ofproto: Probe for masked set action support.
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
29 #include "connmgr.h"
30 #include "coverage.h"
31 #include "cfm.h"
32 #include "dpif.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
36 #include "hmapx.h"
37 #include "lacp.h"
38 #include "learn.h"
39 #include "mac-learning.h"
40 #include "mcast-snooping.h"
41 #include "meta-flow.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netdev.h"
45 #include "netlink.h"
46 #include "nx-match.h"
47 #include "odp-util.h"
48 #include "odp-execute.h"
49 #include "ofp-util.h"
50 #include "ofpbuf.h"
51 #include "ofp-actions.h"
52 #include "ofp-parse.h"
53 #include "ofp-print.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-rid.h"
58 #include "ofproto-dpif-sflow.h"
59 #include "ofproto-dpif-upcall.h"
60 #include "ofproto-dpif-xlate.h"
61 #include "poll-loop.h"
62 #include "seq.h"
63 #include "simap.h"
64 #include "smap.h"
65 #include "timer.h"
66 #include "tunnel.h"
67 #include "unaligned.h"
68 #include "unixctl.h"
69 #include "vlan-bitmap.h"
70 #include "vlog.h"
71
72 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
73
74 COVERAGE_DEFINE(ofproto_dpif_expired);
75 COVERAGE_DEFINE(packet_in_overflow);
76
77 struct flow_miss;
78
79 struct rule_dpif {
80     struct rule up;
81
82     /* These statistics:
83      *
84      *   - Do include packets and bytes from datapath flows which have not
85      *   recently been processed by a revalidator. */
86     struct ovs_mutex stats_mutex;
87     struct dpif_flow_stats stats OVS_GUARDED;
88
89     /* If non-zero then the recirculation id that has
90      * been allocated for use with this rule.
91      * The recirculation id and associated internal flow should
92      * be freed when the rule is freed */
93     uint32_t recirc_id;
94 };
95
96 /* RULE_CAST() depends on this. */
97 BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
98
99 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
100                            long long int *used);
101 static struct rule_dpif *rule_dpif_cast(const struct rule *);
102 static void rule_expire(struct rule_dpif *);
103
104 struct group_dpif {
105     struct ofgroup up;
106
107     /* These statistics:
108      *
109      *   - Do include packets and bytes from datapath flows which have not
110      *   recently been processed by a revalidator. */
111     struct ovs_mutex stats_mutex;
112     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
113     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
114 };
115
116 struct ofbundle {
117     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
118     struct ofproto_dpif *ofproto; /* Owning ofproto. */
119     void *aux;                  /* Key supplied by ofproto's client. */
120     char *name;                 /* Identifier for log messages. */
121
122     /* Configuration. */
123     struct list ports;          /* Contains "struct ofport"s. */
124     enum port_vlan_mode vlan_mode; /* VLAN mode */
125     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
126     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
127                                  * NULL if all VLANs are trunked. */
128     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
129     struct bond *bond;          /* Nonnull iff more than one port. */
130     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
131
132     /* Status. */
133     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
134 };
135
136 static void bundle_remove(struct ofport *);
137 static void bundle_update(struct ofbundle *);
138 static void bundle_destroy(struct ofbundle *);
139 static void bundle_del_port(struct ofport_dpif *);
140 static void bundle_run(struct ofbundle *);
141 static void bundle_wait(struct ofbundle *);
142
143 static void stp_run(struct ofproto_dpif *ofproto);
144 static void stp_wait(struct ofproto_dpif *ofproto);
145 static int set_stp_port(struct ofport *,
146                         const struct ofproto_port_stp_settings *);
147
148 static void rstp_run(struct ofproto_dpif *ofproto);
149 static void set_rstp_port(struct ofport *,
150                          const struct ofproto_port_rstp_settings *);
151
152 struct ofport_dpif {
153     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
154     struct ofport up;
155
156     odp_port_t odp_port;
157     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
158     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
159     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
160     struct bfd *bfd;            /* BFD, if any. */
161     bool may_enable;            /* May be enabled in bonds. */
162     bool is_tunnel;             /* This port is a tunnel. */
163     bool is_layer3;             /* This is a layer 3 port. */
164     long long int carrier_seq;  /* Carrier status changes. */
165     struct ofport_dpif *peer;   /* Peer if patch port. */
166
167     /* Spanning tree. */
168     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
169     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
170     long long int stp_state_entered;
171
172     /* Rapid Spanning Tree. */
173     struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
174     enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
175
176     /* Queue to DSCP mapping. */
177     struct ofproto_port_queue *qdscp;
178     size_t n_qdscp;
179
180     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
181      *
182      * This is deprecated.  It is only for compatibility with broken device
183      * drivers in old versions of Linux that do not properly support VLANs when
184      * VLAN devices are not used.  When broken device drivers are no longer in
185      * widespread use, we will delete these interfaces. */
186     ofp_port_t realdev_ofp_port;
187     int vlandev_vid;
188 };
189
190 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
191  *
192  * This is deprecated.  It is only for compatibility with broken device drivers
193  * in old versions of Linux that do not properly support VLANs when VLAN
194  * devices are not used.  When broken device drivers are no longer in
195  * widespread use, we will delete these interfaces. */
196 struct vlan_splinter {
197     struct hmap_node realdev_vid_node;
198     struct hmap_node vlandev_node;
199     ofp_port_t realdev_ofp_port;
200     ofp_port_t vlandev_ofp_port;
201     int vid;
202 };
203
204 static void vsp_remove(struct ofport_dpif *);
205 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
206
207 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
208                                        ofp_port_t);
209
210 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
211                                        odp_port_t);
212
213 static struct ofport_dpif *
214 ofport_dpif_cast(const struct ofport *ofport)
215 {
216     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
217 }
218
219 static void port_run(struct ofport_dpif *);
220 static int set_bfd(struct ofport *, const struct smap *);
221 static int set_cfm(struct ofport *, const struct cfm_settings *);
222 static void ofport_update_peer(struct ofport_dpif *);
223
224 /* Reasons that we might need to revalidate every datapath flow, and
225  * corresponding coverage counters.
226  *
227  * A value of 0 means that there is no need to revalidate.
228  *
229  * It would be nice to have some cleaner way to integrate with coverage
230  * counters, but with only a few reasons I guess this is good enough for
231  * now. */
232 enum revalidate_reason {
233     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
234     REV_STP,                   /* Spanning tree protocol port status change. */
235     REV_RSTP,                  /* RSTP port status change. */
236     REV_BOND,                  /* Bonding changed. */
237     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
238     REV_FLOW_TABLE,            /* Flow table changed. */
239     REV_MAC_LEARNING,          /* Mac learning changed. */
240     REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
241 };
242 COVERAGE_DEFINE(rev_reconfigure);
243 COVERAGE_DEFINE(rev_stp);
244 COVERAGE_DEFINE(rev_rstp);
245 COVERAGE_DEFINE(rev_bond);
246 COVERAGE_DEFINE(rev_port_toggled);
247 COVERAGE_DEFINE(rev_flow_table);
248 COVERAGE_DEFINE(rev_mac_learning);
249 COVERAGE_DEFINE(rev_mcast_snooping);
250
251 /* All datapaths of a given type share a single dpif backer instance. */
252 struct dpif_backer {
253     char *type;
254     int refcount;
255     struct dpif *dpif;
256     struct udpif *udpif;
257
258     struct ovs_rwlock odp_to_ofport_lock;
259     struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
260
261     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
262
263     enum revalidate_reason need_revalidate; /* Revalidate all flows. */
264
265     bool recv_set_enable; /* Enables or disables receiving packets. */
266
267     /* Recirculation. */
268     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
269     bool enable_recirc;   /* True if the datapath supports recirculation */
270
271     /* True if the datapath supports variable-length
272      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
273      * False if the datapath supports only 8-byte (or shorter) userdata. */
274     bool variable_length_userdata;
275
276     /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET
277      * actions. */
278     bool masked_set_action;
279
280     /* Maximum number of MPLS label stack entries that the datapath supports
281      * in a match */
282     size_t max_mpls_depth;
283 };
284
285 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
286 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
287
288 struct ofproto_dpif {
289     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
290     struct ofproto up;
291     struct dpif_backer *backer;
292
293     uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
294
295     /* Special OpenFlow rules. */
296     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
297     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
298     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
299
300     /* Bridging. */
301     struct netflow *netflow;
302     struct dpif_sflow *sflow;
303     struct dpif_ipfix *ipfix;
304     struct hmap bundles;        /* Contains "struct ofbundle"s. */
305     struct mac_learning *ml;
306     struct mcast_snooping *ms;
307     bool has_bonded_bundles;
308     bool lacp_enabled;
309     struct mbridge *mbridge;
310
311     struct ovs_mutex stats_mutex;
312     struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
313                                             * consumed in userspace. */
314
315     /* Spanning tree. */
316     struct stp *stp;
317     long long int stp_last_tick;
318
319     /* Rapid Spanning Tree. */
320     struct rstp *rstp;
321     long long int rstp_last_tick;
322
323     /* VLAN splinters. */
324     struct ovs_mutex vsp_mutex;
325     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
326     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
327
328     /* Ports. */
329     struct sset ports;             /* Set of standard port names. */
330     struct sset ghost_ports;       /* Ports with no datapath port. */
331     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
332     int port_poll_errno;           /* Last errno for port_poll() reply. */
333     uint64_t change_seq;           /* Connectivity status changes. */
334
335     /* Work queues. */
336     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
337     struct seq *pins_seq;          /* For notifying 'pins' reception. */
338     uint64_t pins_seqno;
339 };
340
341 /* All existing ofproto_dpif instances, indexed by ->up.name. */
342 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
343
344 static void ofproto_dpif_unixctl_init(void);
345
346 static inline struct ofproto_dpif *
347 ofproto_dpif_cast(const struct ofproto *ofproto)
348 {
349     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
350     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
351 }
352
353 size_t
354 ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
355 {
356     return ofproto->backer->max_mpls_depth;
357 }
358
359 bool
360 ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
361 {
362     return ofproto->backer->enable_recirc;
363 }
364
365 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
366                                         ofp_port_t ofp_port);
367 static void ofproto_trace(struct ofproto_dpif *, struct flow *,
368                           const struct ofpbuf *packet,
369                           const struct ofpact[], size_t ofpacts_len,
370                           struct ds *);
371
372 /* Global variables. */
373 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
374
375 /* Initial mappings of port to bridge mappings. */
376 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
377
378 /* Executes 'fm'.  The caller retains ownership of 'fm' and everything in
379  * it. */
380 void
381 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
382                       struct ofputil_flow_mod *fm)
383 {
384     ofproto_flow_mod(&ofproto->up, fm);
385 }
386
387 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
388  * Takes ownership of 'pin' and pin->packet. */
389 void
390 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
391                             struct ofproto_packet_in *pin)
392 {
393     if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
394         COVERAGE_INC(packet_in_overflow);
395         free(CONST_CAST(void *, pin->up.packet));
396         free(pin);
397     }
398
399     /* Wakes up main thread for packet-in I/O. */
400     seq_change(ofproto->pins_seq);
401 }
402
403 /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
404  * packet rather than to send the packet to the controller.
405  *
406  * This function returns false to indicate that a packet_in message
407  * for a "table-miss" should be sent to at least one controller.
408  * False otherwise. */
409 bool
410 ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
411 {
412     return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
413 }
414 \f
415 /* Factory functions. */
416
417 static void
418 init(const struct shash *iface_hints)
419 {
420     struct shash_node *node;
421
422     /* Make a local copy, since we don't own 'iface_hints' elements. */
423     SHASH_FOR_EACH(node, iface_hints) {
424         const struct iface_hint *orig_hint = node->data;
425         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
426
427         new_hint->br_name = xstrdup(orig_hint->br_name);
428         new_hint->br_type = xstrdup(orig_hint->br_type);
429         new_hint->ofp_port = orig_hint->ofp_port;
430
431         shash_add(&init_ofp_ports, node->name, new_hint);
432     }
433 }
434
435 static void
436 enumerate_types(struct sset *types)
437 {
438     dp_enumerate_types(types);
439 }
440
441 static int
442 enumerate_names(const char *type, struct sset *names)
443 {
444     struct ofproto_dpif *ofproto;
445
446     sset_clear(names);
447     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
448         if (strcmp(type, ofproto->up.type)) {
449             continue;
450         }
451         sset_add(names, ofproto->up.name);
452     }
453
454     return 0;
455 }
456
457 static int
458 del(const char *type, const char *name)
459 {
460     struct dpif *dpif;
461     int error;
462
463     error = dpif_open(name, type, &dpif);
464     if (!error) {
465         error = dpif_delete(dpif);
466         dpif_close(dpif);
467     }
468     return error;
469 }
470 \f
471 static const char *
472 port_open_type(const char *datapath_type, const char *port_type)
473 {
474     return dpif_port_open_type(datapath_type, port_type);
475 }
476
477 /* Type functions. */
478
479 static void process_dpif_port_changes(struct dpif_backer *);
480 static void process_dpif_all_ports_changed(struct dpif_backer *);
481 static void process_dpif_port_change(struct dpif_backer *,
482                                      const char *devname);
483 static void process_dpif_port_error(struct dpif_backer *, int error);
484
485 static struct ofproto_dpif *
486 lookup_ofproto_dpif_by_port_name(const char *name)
487 {
488     struct ofproto_dpif *ofproto;
489
490     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
491         if (sset_contains(&ofproto->ports, name)) {
492             return ofproto;
493         }
494     }
495
496     return NULL;
497 }
498
499 static int
500 type_run(const char *type)
501 {
502     struct dpif_backer *backer;
503
504     backer = shash_find_data(&all_dpif_backers, type);
505     if (!backer) {
506         /* This is not necessarily a problem, since backers are only
507          * created on demand. */
508         return 0;
509     }
510
511     dpif_run(backer->dpif);
512     udpif_run(backer->udpif);
513
514     /* If vswitchd started with other_config:flow_restore_wait set as "true",
515      * and the configuration has now changed to "false", enable receiving
516      * packets from the datapath. */
517     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
518         int error;
519
520         backer->recv_set_enable = true;
521
522         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
523         if (error) {
524             VLOG_ERR("Failed to enable receiving packets in dpif.");
525             return error;
526         }
527         dpif_flow_flush(backer->dpif);
528         backer->need_revalidate = REV_RECONFIGURE;
529     }
530
531     if (backer->recv_set_enable) {
532         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
533     }
534
535     if (backer->need_revalidate) {
536         struct ofproto_dpif *ofproto;
537         struct simap_node *node;
538         struct simap tmp_backers;
539
540         /* Handle tunnel garbage collection. */
541         simap_init(&tmp_backers);
542         simap_swap(&backer->tnl_backers, &tmp_backers);
543
544         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
545             struct ofport_dpif *iter;
546
547             if (backer != ofproto->backer) {
548                 continue;
549             }
550
551             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
552                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
553                 const char *dp_port;
554
555                 if (!iter->is_tunnel) {
556                     continue;
557                 }
558
559                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
560                                                      namebuf, sizeof namebuf);
561                 node = simap_find(&tmp_backers, dp_port);
562                 if (node) {
563                     simap_put(&backer->tnl_backers, dp_port, node->data);
564                     simap_delete(&tmp_backers, node);
565                     node = simap_find(&backer->tnl_backers, dp_port);
566                 } else {
567                     node = simap_find(&backer->tnl_backers, dp_port);
568                     if (!node) {
569                         odp_port_t odp_port = ODPP_NONE;
570
571                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
572                                            &odp_port)) {
573                             simap_put(&backer->tnl_backers, dp_port,
574                                       odp_to_u32(odp_port));
575                             node = simap_find(&backer->tnl_backers, dp_port);
576                         }
577                     }
578                 }
579
580                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
581                 if (tnl_port_reconfigure(iter, iter->up.netdev,
582                                          iter->odp_port)) {
583                     backer->need_revalidate = REV_RECONFIGURE;
584                 }
585             }
586         }
587
588         SIMAP_FOR_EACH (node, &tmp_backers) {
589             dpif_port_del(backer->dpif, u32_to_odp(node->data));
590         }
591         simap_destroy(&tmp_backers);
592
593         switch (backer->need_revalidate) {
594         case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
595         case REV_STP:            COVERAGE_INC(rev_stp);            break;
596         case REV_RSTP:           COVERAGE_INC(rev_rstp);           break;
597         case REV_BOND:           COVERAGE_INC(rev_bond);           break;
598         case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
599         case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
600         case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
601         case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
602         }
603         backer->need_revalidate = 0;
604
605         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
606             struct ofport_dpif *ofport;
607             struct ofbundle *bundle;
608
609             if (ofproto->backer != backer) {
610                 continue;
611             }
612
613             xlate_txn_start();
614             xlate_ofproto_set(ofproto, ofproto->up.name,
615                               ofproto->backer->dpif, ofproto->miss_rule,
616                               ofproto->no_packet_in_rule, ofproto->ml,
617                               ofproto->stp, ofproto->rstp, ofproto->ms,
618                               ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
619                               ofproto->netflow, ofproto->up.frag_handling,
620                               ofproto->up.forward_bpdu,
621                               connmgr_has_in_band(ofproto->up.connmgr),
622                               ofproto->backer->enable_recirc,
623                               ofproto->backer->variable_length_userdata,
624                               ofproto->backer->max_mpls_depth,
625                               ofproto->backer->masked_set_action);
626
627             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
628                 xlate_bundle_set(ofproto, bundle, bundle->name,
629                                  bundle->vlan_mode, bundle->vlan,
630                                  bundle->trunks, bundle->use_priority_tags,
631                                  bundle->bond, bundle->lacp,
632                                  bundle->floodable);
633             }
634
635             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
636                 int stp_port = ofport->stp_port
637                     ? stp_port_no(ofport->stp_port)
638                     : -1;
639                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
640                                  ofport->up.ofp_port, ofport->odp_port,
641                                  ofport->up.netdev, ofport->cfm,
642                                  ofport->bfd, ofport->peer, stp_port,
643                                  ofport->rstp_port, ofport->qdscp,
644                                  ofport->n_qdscp, ofport->up.pp.config,
645                                  ofport->up.pp.state, ofport->is_tunnel,
646                                  ofport->may_enable);
647             }
648             xlate_txn_commit();
649         }
650
651         udpif_revalidate(backer->udpif);
652     }
653
654     process_dpif_port_changes(backer);
655
656     return 0;
657 }
658
659 /* Check for and handle port changes in 'backer''s dpif. */
660 static void
661 process_dpif_port_changes(struct dpif_backer *backer)
662 {
663     for (;;) {
664         char *devname;
665         int error;
666
667         error = dpif_port_poll(backer->dpif, &devname);
668         switch (error) {
669         case EAGAIN:
670             return;
671
672         case ENOBUFS:
673             process_dpif_all_ports_changed(backer);
674             break;
675
676         case 0:
677             process_dpif_port_change(backer, devname);
678             free(devname);
679             break;
680
681         default:
682             process_dpif_port_error(backer, error);
683             break;
684         }
685     }
686 }
687
688 static void
689 process_dpif_all_ports_changed(struct dpif_backer *backer)
690 {
691     struct ofproto_dpif *ofproto;
692     struct dpif_port dpif_port;
693     struct dpif_port_dump dump;
694     struct sset devnames;
695     const char *devname;
696
697     sset_init(&devnames);
698     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
699         if (ofproto->backer == backer) {
700             struct ofport *ofport;
701
702             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
703                 sset_add(&devnames, netdev_get_name(ofport->netdev));
704             }
705         }
706     }
707     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
708         sset_add(&devnames, dpif_port.name);
709     }
710
711     SSET_FOR_EACH (devname, &devnames) {
712         process_dpif_port_change(backer, devname);
713     }
714     sset_destroy(&devnames);
715 }
716
717 static void
718 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
719 {
720     struct ofproto_dpif *ofproto;
721     struct dpif_port port;
722
723     /* Don't report on the datapath's device. */
724     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
725         return;
726     }
727
728     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
729                    &all_ofproto_dpifs) {
730         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
731             return;
732         }
733     }
734
735     ofproto = lookup_ofproto_dpif_by_port_name(devname);
736     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
737         /* The port was removed.  If we know the datapath,
738          * report it through poll_set().  If we don't, it may be
739          * notifying us of a removal we initiated, so ignore it.
740          * If there's a pending ENOBUFS, let it stand, since
741          * everything will be reevaluated. */
742         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
743             sset_add(&ofproto->port_poll_set, devname);
744             ofproto->port_poll_errno = 0;
745         }
746     } else if (!ofproto) {
747         /* The port was added, but we don't know with which
748          * ofproto we should associate it.  Delete it. */
749         dpif_port_del(backer->dpif, port.port_no);
750     } else {
751         struct ofport_dpif *ofport;
752
753         ofport = ofport_dpif_cast(shash_find_data(
754                                       &ofproto->up.port_by_name, devname));
755         if (ofport
756             && ofport->odp_port != port.port_no
757             && !odp_port_to_ofport(backer, port.port_no))
758         {
759             /* 'ofport''s datapath port number has changed from
760              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
761              * structures to match. */
762             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
763             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
764             ofport->odp_port = port.port_no;
765             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
766                         hash_odp_port(port.port_no));
767             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
768             backer->need_revalidate = REV_RECONFIGURE;
769         }
770     }
771     dpif_port_destroy(&port);
772 }
773
774 /* Propagate 'error' to all ofprotos based on 'backer'. */
775 static void
776 process_dpif_port_error(struct dpif_backer *backer, int error)
777 {
778     struct ofproto_dpif *ofproto;
779
780     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
781         if (ofproto->backer == backer) {
782             sset_clear(&ofproto->port_poll_set);
783             ofproto->port_poll_errno = error;
784         }
785     }
786 }
787
788 static void
789 type_wait(const char *type)
790 {
791     struct dpif_backer *backer;
792
793     backer = shash_find_data(&all_dpif_backers, type);
794     if (!backer) {
795         /* This is not necessarily a problem, since backers are only
796          * created on demand. */
797         return;
798     }
799
800     dpif_wait(backer->dpif);
801 }
802 \f
803 /* Basic life-cycle. */
804
805 static int add_internal_flows(struct ofproto_dpif *);
806
807 static struct ofproto *
808 alloc(void)
809 {
810     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
811     return &ofproto->up;
812 }
813
814 static void
815 dealloc(struct ofproto *ofproto_)
816 {
817     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
818     free(ofproto);
819 }
820
821 static void
822 close_dpif_backer(struct dpif_backer *backer)
823 {
824     ovs_assert(backer->refcount > 0);
825
826     if (--backer->refcount) {
827         return;
828     }
829
830     udpif_destroy(backer->udpif);
831
832     simap_destroy(&backer->tnl_backers);
833     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
834     hmap_destroy(&backer->odp_to_ofport_map);
835     shash_find_and_delete(&all_dpif_backers, backer->type);
836     recirc_id_pool_destroy(backer->rid_pool);
837     free(backer->type);
838     dpif_close(backer->dpif);
839     free(backer);
840 }
841
842 /* Datapath port slated for removal from datapath. */
843 struct odp_garbage {
844     struct list list_node;
845     odp_port_t odp_port;
846 };
847
848 static bool check_variable_length_userdata(struct dpif_backer *backer);
849 static size_t check_max_mpls_depth(struct dpif_backer *backer);
850 static bool check_recirc(struct dpif_backer *backer);
851 static bool check_masked_set_action(struct dpif_backer *backer);
852
853 static int
854 open_dpif_backer(const char *type, struct dpif_backer **backerp)
855 {
856     struct dpif_backer *backer;
857     struct dpif_port_dump port_dump;
858     struct dpif_port port;
859     struct shash_node *node;
860     struct list garbage_list;
861     struct odp_garbage *garbage, *next;
862
863     struct sset names;
864     char *backer_name;
865     const char *name;
866     int error;
867
868     backer = shash_find_data(&all_dpif_backers, type);
869     if (backer) {
870         backer->refcount++;
871         *backerp = backer;
872         return 0;
873     }
874
875     backer_name = xasprintf("ovs-%s", type);
876
877     /* Remove any existing datapaths, since we assume we're the only
878      * userspace controlling the datapath. */
879     sset_init(&names);
880     dp_enumerate_names(type, &names);
881     SSET_FOR_EACH(name, &names) {
882         struct dpif *old_dpif;
883
884         /* Don't remove our backer if it exists. */
885         if (!strcmp(name, backer_name)) {
886             continue;
887         }
888
889         if (dpif_open(name, type, &old_dpif)) {
890             VLOG_WARN("couldn't open old datapath %s to remove it", name);
891         } else {
892             dpif_delete(old_dpif);
893             dpif_close(old_dpif);
894         }
895     }
896     sset_destroy(&names);
897
898     backer = xmalloc(sizeof *backer);
899
900     error = dpif_create_and_open(backer_name, type, &backer->dpif);
901     free(backer_name);
902     if (error) {
903         VLOG_ERR("failed to open datapath of type %s: %s", type,
904                  ovs_strerror(error));
905         free(backer);
906         return error;
907     }
908     backer->udpif = udpif_create(backer, backer->dpif);
909
910     backer->type = xstrdup(type);
911     backer->refcount = 1;
912     hmap_init(&backer->odp_to_ofport_map);
913     ovs_rwlock_init(&backer->odp_to_ofport_lock);
914     backer->need_revalidate = 0;
915     simap_init(&backer->tnl_backers);
916     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
917     *backerp = backer;
918
919     if (backer->recv_set_enable) {
920         dpif_flow_flush(backer->dpif);
921     }
922
923     /* Loop through the ports already on the datapath and remove any
924      * that we don't need anymore. */
925     list_init(&garbage_list);
926     dpif_port_dump_start(&port_dump, backer->dpif);
927     while (dpif_port_dump_next(&port_dump, &port)) {
928         node = shash_find(&init_ofp_ports, port.name);
929         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
930             garbage = xmalloc(sizeof *garbage);
931             garbage->odp_port = port.port_no;
932             list_push_front(&garbage_list, &garbage->list_node);
933         }
934     }
935     dpif_port_dump_done(&port_dump);
936
937     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
938         dpif_port_del(backer->dpif, garbage->odp_port);
939         list_remove(&garbage->list_node);
940         free(garbage);
941     }
942
943     shash_add(&all_dpif_backers, type, backer);
944
945     backer->enable_recirc = check_recirc(backer);
946     backer->variable_length_userdata = check_variable_length_userdata(backer);
947     backer->max_mpls_depth = check_max_mpls_depth(backer);
948     backer->masked_set_action = check_masked_set_action(backer);
949     backer->rid_pool = recirc_id_pool_create();
950
951     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
952     if (error) {
953         VLOG_ERR("failed to listen on datapath of type %s: %s",
954                  type, ovs_strerror(error));
955         close_dpif_backer(backer);
956         return error;
957     }
958
959     if (backer->recv_set_enable) {
960         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
961     }
962
963     return error;
964 }
965
966 /* Tests whether 'backer''s datapath supports recirculation.  Only newer
967  * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
968  * features on older datapaths that don't support this feature.
969  *
970  * Returns false if 'backer' definitely does not support recirculation, true if
971  * it seems to support recirculation or if at least the error we get is
972  * ambiguous. */
973 static bool
974 check_recirc(struct dpif_backer *backer)
975 {
976     struct flow flow;
977     struct odputil_keybuf keybuf;
978     struct ofpbuf key;
979     int error;
980     bool enable_recirc = false;
981
982     memset(&flow, 0, sizeof flow);
983     flow.recirc_id = 1;
984     flow.dp_hash = 1;
985
986     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
987     odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
988
989     error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE,
990                           ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
991                           0, NULL);
992     if (error && error != EEXIST) {
993         if (error != EINVAL) {
994             VLOG_WARN("%s: Reciculation flow probe failed (%s)",
995                       dpif_name(backer->dpif), ovs_strerror(error));
996         }
997         goto done;
998     }
999
1000     error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
1001                           NULL);
1002     if (error) {
1003         VLOG_WARN("%s: failed to delete recirculation feature probe flow",
1004                   dpif_name(backer->dpif));
1005     }
1006
1007     enable_recirc = true;
1008
1009 done:
1010     if (enable_recirc) {
1011         VLOG_INFO("%s: Datapath supports recirculation",
1012                   dpif_name(backer->dpif));
1013     } else {
1014         VLOG_INFO("%s: Datapath does not support recirculation",
1015                   dpif_name(backer->dpif));
1016     }
1017
1018     return enable_recirc;
1019 }
1020
1021 /* Tests whether 'backer''s datapath supports variable-length
1022  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
1023  * to disable some features on older datapaths that don't support this
1024  * feature.
1025  *
1026  * Returns false if 'backer' definitely does not support variable-length
1027  * userdata, true if it seems to support them or if at least the error we get
1028  * is ambiguous. */
1029 static bool
1030 check_variable_length_userdata(struct dpif_backer *backer)
1031 {
1032     struct eth_header *eth;
1033     struct ofpbuf actions;
1034     struct dpif_execute execute;
1035     struct ofpbuf packet;
1036     size_t start;
1037     int error;
1038
1039     /* Compose a userspace action that will cause an ERANGE error on older
1040      * datapaths that don't support variable-length userdata.
1041      *
1042      * We really test for using userdata longer than 8 bytes, but older
1043      * datapaths accepted these, silently truncating the userdata to 8 bytes.
1044      * The same older datapaths rejected userdata shorter than 8 bytes, so we
1045      * test for that instead as a proxy for longer userdata support. */
1046     ofpbuf_init(&actions, 64);
1047     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
1048     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
1049                    dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
1050     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
1051     nl_msg_end_nested(&actions, start);
1052
1053     /* Compose a dummy ethernet packet. */
1054     ofpbuf_init(&packet, ETH_HEADER_LEN);
1055     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1056     eth->eth_type = htons(0x1234);
1057
1058     /* Execute the actions.  On older datapaths this fails with ERANGE, on
1059      * newer datapaths it succeeds. */
1060     execute.actions = ofpbuf_data(&actions);
1061     execute.actions_len = ofpbuf_size(&actions);
1062     execute.packet = &packet;
1063     execute.md = PKT_METADATA_INITIALIZER(0);
1064     execute.needs_help = false;
1065
1066     error = dpif_execute(backer->dpif, &execute);
1067
1068     ofpbuf_uninit(&packet);
1069     ofpbuf_uninit(&actions);
1070
1071     switch (error) {
1072     case 0:
1073         return true;
1074
1075     case ERANGE:
1076         /* Variable-length userdata is not supported. */
1077         VLOG_WARN("%s: datapath does not support variable-length userdata "
1078                   "feature (needs Linux 3.10+ or kernel module from OVS "
1079                   "1..11+).  The NXAST_SAMPLE action will be ignored.",
1080                   dpif_name(backer->dpif));
1081         return false;
1082
1083     default:
1084         /* Something odd happened.  We're not sure whether variable-length
1085          * userdata is supported.  Default to "yes". */
1086         VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
1087                   dpif_name(backer->dpif), ovs_strerror(error));
1088         return true;
1089     }
1090 }
1091
1092 /* Tests the MPLS label stack depth supported by 'backer''s datapath.
1093  *
1094  * Returns the number of elements in a struct flow's mpls_lse field
1095  * if the datapath supports at least that many entries in an
1096  * MPLS label stack.
1097  * Otherwise returns the number of MPLS push actions supported by
1098  * the datapath. */
1099 static size_t
1100 check_max_mpls_depth(struct dpif_backer *backer)
1101 {
1102     struct flow flow;
1103     int n;
1104
1105     for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
1106         struct odputil_keybuf keybuf;
1107         struct ofpbuf key;
1108         int error;
1109
1110         memset(&flow, 0, sizeof flow);
1111         flow.dl_type = htons(ETH_TYPE_MPLS);
1112         flow_set_mpls_bos(&flow, n, 1);
1113
1114         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1115         odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
1116
1117         error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE,
1118                               ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL, 0, NULL);
1119         if (error && error != EEXIST) {
1120             if (error != EINVAL) {
1121                 VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
1122                           dpif_name(backer->dpif), ovs_strerror(error));
1123             }
1124             break;
1125         }
1126
1127         error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key), NULL);
1128         if (error) {
1129             VLOG_WARN("%s: failed to delete MPLS feature probe flow",
1130                       dpif_name(backer->dpif));
1131         }
1132     }
1133
1134     VLOG_INFO("%s: MPLS label stack length probed as %d",
1135               dpif_name(backer->dpif), n);
1136     return n;
1137 }
1138
1139 /* Tests whether 'backer''s datapath supports masked data in
1140  * OVS_ACTION_ATTR_SET actions.  We need to disable some features on older
1141  * datapaths that don't support this feature. */
1142 static bool
1143 check_masked_set_action(struct dpif_backer *backer)
1144 {
1145     struct eth_header *eth;
1146     struct ofpbuf actions;
1147     struct dpif_execute execute;
1148     struct ofpbuf packet;
1149     int error;
1150     struct ovs_key_ethernet key, mask;
1151
1152     /* Compose a set action that will cause an EINVAL error on older
1153      * datapaths that don't support masked set actions.
1154      * Avoid using a full mask, as it could be translated to a non-masked
1155      * set action instead. */
1156     ofpbuf_init(&actions, 64);
1157     memset(&key, 0x53, sizeof key);
1158     memset(&mask, 0x7f, sizeof mask);
1159     commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
1160                              sizeof key);
1161
1162     /* Compose a dummy ethernet packet. */
1163     ofpbuf_init(&packet, ETH_HEADER_LEN);
1164     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1165     eth->eth_type = htons(0x1234);
1166
1167     /* Execute the actions.  On older datapaths this fails with EINVAL, on
1168      * newer datapaths it succeeds. */
1169     execute.actions = ofpbuf_data(&actions);
1170     execute.actions_len = ofpbuf_size(&actions);
1171     execute.packet = &packet;
1172     execute.md = PKT_METADATA_INITIALIZER(0);
1173     execute.needs_help = false;
1174
1175     error = dpif_execute(backer->dpif, &execute);
1176
1177     ofpbuf_uninit(&packet);
1178     ofpbuf_uninit(&actions);
1179
1180     if (error) {
1181         /* Masked set action is not supported. */
1182         VLOG_INFO("%s: datapath does not support masked set action feature.",
1183                   dpif_name(backer->dpif));
1184     }
1185     return !error;
1186 }
1187
1188 static int
1189 construct(struct ofproto *ofproto_)
1190 {
1191     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1192     struct shash_node *node, *next;
1193     int error;
1194
1195     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1196     if (error) {
1197         return error;
1198     }
1199
1200     ofproto->netflow = NULL;
1201     ofproto->sflow = NULL;
1202     ofproto->ipfix = NULL;
1203     ofproto->stp = NULL;
1204     ofproto->rstp = NULL;
1205     ofproto->dump_seq = 0;
1206     hmap_init(&ofproto->bundles);
1207     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1208     ofproto->ms = NULL;
1209     ofproto->mbridge = mbridge_create();
1210     ofproto->has_bonded_bundles = false;
1211     ofproto->lacp_enabled = false;
1212     ovs_mutex_init_adaptive(&ofproto->stats_mutex);
1213     ovs_mutex_init(&ofproto->vsp_mutex);
1214
1215     guarded_list_init(&ofproto->pins);
1216
1217     ofproto_dpif_unixctl_init();
1218
1219     hmap_init(&ofproto->vlandev_map);
1220     hmap_init(&ofproto->realdev_vid_map);
1221
1222     sset_init(&ofproto->ports);
1223     sset_init(&ofproto->ghost_ports);
1224     sset_init(&ofproto->port_poll_set);
1225     ofproto->port_poll_errno = 0;
1226     ofproto->change_seq = 0;
1227     ofproto->pins_seq = seq_create();
1228     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1229
1230
1231     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1232         struct iface_hint *iface_hint = node->data;
1233
1234         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1235             /* Check if the datapath already has this port. */
1236             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1237                 sset_add(&ofproto->ports, node->name);
1238             }
1239
1240             free(iface_hint->br_name);
1241             free(iface_hint->br_type);
1242             free(iface_hint);
1243             shash_delete(&init_ofp_ports, node);
1244         }
1245     }
1246
1247     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1248                 hash_string(ofproto->up.name, 0));
1249     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1250
1251     ofproto_init_tables(ofproto_, N_TABLES);
1252     error = add_internal_flows(ofproto);
1253
1254     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1255
1256     return error;
1257 }
1258
1259 static int
1260 add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
1261                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1262 {
1263     struct match match;
1264     int error;
1265     struct rule *rule;
1266
1267     match_init_catchall(&match);
1268     match_set_reg(&match, 0, id);
1269
1270     error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
1271                                            &rule);
1272     *rulep = error ? NULL : rule_dpif_cast(rule);
1273
1274     return error;
1275 }
1276
1277 static int
1278 add_internal_flows(struct ofproto_dpif *ofproto)
1279 {
1280     struct ofpact_controller *controller;
1281     uint64_t ofpacts_stub[128 / 8];
1282     struct ofpbuf ofpacts;
1283     struct rule *unused_rulep OVS_UNUSED;
1284     struct ofpact_resubmit *resubmit;
1285     struct match match;
1286     int error;
1287     int id;
1288
1289     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1290     id = 1;
1291
1292     controller = ofpact_put_CONTROLLER(&ofpacts);
1293     controller->max_len = UINT16_MAX;
1294     controller->controller_id = 0;
1295     controller->reason = OFPR_NO_MATCH;
1296     ofpact_pad(&ofpacts);
1297
1298     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1299                                    &ofproto->miss_rule);
1300     if (error) {
1301         return error;
1302     }
1303
1304     ofpbuf_clear(&ofpacts);
1305     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1306                                    &ofproto->no_packet_in_rule);
1307     if (error) {
1308         return error;
1309     }
1310
1311     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1312                                    &ofproto->drop_frags_rule);
1313     if (error) {
1314         return error;
1315     }
1316
1317     /* Continue non-recirculation rule lookups from table 0.
1318      *
1319      * (priority=2), recirc=0, actions=resubmit(, 0)
1320      */
1321     resubmit = ofpact_put_RESUBMIT(&ofpacts);
1322     resubmit->in_port = OFPP_IN_PORT;
1323     resubmit->table_id = 0;
1324
1325     match_init_catchall(&match);
1326     match_set_recirc_id(&match, 0);
1327
1328     error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
1329                                            &unused_rulep);
1330     if (error) {
1331         return error;
1332     }
1333
1334     /* Drop any run away recirc rule lookups. Recirc_id has to be
1335      * non-zero when reaching this rule.
1336      *
1337      * (priority=1), *, actions=drop
1338      */
1339     ofpbuf_clear(&ofpacts);
1340     match_init_catchall(&match);
1341     error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
1342                                            &unused_rulep);
1343
1344     return error;
1345 }
1346
1347 static void
1348 destruct(struct ofproto *ofproto_)
1349 {
1350     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1351     struct ofproto_packet_in *pin, *next_pin;
1352     struct rule_dpif *rule;
1353     struct oftable *table;
1354     struct list pins;
1355
1356     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1357     xlate_txn_start();
1358     xlate_remove_ofproto(ofproto);
1359     xlate_txn_commit();
1360
1361     /* Ensure that the upcall processing threads have no remaining references
1362      * to the ofproto or anything in it. */
1363     udpif_synchronize(ofproto->backer->udpif);
1364
1365     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1366
1367     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1368         CLS_FOR_EACH_SAFE (rule, up.cr, &table->cls) {
1369             ofproto_rule_delete(&ofproto->up, &rule->up);
1370         }
1371     }
1372
1373     guarded_list_pop_all(&ofproto->pins, &pins);
1374     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1375         list_remove(&pin->list_node);
1376         free(CONST_CAST(void *, pin->up.packet));
1377         free(pin);
1378     }
1379     guarded_list_destroy(&ofproto->pins);
1380
1381     mbridge_unref(ofproto->mbridge);
1382
1383     netflow_unref(ofproto->netflow);
1384     dpif_sflow_unref(ofproto->sflow);
1385     dpif_ipfix_unref(ofproto->ipfix);
1386     hmap_destroy(&ofproto->bundles);
1387     mac_learning_unref(ofproto->ml);
1388     mcast_snooping_unref(ofproto->ms);
1389
1390     hmap_destroy(&ofproto->vlandev_map);
1391     hmap_destroy(&ofproto->realdev_vid_map);
1392
1393     sset_destroy(&ofproto->ports);
1394     sset_destroy(&ofproto->ghost_ports);
1395     sset_destroy(&ofproto->port_poll_set);
1396
1397     ovs_mutex_destroy(&ofproto->stats_mutex);
1398     ovs_mutex_destroy(&ofproto->vsp_mutex);
1399
1400     seq_destroy(ofproto->pins_seq);
1401
1402     close_dpif_backer(ofproto->backer);
1403 }
1404
1405 static int
1406 run(struct ofproto *ofproto_)
1407 {
1408     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1409     uint64_t new_seq, new_dump_seq;
1410
1411     if (mbridge_need_revalidate(ofproto->mbridge)) {
1412         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1413         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1414         mac_learning_flush(ofproto->ml);
1415         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1416         mcast_snooping_mdb_flush(ofproto->ms);
1417     }
1418
1419     /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
1420      * flow restore.  Even though nothing is processed during flow restore,
1421      * all queued 'pins' will be handled immediately when flow restore
1422      * completes. */
1423     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1424
1425     /* Do not perform any periodic activity required by 'ofproto' while
1426      * waiting for flow restore to complete. */
1427     if (!ofproto_get_flow_restore_wait()) {
1428         struct ofproto_packet_in *pin, *next_pin;
1429         struct list pins;
1430
1431         guarded_list_pop_all(&ofproto->pins, &pins);
1432         LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1433             connmgr_send_packet_in(ofproto->up.connmgr, pin);
1434             list_remove(&pin->list_node);
1435             free(CONST_CAST(void *, pin->up.packet));
1436             free(pin);
1437         }
1438     }
1439
1440     if (ofproto->netflow) {
1441         netflow_run(ofproto->netflow);
1442     }
1443     if (ofproto->sflow) {
1444         dpif_sflow_run(ofproto->sflow);
1445     }
1446     if (ofproto->ipfix) {
1447         dpif_ipfix_run(ofproto->ipfix);
1448     }
1449
1450     new_seq = seq_read(connectivity_seq_get());
1451     if (ofproto->change_seq != new_seq) {
1452         struct ofport_dpif *ofport;
1453
1454         HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1455             port_run(ofport);
1456         }
1457
1458         ofproto->change_seq = new_seq;
1459     }
1460     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1461         struct ofbundle *bundle;
1462
1463         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1464             bundle_run(bundle);
1465         }
1466     }
1467
1468     stp_run(ofproto);
1469     rstp_run(ofproto);
1470     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1471     if (mac_learning_run(ofproto->ml)) {
1472         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1473     }
1474     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1475
1476     if (mcast_snooping_run(ofproto->ms)) {
1477         ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
1478     }
1479
1480     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
1481     if (ofproto->dump_seq != new_dump_seq) {
1482         struct rule *rule, *next_rule;
1483
1484         /* We know stats are relatively fresh, so now is a good time to do some
1485          * periodic work. */
1486         ofproto->dump_seq = new_dump_seq;
1487
1488         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
1489          * has passed. */
1490         ovs_mutex_lock(&ofproto_mutex);
1491         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
1492                             &ofproto->up.expirable) {
1493             rule_expire(rule_dpif_cast(rule));
1494         }
1495         ovs_mutex_unlock(&ofproto_mutex);
1496
1497         /* All outstanding data in existing flows has been accounted, so it's a
1498          * good time to do bond rebalancing. */
1499         if (ofproto->has_bonded_bundles) {
1500             struct ofbundle *bundle;
1501
1502             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1503                 if (bundle->bond) {
1504                     bond_rebalance(bundle->bond);
1505                 }
1506             }
1507         }
1508     }
1509
1510     return 0;
1511 }
1512
1513 static void
1514 wait(struct ofproto *ofproto_)
1515 {
1516     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1517
1518     if (ofproto_get_flow_restore_wait()) {
1519         return;
1520     }
1521
1522     if (ofproto->sflow) {
1523         dpif_sflow_wait(ofproto->sflow);
1524     }
1525     if (ofproto->ipfix) {
1526         dpif_ipfix_wait(ofproto->ipfix);
1527     }
1528     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1529         struct ofbundle *bundle;
1530
1531         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1532             bundle_wait(bundle);
1533         }
1534     }
1535     if (ofproto->netflow) {
1536         netflow_wait(ofproto->netflow);
1537     }
1538     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1539     mac_learning_wait(ofproto->ml);
1540     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1541     mcast_snooping_wait(ofproto->ms);
1542     stp_wait(ofproto);
1543     if (ofproto->backer->need_revalidate) {
1544         /* Shouldn't happen, but if it does just go around again. */
1545         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1546         poll_immediate_wake();
1547     }
1548
1549     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
1550     seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
1551 }
1552
1553 static void
1554 type_get_memory_usage(const char *type, struct simap *usage)
1555 {
1556     struct dpif_backer *backer;
1557
1558     backer = shash_find_data(&all_dpif_backers, type);
1559     if (backer) {
1560         udpif_get_memory_usage(backer->udpif, usage);
1561     }
1562 }
1563
1564 static void
1565 flush(struct ofproto *ofproto_)
1566 {
1567     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1568     struct dpif_backer *backer = ofproto->backer;
1569
1570     if (backer) {
1571         udpif_flush(backer->udpif);
1572     }
1573 }
1574
1575 static void
1576 query_tables(struct ofproto *ofproto,
1577              struct ofputil_table_features *features,
1578              struct ofputil_table_stats *stats)
1579 {
1580     strcpy(features->name, "classifier");
1581
1582     if (stats) {
1583         int i;
1584
1585         for (i = 0; i < ofproto->n_tables; i++) {
1586             unsigned long missed, matched;
1587
1588             atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
1589             atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
1590
1591             stats[i].matched_count = matched;
1592             stats[i].lookup_count = matched + missed;
1593         }
1594     }
1595 }
1596
1597 static struct ofport *
1598 port_alloc(void)
1599 {
1600     struct ofport_dpif *port = xmalloc(sizeof *port);
1601     return &port->up;
1602 }
1603
1604 static void
1605 port_dealloc(struct ofport *port_)
1606 {
1607     struct ofport_dpif *port = ofport_dpif_cast(port_);
1608     free(port);
1609 }
1610
1611 static int
1612 port_construct(struct ofport *port_)
1613 {
1614     struct ofport_dpif *port = ofport_dpif_cast(port_);
1615     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1616     const struct netdev *netdev = port->up.netdev;
1617     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1618     struct dpif_port dpif_port;
1619     int error;
1620
1621     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1622     port->bundle = NULL;
1623     port->cfm = NULL;
1624     port->bfd = NULL;
1625     port->may_enable = false;
1626     port->stp_port = NULL;
1627     port->stp_state = STP_DISABLED;
1628     port->rstp_port = NULL;
1629     port->rstp_state = RSTP_DISABLED;
1630     port->is_tunnel = false;
1631     port->peer = NULL;
1632     port->qdscp = NULL;
1633     port->n_qdscp = 0;
1634     port->realdev_ofp_port = 0;
1635     port->vlandev_vid = 0;
1636     port->carrier_seq = netdev_get_carrier_resets(netdev);
1637     port->is_layer3 = netdev_vport_is_layer3(netdev);
1638
1639     if (netdev_vport_is_patch(netdev)) {
1640         /* By bailing out here, we don't submit the port to the sFlow module
1641          * to be considered for counter polling export.  This is correct
1642          * because the patch port represents an interface that sFlow considers
1643          * to be "internal" to the switch as a whole, and therefore not an
1644          * candidate for counter polling. */
1645         port->odp_port = ODPP_NONE;
1646         ofport_update_peer(port);
1647         return 0;
1648     }
1649
1650     error = dpif_port_query_by_name(ofproto->backer->dpif,
1651                                     netdev_vport_get_dpif_port(netdev, namebuf,
1652                                                                sizeof namebuf),
1653                                     &dpif_port);
1654     if (error) {
1655         return error;
1656     }
1657
1658     port->odp_port = dpif_port.port_no;
1659
1660     if (netdev_get_tunnel_config(netdev)) {
1661         tnl_port_add(port, port->up.netdev, port->odp_port);
1662         port->is_tunnel = true;
1663         if (ofproto->ipfix) {
1664            dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
1665         }
1666     } else {
1667         /* Sanity-check that a mapping doesn't already exist.  This
1668          * shouldn't happen for non-tunnel ports. */
1669         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1670             VLOG_ERR("port %s already has an OpenFlow port number",
1671                      dpif_port.name);
1672             dpif_port_destroy(&dpif_port);
1673             return EBUSY;
1674         }
1675
1676         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1677         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1678                     hash_odp_port(port->odp_port));
1679         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1680     }
1681     dpif_port_destroy(&dpif_port);
1682
1683     if (ofproto->sflow) {
1684         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1685     }
1686
1687     return 0;
1688 }
1689
1690 static void
1691 port_destruct(struct ofport *port_)
1692 {
1693     struct ofport_dpif *port = ofport_dpif_cast(port_);
1694     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1695     const char *devname = netdev_get_name(port->up.netdev);
1696     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1697     const char *dp_port_name;
1698
1699     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1700     xlate_txn_start();
1701     xlate_ofport_remove(port);
1702     xlate_txn_commit();
1703
1704     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1705                                               sizeof namebuf);
1706     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1707         /* The underlying device is still there, so delete it.  This
1708          * happens when the ofproto is being destroyed, since the caller
1709          * assumes that removal of attached ports will happen as part of
1710          * destruction. */
1711         if (!port->is_tunnel) {
1712             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1713         }
1714     }
1715
1716     if (port->peer) {
1717         port->peer->peer = NULL;
1718         port->peer = NULL;
1719     }
1720
1721     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1722         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1723         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1724         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1725     }
1726
1727     if (port->is_tunnel && ofproto->ipfix) {
1728        dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
1729     }
1730
1731     tnl_port_del(port);
1732     sset_find_and_delete(&ofproto->ports, devname);
1733     sset_find_and_delete(&ofproto->ghost_ports, devname);
1734     bundle_remove(port_);
1735     set_cfm(port_, NULL);
1736     set_bfd(port_, NULL);
1737     if (port->stp_port) {
1738         stp_port_disable(port->stp_port);
1739     }
1740     set_rstp_port(port_, NULL);
1741     if (ofproto->sflow) {
1742         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1743     }
1744
1745     free(port->qdscp);
1746 }
1747
1748 static void
1749 port_modified(struct ofport *port_)
1750 {
1751     struct ofport_dpif *port = ofport_dpif_cast(port_);
1752
1753     if (port->bundle && port->bundle->bond) {
1754         bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
1755     }
1756
1757     if (port->cfm) {
1758         cfm_set_netdev(port->cfm, port->up.netdev);
1759     }
1760
1761     if (port->bfd) {
1762         bfd_set_netdev(port->bfd, port->up.netdev);
1763     }
1764
1765     ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
1766                                      port->up.pp.hw_addr);
1767
1768     if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
1769                                                 port->odp_port)) {
1770         ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
1771             REV_RECONFIGURE;
1772     }
1773
1774     ofport_update_peer(port);
1775 }
1776
1777 static void
1778 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1779 {
1780     struct ofport_dpif *port = ofport_dpif_cast(port_);
1781     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1782     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1783
1784     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1785                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1786                    OFPUTIL_PC_NO_PACKET_IN)) {
1787         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1788
1789         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1790             bundle_update(port->bundle);
1791         }
1792     }
1793 }
1794
1795 static int
1796 set_sflow(struct ofproto *ofproto_,
1797           const struct ofproto_sflow_options *sflow_options)
1798 {
1799     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1800     struct dpif_sflow *ds = ofproto->sflow;
1801
1802     if (sflow_options) {
1803         if (!ds) {
1804             struct ofport_dpif *ofport;
1805
1806             ds = ofproto->sflow = dpif_sflow_create();
1807             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1808                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1809             }
1810             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1811         }
1812         dpif_sflow_set_options(ds, sflow_options);
1813     } else {
1814         if (ds) {
1815             dpif_sflow_unref(ds);
1816             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1817             ofproto->sflow = NULL;
1818         }
1819     }
1820     return 0;
1821 }
1822
1823 static int
1824 set_ipfix(
1825     struct ofproto *ofproto_,
1826     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1827     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1828     size_t n_flow_exporters_options)
1829 {
1830     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1831     struct dpif_ipfix *di = ofproto->ipfix;
1832     bool has_options = bridge_exporter_options || flow_exporters_options;
1833     bool new_di = false;
1834
1835     if (has_options && !di) {
1836         di = ofproto->ipfix = dpif_ipfix_create();
1837         new_di = true;
1838     }
1839
1840     if (di) {
1841         /* Call set_options in any case to cleanly flush the flow
1842          * caches in the last exporters that are to be destroyed. */
1843         dpif_ipfix_set_options(
1844             di, bridge_exporter_options, flow_exporters_options,
1845             n_flow_exporters_options);
1846
1847         /* Add tunnel ports only when a new ipfix created */
1848         if (new_di == true) {
1849             struct ofport_dpif *ofport;
1850             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1851                 if (ofport->is_tunnel == true) {
1852                     dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
1853                 }
1854             }
1855         }
1856
1857         if (!has_options) {
1858             dpif_ipfix_unref(di);
1859             ofproto->ipfix = NULL;
1860         }
1861     }
1862
1863     return 0;
1864 }
1865
1866 static int
1867 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1868 {
1869     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1870     int error = 0;
1871
1872     if (s) {
1873         if (!ofport->cfm) {
1874             struct ofproto_dpif *ofproto;
1875
1876             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1877             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1878             ofport->cfm = cfm_create(ofport->up.netdev);
1879         }
1880
1881         if (cfm_configure(ofport->cfm, s)) {
1882             error = 0;
1883             goto out;
1884         }
1885
1886         error = EINVAL;
1887     }
1888     cfm_unref(ofport->cfm);
1889     ofport->cfm = NULL;
1890 out:
1891     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1892                                      ofport->up.pp.hw_addr);
1893     return error;
1894 }
1895
1896 static bool
1897 cfm_status_changed(struct ofport *ofport_)
1898 {
1899     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1900
1901     return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
1902 }
1903
1904 static int
1905 get_cfm_status(const struct ofport *ofport_,
1906                struct cfm_status *status)
1907 {
1908     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1909     int ret = 0;
1910
1911     if (ofport->cfm) {
1912         cfm_get_status(ofport->cfm, status);
1913     } else {
1914         ret = ENOENT;
1915     }
1916
1917     return ret;
1918 }
1919
1920 static int
1921 set_bfd(struct ofport *ofport_, const struct smap *cfg)
1922 {
1923     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
1924     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1925     struct bfd *old;
1926
1927     old = ofport->bfd;
1928     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev),
1929                                 cfg, ofport->up.netdev);
1930     if (ofport->bfd != old) {
1931         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1932     }
1933     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1934                                      ofport->up.pp.hw_addr);
1935     return 0;
1936 }
1937
1938 static bool
1939 bfd_status_changed(struct ofport *ofport_)
1940 {
1941     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1942
1943     return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
1944 }
1945
1946 static int
1947 get_bfd_status(struct ofport *ofport_, struct smap *smap)
1948 {
1949     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1950     int ret = 0;
1951
1952     if (ofport->bfd) {
1953         bfd_get_status(ofport->bfd, smap);
1954     } else {
1955         ret = ENOENT;
1956     }
1957
1958     return ret;
1959 }
1960 \f
1961 /* Spanning Tree. */
1962
1963 /* Called while rstp_mutex is held. */
1964 static void
1965 rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_)
1966 {
1967     struct ofproto_dpif *ofproto = ofproto_;
1968     struct ofport_dpif *ofport = ofport_;
1969     struct eth_header *eth = ofpbuf_l2(pkt);
1970
1971     netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
1972     if (eth_addr_is_zero(eth->eth_src)) {
1973         VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
1974                      "does not have a configured source MAC address.",
1975                      ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
1976     } else {
1977         ofproto_dpif_send_packet(ofport, pkt);
1978     }
1979     ofpbuf_delete(pkt);
1980 }
1981
1982 static void
1983 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
1984 {
1985     struct ofproto_dpif *ofproto = ofproto_;
1986     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
1987     struct ofport_dpif *ofport;
1988
1989     ofport = stp_port_get_aux(sp);
1990     if (!ofport) {
1991         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
1992                      ofproto->up.name, port_num);
1993     } else {
1994         struct eth_header *eth = ofpbuf_l2(pkt);
1995
1996         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
1997         if (eth_addr_is_zero(eth->eth_src)) {
1998             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
1999                          "with unknown MAC", ofproto->up.name, port_num);
2000         } else {
2001             ofproto_dpif_send_packet(ofport, pkt);
2002         }
2003     }
2004     ofpbuf_delete(pkt);
2005 }
2006
2007 /* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
2008 static void
2009 set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
2010 {
2011     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2012
2013     /* Only revalidate flows if the configuration changed. */
2014     if (!s != !ofproto->rstp) {
2015         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2016     }
2017
2018     if (s) {
2019         if (!ofproto->rstp) {
2020             ofproto->rstp = rstp_create(ofproto_->name, s->address,
2021                                         rstp_send_bpdu_cb, ofproto);
2022             ofproto->rstp_last_tick = time_msec();
2023         }
2024         rstp_set_bridge_address(ofproto->rstp, s->address);
2025         rstp_set_bridge_priority(ofproto->rstp, s->priority);
2026         rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
2027         rstp_set_bridge_force_protocol_version(ofproto->rstp,
2028                                                s->force_protocol_version);
2029         rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
2030         rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
2031         rstp_set_bridge_transmit_hold_count(ofproto->rstp,
2032                                             s->transmit_hold_count);
2033     } else {
2034         struct ofport *ofport;
2035         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2036             set_rstp_port(ofport, NULL);
2037         }
2038         rstp_unref(ofproto->rstp);
2039         ofproto->rstp = NULL;
2040     }
2041 }
2042
2043 static void
2044 get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
2045 {
2046     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2047
2048     if (ofproto->rstp) {
2049         s->enabled = true;
2050         s->root_id = rstp_get_root_id(ofproto->rstp);
2051         s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
2052         s->designated_id = rstp_get_designated_id(ofproto->rstp);
2053         s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
2054         s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
2055         s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
2056     } else {
2057         s->enabled = false;
2058     }
2059 }
2060
2061 static void
2062 update_rstp_port_state(struct ofport_dpif *ofport)
2063 {
2064     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2065     enum rstp_state state;
2066
2067     /* Figure out new state. */
2068     state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
2069         : RSTP_DISABLED;
2070
2071     /* Update state. */
2072     if (ofport->rstp_state != state) {
2073         enum ofputil_port_state of_state;
2074         bool fwd_change;
2075
2076         VLOG_DBG("port %s: RSTP state changed from %s to %s",
2077                  netdev_get_name(ofport->up.netdev),
2078                  rstp_state_name(ofport->rstp_state),
2079                  rstp_state_name(state));
2080         if (rstp_learn_in_state(ofport->rstp_state)
2081                 != rstp_learn_in_state(state)) {
2082             /* xxx Learning action flows should also be flushed. */
2083             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2084             mac_learning_flush(ofproto->ml);
2085             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2086         }
2087         fwd_change = rstp_forward_in_state(ofport->rstp_state)
2088             != rstp_forward_in_state(state);
2089
2090         ofproto->backer->need_revalidate = REV_RSTP;
2091         ofport->rstp_state = state;
2092
2093         if (fwd_change && ofport->bundle) {
2094             bundle_update(ofport->bundle);
2095         }
2096
2097         /* Update the RSTP state bits in the OpenFlow port description. */
2098         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2099         of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
2100                 : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2101                 : state == RSTP_DISCARDING ?  OFPUTIL_PS_STP_LISTEN
2102                 : 0);
2103         ofproto_port_set_state(&ofport->up, of_state);
2104     }
2105 }
2106
2107 static void
2108 rstp_run(struct ofproto_dpif *ofproto)
2109 {
2110     if (ofproto->rstp) {
2111         long long int now = time_msec();
2112         long long int elapsed = now - ofproto->rstp_last_tick;
2113         struct rstp_port *rp;
2114         struct ofport_dpif *ofport;
2115
2116         /* Every second, decrease the values of the timers. */
2117         if (elapsed >= 1000) {
2118             rstp_tick_timers(ofproto->rstp);
2119             ofproto->rstp_last_tick = now;
2120         }
2121         rp = NULL;
2122         while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
2123             update_rstp_port_state(ofport);
2124         }
2125         /* FIXME: This check should be done on-event (i.e., when setting
2126          * p->fdb_flush) and not periodically.
2127          */
2128         if (rstp_check_and_reset_fdb_flush(ofproto->rstp)) {
2129             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2130             /* FIXME: RSTP should be able to flush the entries pertaining to a
2131              * single port, not the whole table.
2132              */
2133             mac_learning_flush(ofproto->ml);
2134             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2135         }
2136     }
2137 }
2138
2139 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2140 static int
2141 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2142 {
2143     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2144
2145     /* Only revalidate flows if the configuration changed. */
2146     if (!s != !ofproto->stp) {
2147         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2148     }
2149
2150     if (s) {
2151         if (!ofproto->stp) {
2152             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2153                                       send_bpdu_cb, ofproto);
2154             ofproto->stp_last_tick = time_msec();
2155         }
2156
2157         stp_set_bridge_id(ofproto->stp, s->system_id);
2158         stp_set_bridge_priority(ofproto->stp, s->priority);
2159         stp_set_hello_time(ofproto->stp, s->hello_time);
2160         stp_set_max_age(ofproto->stp, s->max_age);
2161         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2162     }  else {
2163         struct ofport *ofport;
2164
2165         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2166             set_stp_port(ofport, NULL);
2167         }
2168
2169         stp_unref(ofproto->stp);
2170         ofproto->stp = NULL;
2171     }
2172
2173     return 0;
2174 }
2175
2176 static int
2177 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2178 {
2179     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2180
2181     if (ofproto->stp) {
2182         s->enabled = true;
2183         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2184         s->designated_root = stp_get_designated_root(ofproto->stp);
2185         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2186     } else {
2187         s->enabled = false;
2188     }
2189
2190     return 0;
2191 }
2192
2193 static void
2194 update_stp_port_state(struct ofport_dpif *ofport)
2195 {
2196     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2197     enum stp_state state;
2198
2199     /* Figure out new state. */
2200     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2201                              : STP_DISABLED;
2202
2203     /* Update state. */
2204     if (ofport->stp_state != state) {
2205         enum ofputil_port_state of_state;
2206         bool fwd_change;
2207
2208         VLOG_DBG("port %s: STP state changed from %s to %s",
2209                  netdev_get_name(ofport->up.netdev),
2210                  stp_state_name(ofport->stp_state),
2211                  stp_state_name(state));
2212         if (stp_learn_in_state(ofport->stp_state)
2213                 != stp_learn_in_state(state)) {
2214             /* xxx Learning action flows should also be flushed. */
2215             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2216             mac_learning_flush(ofproto->ml);
2217             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2218             mcast_snooping_mdb_flush(ofproto->ms);
2219         }
2220         fwd_change = stp_forward_in_state(ofport->stp_state)
2221                         != stp_forward_in_state(state);
2222
2223         ofproto->backer->need_revalidate = REV_STP;
2224         ofport->stp_state = state;
2225         ofport->stp_state_entered = time_msec();
2226
2227         if (fwd_change && ofport->bundle) {
2228             bundle_update(ofport->bundle);
2229         }
2230
2231         /* Update the STP state bits in the OpenFlow port description. */
2232         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2233         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2234                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2235                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2236                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2237                      : 0);
2238         ofproto_port_set_state(&ofport->up, of_state);
2239     }
2240 }
2241
2242 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2243  * caller is responsible for assigning STP port numbers and ensuring
2244  * there are no duplicates. */
2245 static int
2246 set_stp_port(struct ofport *ofport_,
2247              const struct ofproto_port_stp_settings *s)
2248 {
2249     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2250     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2251     struct stp_port *sp = ofport->stp_port;
2252
2253     if (!s || !s->enable) {
2254         if (sp) {
2255             ofport->stp_port = NULL;
2256             stp_port_disable(sp);
2257             update_stp_port_state(ofport);
2258         }
2259         return 0;
2260     } else if (sp && stp_port_no(sp) != s->port_num
2261                && ofport == stp_port_get_aux(sp)) {
2262         /* The port-id changed, so disable the old one if it's not
2263          * already in use by another port. */
2264         stp_port_disable(sp);
2265     }
2266
2267     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2268
2269     /* Set name before enabling the port so that debugging messages can print
2270      * the name. */
2271     stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
2272     stp_port_enable(sp);
2273
2274     stp_port_set_aux(sp, ofport);
2275     stp_port_set_priority(sp, s->priority);
2276     stp_port_set_path_cost(sp, s->path_cost);
2277
2278     update_stp_port_state(ofport);
2279
2280     return 0;
2281 }
2282
2283 static int
2284 get_stp_port_status(struct ofport *ofport_,
2285                     struct ofproto_port_stp_status *s)
2286 {
2287     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2288     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2289     struct stp_port *sp = ofport->stp_port;
2290
2291     if (!ofproto->stp || !sp) {
2292         s->enabled = false;
2293         return 0;
2294     }
2295
2296     s->enabled = true;
2297     s->port_id = stp_port_get_id(sp);
2298     s->state = stp_port_get_state(sp);
2299     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2300     s->role = stp_port_get_role(sp);
2301
2302     return 0;
2303 }
2304
2305 static int
2306 get_stp_port_stats(struct ofport *ofport_,
2307                    struct ofproto_port_stp_stats *s)
2308 {
2309     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2310     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2311     struct stp_port *sp = ofport->stp_port;
2312
2313     if (!ofproto->stp || !sp) {
2314         s->enabled = false;
2315         return 0;
2316     }
2317
2318     s->enabled = true;
2319     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2320
2321     return 0;
2322 }
2323
2324 static void
2325 stp_run(struct ofproto_dpif *ofproto)
2326 {
2327     if (ofproto->stp) {
2328         long long int now = time_msec();
2329         long long int elapsed = now - ofproto->stp_last_tick;
2330         struct stp_port *sp;
2331
2332         if (elapsed > 0) {
2333             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2334             ofproto->stp_last_tick = now;
2335         }
2336         while (stp_get_changed_port(ofproto->stp, &sp)) {
2337             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2338
2339             if (ofport) {
2340                 update_stp_port_state(ofport);
2341             }
2342         }
2343
2344         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2345             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2346             mac_learning_flush(ofproto->ml);
2347             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2348             mcast_snooping_mdb_flush(ofproto->ms);
2349         }
2350     }
2351 }
2352
2353 static void
2354 stp_wait(struct ofproto_dpif *ofproto)
2355 {
2356     if (ofproto->stp) {
2357         poll_timer_wait(1000);
2358     }
2359 }
2360
2361 /* Configures RSTP on 'ofport_' using the settings defined in 's'.  The
2362  * caller is responsible for assigning RSTP port numbers and ensuring
2363  * there are no duplicates. */
2364 static void
2365 set_rstp_port(struct ofport *ofport_,
2366               const struct ofproto_port_rstp_settings *s)
2367 {
2368     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2369     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2370     struct rstp_port *rp = ofport->rstp_port;
2371
2372     if (!s || !s->enable) {
2373         if (rp) {
2374             rstp_port_unref(rp);
2375             ofport->rstp_port = NULL;
2376             update_rstp_port_state(ofport);
2377         }
2378         return;
2379     }
2380
2381     /* Check if need to add a new port. */
2382     if (!rp) {
2383         rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
2384     }
2385
2386     rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
2387                   s->admin_edge_port, s->auto_edge, s->mcheck, ofport);
2388     update_rstp_port_state(ofport);
2389 }
2390
2391 static void
2392 get_rstp_port_status(struct ofport *ofport_,
2393         struct ofproto_port_rstp_status *s)
2394 {
2395     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2396     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2397     struct rstp_port *rp = ofport->rstp_port;
2398
2399     if (!ofproto->rstp || !rp) {
2400         s->enabled = false;
2401         return;
2402     }
2403
2404     s->enabled = true;
2405     rstp_port_get_status(rp, &s->port_id, &s->state, &s->role, &s->tx_count,
2406                          &s->rx_count, &s->error_count, &s->uptime);
2407 }
2408
2409 \f
2410 static int
2411 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2412            size_t n_qdscp)
2413 {
2414     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2415     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2416
2417     if (ofport->n_qdscp != n_qdscp
2418         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2419                               n_qdscp * sizeof *qdscp))) {
2420         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2421         free(ofport->qdscp);
2422         ofport->qdscp = n_qdscp
2423             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2424             : NULL;
2425         ofport->n_qdscp = n_qdscp;
2426     }
2427
2428     return 0;
2429 }
2430 \f
2431 /* Bundles. */
2432
2433 /* Expires all MAC learning entries associated with 'bundle' and forces its
2434  * ofproto to revalidate every flow.
2435  *
2436  * Normally MAC learning entries are removed only from the ofproto associated
2437  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2438  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2439  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2440  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2441  * with the host from which it migrated. */
2442 static void
2443 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2444 {
2445     struct ofproto_dpif *ofproto = bundle->ofproto;
2446     struct mac_learning *ml = ofproto->ml;
2447     struct mac_entry *mac, *next_mac;
2448
2449     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2450     ovs_rwlock_wrlock(&ml->rwlock);
2451     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2452         if (mac->port.p == bundle) {
2453             if (all_ofprotos) {
2454                 struct ofproto_dpif *o;
2455
2456                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2457                     if (o != ofproto) {
2458                         struct mac_entry *e;
2459
2460                         ovs_rwlock_wrlock(&o->ml->rwlock);
2461                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2462                         if (e) {
2463                             mac_learning_expire(o->ml, e);
2464                         }
2465                         ovs_rwlock_unlock(&o->ml->rwlock);
2466                     }
2467                 }
2468             }
2469
2470             mac_learning_expire(ml, mac);
2471         }
2472     }
2473     ovs_rwlock_unlock(&ml->rwlock);
2474 }
2475
2476 static struct ofbundle *
2477 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2478 {
2479     struct ofbundle *bundle;
2480
2481     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2482                              &ofproto->bundles) {
2483         if (bundle->aux == aux) {
2484             return bundle;
2485         }
2486     }
2487     return NULL;
2488 }
2489
2490 static void
2491 bundle_update(struct ofbundle *bundle)
2492 {
2493     struct ofport_dpif *port;
2494
2495     bundle->floodable = true;
2496     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2497         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2498             || port->is_layer3
2499             || !stp_forward_in_state(port->stp_state)) {
2500             bundle->floodable = false;
2501             break;
2502         }
2503     }
2504 }
2505
2506 static void
2507 bundle_del_port(struct ofport_dpif *port)
2508 {
2509     struct ofbundle *bundle = port->bundle;
2510
2511     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2512
2513     list_remove(&port->bundle_node);
2514     port->bundle = NULL;
2515
2516     if (bundle->lacp) {
2517         lacp_slave_unregister(bundle->lacp, port);
2518     }
2519     if (bundle->bond) {
2520         bond_slave_unregister(bundle->bond, port);
2521     }
2522
2523     bundle_update(bundle);
2524 }
2525
2526 static bool
2527 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2528                 struct lacp_slave_settings *lacp)
2529 {
2530     struct ofport_dpif *port;
2531
2532     port = get_ofp_port(bundle->ofproto, ofp_port);
2533     if (!port) {
2534         return false;
2535     }
2536
2537     if (port->bundle != bundle) {
2538         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2539         if (port->bundle) {
2540             bundle_remove(&port->up);
2541         }
2542
2543         port->bundle = bundle;
2544         list_push_back(&bundle->ports, &port->bundle_node);
2545         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2546             || port->is_layer3
2547             || !stp_forward_in_state(port->stp_state)) {
2548             bundle->floodable = false;
2549         }
2550     }
2551     if (lacp) {
2552         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2553         lacp_slave_register(bundle->lacp, port, lacp);
2554     }
2555
2556     return true;
2557 }
2558
2559 static void
2560 bundle_destroy(struct ofbundle *bundle)
2561 {
2562     struct ofproto_dpif *ofproto;
2563     struct ofport_dpif *port, *next_port;
2564
2565     if (!bundle) {
2566         return;
2567     }
2568
2569     ofproto = bundle->ofproto;
2570     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2571
2572     xlate_txn_start();
2573     xlate_bundle_remove(bundle);
2574     xlate_txn_commit();
2575
2576     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2577         bundle_del_port(port);
2578     }
2579
2580     bundle_flush_macs(bundle, true);
2581     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2582     free(bundle->name);
2583     free(bundle->trunks);
2584     lacp_unref(bundle->lacp);
2585     bond_unref(bundle->bond);
2586     free(bundle);
2587 }
2588
2589 static int
2590 bundle_set(struct ofproto *ofproto_, void *aux,
2591            const struct ofproto_bundle_settings *s)
2592 {
2593     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2594     bool need_flush = false;
2595     struct ofport_dpif *port;
2596     struct ofbundle *bundle;
2597     unsigned long *trunks;
2598     int vlan;
2599     size_t i;
2600     bool ok;
2601
2602     if (!s) {
2603         bundle_destroy(bundle_lookup(ofproto, aux));
2604         return 0;
2605     }
2606
2607     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2608     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2609
2610     bundle = bundle_lookup(ofproto, aux);
2611     if (!bundle) {
2612         bundle = xmalloc(sizeof *bundle);
2613
2614         bundle->ofproto = ofproto;
2615         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2616                     hash_pointer(aux, 0));
2617         bundle->aux = aux;
2618         bundle->name = NULL;
2619
2620         list_init(&bundle->ports);
2621         bundle->vlan_mode = PORT_VLAN_TRUNK;
2622         bundle->vlan = -1;
2623         bundle->trunks = NULL;
2624         bundle->use_priority_tags = s->use_priority_tags;
2625         bundle->lacp = NULL;
2626         bundle->bond = NULL;
2627
2628         bundle->floodable = true;
2629         mbridge_register_bundle(ofproto->mbridge, bundle);
2630     }
2631
2632     if (!bundle->name || strcmp(s->name, bundle->name)) {
2633         free(bundle->name);
2634         bundle->name = xstrdup(s->name);
2635     }
2636
2637     /* LACP. */
2638     if (s->lacp) {
2639         ofproto->lacp_enabled = true;
2640         if (!bundle->lacp) {
2641             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2642             bundle->lacp = lacp_create();
2643         }
2644         lacp_configure(bundle->lacp, s->lacp);
2645     } else {
2646         lacp_unref(bundle->lacp);
2647         bundle->lacp = NULL;
2648     }
2649
2650     /* Update set of ports. */
2651     ok = true;
2652     for (i = 0; i < s->n_slaves; i++) {
2653         if (!bundle_add_port(bundle, s->slaves[i],
2654                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2655             ok = false;
2656         }
2657     }
2658     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2659         struct ofport_dpif *next_port;
2660
2661         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2662             for (i = 0; i < s->n_slaves; i++) {
2663                 if (s->slaves[i] == port->up.ofp_port) {
2664                     goto found;
2665                 }
2666             }
2667
2668             bundle_del_port(port);
2669         found: ;
2670         }
2671     }
2672     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2673
2674     if (list_is_empty(&bundle->ports)) {
2675         bundle_destroy(bundle);
2676         return EINVAL;
2677     }
2678
2679     /* Set VLAN tagging mode */
2680     if (s->vlan_mode != bundle->vlan_mode
2681         || s->use_priority_tags != bundle->use_priority_tags) {
2682         bundle->vlan_mode = s->vlan_mode;
2683         bundle->use_priority_tags = s->use_priority_tags;
2684         need_flush = true;
2685     }
2686
2687     /* Set VLAN tag. */
2688     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2689             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2690             : 0);
2691     if (vlan != bundle->vlan) {
2692         bundle->vlan = vlan;
2693         need_flush = true;
2694     }
2695
2696     /* Get trunked VLANs. */
2697     switch (s->vlan_mode) {
2698     case PORT_VLAN_ACCESS:
2699         trunks = NULL;
2700         break;
2701
2702     case PORT_VLAN_TRUNK:
2703         trunks = CONST_CAST(unsigned long *, s->trunks);
2704         break;
2705
2706     case PORT_VLAN_NATIVE_UNTAGGED:
2707     case PORT_VLAN_NATIVE_TAGGED:
2708         if (vlan != 0 && (!s->trunks
2709                           || !bitmap_is_set(s->trunks, vlan)
2710                           || bitmap_is_set(s->trunks, 0))) {
2711             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2712             if (s->trunks) {
2713                 trunks = bitmap_clone(s->trunks, 4096);
2714             } else {
2715                 trunks = bitmap_allocate1(4096);
2716             }
2717             bitmap_set1(trunks, vlan);
2718             bitmap_set0(trunks, 0);
2719         } else {
2720             trunks = CONST_CAST(unsigned long *, s->trunks);
2721         }
2722         break;
2723
2724     default:
2725         OVS_NOT_REACHED();
2726     }
2727     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2728         free(bundle->trunks);
2729         if (trunks == s->trunks) {
2730             bundle->trunks = vlan_bitmap_clone(trunks);
2731         } else {
2732             bundle->trunks = trunks;
2733             trunks = NULL;
2734         }
2735         need_flush = true;
2736     }
2737     if (trunks != s->trunks) {
2738         free(trunks);
2739     }
2740
2741     /* Bonding. */
2742     if (!list_is_short(&bundle->ports)) {
2743         bundle->ofproto->has_bonded_bundles = true;
2744         if (bundle->bond) {
2745             if (bond_reconfigure(bundle->bond, s->bond)) {
2746                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2747             }
2748         } else {
2749             bundle->bond = bond_create(s->bond, ofproto);
2750             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2751         }
2752
2753         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2754             bond_slave_register(bundle->bond, port,
2755                                 port->up.ofp_port, port->up.netdev);
2756         }
2757     } else {
2758         bond_unref(bundle->bond);
2759         bundle->bond = NULL;
2760     }
2761
2762     /* If we changed something that would affect MAC learning, un-learn
2763      * everything on this port and force flow revalidation. */
2764     if (need_flush) {
2765         bundle_flush_macs(bundle, false);
2766     }
2767
2768     return 0;
2769 }
2770
2771 static void
2772 bundle_remove(struct ofport *port_)
2773 {
2774     struct ofport_dpif *port = ofport_dpif_cast(port_);
2775     struct ofbundle *bundle = port->bundle;
2776
2777     if (bundle) {
2778         bundle_del_port(port);
2779         if (list_is_empty(&bundle->ports)) {
2780             bundle_destroy(bundle);
2781         } else if (list_is_short(&bundle->ports)) {
2782             bond_unref(bundle->bond);
2783             bundle->bond = NULL;
2784         }
2785     }
2786 }
2787
2788 static void
2789 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2790 {
2791     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2792     struct ofport_dpif *port = port_;
2793     uint8_t ea[ETH_ADDR_LEN];
2794     int error;
2795
2796     error = netdev_get_etheraddr(port->up.netdev, ea);
2797     if (!error) {
2798         struct ofpbuf packet;
2799         void *packet_pdu;
2800
2801         ofpbuf_init(&packet, 0);
2802         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2803                                  pdu_size);
2804         memcpy(packet_pdu, pdu, pdu_size);
2805
2806         ofproto_dpif_send_packet(port, &packet);
2807         ofpbuf_uninit(&packet);
2808     } else {
2809         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2810                     "%s (%s)", port->bundle->name,
2811                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2812     }
2813 }
2814
2815 static void
2816 bundle_send_learning_packets(struct ofbundle *bundle)
2817 {
2818     struct ofproto_dpif *ofproto = bundle->ofproto;
2819     struct ofpbuf *learning_packet;
2820     int error, n_packets, n_errors;
2821     struct mac_entry *e;
2822     struct list packets;
2823
2824     list_init(&packets);
2825     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2826     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2827         if (e->port.p != bundle) {
2828             void *port_void;
2829
2830             learning_packet = bond_compose_learning_packet(bundle->bond,
2831                                                            e->mac, e->vlan,
2832                                                            &port_void);
2833             /* Temporarily use 'frame' as a private pointer (see below). */
2834             ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
2835             learning_packet->frame = port_void;
2836             list_push_back(&packets, &learning_packet->list_node);
2837         }
2838     }
2839     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2840
2841     error = n_packets = n_errors = 0;
2842     LIST_FOR_EACH (learning_packet, list_node, &packets) {
2843         int ret;
2844         void *port_void = learning_packet->frame;
2845
2846         /* Restore 'frame'. */
2847         learning_packet->frame = ofpbuf_data(learning_packet);
2848         ret = ofproto_dpif_send_packet(port_void, learning_packet);
2849         if (ret) {
2850             error = ret;
2851             n_errors++;
2852         }
2853         n_packets++;
2854     }
2855     ofpbuf_list_delete(&packets);
2856
2857     if (n_errors) {
2858         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2859         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2860                      "packets, last error was: %s",
2861                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2862     } else {
2863         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2864                  bundle->name, n_packets);
2865     }
2866 }
2867
2868 static void
2869 bundle_run(struct ofbundle *bundle)
2870 {
2871     if (bundle->lacp) {
2872         lacp_run(bundle->lacp, send_pdu_cb);
2873     }
2874     if (bundle->bond) {
2875         struct ofport_dpif *port;
2876
2877         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2878             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2879         }
2880
2881         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2882             bundle->ofproto->backer->need_revalidate = REV_BOND;
2883         }
2884
2885         if (bond_should_send_learning_packets(bundle->bond)) {
2886             bundle_send_learning_packets(bundle);
2887         }
2888     }
2889 }
2890
2891 static void
2892 bundle_wait(struct ofbundle *bundle)
2893 {
2894     if (bundle->lacp) {
2895         lacp_wait(bundle->lacp);
2896     }
2897     if (bundle->bond) {
2898         bond_wait(bundle->bond);
2899     }
2900 }
2901 \f
2902 /* Mirrors. */
2903
2904 static int
2905 mirror_set__(struct ofproto *ofproto_, void *aux,
2906              const struct ofproto_mirror_settings *s)
2907 {
2908     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2909     struct ofbundle **srcs, **dsts;
2910     int error;
2911     size_t i;
2912
2913     if (!s) {
2914         mirror_destroy(ofproto->mbridge, aux);
2915         return 0;
2916     }
2917
2918     srcs = xmalloc(s->n_srcs * sizeof *srcs);
2919     dsts = xmalloc(s->n_dsts * sizeof *dsts);
2920
2921     for (i = 0; i < s->n_srcs; i++) {
2922         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
2923     }
2924
2925     for (i = 0; i < s->n_dsts; i++) {
2926         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
2927     }
2928
2929     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
2930                        s->n_dsts, s->src_vlans,
2931                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
2932     free(srcs);
2933     free(dsts);
2934     return error;
2935 }
2936
2937 static int
2938 mirror_get_stats__(struct ofproto *ofproto, void *aux,
2939                    uint64_t *packets, uint64_t *bytes)
2940 {
2941     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
2942                             bytes);
2943 }
2944
2945 static int
2946 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
2947 {
2948     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2949     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2950     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
2951         mac_learning_flush(ofproto->ml);
2952     }
2953     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2954     return 0;
2955 }
2956
2957 static bool
2958 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
2959 {
2960     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2961     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
2962     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
2963 }
2964
2965 static void
2966 forward_bpdu_changed(struct ofproto *ofproto_)
2967 {
2968     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2969     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2970 }
2971
2972 static void
2973 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
2974                      size_t max_entries)
2975 {
2976     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2977     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2978     mac_learning_set_idle_time(ofproto->ml, idle_time);
2979     mac_learning_set_max_entries(ofproto->ml, max_entries);
2980     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2981 }
2982
2983 /* Configures multicast snooping on 'ofport' using the settings
2984  * defined in 's'. */
2985 static int
2986 set_mcast_snooping(struct ofproto *ofproto_,
2987                    const struct ofproto_mcast_snooping_settings *s)
2988 {
2989     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2990
2991     /* Only revalidate flows if the configuration changed. */
2992     if (!s != !ofproto->ms) {
2993         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2994     }
2995
2996     if (s) {
2997         if (!ofproto->ms) {
2998             ofproto->ms = mcast_snooping_create();
2999         }
3000
3001         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3002         mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
3003         mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
3004         if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
3005             ofproto->backer->need_revalidate = REV_RECONFIGURE;
3006         }
3007         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3008     } else {
3009         mcast_snooping_unref(ofproto->ms);
3010         ofproto->ms = NULL;
3011     }
3012
3013     return 0;
3014 }
3015
3016 /* Configures multicast snooping port's flood setting on 'ofproto'. */
3017 static int
3018 set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood)
3019 {
3020     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3021     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3022
3023     if (ofproto->ms) {
3024         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3025         mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle,
3026                                       flood);
3027         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3028     }
3029     return 0;
3030 }
3031
3032 \f
3033 /* Ports. */
3034
3035 static struct ofport_dpif *
3036 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
3037 {
3038     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
3039     return ofport ? ofport_dpif_cast(ofport) : NULL;
3040 }
3041
3042 static void
3043 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
3044                             struct ofproto_port *ofproto_port,
3045                             struct dpif_port *dpif_port)
3046 {
3047     ofproto_port->name = dpif_port->name;
3048     ofproto_port->type = dpif_port->type;
3049     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
3050 }
3051
3052 static void
3053 ofport_update_peer(struct ofport_dpif *ofport)
3054 {
3055     const struct ofproto_dpif *ofproto;
3056     struct dpif_backer *backer;
3057     char *peer_name;
3058
3059     if (!netdev_vport_is_patch(ofport->up.netdev)) {
3060         return;
3061     }
3062
3063     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
3064     backer->need_revalidate = REV_RECONFIGURE;
3065
3066     if (ofport->peer) {
3067         ofport->peer->peer = NULL;
3068         ofport->peer = NULL;
3069     }
3070
3071     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
3072     if (!peer_name) {
3073         return;
3074     }
3075
3076     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3077         struct ofport *peer_ofport;
3078         struct ofport_dpif *peer;
3079         char *peer_peer;
3080
3081         if (ofproto->backer != backer) {
3082             continue;
3083         }
3084
3085         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
3086         if (!peer_ofport) {
3087             continue;
3088         }
3089
3090         peer = ofport_dpif_cast(peer_ofport);
3091         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
3092         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
3093                                  peer_peer)) {
3094             ofport->peer = peer;
3095             ofport->peer->peer = ofport;
3096         }
3097         free(peer_peer);
3098
3099         break;
3100     }
3101     free(peer_name);
3102 }
3103
3104 static void
3105 port_run(struct ofport_dpif *ofport)
3106 {
3107     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
3108     bool carrier_changed = carrier_seq != ofport->carrier_seq;
3109     bool enable = netdev_get_carrier(ofport->up.netdev);
3110     bool cfm_enable = false;
3111     bool bfd_enable = false;
3112
3113     ofport->carrier_seq = carrier_seq;
3114
3115     if (ofport->cfm) {
3116         int cfm_opup = cfm_get_opup(ofport->cfm);
3117
3118         cfm_enable = !cfm_get_fault(ofport->cfm);
3119
3120         if (cfm_opup >= 0) {
3121             cfm_enable = cfm_enable && cfm_opup;
3122         }
3123     }
3124
3125     if (ofport->bfd) {
3126         bfd_enable = bfd_forwarding(ofport->bfd);
3127     }
3128
3129     if (ofport->bfd || ofport->cfm) {
3130         enable = enable && (cfm_enable || bfd_enable);
3131     }
3132
3133     if (ofport->bundle) {
3134         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3135         if (carrier_changed) {
3136             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
3137         }
3138     }
3139
3140     if (ofport->may_enable != enable) {
3141         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3142
3143         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
3144
3145         if (ofport->rstp_port) {
3146             rstp_port_set_mac_operational(ofport->rstp_port, enable);
3147         }
3148     }
3149
3150     ofport->may_enable = enable;
3151 }
3152
3153 static int
3154 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3155                    struct ofproto_port *ofproto_port)
3156 {
3157     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3158     struct dpif_port dpif_port;
3159     int error;
3160
3161     if (sset_contains(&ofproto->ghost_ports, devname)) {
3162         const char *type = netdev_get_type_from_name(devname);
3163
3164         /* We may be called before ofproto->up.port_by_name is populated with
3165          * the appropriate ofport.  For this reason, we must get the name and
3166          * type from the netdev layer directly. */
3167         if (type) {
3168             const struct ofport *ofport;
3169
3170             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3171             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3172             ofproto_port->name = xstrdup(devname);
3173             ofproto_port->type = xstrdup(type);
3174             return 0;
3175         }
3176         return ENODEV;
3177     }
3178
3179     if (!sset_contains(&ofproto->ports, devname)) {
3180         return ENODEV;
3181     }
3182     error = dpif_port_query_by_name(ofproto->backer->dpif,
3183                                     devname, &dpif_port);
3184     if (!error) {
3185         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3186     }
3187     return error;
3188 }
3189
3190 static int
3191 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3192 {
3193     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3194     const char *devname = netdev_get_name(netdev);
3195     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3196     const char *dp_port_name;
3197
3198     if (netdev_vport_is_patch(netdev)) {
3199         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3200         return 0;
3201     }
3202
3203     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3204     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3205         odp_port_t port_no = ODPP_NONE;
3206         int error;
3207
3208         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3209         if (error) {
3210             return error;
3211         }
3212         if (netdev_get_tunnel_config(netdev)) {
3213             simap_put(&ofproto->backer->tnl_backers,
3214                       dp_port_name, odp_to_u32(port_no));
3215         }
3216     }
3217
3218     if (netdev_get_tunnel_config(netdev)) {
3219         sset_add(&ofproto->ghost_ports, devname);
3220     } else {
3221         sset_add(&ofproto->ports, devname);
3222     }
3223     return 0;
3224 }
3225
3226 static int
3227 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3228 {
3229     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3230     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3231     int error = 0;
3232
3233     if (!ofport) {
3234         return 0;
3235     }
3236
3237     sset_find_and_delete(&ofproto->ghost_ports,
3238                          netdev_get_name(ofport->up.netdev));
3239     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3240     if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
3241         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3242         if (!error) {
3243             /* The caller is going to close ofport->up.netdev.  If this is a
3244              * bonded port, then the bond is using that netdev, so remove it
3245              * from the bond.  The client will need to reconfigure everything
3246              * after deleting ports, so then the slave will get re-added. */
3247             bundle_remove(&ofport->up);
3248         }
3249     }
3250     return error;
3251 }
3252
3253 static int
3254 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3255 {
3256     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3257     int error;
3258
3259     error = netdev_get_stats(ofport->up.netdev, stats);
3260
3261     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3262         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3263
3264         ovs_mutex_lock(&ofproto->stats_mutex);
3265         /* ofproto->stats.tx_packets represents packets that we created
3266          * internally and sent to some port (e.g. packets sent with
3267          * ofproto_dpif_send_packet()).  Account for them as if they had
3268          * come from OFPP_LOCAL and got forwarded. */
3269
3270         if (stats->rx_packets != UINT64_MAX) {
3271             stats->rx_packets += ofproto->stats.tx_packets;
3272         }
3273
3274         if (stats->rx_bytes != UINT64_MAX) {
3275             stats->rx_bytes += ofproto->stats.tx_bytes;
3276         }
3277
3278         /* ofproto->stats.rx_packets represents packets that were received on
3279          * some port and we processed internally and dropped (e.g. STP).
3280          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3281
3282         if (stats->tx_packets != UINT64_MAX) {
3283             stats->tx_packets += ofproto->stats.rx_packets;
3284         }
3285
3286         if (stats->tx_bytes != UINT64_MAX) {
3287             stats->tx_bytes += ofproto->stats.rx_bytes;
3288         }
3289         ovs_mutex_unlock(&ofproto->stats_mutex);
3290     }
3291
3292     return error;
3293 }
3294
3295 struct port_dump_state {
3296     uint32_t bucket;
3297     uint32_t offset;
3298     bool ghost;
3299
3300     struct ofproto_port port;
3301     bool has_port;
3302 };
3303
3304 static int
3305 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3306 {
3307     *statep = xzalloc(sizeof(struct port_dump_state));
3308     return 0;
3309 }
3310
3311 static int
3312 port_dump_next(const struct ofproto *ofproto_, void *state_,
3313                struct ofproto_port *port)
3314 {
3315     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3316     struct port_dump_state *state = state_;
3317     const struct sset *sset;
3318     struct sset_node *node;
3319
3320     if (state->has_port) {
3321         ofproto_port_destroy(&state->port);
3322         state->has_port = false;
3323     }
3324     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3325     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3326         int error;
3327
3328         error = port_query_by_name(ofproto_, node->name, &state->port);
3329         if (!error) {
3330             *port = state->port;
3331             state->has_port = true;
3332             return 0;
3333         } else if (error != ENODEV) {
3334             return error;
3335         }
3336     }
3337
3338     if (!state->ghost) {
3339         state->ghost = true;
3340         state->bucket = 0;
3341         state->offset = 0;
3342         return port_dump_next(ofproto_, state_, port);
3343     }
3344
3345     return EOF;
3346 }
3347
3348 static int
3349 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3350 {
3351     struct port_dump_state *state = state_;
3352
3353     if (state->has_port) {
3354         ofproto_port_destroy(&state->port);
3355     }
3356     free(state);
3357     return 0;
3358 }
3359
3360 static int
3361 port_poll(const struct ofproto *ofproto_, char **devnamep)
3362 {
3363     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3364
3365     if (ofproto->port_poll_errno) {
3366         int error = ofproto->port_poll_errno;
3367         ofproto->port_poll_errno = 0;
3368         return error;
3369     }
3370
3371     if (sset_is_empty(&ofproto->port_poll_set)) {
3372         return EAGAIN;
3373     }
3374
3375     *devnamep = sset_pop(&ofproto->port_poll_set);
3376     return 0;
3377 }
3378
3379 static void
3380 port_poll_wait(const struct ofproto *ofproto_)
3381 {
3382     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3383     dpif_port_poll_wait(ofproto->backer->dpif);
3384 }
3385
3386 static int
3387 port_is_lacp_current(const struct ofport *ofport_)
3388 {
3389     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3390     return (ofport->bundle && ofport->bundle->lacp
3391             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3392             : -1);
3393 }
3394 \f
3395 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3396  * then delete it entirely. */
3397 static void
3398 rule_expire(struct rule_dpif *rule)
3399     OVS_REQUIRES(ofproto_mutex)
3400 {
3401     uint16_t hard_timeout, idle_timeout;
3402     long long int now = time_msec();
3403     int reason = -1;
3404
3405     hard_timeout = rule->up.hard_timeout;
3406     idle_timeout = rule->up.idle_timeout;
3407
3408     /* Has 'rule' expired? */
3409     if (hard_timeout) {
3410         long long int modified;
3411
3412         ovs_mutex_lock(&rule->up.mutex);
3413         modified = rule->up.modified;
3414         ovs_mutex_unlock(&rule->up.mutex);
3415
3416         if (now > modified + hard_timeout * 1000) {
3417             reason = OFPRR_HARD_TIMEOUT;
3418         }
3419     }
3420
3421     if (reason < 0 && idle_timeout) {
3422         long long int used;
3423
3424         ovs_mutex_lock(&rule->stats_mutex);
3425         used = rule->stats.used;
3426         ovs_mutex_unlock(&rule->stats_mutex);
3427
3428         if (now > used + idle_timeout * 1000) {
3429             reason = OFPRR_IDLE_TIMEOUT;
3430         }
3431     }
3432
3433     if (reason >= 0) {
3434         COVERAGE_INC(ofproto_dpif_expired);
3435         ofproto_rule_expire(&rule->up, reason);
3436     }
3437 }
3438
3439 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3440  * 'flow' must reflect the data in 'packet'. */
3441 int
3442 ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
3443                              const struct flow *flow,
3444                              struct rule_dpif *rule,
3445                              const struct ofpact *ofpacts, size_t ofpacts_len,
3446                              struct ofpbuf *packet)
3447 {
3448     struct dpif_flow_stats stats;
3449     struct xlate_out xout;
3450     struct xlate_in xin;
3451     ofp_port_t in_port;
3452     struct dpif_execute execute;
3453     int error;
3454
3455     ovs_assert((rule != NULL) != (ofpacts != NULL));
3456
3457     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
3458
3459     if (rule) {
3460         rule_dpif_credit_stats(rule, &stats);
3461     }
3462
3463     xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
3464                   stats.tcp_flags, packet);
3465     xin.ofpacts = ofpacts;
3466     xin.ofpacts_len = ofpacts_len;
3467     xin.resubmit_stats = &stats;
3468     xlate_actions(&xin, &xout);
3469
3470     execute.actions = ofpbuf_data(xout.odp_actions);
3471     execute.actions_len = ofpbuf_size(xout.odp_actions);
3472     execute.packet = packet;
3473     execute.md = pkt_metadata_from_flow(flow);
3474     execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
3475
3476     /* Fix up in_port. */
3477     in_port = flow->in_port.ofp_port;
3478     if (in_port == OFPP_NONE) {
3479         in_port = OFPP_LOCAL;
3480     }
3481     execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
3482
3483     error = dpif_execute(ofproto->backer->dpif, &execute);
3484
3485     xlate_out_uninit(&xout);
3486
3487     return error;
3488 }
3489
3490 void
3491 rule_dpif_credit_stats(struct rule_dpif *rule,
3492                        const struct dpif_flow_stats *stats)
3493 {
3494     ovs_mutex_lock(&rule->stats_mutex);
3495     rule->stats.n_packets += stats->n_packets;
3496     rule->stats.n_bytes += stats->n_bytes;
3497     rule->stats.used = MAX(rule->stats.used, stats->used);
3498     ovs_mutex_unlock(&rule->stats_mutex);
3499 }
3500
3501 ovs_be64
3502 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
3503     OVS_REQUIRES(rule->up.mutex)
3504 {
3505     return rule->up.flow_cookie;
3506 }
3507
3508 void
3509 rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
3510                      uint16_t hard_timeout)
3511 {
3512     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
3513 }
3514
3515 /* Returns 'rule''s actions.  The caller owns a reference on the returned
3516  * actions and must eventually release it (with rule_actions_unref()) to avoid
3517  * a memory leak. */
3518 const struct rule_actions *
3519 rule_dpif_get_actions(const struct rule_dpif *rule)
3520 {
3521     return rule_get_actions(&rule->up);
3522 }
3523
3524 /* Sets 'rule''s recirculation id. */
3525 static void
3526 rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
3527     OVS_REQUIRES(rule->up.mutex)
3528 {
3529     ovs_assert(!rule->recirc_id);
3530     rule->recirc_id = id;
3531 }
3532
3533 /* Returns 'rule''s recirculation id. */
3534 uint32_t
3535 rule_dpif_get_recirc_id(struct rule_dpif *rule)
3536     OVS_REQUIRES(rule->up.mutex)
3537 {
3538     if (!rule->recirc_id) {
3539         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3540
3541         rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
3542     }
3543     return rule->recirc_id;
3544 }
3545
3546 /* Sets 'rule''s recirculation id. */
3547 void
3548 rule_set_recirc_id(struct rule *rule_, uint32_t id)
3549 {
3550     struct rule_dpif *rule = rule_dpif_cast(rule_);
3551
3552     ovs_mutex_lock(&rule->up.mutex);
3553     rule_dpif_set_recirc_id(rule, id);
3554     ovs_mutex_unlock(&rule->up.mutex);
3555 }
3556
3557 /* Lookup 'flow' in table 0 of 'ofproto''s classifier.
3558  * If 'wc' is non-null, sets the fields that were relevant as part of
3559  * the lookup. Returns the table_id where a match or miss occurred.
3560  *
3561  * The return value will be zero unless there was a miss and
3562  * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
3563  * where misses occur.
3564  *
3565  * The rule is returned in '*rule', which is valid at least until the next
3566  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3567  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3568  * on it before this returns. */
3569 uint8_t
3570 rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
3571                  struct flow_wildcards *wc, struct rule_dpif **rule,
3572                  bool take_ref, const struct dpif_flow_stats *stats)
3573 {
3574     enum rule_dpif_lookup_verdict verdict;
3575     enum ofputil_port_config config = 0;
3576     uint8_t table_id;
3577
3578     if (ofproto_dpif_get_enable_recirc(ofproto)) {
3579         /* Always exactly match recirc_id since datapath supports
3580          * recirculation.  */
3581         if (wc) {
3582             wc->masks.recirc_id = UINT32_MAX;
3583         }
3584
3585         /* Start looking up from internal table for post recirculation flows
3586          * or packets. We can also simply send all, including normal flows
3587          * or packets to the internal table. They will not match any post
3588          * recirculation rules except the 'catch all' rule that resubmit
3589          * them to table 0.
3590          *
3591          * As an optimization, we send normal flows and packets to table 0
3592          * directly, saving one table lookup.  */
3593         table_id = flow->recirc_id ? TBL_INTERNAL : 0;
3594     } else {
3595         table_id = 0;
3596     }
3597
3598     verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true,
3599                                           &table_id, rule, take_ref, stats);
3600
3601     switch (verdict) {
3602     case RULE_DPIF_LOOKUP_VERDICT_MATCH:
3603         return table_id;
3604     case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: {
3605         struct ofport_dpif *port;
3606
3607         port = get_ofp_port(ofproto, flow->in_port.ofp_port);
3608         if (!port) {
3609             VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
3610                          flow->in_port.ofp_port);
3611         }
3612         config = port ? port->up.pp.config : 0;
3613         break;
3614     }
3615     case RULE_DPIF_LOOKUP_VERDICT_DROP:
3616         config = OFPUTIL_PC_NO_PACKET_IN;
3617         break;
3618     case RULE_DPIF_LOOKUP_VERDICT_DEFAULT:
3619         if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
3620             config = OFPUTIL_PC_NO_PACKET_IN;
3621         }
3622         break;
3623     default:
3624         OVS_NOT_REACHED();
3625     }
3626
3627     choose_miss_rule(config, ofproto->miss_rule,
3628                      ofproto->no_packet_in_rule, rule, take_ref);
3629     return table_id;
3630 }
3631
3632 /* The returned rule is valid at least until the next RCU quiescent period.
3633  * If the '*rule' needs to stay around longer, a non-zero 'take_ref' must be
3634  * passed in to cause a reference to be taken on it before this returns. */
3635 static struct rule_dpif *
3636 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
3637                           const struct flow *flow, struct flow_wildcards *wc,
3638                           bool take_ref)
3639 {
3640     struct classifier *cls = &ofproto->up.tables[table_id].cls;
3641     const struct cls_rule *cls_rule;
3642     struct rule_dpif *rule;
3643     struct flow ofpc_normal_flow;
3644
3645     if (ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
3646         /* We always unwildcard dl_type and nw_frag (for IP), so they
3647          * need not be unwildcarded here. */
3648
3649         if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
3650             if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
3651                 /* We must pretend that transport ports are unavailable. */
3652                 ofpc_normal_flow = *flow;
3653                 ofpc_normal_flow.tp_src = htons(0);
3654                 ofpc_normal_flow.tp_dst = htons(0);
3655                 flow = &ofpc_normal_flow;
3656             } else {
3657                 /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
3658                  * Use the drop_frags_rule (which cannot disappear). */
3659                 cls_rule = &ofproto->drop_frags_rule->up.cr;
3660                 rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3661                 if (take_ref) {
3662                     rule_dpif_ref(rule);
3663                 }
3664                 return rule;
3665             }
3666         }
3667     }
3668
3669     do {
3670         cls_rule = classifier_lookup(cls, flow, wc);
3671
3672         rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3673
3674         /* Try again if the rule was released before we get the reference. */
3675     } while (rule && take_ref && !rule_dpif_try_ref(rule));
3676
3677     return rule;
3678 }
3679
3680 /* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
3681  * Stores the rule that was found in '*rule', or NULL if none was found.
3682  * Updates 'wc', if nonnull, to reflect the fields that were used during the
3683  * lookup.
3684  *
3685  * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
3686  * if none is found then the table miss configuration for that table is
3687  * honored, which can result in additional lookups in other OpenFlow tables.
3688  * In this case the function updates '*table_id' to reflect the final OpenFlow
3689  * table that was searched.
3690  *
3691  * If 'honor_table_miss' is false, then only one table lookup occurs, in
3692  * '*table_id'.
3693  *
3694  * Returns:
3695  *
3696  *    - RULE_DPIF_LOOKUP_VERDICT_MATCH if a rule (in '*rule') was found.
3697  *
3698  *    - RULE_OFPTC_TABLE_MISS_CONTROLLER if no rule was found and either:
3699  *      + 'honor_table_miss' is false
3700  *      + a table miss configuration specified that the packet should be
3701  *        sent to the controller in this case.
3702  *
3703  *    - RULE_DPIF_LOOKUP_VERDICT_DROP if no rule was found, 'honor_table_miss'
3704  *      is true and a table miss configuration specified that the packet
3705  *      should be dropped in this case.
3706  *
3707  *    - RULE_DPIF_LOOKUP_VERDICT_DEFAULT if no rule was found,
3708  *      'honor_table_miss' is true and a table miss configuration has
3709  *      not been specified in this case.
3710  *
3711  * The rule is returned in '*rule', which is valid at least until the next
3712  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3713  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3714  * on it before this returns. */
3715 enum rule_dpif_lookup_verdict
3716 rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
3717                             const struct flow *flow,
3718                             struct flow_wildcards *wc,
3719                             bool honor_table_miss,
3720                             uint8_t *table_id, struct rule_dpif **rule,
3721                             bool take_ref, const struct dpif_flow_stats *stats)
3722 {
3723     uint8_t next_id;
3724
3725     for (next_id = *table_id;
3726          next_id < ofproto->up.n_tables;
3727          next_id++, next_id += (next_id == TBL_INTERNAL))
3728     {
3729         *table_id = next_id;
3730         *rule = rule_dpif_lookup_in_table(ofproto, *table_id, flow, wc,
3731                                           take_ref);
3732         if (stats) {
3733             struct oftable *tbl = &ofproto->up.tables[next_id];
3734             unsigned long orig;
3735
3736             atomic_add_relaxed(*rule ? &tbl->n_matched : &tbl->n_missed,
3737                                stats->n_packets, &orig);
3738         }
3739         if (*rule) {
3740             return RULE_DPIF_LOOKUP_VERDICT_MATCH;
3741         } else if (!honor_table_miss) {
3742             return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3743         } else {
3744             switch (ofproto_table_get_miss_config(&ofproto->up, *table_id)) {
3745             case OFPUTIL_TABLE_MISS_CONTINUE:
3746                 break;
3747
3748             case OFPUTIL_TABLE_MISS_CONTROLLER:
3749                 return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3750
3751             case OFPUTIL_TABLE_MISS_DROP:
3752                 return RULE_DPIF_LOOKUP_VERDICT_DROP;
3753
3754             case OFPUTIL_TABLE_MISS_DEFAULT:
3755                 return RULE_DPIF_LOOKUP_VERDICT_DEFAULT;
3756             }
3757         }
3758     }
3759
3760     return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3761 }
3762
3763 /* Given a port configuration (specified as zero if there's no port), chooses
3764  * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
3765  * flow table miss.
3766  *
3767  * The rule is returned in '*rule', which is valid at least until the next
3768  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3769  * a reference must be taken on it (rule_dpif_ref()).
3770  */
3771 void
3772 choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
3773                  struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule,
3774                  bool take_ref)
3775 {
3776     *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
3777     if (take_ref) {
3778         rule_dpif_ref(*rule);
3779     }
3780 }
3781
3782 static void
3783 complete_operation(struct rule_dpif *rule)
3784     OVS_REQUIRES(ofproto_mutex)
3785 {
3786     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3787
3788     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3789 }
3790
3791 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
3792 {
3793     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
3794 }
3795
3796 static struct rule *
3797 rule_alloc(void)
3798 {
3799     struct rule_dpif *rule = xmalloc(sizeof *rule);
3800     return &rule->up;
3801 }
3802
3803 static void
3804 rule_dealloc(struct rule *rule_)
3805 {
3806     struct rule_dpif *rule = rule_dpif_cast(rule_);
3807     free(rule);
3808 }
3809
3810 static enum ofperr
3811 rule_construct(struct rule *rule_)
3812     OVS_NO_THREAD_SAFETY_ANALYSIS
3813 {
3814     struct rule_dpif *rule = rule_dpif_cast(rule_);
3815     ovs_mutex_init_adaptive(&rule->stats_mutex);
3816     rule->stats.n_packets = 0;
3817     rule->stats.n_bytes = 0;
3818     rule->stats.used = rule->up.modified;
3819     rule->recirc_id = 0;
3820
3821     return 0;
3822 }
3823
3824 static enum ofperr
3825 rule_insert(struct rule *rule_)
3826     OVS_REQUIRES(ofproto_mutex)
3827 {
3828     struct rule_dpif *rule = rule_dpif_cast(rule_);
3829     complete_operation(rule);
3830     return 0;
3831 }
3832
3833 static void
3834 rule_delete(struct rule *rule_)
3835     OVS_REQUIRES(ofproto_mutex)
3836 {
3837     struct rule_dpif *rule = rule_dpif_cast(rule_);
3838     complete_operation(rule);
3839 }
3840
3841 static void
3842 rule_destruct(struct rule *rule_)
3843 {
3844     struct rule_dpif *rule = rule_dpif_cast(rule_);
3845
3846     ovs_mutex_destroy(&rule->stats_mutex);
3847     if (rule->recirc_id) {
3848         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3849
3850         ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
3851     }
3852 }
3853
3854 static void
3855 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
3856                long long int *used)
3857 {
3858     struct rule_dpif *rule = rule_dpif_cast(rule_);
3859
3860     ovs_mutex_lock(&rule->stats_mutex);
3861     *packets = rule->stats.n_packets;
3862     *bytes = rule->stats.n_bytes;
3863     *used = rule->stats.used;
3864     ovs_mutex_unlock(&rule->stats_mutex);
3865 }
3866
3867 static void
3868 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
3869                   struct ofpbuf *packet)
3870 {
3871     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3872
3873     ofproto_dpif_execute_actions(ofproto, flow, rule, NULL, 0, packet);
3874 }
3875
3876 static enum ofperr
3877 rule_execute(struct rule *rule, const struct flow *flow,
3878              struct ofpbuf *packet)
3879 {
3880     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
3881     ofpbuf_delete(packet);
3882     return 0;
3883 }
3884
3885 static void
3886 rule_modify_actions(struct rule *rule_, bool reset_counters)
3887     OVS_REQUIRES(ofproto_mutex)
3888 {
3889     struct rule_dpif *rule = rule_dpif_cast(rule_);
3890
3891     if (reset_counters) {
3892         ovs_mutex_lock(&rule->stats_mutex);
3893         rule->stats.n_packets = 0;
3894         rule->stats.n_bytes = 0;
3895         ovs_mutex_unlock(&rule->stats_mutex);
3896     }
3897
3898     complete_operation(rule);
3899 }
3900
3901 static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
3902 {
3903     return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
3904 }
3905
3906 static struct ofgroup *
3907 group_alloc(void)
3908 {
3909     struct group_dpif *group = xzalloc(sizeof *group);
3910     return &group->up;
3911 }
3912
3913 static void
3914 group_dealloc(struct ofgroup *group_)
3915 {
3916     struct group_dpif *group = group_dpif_cast(group_);
3917     free(group);
3918 }
3919
3920 static void
3921 group_construct_stats(struct group_dpif *group)
3922     OVS_REQUIRES(group->stats_mutex)
3923 {
3924     struct ofputil_bucket *bucket;
3925     const struct list *buckets;
3926
3927     group->packet_count = 0;
3928     group->byte_count = 0;
3929
3930     group_dpif_get_buckets(group, &buckets);
3931     LIST_FOR_EACH (bucket, list_node, buckets) {
3932         bucket->stats.packet_count = 0;
3933         bucket->stats.byte_count = 0;
3934     }
3935 }
3936
3937 void
3938 group_dpif_credit_stats(struct group_dpif *group,
3939                         struct ofputil_bucket *bucket,
3940                         const struct dpif_flow_stats *stats)
3941 {
3942     ovs_mutex_lock(&group->stats_mutex);
3943     group->packet_count += stats->n_packets;
3944     group->byte_count += stats->n_bytes;
3945     if (bucket) {
3946         bucket->stats.packet_count += stats->n_packets;
3947         bucket->stats.byte_count += stats->n_bytes;
3948     } else { /* Credit to all buckets */
3949         const struct list *buckets;
3950
3951         group_dpif_get_buckets(group, &buckets);
3952         LIST_FOR_EACH (bucket, list_node, buckets) {
3953             bucket->stats.packet_count += stats->n_packets;
3954             bucket->stats.byte_count += stats->n_bytes;
3955         }
3956     }
3957     ovs_mutex_unlock(&group->stats_mutex);
3958 }
3959
3960 static enum ofperr
3961 group_construct(struct ofgroup *group_)
3962 {
3963     struct group_dpif *group = group_dpif_cast(group_);
3964     const struct ofputil_bucket *bucket;
3965
3966     /* Prevent group chaining because our locking structure makes it hard to
3967      * implement deadlock-free.  (See xlate_group_resource_check().) */
3968     LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
3969         const struct ofpact *a;
3970
3971         OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
3972             if (a->type == OFPACT_GROUP) {
3973                 return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
3974             }
3975         }
3976     }
3977
3978     ovs_mutex_init_adaptive(&group->stats_mutex);
3979     ovs_mutex_lock(&group->stats_mutex);
3980     group_construct_stats(group);
3981     ovs_mutex_unlock(&group->stats_mutex);
3982     return 0;
3983 }
3984
3985 static void
3986 group_destruct(struct ofgroup *group_)
3987 {
3988     struct group_dpif *group = group_dpif_cast(group_);
3989     ovs_mutex_destroy(&group->stats_mutex);
3990 }
3991
3992 static enum ofperr
3993 group_modify(struct ofgroup *group_)
3994 {
3995     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
3996
3997     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3998
3999     return 0;
4000 }
4001
4002 static enum ofperr
4003 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
4004 {
4005     struct group_dpif *group = group_dpif_cast(group_);
4006     struct ofputil_bucket *bucket;
4007     const struct list *buckets;
4008     struct bucket_counter *bucket_stats;
4009
4010     ovs_mutex_lock(&group->stats_mutex);
4011     ogs->packet_count = group->packet_count;
4012     ogs->byte_count = group->byte_count;
4013
4014     group_dpif_get_buckets(group, &buckets);
4015     bucket_stats = ogs->bucket_stats;
4016     LIST_FOR_EACH (bucket, list_node, buckets) {
4017         bucket_stats->packet_count = bucket->stats.packet_count;
4018         bucket_stats->byte_count = bucket->stats.byte_count;
4019         bucket_stats++;
4020     }
4021     ovs_mutex_unlock(&group->stats_mutex);
4022
4023     return 0;
4024 }
4025
4026 /* If the group exists, this function increments the groups's reference count.
4027  *
4028  * Make sure to call group_dpif_unref() after no longer needing to maintain
4029  * a reference to the group. */
4030 bool
4031 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
4032                   struct group_dpif **group)
4033 {
4034     struct ofgroup *ofgroup;
4035     bool found;
4036
4037     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
4038     *group = found ?  group_dpif_cast(ofgroup) : NULL;
4039
4040     return found;
4041 }
4042
4043 void
4044 group_dpif_get_buckets(const struct group_dpif *group,
4045                        const struct list **buckets)
4046 {
4047     *buckets = &group->up.buckets;
4048 }
4049
4050 enum ofp11_group_type
4051 group_dpif_get_type(const struct group_dpif *group)
4052 {
4053     return group->up.type;
4054 }
4055 \f
4056 /* Sends 'packet' out 'ofport'.
4057  * May modify 'packet'.
4058  * Returns 0 if successful, otherwise a positive errno value. */
4059 int
4060 ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4061 {
4062     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4063     int error;
4064
4065     error = xlate_send_packet(ofport, packet);
4066
4067     ovs_mutex_lock(&ofproto->stats_mutex);
4068     ofproto->stats.tx_packets++;
4069     ofproto->stats.tx_bytes += ofpbuf_size(packet);
4070     ovs_mutex_unlock(&ofproto->stats_mutex);
4071     return error;
4072 }
4073 \f
4074 static bool
4075 set_frag_handling(struct ofproto *ofproto_,
4076                   enum ofp_config_flags frag_handling)
4077 {
4078     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4079     if (frag_handling != OFPC_FRAG_REASM) {
4080         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4081         return true;
4082     } else {
4083         return false;
4084     }
4085 }
4086
4087 static enum ofperr
4088 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
4089            const struct flow *flow,
4090            const struct ofpact *ofpacts, size_t ofpacts_len)
4091 {
4092     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4093
4094     ofproto_dpif_execute_actions(ofproto, flow, NULL, ofpacts,
4095                                  ofpacts_len, packet);
4096     return 0;
4097 }
4098 \f
4099 /* NetFlow. */
4100
4101 static int
4102 set_netflow(struct ofproto *ofproto_,
4103             const struct netflow_options *netflow_options)
4104 {
4105     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4106
4107     if (netflow_options) {
4108         if (!ofproto->netflow) {
4109             ofproto->netflow = netflow_create();
4110             ofproto->backer->need_revalidate = REV_RECONFIGURE;
4111         }
4112         return netflow_set_options(ofproto->netflow, netflow_options);
4113     } else if (ofproto->netflow) {
4114         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4115         netflow_unref(ofproto->netflow);
4116         ofproto->netflow = NULL;
4117     }
4118
4119     return 0;
4120 }
4121
4122 static void
4123 get_netflow_ids(const struct ofproto *ofproto_,
4124                 uint8_t *engine_type, uint8_t *engine_id)
4125 {
4126     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4127
4128     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
4129 }
4130 \f
4131 static struct ofproto_dpif *
4132 ofproto_dpif_lookup(const char *name)
4133 {
4134     struct ofproto_dpif *ofproto;
4135
4136     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
4137                              hash_string(name, 0), &all_ofproto_dpifs) {
4138         if (!strcmp(ofproto->up.name, name)) {
4139             return ofproto;
4140         }
4141     }
4142     return NULL;
4143 }
4144
4145 static void
4146 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
4147                           const char *argv[], void *aux OVS_UNUSED)
4148 {
4149     struct ofproto_dpif *ofproto;
4150
4151     if (argc > 1) {
4152         ofproto = ofproto_dpif_lookup(argv[1]);
4153         if (!ofproto) {
4154             unixctl_command_reply_error(conn, "no such bridge");
4155             return;
4156         }
4157         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4158         mac_learning_flush(ofproto->ml);
4159         ovs_rwlock_unlock(&ofproto->ml->rwlock);
4160     } else {
4161         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4162             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4163             mac_learning_flush(ofproto->ml);
4164             ovs_rwlock_unlock(&ofproto->ml->rwlock);
4165         }
4166     }
4167
4168     unixctl_command_reply(conn, "table successfully flushed");
4169 }
4170
4171 static void
4172 ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
4173                                      const char *argv[], void *aux OVS_UNUSED)
4174 {
4175     struct ofproto_dpif *ofproto;
4176
4177     if (argc > 1) {
4178         ofproto = ofproto_dpif_lookup(argv[1]);
4179         if (!ofproto) {
4180             unixctl_command_reply_error(conn, "no such bridge");
4181             return;
4182         }
4183
4184         if (!mcast_snooping_enabled(ofproto->ms)) {
4185             unixctl_command_reply_error(conn, "multicast snooping is disabled");
4186             return;
4187         }
4188         mcast_snooping_mdb_flush(ofproto->ms);
4189     } else {
4190         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4191             if (!mcast_snooping_enabled(ofproto->ms)) {
4192                 continue;
4193             }
4194             mcast_snooping_mdb_flush(ofproto->ms);
4195         }
4196     }
4197
4198     unixctl_command_reply(conn, "table successfully flushed");
4199 }
4200
4201 static struct ofport_dpif *
4202 ofbundle_get_a_port(const struct ofbundle *bundle)
4203 {
4204     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
4205                         bundle_node);
4206 }
4207
4208 static void
4209 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4210                          const char *argv[], void *aux OVS_UNUSED)
4211 {
4212     struct ds ds = DS_EMPTY_INITIALIZER;
4213     const struct ofproto_dpif *ofproto;
4214     const struct mac_entry *e;
4215
4216     ofproto = ofproto_dpif_lookup(argv[1]);
4217     if (!ofproto) {
4218         unixctl_command_reply_error(conn, "no such bridge");
4219         return;
4220     }
4221
4222     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
4223     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
4224     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
4225         struct ofbundle *bundle = e->port.p;
4226         char name[OFP_MAX_PORT_NAME_LEN];
4227
4228         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4229                                name, sizeof name);
4230         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
4231                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
4232                       mac_entry_age(ofproto->ml, e));
4233     }
4234     ovs_rwlock_unlock(&ofproto->ml->rwlock);
4235     unixctl_command_reply(conn, ds_cstr(&ds));
4236     ds_destroy(&ds);
4237 }
4238
4239 static void
4240 ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
4241                                     int argc OVS_UNUSED,
4242                                     const char *argv[],
4243                                     void *aux OVS_UNUSED)
4244 {
4245     struct ds ds = DS_EMPTY_INITIALIZER;
4246     const struct ofproto_dpif *ofproto;
4247     const struct ofbundle *bundle;
4248     const struct mcast_group *grp;
4249     struct mcast_group_bundle *b;
4250     struct mcast_mrouter_bundle *mrouter;
4251
4252     ofproto = ofproto_dpif_lookup(argv[1]);
4253     if (!ofproto) {
4254         unixctl_command_reply_error(conn, "no such bridge");
4255         return;
4256     }
4257
4258     if (!mcast_snooping_enabled(ofproto->ms)) {
4259         unixctl_command_reply_error(conn, "multicast snooping is disabled");
4260         return;
4261     }
4262
4263     ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
4264     ovs_rwlock_rdlock(&ofproto->ms->rwlock);
4265     LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
4266         LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
4267             char name[OFP_MAX_PORT_NAME_LEN];
4268
4269             bundle = b->port;
4270             ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4271                                    name, sizeof name);
4272             ds_put_format(&ds, "%5s  %4d  "IP_FMT"         %3d\n",
4273                           name, grp->vlan, IP_ARGS(grp->ip4),
4274                           mcast_bundle_age(ofproto->ms, b));
4275         }
4276     }
4277
4278     /* ports connected to multicast routers */
4279     LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
4280         char name[OFP_MAX_PORT_NAME_LEN];
4281
4282         bundle = mrouter->port;
4283         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4284                                name, sizeof name);
4285             ds_put_format(&ds, "%5s  %4d  querier             %3d\n",
4286                       name, mrouter->vlan,
4287                       mcast_mrouter_age(ofproto->ms, mrouter));
4288     }
4289     ovs_rwlock_unlock(&ofproto->ms->rwlock);
4290     unixctl_command_reply(conn, ds_cstr(&ds));
4291     ds_destroy(&ds);
4292 }
4293
4294 struct trace_ctx {
4295     struct xlate_out xout;
4296     struct xlate_in xin;
4297     const struct flow *key;
4298     struct flow flow;
4299     struct flow_wildcards wc;
4300     struct ds *result;
4301 };
4302
4303 static void
4304 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
4305 {
4306     const struct rule_actions *actions;
4307     ovs_be64 cookie;
4308
4309     ds_put_char_multiple(result, '\t', level);
4310     if (!rule) {
4311         ds_put_cstr(result, "No match\n");
4312         return;
4313     }
4314
4315     ovs_mutex_lock(&rule->up.mutex);
4316     cookie = rule->up.flow_cookie;
4317     ovs_mutex_unlock(&rule->up.mutex);
4318
4319     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
4320                   rule ? rule->up.table_id : 0, ntohll(cookie));
4321     cls_rule_format(&rule->up.cr, result);
4322     ds_put_char(result, '\n');
4323
4324     actions = rule_dpif_get_actions(rule);
4325
4326     ds_put_char_multiple(result, '\t', level);
4327     ds_put_cstr(result, "OpenFlow actions=");
4328     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
4329     ds_put_char(result, '\n');
4330 }
4331
4332 static void
4333 trace_format_flow(struct ds *result, int level, const char *title,
4334                   struct trace_ctx *trace)
4335 {
4336     ds_put_char_multiple(result, '\t', level);
4337     ds_put_format(result, "%s: ", title);
4338     /* Do not report unchanged flows for resubmits. */
4339     if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
4340         || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
4341         ds_put_cstr(result, "unchanged");
4342     } else {
4343         flow_format(result, &trace->xin.flow);
4344         trace->flow = trace->xin.flow;
4345     }
4346     ds_put_char(result, '\n');
4347 }
4348
4349 static void
4350 trace_format_regs(struct ds *result, int level, const char *title,
4351                   struct trace_ctx *trace)
4352 {
4353     size_t i;
4354
4355     ds_put_char_multiple(result, '\t', level);
4356     ds_put_format(result, "%s:", title);
4357     for (i = 0; i < FLOW_N_REGS; i++) {
4358         ds_put_format(result, " reg%"PRIuSIZE"=0x%"PRIx32, i, trace->flow.regs[i]);
4359     }
4360     ds_put_char(result, '\n');
4361 }
4362
4363 static void
4364 trace_format_odp(struct ds *result, int level, const char *title,
4365                  struct trace_ctx *trace)
4366 {
4367     struct ofpbuf *odp_actions = trace->xout.odp_actions;
4368
4369     ds_put_char_multiple(result, '\t', level);
4370     ds_put_format(result, "%s: ", title);
4371     format_odp_actions(result, ofpbuf_data(odp_actions),
4372                                ofpbuf_size(odp_actions));
4373     ds_put_char(result, '\n');
4374 }
4375
4376 static void
4377 trace_format_megaflow(struct ds *result, int level, const char *title,
4378                       struct trace_ctx *trace)
4379 {
4380     struct match match;
4381
4382     ds_put_char_multiple(result, '\t', level);
4383     ds_put_format(result, "%s: ", title);
4384     flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
4385     match_init(&match, trace->key, &trace->wc);
4386     match_format(&match, result, OFP_DEFAULT_PRIORITY);
4387     ds_put_char(result, '\n');
4388 }
4389
4390 static void
4391 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
4392 {
4393     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4394     struct ds *result = trace->result;
4395
4396     ds_put_char(result, '\n');
4397     trace_format_flow(result, recurse + 1, "Resubmitted flow", trace);
4398     trace_format_regs(result, recurse + 1, "Resubmitted regs", trace);
4399     trace_format_odp(result,  recurse + 1, "Resubmitted  odp", trace);
4400     trace_format_megaflow(result, recurse + 1, "Resubmitted megaflow", trace);
4401     trace_format_rule(result, recurse + 1, rule);
4402 }
4403
4404 static void
4405 trace_report(struct xlate_in *xin, const char *s, int recurse)
4406 {
4407     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4408     struct ds *result = trace->result;
4409
4410     ds_put_char_multiple(result, '\t', recurse);
4411     ds_put_cstr(result, s);
4412     ds_put_char(result, '\n');
4413 }
4414
4415 /* Parses the 'argc' elements of 'argv', ignoring argv[0].  The following
4416  * forms are supported:
4417  *
4418  *     - [dpname] odp_flow [-generate | packet]
4419  *     - bridge br_flow [-generate | packet]
4420  *
4421  * On success, initializes '*ofprotop' and 'flow' and returns NULL.  On failure
4422  * returns a nonnull malloced error message. */
4423 static char * WARN_UNUSED_RESULT
4424 parse_flow_and_packet(int argc, const char *argv[],
4425                       struct ofproto_dpif **ofprotop, struct flow *flow,
4426                       struct ofpbuf **packetp)
4427 {
4428     const struct dpif_backer *backer = NULL;
4429     const char *error = NULL;
4430     char *m_err = NULL;
4431     struct simap port_names = SIMAP_INITIALIZER(&port_names);
4432     struct ofpbuf *packet;
4433     struct ofpbuf odp_key;
4434     struct ofpbuf odp_mask;
4435
4436     ofpbuf_init(&odp_key, 0);
4437     ofpbuf_init(&odp_mask, 0);
4438
4439     /* Handle "-generate" or a hex string as the last argument. */
4440     if (!strcmp(argv[argc - 1], "-generate")) {
4441         packet = ofpbuf_new(0);
4442         argc--;
4443     } else {
4444         error = eth_from_hex(argv[argc - 1], &packet);
4445         if (!error) {
4446             argc--;
4447         } else if (argc == 4) {
4448             /* The 3-argument form must end in "-generate' or a hex string. */
4449             goto exit;
4450         }
4451         error = NULL;
4452     }
4453
4454     /* odp_flow can have its in_port specified as a name instead of port no.
4455      * We do not yet know whether a given flow is a odp_flow or a br_flow.
4456      * But, to know whether a flow is odp_flow through odp_flow_from_string(),
4457      * we need to create a simap of name to port no. */
4458     if (argc == 3) {
4459         const char *dp_type;
4460         if (!strncmp(argv[1], "ovs-", 4)) {
4461             dp_type = argv[1] + 4;
4462         } else {
4463             dp_type = argv[1];
4464         }
4465         backer = shash_find_data(&all_dpif_backers, dp_type);
4466     } else if (argc == 2) {
4467         struct shash_node *node;
4468         if (shash_count(&all_dpif_backers) == 1) {
4469             node = shash_first(&all_dpif_backers);
4470             backer = node->data;
4471         }
4472     } else {
4473         error = "Syntax error";
4474         goto exit;
4475     }
4476     if (backer && backer->dpif) {
4477         struct dpif_port dpif_port;
4478         struct dpif_port_dump port_dump;
4479         DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
4480             simap_put(&port_names, dpif_port.name,
4481                       odp_to_u32(dpif_port.port_no));
4482         }
4483     }
4484
4485     /* Parse the flow and determine whether a datapath or
4486      * bridge is specified. If function odp_flow_key_from_string()
4487      * returns 0, the flow is a odp_flow. If function
4488      * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
4489     if (!odp_flow_from_string(argv[argc - 1], &port_names,
4490                               &odp_key, &odp_mask)) {
4491         if (!backer) {
4492             error = "Cannot find the datapath";
4493             goto exit;
4494         }
4495
4496         if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key),
4497                                  flow) == ODP_FIT_ERROR) {
4498             error = "Failed to parse flow key";
4499             goto exit;
4500         }
4501
4502         *ofprotop = xlate_lookup_ofproto(backer, flow,
4503                                          &flow->in_port.ofp_port);
4504         if (*ofprotop == NULL) {
4505             error = "Invalid datapath flow";
4506             goto exit;
4507         }
4508
4509         vsp_adjust_flow(*ofprotop, flow, NULL);
4510
4511     } else {
4512         char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
4513
4514         if (err) {
4515             m_err = xasprintf("Bad flow syntax: %s", err);
4516             free(err);
4517             goto exit;
4518         } else {
4519             if (argc != 3) {
4520                 error = "Must specify bridge name";
4521                 goto exit;
4522             }
4523
4524             *ofprotop = ofproto_dpif_lookup(argv[1]);
4525             if (!*ofprotop) {
4526                 error = "Unknown bridge name";
4527                 goto exit;
4528             }
4529         }
4530     }
4531
4532     /* Generate a packet, if requested. */
4533     if (packet) {
4534         if (!ofpbuf_size(packet)) {
4535             flow_compose(packet, flow);
4536         } else {
4537             struct pkt_metadata md = pkt_metadata_from_flow(flow);
4538
4539             /* Use the metadata from the flow and the packet argument
4540              * to reconstruct the flow. */
4541             flow_extract(packet, &md, flow);
4542         }
4543     }
4544
4545 exit:
4546     if (error && !m_err) {
4547         m_err = xstrdup(error);
4548     }
4549     if (m_err) {
4550         ofpbuf_delete(packet);
4551         packet = NULL;
4552     }
4553     *packetp = packet;
4554     ofpbuf_uninit(&odp_key);
4555     ofpbuf_uninit(&odp_mask);
4556     simap_destroy(&port_names);
4557     return m_err;
4558 }
4559
4560 static void
4561 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
4562                       void *aux OVS_UNUSED)
4563 {
4564     struct ofproto_dpif *ofproto;
4565     struct ofpbuf *packet;
4566     char *error;
4567     struct flow flow;
4568
4569     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4570     if (!error) {
4571         struct ds result;
4572
4573         ds_init(&result);
4574         ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
4575         unixctl_command_reply(conn, ds_cstr(&result));
4576         ds_destroy(&result);
4577         ofpbuf_delete(packet);
4578     } else {
4579         unixctl_command_reply_error(conn, error);
4580         free(error);
4581     }
4582 }
4583
4584 static void
4585 ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
4586                               const char *argv[], void *aux OVS_UNUSED)
4587 {
4588     enum ofputil_protocol usable_protocols;
4589     struct ofproto_dpif *ofproto;
4590     bool enforce_consistency;
4591     struct ofpbuf ofpacts;
4592     struct ofpbuf *packet;
4593     struct ds result;
4594     struct flow flow;
4595     uint16_t in_port;
4596
4597     /* Three kinds of error return values! */
4598     enum ofperr retval;
4599     char *error;
4600
4601     packet = NULL;
4602     ds_init(&result);
4603     ofpbuf_init(&ofpacts, 0);
4604
4605     /* Parse actions. */
4606     error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
4607     if (error) {
4608         unixctl_command_reply_error(conn, error);
4609         free(error);
4610         goto exit;
4611     }
4612
4613     /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
4614      * consistency between the flow and the actions.  With -consistent, we
4615      * enforce consistency even for a flow supported in OpenFlow 1.0. */
4616     if (!strcmp(argv[1], "-consistent")) {
4617         enforce_consistency = true;
4618         argv++;
4619         argc--;
4620     } else {
4621         enforce_consistency = false;
4622     }
4623
4624     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4625     if (error) {
4626         unixctl_command_reply_error(conn, error);
4627         free(error);
4628         goto exit;
4629     }
4630
4631     /* Do the same checks as handle_packet_out() in ofproto.c.
4632      *
4633      * We pass a 'table_id' of 0 to ofproto_check_ofpacts(), which isn't
4634      * strictly correct because these actions aren't in any table, but it's OK
4635      * because it 'table_id' is used only to check goto_table instructions, but
4636      * packet-outs take a list of actions and therefore it can't include
4637      * instructions.
4638      *
4639      * We skip the "meter" check here because meter is an instruction, not an
4640      * action, and thus cannot appear in ofpacts. */
4641     in_port = ofp_to_u16(flow.in_port.ofp_port);
4642     if (in_port >= ofproto->up.max_ports && in_port < ofp_to_u16(OFPP_MAX)) {
4643         unixctl_command_reply_error(conn, "invalid in_port");
4644         goto exit;
4645     }
4646     if (enforce_consistency) {
4647         retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
4648                                            &flow, u16_to_ofp(ofproto->up.max_ports),
4649                                            0, 0, usable_protocols);
4650     } else {
4651         retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
4652                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
4653                                &usable_protocols);
4654     }
4655
4656     if (retval) {
4657         ds_clear(&result);
4658         ds_put_format(&result, "Bad actions: %s", ofperr_to_string(retval));
4659         unixctl_command_reply_error(conn, ds_cstr(&result));
4660         goto exit;
4661     }
4662
4663     ofproto_trace(ofproto, &flow, packet,
4664                   ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
4665     unixctl_command_reply(conn, ds_cstr(&result));
4666
4667 exit:
4668     ds_destroy(&result);
4669     ofpbuf_delete(packet);
4670     ofpbuf_uninit(&ofpacts);
4671 }
4672
4673 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
4674  * description of the results to 'ds'.
4675  *
4676  * The trace follows a packet with the specified 'flow' through the flow
4677  * table.  'packet' may be nonnull to trace an actual packet, with consequent
4678  * side effects (if it is nonnull then its flow must be 'flow').
4679  *
4680  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
4681  * trace, otherwise the actions are determined by a flow table lookup. */
4682 static void
4683 ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
4684               const struct ofpbuf *packet,
4685               const struct ofpact ofpacts[], size_t ofpacts_len,
4686               struct ds *ds)
4687 {
4688     struct rule_dpif *rule;
4689     struct trace_ctx trace;
4690
4691     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
4692     ds_put_cstr(ds, "Flow: ");
4693     flow_format(ds, flow);
4694     ds_put_char(ds, '\n');
4695
4696     flow_wildcards_init_catchall(&trace.wc);
4697     if (ofpacts) {
4698         rule = NULL;
4699     } else {
4700         rule_dpif_lookup(ofproto, flow, &trace.wc, &rule, false, NULL);
4701
4702         trace_format_rule(ds, 0, rule);
4703         if (rule == ofproto->miss_rule) {
4704             ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n");
4705         } else if (rule == ofproto->no_packet_in_rule) {
4706             ds_put_cstr(ds, "\nNo match, packets dropped because "
4707                         "OFPPC_NO_PACKET_IN is set on in_port.\n");
4708         } else if (rule == ofproto->drop_frags_rule) {
4709             ds_put_cstr(ds, "\nPackets dropped because they are IP fragments "
4710                         "and the fragment handling mode is \"drop\".\n");
4711         }
4712     }
4713
4714     if (rule || ofpacts) {
4715         trace.result = ds;
4716         trace.key = flow; /* Original flow key, used for megaflow. */
4717         trace.flow = *flow; /* May be modified by actions. */
4718         xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, rule,
4719                       ntohs(flow->tcp_flags), packet);
4720         if (ofpacts) {
4721             trace.xin.ofpacts = ofpacts;
4722             trace.xin.ofpacts_len = ofpacts_len;
4723         }
4724         trace.xin.resubmit_hook = trace_resubmit;
4725         trace.xin.report_hook = trace_report;
4726
4727         xlate_actions(&trace.xin, &trace.xout);
4728
4729         ds_put_char(ds, '\n');
4730         trace_format_flow(ds, 0, "Final flow", &trace);
4731         trace_format_megaflow(ds, 0, "Megaflow", &trace);
4732
4733         ds_put_cstr(ds, "Datapath actions: ");
4734         format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions),
4735                            ofpbuf_size(trace.xout.odp_actions));
4736
4737         if (trace.xout.slow) {
4738             enum slow_path_reason slow;
4739
4740             ds_put_cstr(ds, "\nThis flow is handled by the userspace "
4741                         "slow path because it:");
4742
4743             slow = trace.xout.slow;
4744             while (slow) {
4745                 enum slow_path_reason bit = rightmost_1bit(slow);
4746
4747                 ds_put_format(ds, "\n\t- %s.",
4748                               slow_path_reason_to_explanation(bit));
4749
4750                 slow &= ~bit;
4751             }
4752         }
4753
4754         xlate_out_uninit(&trace.xout);
4755     }
4756 }
4757
4758 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
4759  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
4760  * to destroy 'ofproto_shash' and free the returned value. */
4761 static const struct shash_node **
4762 get_ofprotos(struct shash *ofproto_shash)
4763 {
4764     const struct ofproto_dpif *ofproto;
4765
4766     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4767         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
4768         shash_add_nocopy(ofproto_shash, name, ofproto);
4769     }
4770
4771     return shash_sort(ofproto_shash);
4772 }
4773
4774 static void
4775 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
4776                               const char *argv[] OVS_UNUSED,
4777                               void *aux OVS_UNUSED)
4778 {
4779     struct ds ds = DS_EMPTY_INITIALIZER;
4780     struct shash ofproto_shash;
4781     const struct shash_node **sorted_ofprotos;
4782     int i;
4783
4784     shash_init(&ofproto_shash);
4785     sorted_ofprotos = get_ofprotos(&ofproto_shash);
4786     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4787         const struct shash_node *node = sorted_ofprotos[i];
4788         ds_put_format(&ds, "%s\n", node->name);
4789     }
4790
4791     shash_destroy(&ofproto_shash);
4792     free(sorted_ofprotos);
4793
4794     unixctl_command_reply(conn, ds_cstr(&ds));
4795     ds_destroy(&ds);
4796 }
4797
4798 static void
4799 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
4800 {
4801     const struct shash_node **ofprotos;
4802     struct dpif_dp_stats dp_stats;
4803     struct shash ofproto_shash;
4804     size_t i;
4805
4806     dpif_get_dp_stats(backer->dpif, &dp_stats);
4807
4808     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
4809                   dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
4810
4811     shash_init(&ofproto_shash);
4812     ofprotos = get_ofprotos(&ofproto_shash);
4813     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4814         struct ofproto_dpif *ofproto = ofprotos[i]->data;
4815         const struct shash_node **ports;
4816         size_t j;
4817
4818         if (ofproto->backer != backer) {
4819             continue;
4820         }
4821
4822         ds_put_format(ds, "\t%s:\n", ofproto->up.name);
4823
4824         ports = shash_sort(&ofproto->up.port_by_name);
4825         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
4826             const struct shash_node *node = ports[j];
4827             struct ofport *ofport = node->data;
4828             struct smap config;
4829             odp_port_t odp_port;
4830
4831             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
4832                           ofport->ofp_port);
4833
4834             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
4835             if (odp_port != ODPP_NONE) {
4836                 ds_put_format(ds, "%"PRIu32":", odp_port);
4837             } else {
4838                 ds_put_cstr(ds, "none:");
4839             }
4840
4841             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
4842
4843             smap_init(&config);
4844             if (!netdev_get_config(ofport->netdev, &config)) {
4845                 const struct smap_node **nodes;
4846                 size_t i;
4847
4848                 nodes = smap_sort(&config);
4849                 for (i = 0; i < smap_count(&config); i++) {
4850                     const struct smap_node *node = nodes[i];
4851                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
4852                                   node->key, node->value);
4853                 }
4854                 free(nodes);
4855             }
4856             smap_destroy(&config);
4857
4858             ds_put_char(ds, ')');
4859             ds_put_char(ds, '\n');
4860         }
4861         free(ports);
4862     }
4863     shash_destroy(&ofproto_shash);
4864     free(ofprotos);
4865 }
4866
4867 static void
4868 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4869                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
4870 {
4871     struct ds ds = DS_EMPTY_INITIALIZER;
4872     const struct shash_node **backers;
4873     int i;
4874
4875     backers = shash_sort(&all_dpif_backers);
4876     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
4877         dpif_show_backer(backers[i]->data, &ds);
4878     }
4879     free(backers);
4880
4881     unixctl_command_reply(conn, ds_cstr(&ds));
4882     ds_destroy(&ds);
4883 }
4884
4885 static void
4886 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
4887                                 int argc OVS_UNUSED, const char *argv[],
4888                                 void *aux OVS_UNUSED)
4889 {
4890     const struct ofproto_dpif *ofproto;
4891
4892     struct ds ds = DS_EMPTY_INITIALIZER;
4893     bool verbosity = false;
4894
4895     struct dpif_port dpif_port;
4896     struct dpif_port_dump port_dump;
4897     struct hmap portno_names;
4898
4899     struct dpif_flow_dump *flow_dump;
4900     struct dpif_flow_dump_thread *flow_dump_thread;
4901     struct dpif_flow f;
4902     int error;
4903
4904     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
4905     if (!ofproto) {
4906         unixctl_command_reply_error(conn, "no such bridge");
4907         return;
4908     }
4909
4910     if (argc > 2 && !strcmp(argv[1], "-m")) {
4911         verbosity = true;
4912     }
4913
4914     hmap_init(&portno_names);
4915     DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
4916         odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
4917     }
4918
4919     ds_init(&ds);
4920     flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
4921     flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
4922     while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
4923         struct flow flow;
4924
4925         if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
4926             || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
4927             continue;
4928         }
4929
4930         odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
4931                         &portno_names, &ds, verbosity);
4932         ds_put_cstr(&ds, ", ");
4933         dpif_flow_stats_format(&f.stats, &ds);
4934         ds_put_cstr(&ds, ", actions:");
4935         format_odp_actions(&ds, f.actions, f.actions_len);
4936         ds_put_char(&ds, '\n');
4937     }
4938     dpif_flow_dump_thread_destroy(flow_dump_thread);
4939     error = dpif_flow_dump_destroy(flow_dump);
4940
4941     if (error) {
4942         ds_clear(&ds);
4943         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
4944         unixctl_command_reply_error(conn, ds_cstr(&ds));
4945     } else {
4946         unixctl_command_reply(conn, ds_cstr(&ds));
4947     }
4948     odp_portno_names_destroy(&portno_names);
4949     hmap_destroy(&portno_names);
4950     ds_destroy(&ds);
4951 }
4952
4953 static void
4954 ofproto_dpif_unixctl_init(void)
4955 {
4956     static bool registered;
4957     if (registered) {
4958         return;
4959     }
4960     registered = true;
4961
4962     unixctl_command_register(
4963         "ofproto/trace",
4964         "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
4965         1, 3, ofproto_unixctl_trace, NULL);
4966     unixctl_command_register(
4967         "ofproto/trace-packet-out",
4968         "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
4969         2, 6, ofproto_unixctl_trace_actions, NULL);
4970     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
4971                              ofproto_unixctl_fdb_flush, NULL);
4972     unixctl_command_register("fdb/show", "bridge", 1, 1,
4973                              ofproto_unixctl_fdb_show, NULL);
4974     unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
4975                              ofproto_unixctl_mcast_snooping_flush, NULL);
4976     unixctl_command_register("mdb/show", "bridge", 1, 1,
4977                              ofproto_unixctl_mcast_snooping_show, NULL);
4978     unixctl_command_register("dpif/dump-dps", "", 0, 0,
4979                              ofproto_unixctl_dpif_dump_dps, NULL);
4980     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
4981                              NULL);
4982     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
4983                              ofproto_unixctl_dpif_dump_flows, NULL);
4984 }
4985
4986 /* Returns true if 'table' is the table used for internal rules,
4987  * false otherwise. */
4988 bool
4989 table_is_internal(uint8_t table_id)
4990 {
4991     return table_id == TBL_INTERNAL;
4992 }
4993 \f
4994 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
4995  *
4996  * This is deprecated.  It is only for compatibility with broken device drivers
4997  * in old versions of Linux that do not properly support VLANs when VLAN
4998  * devices are not used.  When broken device drivers are no longer in
4999  * widespread use, we will delete these interfaces. */
5000
5001 static int
5002 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5003 {
5004     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5005     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5006
5007     if (realdev_ofp_port == ofport->realdev_ofp_port
5008         && vid == ofport->vlandev_vid) {
5009         return 0;
5010     }
5011
5012     ofproto->backer->need_revalidate = REV_RECONFIGURE;
5013
5014     if (ofport->realdev_ofp_port) {
5015         vsp_remove(ofport);
5016     }
5017     if (realdev_ofp_port && ofport->bundle) {
5018         /* vlandevs are enslaved to their realdevs, so they are not allowed to
5019          * themselves be part of a bundle. */
5020         bundle_set(ofport_->ofproto, ofport->bundle, NULL);
5021     }
5022
5023     ofport->realdev_ofp_port = realdev_ofp_port;
5024     ofport->vlandev_vid = vid;
5025
5026     if (realdev_ofp_port) {
5027         vsp_add(ofport, realdev_ofp_port, vid);
5028     }
5029
5030     return 0;
5031 }
5032
5033 static uint32_t
5034 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
5035 {
5036     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
5037 }
5038
5039 bool
5040 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
5041     OVS_EXCLUDED(ofproto->vsp_mutex)
5042 {
5043     /* hmap_is_empty is thread safe. */
5044     return !hmap_is_empty(&ofproto->realdev_vid_map);
5045 }
5046
5047
5048 static ofp_port_t
5049 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
5050                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5051     OVS_REQUIRES(ofproto->vsp_mutex)
5052 {
5053     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
5054         int vid = vlan_tci_to_vid(vlan_tci);
5055         const struct vlan_splinter *vsp;
5056
5057         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
5058                                  hash_realdev_vid(realdev_ofp_port, vid),
5059                                  &ofproto->realdev_vid_map) {
5060             if (vsp->realdev_ofp_port == realdev_ofp_port
5061                 && vsp->vid == vid) {
5062                 return vsp->vlandev_ofp_port;
5063             }
5064         }
5065     }
5066     return realdev_ofp_port;
5067 }
5068
5069 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5070  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5071  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
5072  * 'vlan_tci' 9, it would return the port number of eth0.9.
5073  *
5074  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5075  * function just returns its 'realdev_ofp_port' argument. */
5076 ofp_port_t
5077 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
5078                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5079     OVS_EXCLUDED(ofproto->vsp_mutex)
5080 {
5081     ofp_port_t ret;
5082
5083     /* hmap_is_empty is thread safe, see if we can return immediately. */
5084     if (hmap_is_empty(&ofproto->realdev_vid_map)) {
5085         return realdev_ofp_port;
5086     }
5087     ovs_mutex_lock(&ofproto->vsp_mutex);
5088     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
5089     ovs_mutex_unlock(&ofproto->vsp_mutex);
5090     return ret;
5091 }
5092
5093 static struct vlan_splinter *
5094 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
5095 {
5096     struct vlan_splinter *vsp;
5097
5098     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
5099                              hash_ofp_port(vlandev_ofp_port),
5100                              &ofproto->vlandev_map) {
5101         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
5102             return vsp;
5103         }
5104     }
5105
5106     return NULL;
5107 }
5108
5109 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5110  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5111  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
5112  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5113  * eth0 and store 9 in '*vid'.
5114  *
5115  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5116  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
5117  * always does.*/
5118 static ofp_port_t
5119 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
5120                        ofp_port_t vlandev_ofp_port, int *vid)
5121     OVS_REQUIRES(ofproto->vsp_mutex)
5122 {
5123     if (!hmap_is_empty(&ofproto->vlandev_map)) {
5124         const struct vlan_splinter *vsp;
5125
5126         vsp = vlandev_find(ofproto, vlandev_ofp_port);
5127         if (vsp) {
5128             if (vid) {
5129                 *vid = vsp->vid;
5130             }
5131             return vsp->realdev_ofp_port;
5132         }
5133     }
5134     return 0;
5135 }
5136
5137 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5138  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
5139  * 'flow->in_port' to the "real" device backing the VLAN device, sets
5140  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Optionally pushes the
5141  * appropriate VLAN on 'packet' if provided.  Otherwise (which is always the
5142  * case unless VLAN splinters are enabled), returns false without making any
5143  * changes. */
5144 bool
5145 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
5146                 struct ofpbuf *packet)
5147     OVS_EXCLUDED(ofproto->vsp_mutex)
5148 {
5149     ofp_port_t realdev;
5150     int vid;
5151
5152     /* hmap_is_empty is thread safe. */
5153     if (hmap_is_empty(&ofproto->vlandev_map)) {
5154         return false;
5155     }
5156
5157     ovs_mutex_lock(&ofproto->vsp_mutex);
5158     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
5159     ovs_mutex_unlock(&ofproto->vsp_mutex);
5160     if (!realdev) {
5161         return false;
5162     }
5163
5164     /* Cause the flow to be processed as if it came in on the real device with
5165      * the VLAN device's VLAN ID. */
5166     flow->in_port.ofp_port = realdev;
5167     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
5168
5169     if (packet) {
5170         /* Make the packet resemble the flow, so that it gets sent to an
5171          * OpenFlow controller properly, so that it looks correct for sFlow,
5172          * and so that flow_extract() will get the correct vlan_tci if it is
5173          * called on 'packet'. */
5174         eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
5175     }
5176
5177     return true;
5178 }
5179
5180 static void
5181 vsp_remove(struct ofport_dpif *port)
5182 {
5183     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5184     struct vlan_splinter *vsp;
5185
5186     ovs_mutex_lock(&ofproto->vsp_mutex);
5187     vsp = vlandev_find(ofproto, port->up.ofp_port);
5188     if (vsp) {
5189         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
5190         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
5191         free(vsp);
5192
5193         port->realdev_ofp_port = 0;
5194     } else {
5195         VLOG_ERR("missing vlan device record");
5196     }
5197     ovs_mutex_unlock(&ofproto->vsp_mutex);
5198 }
5199
5200 static void
5201 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
5202 {
5203     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5204
5205     ovs_mutex_lock(&ofproto->vsp_mutex);
5206     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
5207         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
5208             == realdev_ofp_port)) {
5209         struct vlan_splinter *vsp;
5210
5211         vsp = xmalloc(sizeof *vsp);
5212         vsp->realdev_ofp_port = realdev_ofp_port;
5213         vsp->vlandev_ofp_port = port->up.ofp_port;
5214         vsp->vid = vid;
5215
5216         port->realdev_ofp_port = realdev_ofp_port;
5217
5218         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
5219                     hash_ofp_port(port->up.ofp_port));
5220         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
5221                     hash_realdev_vid(realdev_ofp_port, vid));
5222     } else {
5223         VLOG_ERR("duplicate vlan device record");
5224     }
5225     ovs_mutex_unlock(&ofproto->vsp_mutex);
5226 }
5227
5228 static odp_port_t
5229 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
5230 {
5231     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
5232     return ofport ? ofport->odp_port : ODPP_NONE;
5233 }
5234
5235 struct ofport_dpif *
5236 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
5237 {
5238     struct ofport_dpif *port;
5239
5240     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
5241     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
5242                              &backer->odp_to_ofport_map) {
5243         if (port->odp_port == odp_port) {
5244             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5245             return port;
5246         }
5247     }
5248
5249     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5250     return NULL;
5251 }
5252
5253 static ofp_port_t
5254 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
5255 {
5256     struct ofport_dpif *port;
5257
5258     port = odp_port_to_ofport(ofproto->backer, odp_port);
5259     if (port && &ofproto->up == port->up.ofproto) {
5260         return port->up.ofp_port;
5261     } else {
5262         return OFPP_NONE;
5263     }
5264 }
5265
5266 uint32_t
5267 ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
5268 {
5269     struct dpif_backer *backer = ofproto->backer;
5270
5271     return  recirc_id_alloc(backer->rid_pool);
5272 }
5273
5274 void
5275 ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
5276 {
5277     struct dpif_backer *backer = ofproto->backer;
5278
5279     recirc_id_free(backer->rid_pool, recirc_id);
5280 }
5281
5282 int
5283 ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
5284                                const struct match *match, int priority,
5285                                uint16_t idle_timeout,
5286                                const struct ofpbuf *ofpacts,
5287                                struct rule **rulep)
5288 {
5289     struct ofputil_flow_mod fm;
5290     struct rule_dpif *rule;
5291     int error;
5292
5293     fm.match = *match;
5294     fm.priority = priority;
5295     fm.new_cookie = htonll(0);
5296     fm.cookie = htonll(0);
5297     fm.cookie_mask = htonll(0);
5298     fm.modify_cookie = false;
5299     fm.table_id = TBL_INTERNAL;
5300     fm.command = OFPFC_ADD;
5301     fm.idle_timeout = idle_timeout;
5302     fm.hard_timeout = 0;
5303     fm.buffer_id = 0;
5304     fm.out_port = 0;
5305     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5306     fm.ofpacts = ofpbuf_data(ofpacts);
5307     fm.ofpacts_len = ofpbuf_size(ofpacts);
5308
5309     error = ofproto_flow_mod(&ofproto->up, &fm);
5310     if (error) {
5311         VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
5312                     ofperr_to_string(error));
5313         *rulep = NULL;
5314         return error;
5315     }
5316
5317     rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
5318                                      &fm.match.wc, false);
5319     if (rule) {
5320         *rulep = &rule->up;
5321     } else {
5322         OVS_NOT_REACHED();
5323     }
5324     return 0;
5325 }
5326
5327 int
5328 ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
5329                                   struct match *match, int priority)
5330 {
5331     struct ofputil_flow_mod fm;
5332     int error;
5333
5334     fm.match = *match;
5335     fm.priority = priority;
5336     fm.new_cookie = htonll(0);
5337     fm.cookie = htonll(0);
5338     fm.cookie_mask = htonll(0);
5339     fm.modify_cookie = false;
5340     fm.table_id = TBL_INTERNAL;
5341     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5342     fm.command = OFPFC_DELETE_STRICT;
5343
5344     error = ofproto_flow_mod(&ofproto->up, &fm);
5345     if (error) {
5346         VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
5347                     ofperr_to_string(error));
5348         return error;
5349     }
5350
5351     return 0;
5352 }
5353
5354 const struct ofproto_class ofproto_dpif_class = {
5355     init,
5356     enumerate_types,
5357     enumerate_names,
5358     del,
5359     port_open_type,
5360     type_run,
5361     type_wait,
5362     alloc,
5363     construct,
5364     destruct,
5365     dealloc,
5366     run,
5367     wait,
5368     NULL,                       /* get_memory_usage. */
5369     type_get_memory_usage,
5370     flush,
5371     query_tables,
5372     port_alloc,
5373     port_construct,
5374     port_destruct,
5375     port_dealloc,
5376     port_modified,
5377     port_reconfigured,
5378     port_query_by_name,
5379     port_add,
5380     port_del,
5381     port_get_stats,
5382     port_dump_start,
5383     port_dump_next,
5384     port_dump_done,
5385     port_poll,
5386     port_poll_wait,
5387     port_is_lacp_current,
5388     NULL,                       /* rule_choose_table */
5389     rule_alloc,
5390     rule_construct,
5391     rule_insert,
5392     rule_delete,
5393     rule_destruct,
5394     rule_dealloc,
5395     rule_get_stats,
5396     rule_execute,
5397     NULL,                       /* rule_premodify_actions */
5398     rule_modify_actions,
5399     set_frag_handling,
5400     packet_out,
5401     set_netflow,
5402     get_netflow_ids,
5403     set_sflow,
5404     set_ipfix,
5405     set_cfm,
5406     cfm_status_changed,
5407     get_cfm_status,
5408     set_bfd,
5409     bfd_status_changed,
5410     get_bfd_status,
5411     set_stp,
5412     get_stp_status,
5413     set_stp_port,
5414     get_stp_port_status,
5415     get_stp_port_stats,
5416     set_rstp,
5417     get_rstp_status,
5418     set_rstp_port,
5419     get_rstp_port_status,
5420     set_queues,
5421     bundle_set,
5422     bundle_remove,
5423     mirror_set__,
5424     mirror_get_stats__,
5425     set_flood_vlans,
5426     is_mirror_output_bundle,
5427     forward_bpdu_changed,
5428     set_mac_table_config,
5429     set_mcast_snooping,
5430     set_mcast_snooping_port,
5431     set_realdev,
5432     NULL,                       /* meter_get_features */
5433     NULL,                       /* meter_set */
5434     NULL,                       /* meter_get */
5435     NULL,                       /* meter_del */
5436     group_alloc,                /* group_alloc */
5437     group_construct,            /* group_construct */
5438     group_destruct,             /* group_destruct */
5439     group_dealloc,              /* group_dealloc */
5440     group_modify,               /* group_modify */
5441     group_get_stats,            /* group_get_stats */
5442 };