dpif: Index flows using unique identifiers.
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
29 #include "connmgr.h"
30 #include "coverage.h"
31 #include "cfm.h"
32 #include "dpif.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
36 #include "hmapx.h"
37 #include "lacp.h"
38 #include "learn.h"
39 #include "mac-learning.h"
40 #include "mcast-snooping.h"
41 #include "meta-flow.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netdev.h"
45 #include "netlink.h"
46 #include "nx-match.h"
47 #include "odp-util.h"
48 #include "odp-execute.h"
49 #include "ofp-util.h"
50 #include "ofpbuf.h"
51 #include "ofp-actions.h"
52 #include "ofp-parse.h"
53 #include "ofp-print.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-rid.h"
58 #include "ofproto-dpif-sflow.h"
59 #include "ofproto-dpif-upcall.h"
60 #include "ofproto-dpif-xlate.h"
61 #include "poll-loop.h"
62 #include "ovs-router.h"
63 #include "seq.h"
64 #include "simap.h"
65 #include "smap.h"
66 #include "timer.h"
67 #include "tunnel.h"
68 #include "unaligned.h"
69 #include "unixctl.h"
70 #include "vlan-bitmap.h"
71 #include "vlog.h"
72
73 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
74
75 COVERAGE_DEFINE(ofproto_dpif_expired);
76 COVERAGE_DEFINE(packet_in_overflow);
77
78 struct flow_miss;
79
80 struct rule_dpif {
81     struct rule up;
82
83     /* These statistics:
84      *
85      *   - Do include packets and bytes from datapath flows which have not
86      *   recently been processed by a revalidator. */
87     struct ovs_mutex stats_mutex;
88     struct dpif_flow_stats stats OVS_GUARDED;
89
90     /* If non-zero then the recirculation id that has
91      * been allocated for use with this rule.
92      * The recirculation id and associated internal flow should
93      * be freed when the rule is freed */
94     uint32_t recirc_id;
95 };
96
97 /* RULE_CAST() depends on this. */
98 BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
99
100 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
101                            long long int *used);
102 static struct rule_dpif *rule_dpif_cast(const struct rule *);
103 static void rule_expire(struct rule_dpif *);
104
105 struct group_dpif {
106     struct ofgroup up;
107
108     /* These statistics:
109      *
110      *   - Do include packets and bytes from datapath flows which have not
111      *   recently been processed by a revalidator. */
112     struct ovs_mutex stats_mutex;
113     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
114     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
115 };
116
117 struct ofbundle {
118     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
119     struct ofproto_dpif *ofproto; /* Owning ofproto. */
120     void *aux;                  /* Key supplied by ofproto's client. */
121     char *name;                 /* Identifier for log messages. */
122
123     /* Configuration. */
124     struct list ports;          /* Contains "struct ofport"s. */
125     enum port_vlan_mode vlan_mode; /* VLAN mode */
126     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
127     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
128                                  * NULL if all VLANs are trunked. */
129     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
130     struct bond *bond;          /* Nonnull iff more than one port. */
131     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
132
133     /* Status. */
134     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
135 };
136
137 static void bundle_remove(struct ofport *);
138 static void bundle_update(struct ofbundle *);
139 static void bundle_destroy(struct ofbundle *);
140 static void bundle_del_port(struct ofport_dpif *);
141 static void bundle_run(struct ofbundle *);
142 static void bundle_wait(struct ofbundle *);
143 static void bundle_flush_macs(struct ofbundle *, bool);
144 static void bundle_move(struct ofbundle *, struct ofbundle *);
145
146 static void stp_run(struct ofproto_dpif *ofproto);
147 static void stp_wait(struct ofproto_dpif *ofproto);
148 static int set_stp_port(struct ofport *,
149                         const struct ofproto_port_stp_settings *);
150
151 static void rstp_run(struct ofproto_dpif *ofproto);
152 static void set_rstp_port(struct ofport *,
153                          const struct ofproto_port_rstp_settings *);
154
155 struct ofport_dpif {
156     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
157     struct ofport up;
158
159     odp_port_t odp_port;
160     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
161     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
162     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
163     struct bfd *bfd;            /* BFD, if any. */
164     bool may_enable;            /* May be enabled in bonds. */
165     bool is_tunnel;             /* This port is a tunnel. */
166     bool is_layer3;             /* This is a layer 3 port. */
167     long long int carrier_seq;  /* Carrier status changes. */
168     struct ofport_dpif *peer;   /* Peer if patch port. */
169
170     /* Spanning tree. */
171     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
172     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
173     long long int stp_state_entered;
174
175     /* Rapid Spanning Tree. */
176     struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
177     enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
178
179     /* Queue to DSCP mapping. */
180     struct ofproto_port_queue *qdscp;
181     size_t n_qdscp;
182
183     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
184      *
185      * This is deprecated.  It is only for compatibility with broken device
186      * drivers in old versions of Linux that do not properly support VLANs when
187      * VLAN devices are not used.  When broken device drivers are no longer in
188      * widespread use, we will delete these interfaces. */
189     ofp_port_t realdev_ofp_port;
190     int vlandev_vid;
191 };
192
193 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
194  *
195  * This is deprecated.  It is only for compatibility with broken device drivers
196  * in old versions of Linux that do not properly support VLANs when VLAN
197  * devices are not used.  When broken device drivers are no longer in
198  * widespread use, we will delete these interfaces. */
199 struct vlan_splinter {
200     struct hmap_node realdev_vid_node;
201     struct hmap_node vlandev_node;
202     ofp_port_t realdev_ofp_port;
203     ofp_port_t vlandev_ofp_port;
204     int vid;
205 };
206
207 static void vsp_remove(struct ofport_dpif *);
208 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
209
210 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
211                                        ofp_port_t);
212
213 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
214                                        odp_port_t);
215
216 static struct ofport_dpif *
217 ofport_dpif_cast(const struct ofport *ofport)
218 {
219     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
220 }
221
222 static void port_run(struct ofport_dpif *);
223 static int set_bfd(struct ofport *, const struct smap *);
224 static int set_cfm(struct ofport *, const struct cfm_settings *);
225 static void ofport_update_peer(struct ofport_dpif *);
226
227 /* Reasons that we might need to revalidate every datapath flow, and
228  * corresponding coverage counters.
229  *
230  * A value of 0 means that there is no need to revalidate.
231  *
232  * It would be nice to have some cleaner way to integrate with coverage
233  * counters, but with only a few reasons I guess this is good enough for
234  * now. */
235 enum revalidate_reason {
236     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
237     REV_STP,                   /* Spanning tree protocol port status change. */
238     REV_RSTP,                  /* RSTP port status change. */
239     REV_BOND,                  /* Bonding changed. */
240     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
241     REV_FLOW_TABLE,            /* Flow table changed. */
242     REV_MAC_LEARNING,          /* Mac learning changed. */
243     REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
244 };
245 COVERAGE_DEFINE(rev_reconfigure);
246 COVERAGE_DEFINE(rev_stp);
247 COVERAGE_DEFINE(rev_rstp);
248 COVERAGE_DEFINE(rev_bond);
249 COVERAGE_DEFINE(rev_port_toggled);
250 COVERAGE_DEFINE(rev_flow_table);
251 COVERAGE_DEFINE(rev_mac_learning);
252 COVERAGE_DEFINE(rev_mcast_snooping);
253
254 /* All datapaths of a given type share a single dpif backer instance. */
255 struct dpif_backer {
256     char *type;
257     int refcount;
258     struct dpif *dpif;
259     struct udpif *udpif;
260
261     struct ovs_rwlock odp_to_ofport_lock;
262     struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
263
264     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
265
266     enum revalidate_reason need_revalidate; /* Revalidate all flows. */
267
268     bool recv_set_enable; /* Enables or disables receiving packets. */
269
270     /* Recirculation. */
271     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
272     bool enable_recirc;   /* True if the datapath supports recirculation */
273
274     /* True if the datapath supports variable-length
275      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
276      * False if the datapath supports only 8-byte (or shorter) userdata. */
277     bool variable_length_userdata;
278
279     /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET
280      * actions. */
281     bool masked_set_action;
282
283     /* Maximum number of MPLS label stack entries that the datapath supports
284      * in a match */
285     size_t max_mpls_depth;
286
287     /* Version string of the datapath stored in OVSDB. */
288     char *dp_version_string;
289
290     /* True if the datapath supports tnl_push and pop actions. */
291     bool enable_tnl_push_pop;
292     struct atomic_count tnl_count;
293 };
294
295 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
296 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
297
298 struct ofproto_dpif {
299     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
300     struct ofproto up;
301     struct dpif_backer *backer;
302
303     uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
304
305     /* Special OpenFlow rules. */
306     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
307     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
308     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
309
310     /* Bridging. */
311     struct netflow *netflow;
312     struct dpif_sflow *sflow;
313     struct dpif_ipfix *ipfix;
314     struct hmap bundles;        /* Contains "struct ofbundle"s. */
315     struct mac_learning *ml;
316     struct mcast_snooping *ms;
317     bool has_bonded_bundles;
318     bool lacp_enabled;
319     struct mbridge *mbridge;
320
321     struct ovs_mutex stats_mutex;
322     struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
323                                             * consumed in userspace. */
324
325     /* Spanning tree. */
326     struct stp *stp;
327     long long int stp_last_tick;
328
329     /* Rapid Spanning Tree. */
330     struct rstp *rstp;
331     long long int rstp_last_tick;
332
333     /* VLAN splinters. */
334     struct ovs_mutex vsp_mutex;
335     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
336     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
337
338     /* Ports. */
339     struct sset ports;             /* Set of standard port names. */
340     struct sset ghost_ports;       /* Ports with no datapath port. */
341     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
342     int port_poll_errno;           /* Last errno for port_poll() reply. */
343     uint64_t change_seq;           /* Connectivity status changes. */
344
345     /* Work queues. */
346     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
347     struct seq *pins_seq;          /* For notifying 'pins' reception. */
348     uint64_t pins_seqno;
349 };
350
351 /* All existing ofproto_dpif instances, indexed by ->up.name. */
352 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
353
354 static bool ofproto_use_tnl_push_pop = true;
355 static void ofproto_unixctl_init(void);
356
357 static inline struct ofproto_dpif *
358 ofproto_dpif_cast(const struct ofproto *ofproto)
359 {
360     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
361     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
362 }
363
364 size_t
365 ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
366 {
367     return ofproto->backer->max_mpls_depth;
368 }
369
370 bool
371 ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
372 {
373     return ofproto->backer->enable_recirc;
374 }
375
376 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
377                                         ofp_port_t ofp_port);
378 static void ofproto_trace(struct ofproto_dpif *, struct flow *,
379                           const struct ofpbuf *packet,
380                           const struct ofpact[], size_t ofpacts_len,
381                           struct ds *);
382
383 /* Global variables. */
384 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
385
386 /* Initial mappings of port to bridge mappings. */
387 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
388
389 /* Executes 'fm'.  The caller retains ownership of 'fm' and everything in
390  * it. */
391 void
392 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
393                       struct ofputil_flow_mod *fm)
394 {
395     ofproto_flow_mod(&ofproto->up, fm);
396 }
397
398 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
399  * Takes ownership of 'pin' and pin->packet. */
400 void
401 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
402                             struct ofproto_packet_in *pin)
403 {
404     if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
405         COVERAGE_INC(packet_in_overflow);
406         free(CONST_CAST(void *, pin->up.packet));
407         free(pin);
408     }
409
410     /* Wakes up main thread for packet-in I/O. */
411     seq_change(ofproto->pins_seq);
412 }
413
414 /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
415  * packet rather than to send the packet to the controller.
416  *
417  * This function returns false to indicate that a packet_in message
418  * for a "table-miss" should be sent to at least one controller.
419  * False otherwise. */
420 bool
421 ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
422 {
423     return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
424 }
425 \f
426 /* Factory functions. */
427
428 static void
429 init(const struct shash *iface_hints)
430 {
431     struct shash_node *node;
432
433     /* Make a local copy, since we don't own 'iface_hints' elements. */
434     SHASH_FOR_EACH(node, iface_hints) {
435         const struct iface_hint *orig_hint = node->data;
436         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
437
438         new_hint->br_name = xstrdup(orig_hint->br_name);
439         new_hint->br_type = xstrdup(orig_hint->br_type);
440         new_hint->ofp_port = orig_hint->ofp_port;
441
442         shash_add(&init_ofp_ports, node->name, new_hint);
443     }
444 }
445
446 static void
447 enumerate_types(struct sset *types)
448 {
449     dp_enumerate_types(types);
450 }
451
452 static int
453 enumerate_names(const char *type, struct sset *names)
454 {
455     struct ofproto_dpif *ofproto;
456
457     sset_clear(names);
458     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
459         if (strcmp(type, ofproto->up.type)) {
460             continue;
461         }
462         sset_add(names, ofproto->up.name);
463     }
464
465     return 0;
466 }
467
468 static int
469 del(const char *type, const char *name)
470 {
471     struct dpif *dpif;
472     int error;
473
474     error = dpif_open(name, type, &dpif);
475     if (!error) {
476         error = dpif_delete(dpif);
477         dpif_close(dpif);
478     }
479     return error;
480 }
481 \f
482 static const char *
483 port_open_type(const char *datapath_type, const char *port_type)
484 {
485     return dpif_port_open_type(datapath_type, port_type);
486 }
487
488 /* Type functions. */
489
490 static void process_dpif_port_changes(struct dpif_backer *);
491 static void process_dpif_all_ports_changed(struct dpif_backer *);
492 static void process_dpif_port_change(struct dpif_backer *,
493                                      const char *devname);
494 static void process_dpif_port_error(struct dpif_backer *, int error);
495
496 static struct ofproto_dpif *
497 lookup_ofproto_dpif_by_port_name(const char *name)
498 {
499     struct ofproto_dpif *ofproto;
500
501     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
502         if (sset_contains(&ofproto->ports, name)) {
503             return ofproto;
504         }
505     }
506
507     return NULL;
508 }
509
510 static int
511 type_run(const char *type)
512 {
513     struct dpif_backer *backer;
514
515     backer = shash_find_data(&all_dpif_backers, type);
516     if (!backer) {
517         /* This is not necessarily a problem, since backers are only
518          * created on demand. */
519         return 0;
520     }
521
522
523     if (dpif_run(backer->dpif)) {
524         backer->need_revalidate = REV_RECONFIGURE;
525     }
526
527     udpif_run(backer->udpif);
528
529     /* If vswitchd started with other_config:flow_restore_wait set as "true",
530      * and the configuration has now changed to "false", enable receiving
531      * packets from the datapath. */
532     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
533         int error;
534
535         backer->recv_set_enable = true;
536
537         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
538         if (error) {
539             VLOG_ERR("Failed to enable receiving packets in dpif.");
540             return error;
541         }
542         dpif_flow_flush(backer->dpif);
543         backer->need_revalidate = REV_RECONFIGURE;
544     }
545
546     if (backer->recv_set_enable) {
547         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
548     }
549
550     dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
551
552     if (backer->need_revalidate) {
553         struct ofproto_dpif *ofproto;
554         struct simap_node *node;
555         struct simap tmp_backers;
556
557         /* Handle tunnel garbage collection. */
558         simap_init(&tmp_backers);
559         simap_swap(&backer->tnl_backers, &tmp_backers);
560
561         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
562             struct ofport_dpif *iter;
563
564             if (backer != ofproto->backer) {
565                 continue;
566             }
567
568             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
569                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
570                 const char *dp_port;
571
572                 if (!iter->is_tunnel) {
573                     continue;
574                 }
575
576                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
577                                                      namebuf, sizeof namebuf);
578                 node = simap_find(&tmp_backers, dp_port);
579                 if (node) {
580                     simap_put(&backer->tnl_backers, dp_port, node->data);
581                     simap_delete(&tmp_backers, node);
582                     node = simap_find(&backer->tnl_backers, dp_port);
583                 } else {
584                     node = simap_find(&backer->tnl_backers, dp_port);
585                     if (!node) {
586                         odp_port_t odp_port = ODPP_NONE;
587
588                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
589                                            &odp_port)) {
590                             simap_put(&backer->tnl_backers, dp_port,
591                                       odp_to_u32(odp_port));
592                             node = simap_find(&backer->tnl_backers, dp_port);
593                         }
594                     }
595                 }
596
597                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
598                 if (tnl_port_reconfigure(iter, iter->up.netdev,
599                                          iter->odp_port,
600                                          ovs_native_tunneling_is_on(ofproto), dp_port)) {
601                     backer->need_revalidate = REV_RECONFIGURE;
602                 }
603             }
604         }
605
606         SIMAP_FOR_EACH (node, &tmp_backers) {
607             dpif_port_del(backer->dpif, u32_to_odp(node->data));
608         }
609         simap_destroy(&tmp_backers);
610
611         switch (backer->need_revalidate) {
612         case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
613         case REV_STP:            COVERAGE_INC(rev_stp);            break;
614         case REV_RSTP:           COVERAGE_INC(rev_rstp);           break;
615         case REV_BOND:           COVERAGE_INC(rev_bond);           break;
616         case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
617         case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
618         case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
619         case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
620         }
621         backer->need_revalidate = 0;
622
623         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
624             struct ofport_dpif *ofport;
625             struct ofbundle *bundle;
626
627             if (ofproto->backer != backer) {
628                 continue;
629             }
630
631             xlate_txn_start();
632             xlate_ofproto_set(ofproto, ofproto->up.name,
633                               ofproto->backer->dpif, ofproto->ml,
634                               ofproto->stp, ofproto->rstp, ofproto->ms,
635                               ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
636                               ofproto->netflow,
637                               ofproto->up.forward_bpdu,
638                               connmgr_has_in_band(ofproto->up.connmgr),
639                               ofproto->backer->enable_recirc,
640                               ofproto->backer->variable_length_userdata,
641                               ofproto->backer->max_mpls_depth,
642                               ofproto->backer->masked_set_action);
643
644             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
645                 xlate_bundle_set(ofproto, bundle, bundle->name,
646                                  bundle->vlan_mode, bundle->vlan,
647                                  bundle->trunks, bundle->use_priority_tags,
648                                  bundle->bond, bundle->lacp,
649                                  bundle->floodable);
650             }
651
652             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
653                 int stp_port = ofport->stp_port
654                     ? stp_port_no(ofport->stp_port)
655                     : -1;
656                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
657                                  ofport->up.ofp_port, ofport->odp_port,
658                                  ofport->up.netdev, ofport->cfm,
659                                  ofport->bfd, ofport->peer, stp_port,
660                                  ofport->rstp_port, ofport->qdscp,
661                                  ofport->n_qdscp, ofport->up.pp.config,
662                                  ofport->up.pp.state, ofport->is_tunnel,
663                                  ofport->may_enable);
664             }
665             xlate_txn_commit();
666         }
667
668         udpif_revalidate(backer->udpif);
669     }
670
671     process_dpif_port_changes(backer);
672
673     return 0;
674 }
675
676 /* Check for and handle port changes in 'backer''s dpif. */
677 static void
678 process_dpif_port_changes(struct dpif_backer *backer)
679 {
680     for (;;) {
681         char *devname;
682         int error;
683
684         error = dpif_port_poll(backer->dpif, &devname);
685         switch (error) {
686         case EAGAIN:
687             return;
688
689         case ENOBUFS:
690             process_dpif_all_ports_changed(backer);
691             break;
692
693         case 0:
694             process_dpif_port_change(backer, devname);
695             free(devname);
696             break;
697
698         default:
699             process_dpif_port_error(backer, error);
700             break;
701         }
702     }
703 }
704
705 static void
706 process_dpif_all_ports_changed(struct dpif_backer *backer)
707 {
708     struct ofproto_dpif *ofproto;
709     struct dpif_port dpif_port;
710     struct dpif_port_dump dump;
711     struct sset devnames;
712     const char *devname;
713
714     sset_init(&devnames);
715     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
716         if (ofproto->backer == backer) {
717             struct ofport *ofport;
718
719             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
720                 sset_add(&devnames, netdev_get_name(ofport->netdev));
721             }
722         }
723     }
724     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
725         sset_add(&devnames, dpif_port.name);
726     }
727
728     SSET_FOR_EACH (devname, &devnames) {
729         process_dpif_port_change(backer, devname);
730     }
731     sset_destroy(&devnames);
732 }
733
734 static void
735 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
736 {
737     struct ofproto_dpif *ofproto;
738     struct dpif_port port;
739
740     /* Don't report on the datapath's device. */
741     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
742         return;
743     }
744
745     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
746                    &all_ofproto_dpifs) {
747         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
748             return;
749         }
750     }
751
752     ofproto = lookup_ofproto_dpif_by_port_name(devname);
753     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
754         /* The port was removed.  If we know the datapath,
755          * report it through poll_set().  If we don't, it may be
756          * notifying us of a removal we initiated, so ignore it.
757          * If there's a pending ENOBUFS, let it stand, since
758          * everything will be reevaluated. */
759         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
760             sset_add(&ofproto->port_poll_set, devname);
761             ofproto->port_poll_errno = 0;
762         }
763     } else if (!ofproto) {
764         /* The port was added, but we don't know with which
765          * ofproto we should associate it.  Delete it. */
766         dpif_port_del(backer->dpif, port.port_no);
767     } else {
768         struct ofport_dpif *ofport;
769
770         ofport = ofport_dpif_cast(shash_find_data(
771                                       &ofproto->up.port_by_name, devname));
772         if (ofport
773             && ofport->odp_port != port.port_no
774             && !odp_port_to_ofport(backer, port.port_no))
775         {
776             /* 'ofport''s datapath port number has changed from
777              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
778              * structures to match. */
779             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
780             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
781             ofport->odp_port = port.port_no;
782             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
783                         hash_odp_port(port.port_no));
784             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
785             backer->need_revalidate = REV_RECONFIGURE;
786         }
787     }
788     dpif_port_destroy(&port);
789 }
790
791 /* Propagate 'error' to all ofprotos based on 'backer'. */
792 static void
793 process_dpif_port_error(struct dpif_backer *backer, int error)
794 {
795     struct ofproto_dpif *ofproto;
796
797     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
798         if (ofproto->backer == backer) {
799             sset_clear(&ofproto->port_poll_set);
800             ofproto->port_poll_errno = error;
801         }
802     }
803 }
804
805 static void
806 type_wait(const char *type)
807 {
808     struct dpif_backer *backer;
809
810     backer = shash_find_data(&all_dpif_backers, type);
811     if (!backer) {
812         /* This is not necessarily a problem, since backers are only
813          * created on demand. */
814         return;
815     }
816
817     dpif_wait(backer->dpif);
818 }
819 \f
820 /* Basic life-cycle. */
821
822 static int add_internal_flows(struct ofproto_dpif *);
823
824 static struct ofproto *
825 alloc(void)
826 {
827     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
828     return &ofproto->up;
829 }
830
831 static void
832 dealloc(struct ofproto *ofproto_)
833 {
834     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
835     free(ofproto);
836 }
837
838 static void
839 close_dpif_backer(struct dpif_backer *backer)
840 {
841     ovs_assert(backer->refcount > 0);
842
843     if (--backer->refcount) {
844         return;
845     }
846
847     udpif_destroy(backer->udpif);
848
849     simap_destroy(&backer->tnl_backers);
850     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
851     hmap_destroy(&backer->odp_to_ofport_map);
852     shash_find_and_delete(&all_dpif_backers, backer->type);
853     recirc_id_pool_destroy(backer->rid_pool);
854     free(backer->type);
855     free(backer->dp_version_string);
856     dpif_close(backer->dpif);
857     free(backer);
858 }
859
860 /* Datapath port slated for removal from datapath. */
861 struct odp_garbage {
862     struct list list_node;
863     odp_port_t odp_port;
864 };
865
866 static bool check_variable_length_userdata(struct dpif_backer *backer);
867 static size_t check_max_mpls_depth(struct dpif_backer *backer);
868 static bool check_recirc(struct dpif_backer *backer);
869 static bool check_masked_set_action(struct dpif_backer *backer);
870
871 static int
872 open_dpif_backer(const char *type, struct dpif_backer **backerp)
873 {
874     struct dpif_backer *backer;
875     struct dpif_port_dump port_dump;
876     struct dpif_port port;
877     struct shash_node *node;
878     struct list garbage_list;
879     struct odp_garbage *garbage, *next;
880
881     struct sset names;
882     char *backer_name;
883     const char *name;
884     int error;
885
886     backer = shash_find_data(&all_dpif_backers, type);
887     if (backer) {
888         backer->refcount++;
889         *backerp = backer;
890         return 0;
891     }
892
893     backer_name = xasprintf("ovs-%s", type);
894
895     /* Remove any existing datapaths, since we assume we're the only
896      * userspace controlling the datapath. */
897     sset_init(&names);
898     dp_enumerate_names(type, &names);
899     SSET_FOR_EACH(name, &names) {
900         struct dpif *old_dpif;
901
902         /* Don't remove our backer if it exists. */
903         if (!strcmp(name, backer_name)) {
904             continue;
905         }
906
907         if (dpif_open(name, type, &old_dpif)) {
908             VLOG_WARN("couldn't open old datapath %s to remove it", name);
909         } else {
910             dpif_delete(old_dpif);
911             dpif_close(old_dpif);
912         }
913     }
914     sset_destroy(&names);
915
916     backer = xmalloc(sizeof *backer);
917
918     error = dpif_create_and_open(backer_name, type, &backer->dpif);
919     free(backer_name);
920     if (error) {
921         VLOG_ERR("failed to open datapath of type %s: %s", type,
922                  ovs_strerror(error));
923         free(backer);
924         return error;
925     }
926     backer->udpif = udpif_create(backer, backer->dpif);
927
928     backer->type = xstrdup(type);
929     backer->refcount = 1;
930     hmap_init(&backer->odp_to_ofport_map);
931     ovs_rwlock_init(&backer->odp_to_ofport_lock);
932     backer->need_revalidate = 0;
933     simap_init(&backer->tnl_backers);
934     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
935     *backerp = backer;
936
937     if (backer->recv_set_enable) {
938         dpif_flow_flush(backer->dpif);
939     }
940
941     /* Loop through the ports already on the datapath and remove any
942      * that we don't need anymore. */
943     list_init(&garbage_list);
944     dpif_port_dump_start(&port_dump, backer->dpif);
945     while (dpif_port_dump_next(&port_dump, &port)) {
946         node = shash_find(&init_ofp_ports, port.name);
947         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
948             garbage = xmalloc(sizeof *garbage);
949             garbage->odp_port = port.port_no;
950             list_push_front(&garbage_list, &garbage->list_node);
951         }
952     }
953     dpif_port_dump_done(&port_dump);
954
955     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
956         dpif_port_del(backer->dpif, garbage->odp_port);
957         list_remove(&garbage->list_node);
958         free(garbage);
959     }
960
961     shash_add(&all_dpif_backers, type, backer);
962
963     backer->enable_recirc = check_recirc(backer);
964     backer->max_mpls_depth = check_max_mpls_depth(backer);
965     backer->masked_set_action = check_masked_set_action(backer);
966     backer->rid_pool = recirc_id_pool_create();
967
968     backer->enable_tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
969     atomic_count_init(&backer->tnl_count, 0);
970
971     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
972     if (error) {
973         VLOG_ERR("failed to listen on datapath of type %s: %s",
974                  type, ovs_strerror(error));
975         close_dpif_backer(backer);
976         return error;
977     }
978
979     if (backer->recv_set_enable) {
980         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
981     }
982
983     /* This check fails if performed before udpif threads have been set,
984      * as the kernel module checks that the 'pid' in userspace action
985      * is non-zero. */
986     backer->variable_length_userdata = check_variable_length_userdata(backer);
987     backer->dp_version_string = dpif_get_dp_version(backer->dpif);
988
989     return error;
990 }
991
992 bool
993 ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto)
994 {
995     return ofproto_use_tnl_push_pop && ofproto->backer->enable_tnl_push_pop &&
996            atomic_count_get(&ofproto->backer->tnl_count);
997 }
998
999 /* Tests whether 'backer''s datapath supports recirculation.  Only newer
1000  * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
1001  * features on older datapaths that don't support this feature.
1002  *
1003  * Returns false if 'backer' definitely does not support recirculation, true if
1004  * it seems to support recirculation or if at least the error we get is
1005  * ambiguous. */
1006 static bool
1007 check_recirc(struct dpif_backer *backer)
1008 {
1009     struct flow flow;
1010     struct odputil_keybuf keybuf;
1011     struct ofpbuf key;
1012     int error;
1013     bool enable_recirc = false;
1014
1015     memset(&flow, 0, sizeof flow);
1016     flow.recirc_id = 1;
1017     flow.dp_hash = 1;
1018
1019     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1020     odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
1021
1022     error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1023                           ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
1024                           0, NULL, NULL);
1025     if (error && error != EEXIST) {
1026         if (error != EINVAL) {
1027             VLOG_WARN("%s: Reciculation flow probe failed (%s)",
1028                       dpif_name(backer->dpif), ovs_strerror(error));
1029         }
1030         goto done;
1031     }
1032
1033     error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
1034                           NULL, NULL);
1035     if (error) {
1036         VLOG_WARN("%s: failed to delete recirculation feature probe flow",
1037                   dpif_name(backer->dpif));
1038     }
1039
1040     enable_recirc = true;
1041
1042 done:
1043     if (enable_recirc) {
1044         VLOG_INFO("%s: Datapath supports recirculation",
1045                   dpif_name(backer->dpif));
1046     } else {
1047         VLOG_INFO("%s: Datapath does not support recirculation",
1048                   dpif_name(backer->dpif));
1049     }
1050
1051     return enable_recirc;
1052 }
1053
1054 /* Tests whether 'backer''s datapath supports variable-length
1055  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
1056  * to disable some features on older datapaths that don't support this
1057  * feature.
1058  *
1059  * Returns false if 'backer' definitely does not support variable-length
1060  * userdata, true if it seems to support them or if at least the error we get
1061  * is ambiguous. */
1062 static bool
1063 check_variable_length_userdata(struct dpif_backer *backer)
1064 {
1065     struct eth_header *eth;
1066     struct ofpbuf actions;
1067     struct dpif_execute execute;
1068     struct ofpbuf packet;
1069     size_t start;
1070     int error;
1071
1072     /* Compose a userspace action that will cause an ERANGE error on older
1073      * datapaths that don't support variable-length userdata.
1074      *
1075      * We really test for using userdata longer than 8 bytes, but older
1076      * datapaths accepted these, silently truncating the userdata to 8 bytes.
1077      * The same older datapaths rejected userdata shorter than 8 bytes, so we
1078      * test for that instead as a proxy for longer userdata support. */
1079     ofpbuf_init(&actions, 64);
1080     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
1081     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
1082                    dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
1083     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
1084     nl_msg_end_nested(&actions, start);
1085
1086     /* Compose a dummy ethernet packet. */
1087     ofpbuf_init(&packet, ETH_HEADER_LEN);
1088     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1089     eth->eth_type = htons(0x1234);
1090
1091     /* Execute the actions.  On older datapaths this fails with ERANGE, on
1092      * newer datapaths it succeeds. */
1093     execute.actions = ofpbuf_data(&actions);
1094     execute.actions_len = ofpbuf_size(&actions);
1095     execute.packet = &packet;
1096     execute.md = PKT_METADATA_INITIALIZER(0);
1097     execute.needs_help = false;
1098     execute.probe = true;
1099
1100     error = dpif_execute(backer->dpif, &execute);
1101
1102     ofpbuf_uninit(&packet);
1103     ofpbuf_uninit(&actions);
1104
1105     switch (error) {
1106     case 0:
1107         return true;
1108
1109     case ERANGE:
1110         /* Variable-length userdata is not supported. */
1111         VLOG_WARN("%s: datapath does not support variable-length userdata "
1112                   "feature (needs Linux 3.10+ or kernel module from OVS "
1113                   "1..11+).  The NXAST_SAMPLE action will be ignored.",
1114                   dpif_name(backer->dpif));
1115         return false;
1116
1117     default:
1118         /* Something odd happened.  We're not sure whether variable-length
1119          * userdata is supported.  Default to "yes". */
1120         VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
1121                   dpif_name(backer->dpif), ovs_strerror(error));
1122         return true;
1123     }
1124 }
1125
1126 /* Tests the MPLS label stack depth supported by 'backer''s datapath.
1127  *
1128  * Returns the number of elements in a struct flow's mpls_lse field
1129  * if the datapath supports at least that many entries in an
1130  * MPLS label stack.
1131  * Otherwise returns the number of MPLS push actions supported by
1132  * the datapath. */
1133 static size_t
1134 check_max_mpls_depth(struct dpif_backer *backer)
1135 {
1136     struct flow flow;
1137     int n;
1138
1139     for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
1140         struct odputil_keybuf keybuf;
1141         struct ofpbuf key;
1142         int error;
1143
1144         memset(&flow, 0, sizeof flow);
1145         flow.dl_type = htons(ETH_TYPE_MPLS);
1146         flow_set_mpls_bos(&flow, n, 1);
1147
1148         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1149         odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
1150
1151         error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1152                               ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0,
1153                               NULL, 0, NULL, NULL);
1154         if (error && error != EEXIST) {
1155             if (error != EINVAL) {
1156                 VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
1157                           dpif_name(backer->dpif), ovs_strerror(error));
1158             }
1159             break;
1160         }
1161
1162         error = dpif_flow_del(backer->dpif, ofpbuf_data(&key),
1163                               ofpbuf_size(&key), NULL, NULL);
1164         if (error) {
1165             VLOG_WARN("%s: failed to delete MPLS feature probe flow",
1166                       dpif_name(backer->dpif));
1167         }
1168     }
1169
1170     VLOG_INFO("%s: MPLS label stack length probed as %d",
1171               dpif_name(backer->dpif), n);
1172     return n;
1173 }
1174
1175 /* Tests whether 'backer''s datapath supports masked data in
1176  * OVS_ACTION_ATTR_SET actions.  We need to disable some features on older
1177  * datapaths that don't support this feature. */
1178 static bool
1179 check_masked_set_action(struct dpif_backer *backer)
1180 {
1181     struct eth_header *eth;
1182     struct ofpbuf actions;
1183     struct dpif_execute execute;
1184     struct ofpbuf packet;
1185     int error;
1186     struct ovs_key_ethernet key, mask;
1187
1188     /* Compose a set action that will cause an EINVAL error on older
1189      * datapaths that don't support masked set actions.
1190      * Avoid using a full mask, as it could be translated to a non-masked
1191      * set action instead. */
1192     ofpbuf_init(&actions, 64);
1193     memset(&key, 0x53, sizeof key);
1194     memset(&mask, 0x7f, sizeof mask);
1195     commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
1196                              sizeof key);
1197
1198     /* Compose a dummy ethernet packet. */
1199     ofpbuf_init(&packet, ETH_HEADER_LEN);
1200     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1201     eth->eth_type = htons(0x1234);
1202
1203     /* Execute the actions.  On older datapaths this fails with EINVAL, on
1204      * newer datapaths it succeeds. */
1205     execute.actions = ofpbuf_data(&actions);
1206     execute.actions_len = ofpbuf_size(&actions);
1207     execute.packet = &packet;
1208     execute.md = PKT_METADATA_INITIALIZER(0);
1209     execute.needs_help = false;
1210     execute.probe = true;
1211
1212     error = dpif_execute(backer->dpif, &execute);
1213
1214     ofpbuf_uninit(&packet);
1215     ofpbuf_uninit(&actions);
1216
1217     if (error) {
1218         /* Masked set action is not supported. */
1219         VLOG_INFO("%s: datapath does not support masked set action feature.",
1220                   dpif_name(backer->dpif));
1221     }
1222     return !error;
1223 }
1224
1225 static int
1226 construct(struct ofproto *ofproto_)
1227 {
1228     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1229     struct shash_node *node, *next;
1230     int error;
1231
1232     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1233     if (error) {
1234         return error;
1235     }
1236
1237     ofproto->netflow = NULL;
1238     ofproto->sflow = NULL;
1239     ofproto->ipfix = NULL;
1240     ofproto->stp = NULL;
1241     ofproto->rstp = NULL;
1242     ofproto->dump_seq = 0;
1243     hmap_init(&ofproto->bundles);
1244     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1245     ofproto->ms = NULL;
1246     ofproto->mbridge = mbridge_create();
1247     ofproto->has_bonded_bundles = false;
1248     ofproto->lacp_enabled = false;
1249     ofproto_tunnel_init();
1250     ovs_mutex_init_adaptive(&ofproto->stats_mutex);
1251     ovs_mutex_init(&ofproto->vsp_mutex);
1252
1253     guarded_list_init(&ofproto->pins);
1254
1255     ofproto_unixctl_init();
1256
1257     hmap_init(&ofproto->vlandev_map);
1258     hmap_init(&ofproto->realdev_vid_map);
1259
1260     sset_init(&ofproto->ports);
1261     sset_init(&ofproto->ghost_ports);
1262     sset_init(&ofproto->port_poll_set);
1263     ofproto->port_poll_errno = 0;
1264     ofproto->change_seq = 0;
1265     ofproto->pins_seq = seq_create();
1266     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1267
1268
1269     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1270         struct iface_hint *iface_hint = node->data;
1271
1272         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1273             /* Check if the datapath already has this port. */
1274             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1275                 sset_add(&ofproto->ports, node->name);
1276             }
1277
1278             free(iface_hint->br_name);
1279             free(iface_hint->br_type);
1280             free(iface_hint);
1281             shash_delete(&init_ofp_ports, node);
1282         }
1283     }
1284
1285     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1286                 hash_string(ofproto->up.name, 0));
1287     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1288
1289     ofproto_init_tables(ofproto_, N_TABLES);
1290     error = add_internal_flows(ofproto);
1291
1292     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1293
1294     return error;
1295 }
1296
1297 static int
1298 add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
1299                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1300 {
1301     struct match match;
1302     int error;
1303     struct rule *rule;
1304
1305     match_init_catchall(&match);
1306     match_set_reg(&match, 0, id);
1307
1308     error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
1309                                            &rule);
1310     *rulep = error ? NULL : rule_dpif_cast(rule);
1311
1312     return error;
1313 }
1314
1315 static int
1316 add_internal_flows(struct ofproto_dpif *ofproto)
1317 {
1318     struct ofpact_controller *controller;
1319     uint64_t ofpacts_stub[128 / 8];
1320     struct ofpbuf ofpacts;
1321     struct rule *unused_rulep OVS_UNUSED;
1322     struct ofpact_resubmit *resubmit;
1323     struct match match;
1324     int error;
1325     int id;
1326
1327     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1328     id = 1;
1329
1330     controller = ofpact_put_CONTROLLER(&ofpacts);
1331     controller->max_len = UINT16_MAX;
1332     controller->controller_id = 0;
1333     controller->reason = OFPR_NO_MATCH;
1334     ofpact_pad(&ofpacts);
1335
1336     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1337                                    &ofproto->miss_rule);
1338     if (error) {
1339         return error;
1340     }
1341
1342     ofpbuf_clear(&ofpacts);
1343     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1344                                    &ofproto->no_packet_in_rule);
1345     if (error) {
1346         return error;
1347     }
1348
1349     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1350                                    &ofproto->drop_frags_rule);
1351     if (error) {
1352         return error;
1353     }
1354
1355     /* Continue non-recirculation rule lookups from table 0.
1356      *
1357      * (priority=2), recirc=0, actions=resubmit(, 0)
1358      */
1359     resubmit = ofpact_put_RESUBMIT(&ofpacts);
1360     resubmit->in_port = OFPP_IN_PORT;
1361     resubmit->table_id = 0;
1362
1363     match_init_catchall(&match);
1364     match_set_recirc_id(&match, 0);
1365
1366     error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
1367                                            &unused_rulep);
1368     if (error) {
1369         return error;
1370     }
1371
1372     /* Drop any run away recirc rule lookups. Recirc_id has to be
1373      * non-zero when reaching this rule.
1374      *
1375      * (priority=1), *, actions=drop
1376      */
1377     ofpbuf_clear(&ofpacts);
1378     match_init_catchall(&match);
1379     error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
1380                                            &unused_rulep);
1381
1382     return error;
1383 }
1384
1385 static void
1386 destruct(struct ofproto *ofproto_)
1387 {
1388     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1389     struct ofproto_packet_in *pin, *next_pin;
1390     struct rule_dpif *rule;
1391     struct oftable *table;
1392     struct list pins;
1393
1394     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1395     xlate_txn_start();
1396     xlate_remove_ofproto(ofproto);
1397     xlate_txn_commit();
1398
1399     /* Ensure that the upcall processing threads have no remaining references
1400      * to the ofproto or anything in it. */
1401     udpif_synchronize(ofproto->backer->udpif);
1402
1403     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1404
1405     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1406         CLS_FOR_EACH (rule, up.cr, &table->cls) {
1407             ofproto_rule_delete(&ofproto->up, &rule->up);
1408         }
1409     }
1410
1411     guarded_list_pop_all(&ofproto->pins, &pins);
1412     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1413         list_remove(&pin->list_node);
1414         free(CONST_CAST(void *, pin->up.packet));
1415         free(pin);
1416     }
1417     guarded_list_destroy(&ofproto->pins);
1418
1419     mbridge_unref(ofproto->mbridge);
1420
1421     netflow_unref(ofproto->netflow);
1422     dpif_sflow_unref(ofproto->sflow);
1423     dpif_ipfix_unref(ofproto->ipfix);
1424     hmap_destroy(&ofproto->bundles);
1425     mac_learning_unref(ofproto->ml);
1426     mcast_snooping_unref(ofproto->ms);
1427
1428     hmap_destroy(&ofproto->vlandev_map);
1429     hmap_destroy(&ofproto->realdev_vid_map);
1430
1431     sset_destroy(&ofproto->ports);
1432     sset_destroy(&ofproto->ghost_ports);
1433     sset_destroy(&ofproto->port_poll_set);
1434
1435     ovs_mutex_destroy(&ofproto->stats_mutex);
1436     ovs_mutex_destroy(&ofproto->vsp_mutex);
1437
1438     seq_destroy(ofproto->pins_seq);
1439
1440     close_dpif_backer(ofproto->backer);
1441 }
1442
1443 static int
1444 run(struct ofproto *ofproto_)
1445 {
1446     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1447     uint64_t new_seq, new_dump_seq;
1448
1449     if (mbridge_need_revalidate(ofproto->mbridge)) {
1450         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1451         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1452         mac_learning_flush(ofproto->ml);
1453         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1454         mcast_snooping_mdb_flush(ofproto->ms);
1455     }
1456
1457     /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
1458      * flow restore.  Even though nothing is processed during flow restore,
1459      * all queued 'pins' will be handled immediately when flow restore
1460      * completes. */
1461     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1462
1463     /* Do not perform any periodic activity required by 'ofproto' while
1464      * waiting for flow restore to complete. */
1465     if (!ofproto_get_flow_restore_wait()) {
1466         struct ofproto_packet_in *pin, *next_pin;
1467         struct list pins;
1468
1469         guarded_list_pop_all(&ofproto->pins, &pins);
1470         LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1471             connmgr_send_packet_in(ofproto->up.connmgr, pin);
1472             list_remove(&pin->list_node);
1473             free(CONST_CAST(void *, pin->up.packet));
1474             free(pin);
1475         }
1476     }
1477
1478     if (ofproto->netflow) {
1479         netflow_run(ofproto->netflow);
1480     }
1481     if (ofproto->sflow) {
1482         dpif_sflow_run(ofproto->sflow);
1483     }
1484     if (ofproto->ipfix) {
1485         dpif_ipfix_run(ofproto->ipfix);
1486     }
1487
1488     new_seq = seq_read(connectivity_seq_get());
1489     if (ofproto->change_seq != new_seq) {
1490         struct ofport_dpif *ofport;
1491
1492         HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1493             port_run(ofport);
1494         }
1495
1496         ofproto->change_seq = new_seq;
1497     }
1498     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1499         struct ofbundle *bundle;
1500
1501         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1502             bundle_run(bundle);
1503         }
1504     }
1505
1506     stp_run(ofproto);
1507     rstp_run(ofproto);
1508     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1509     if (mac_learning_run(ofproto->ml)) {
1510         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1511     }
1512     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1513
1514     if (mcast_snooping_run(ofproto->ms)) {
1515         ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
1516     }
1517
1518     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
1519     if (ofproto->dump_seq != new_dump_seq) {
1520         struct rule *rule, *next_rule;
1521
1522         /* We know stats are relatively fresh, so now is a good time to do some
1523          * periodic work. */
1524         ofproto->dump_seq = new_dump_seq;
1525
1526         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
1527          * has passed. */
1528         ovs_mutex_lock(&ofproto_mutex);
1529         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
1530                             &ofproto->up.expirable) {
1531             rule_expire(rule_dpif_cast(rule));
1532         }
1533         ovs_mutex_unlock(&ofproto_mutex);
1534
1535         /* All outstanding data in existing flows has been accounted, so it's a
1536          * good time to do bond rebalancing. */
1537         if (ofproto->has_bonded_bundles) {
1538             struct ofbundle *bundle;
1539
1540             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1541                 if (bundle->bond) {
1542                     bond_rebalance(bundle->bond);
1543                 }
1544             }
1545         }
1546     }
1547     return 0;
1548 }
1549
1550 static void
1551 wait(struct ofproto *ofproto_)
1552 {
1553     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1554
1555     if (ofproto_get_flow_restore_wait()) {
1556         return;
1557     }
1558
1559     if (ofproto->sflow) {
1560         dpif_sflow_wait(ofproto->sflow);
1561     }
1562     if (ofproto->ipfix) {
1563         dpif_ipfix_wait(ofproto->ipfix);
1564     }
1565     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1566         struct ofbundle *bundle;
1567
1568         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1569             bundle_wait(bundle);
1570         }
1571     }
1572     if (ofproto->netflow) {
1573         netflow_wait(ofproto->netflow);
1574     }
1575     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1576     mac_learning_wait(ofproto->ml);
1577     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1578     mcast_snooping_wait(ofproto->ms);
1579     stp_wait(ofproto);
1580     if (ofproto->backer->need_revalidate) {
1581         /* Shouldn't happen, but if it does just go around again. */
1582         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1583         poll_immediate_wake();
1584     }
1585
1586     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
1587     seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
1588 }
1589
1590 static void
1591 type_get_memory_usage(const char *type, struct simap *usage)
1592 {
1593     struct dpif_backer *backer;
1594
1595     backer = shash_find_data(&all_dpif_backers, type);
1596     if (backer) {
1597         udpif_get_memory_usage(backer->udpif, usage);
1598     }
1599 }
1600
1601 static void
1602 flush(struct ofproto *ofproto_)
1603 {
1604     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1605     struct dpif_backer *backer = ofproto->backer;
1606
1607     if (backer) {
1608         udpif_flush(backer->udpif);
1609     }
1610 }
1611
1612 static void
1613 query_tables(struct ofproto *ofproto,
1614              struct ofputil_table_features *features,
1615              struct ofputil_table_stats *stats)
1616 {
1617     strcpy(features->name, "classifier");
1618
1619     if (stats) {
1620         int i;
1621
1622         for (i = 0; i < ofproto->n_tables; i++) {
1623             unsigned long missed, matched;
1624
1625             atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
1626             atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
1627
1628             stats[i].matched_count = matched;
1629             stats[i].lookup_count = matched + missed;
1630         }
1631     }
1632 }
1633
1634 static struct ofport *
1635 port_alloc(void)
1636 {
1637     struct ofport_dpif *port = xmalloc(sizeof *port);
1638     return &port->up;
1639 }
1640
1641 static void
1642 port_dealloc(struct ofport *port_)
1643 {
1644     struct ofport_dpif *port = ofport_dpif_cast(port_);
1645     free(port);
1646 }
1647
1648 static int
1649 port_construct(struct ofport *port_)
1650 {
1651     struct ofport_dpif *port = ofport_dpif_cast(port_);
1652     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1653     const struct netdev *netdev = port->up.netdev;
1654     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1655     struct dpif_port dpif_port;
1656     int error;
1657
1658     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1659     port->bundle = NULL;
1660     port->cfm = NULL;
1661     port->bfd = NULL;
1662     port->may_enable = false;
1663     port->stp_port = NULL;
1664     port->stp_state = STP_DISABLED;
1665     port->rstp_port = NULL;
1666     port->rstp_state = RSTP_DISABLED;
1667     port->is_tunnel = false;
1668     port->peer = NULL;
1669     port->qdscp = NULL;
1670     port->n_qdscp = 0;
1671     port->realdev_ofp_port = 0;
1672     port->vlandev_vid = 0;
1673     port->carrier_seq = netdev_get_carrier_resets(netdev);
1674     port->is_layer3 = netdev_vport_is_layer3(netdev);
1675
1676     if (netdev_vport_is_patch(netdev)) {
1677         /* By bailing out here, we don't submit the port to the sFlow module
1678          * to be considered for counter polling export.  This is correct
1679          * because the patch port represents an interface that sFlow considers
1680          * to be "internal" to the switch as a whole, and therefore not an
1681          * candidate for counter polling. */
1682         port->odp_port = ODPP_NONE;
1683         ofport_update_peer(port);
1684         return 0;
1685     }
1686
1687     error = dpif_port_query_by_name(ofproto->backer->dpif,
1688                                     netdev_vport_get_dpif_port(netdev, namebuf,
1689                                                                sizeof namebuf),
1690                                     &dpif_port);
1691     if (error) {
1692         return error;
1693     }
1694
1695     port->odp_port = dpif_port.port_no;
1696
1697     if (netdev_get_tunnel_config(netdev)) {
1698         atomic_count_inc(&ofproto->backer->tnl_count);
1699         tnl_port_add(port, port->up.netdev, port->odp_port,
1700                      ovs_native_tunneling_is_on(ofproto), namebuf);
1701         port->is_tunnel = true;
1702         if (ofproto->ipfix) {
1703            dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
1704         }
1705     } else {
1706         /* Sanity-check that a mapping doesn't already exist.  This
1707          * shouldn't happen for non-tunnel ports. */
1708         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1709             VLOG_ERR("port %s already has an OpenFlow port number",
1710                      dpif_port.name);
1711             dpif_port_destroy(&dpif_port);
1712             return EBUSY;
1713         }
1714
1715         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1716         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1717                     hash_odp_port(port->odp_port));
1718         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1719     }
1720     dpif_port_destroy(&dpif_port);
1721
1722     if (ofproto->sflow) {
1723         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1724     }
1725
1726     return 0;
1727 }
1728
1729 static void
1730 port_destruct(struct ofport *port_)
1731 {
1732     struct ofport_dpif *port = ofport_dpif_cast(port_);
1733     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1734     const char *devname = netdev_get_name(port->up.netdev);
1735     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1736     const char *dp_port_name;
1737
1738     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1739     xlate_txn_start();
1740     xlate_ofport_remove(port);
1741     xlate_txn_commit();
1742
1743     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1744                                               sizeof namebuf);
1745     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1746         /* The underlying device is still there, so delete it.  This
1747          * happens when the ofproto is being destroyed, since the caller
1748          * assumes that removal of attached ports will happen as part of
1749          * destruction. */
1750         if (!port->is_tunnel) {
1751             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1752         }
1753     }
1754
1755     if (port->peer) {
1756         port->peer->peer = NULL;
1757         port->peer = NULL;
1758     }
1759
1760     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1761         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1762         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1763         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1764     }
1765
1766     if (port->is_tunnel) {
1767         atomic_count_dec(&ofproto->backer->tnl_count);
1768     }
1769
1770     if (port->is_tunnel && ofproto->ipfix) {
1771        dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
1772     }
1773
1774     tnl_port_del(port);
1775     sset_find_and_delete(&ofproto->ports, devname);
1776     sset_find_and_delete(&ofproto->ghost_ports, devname);
1777     bundle_remove(port_);
1778     set_cfm(port_, NULL);
1779     set_bfd(port_, NULL);
1780     if (port->stp_port) {
1781         stp_port_disable(port->stp_port);
1782     }
1783     set_rstp_port(port_, NULL);
1784     if (ofproto->sflow) {
1785         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1786     }
1787
1788     free(port->qdscp);
1789 }
1790
1791 static void
1792 port_modified(struct ofport *port_)
1793 {
1794     struct ofport_dpif *port = ofport_dpif_cast(port_);
1795     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1796     struct netdev *netdev = port->up.netdev;
1797
1798     if (port->bundle && port->bundle->bond) {
1799         bond_slave_set_netdev(port->bundle->bond, port, netdev);
1800     }
1801
1802     if (port->cfm) {
1803         cfm_set_netdev(port->cfm, netdev);
1804     }
1805
1806     if (port->bfd) {
1807         bfd_set_netdev(port->bfd, netdev);
1808     }
1809
1810     ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
1811                                      port->up.pp.hw_addr);
1812
1813     netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1814
1815     if (port->is_tunnel) {
1816         struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1817
1818         if (tnl_port_reconfigure(port, netdev, port->odp_port,
1819                                  ovs_native_tunneling_is_on(ofproto), namebuf)) {
1820             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1821         }
1822     }
1823
1824     ofport_update_peer(port);
1825 }
1826
1827 static void
1828 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1829 {
1830     struct ofport_dpif *port = ofport_dpif_cast(port_);
1831     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1832     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1833
1834     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1835                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1836                    OFPUTIL_PC_NO_PACKET_IN)) {
1837         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1838
1839         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1840             bundle_update(port->bundle);
1841         }
1842     }
1843 }
1844
1845 static int
1846 set_sflow(struct ofproto *ofproto_,
1847           const struct ofproto_sflow_options *sflow_options)
1848 {
1849     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1850     struct dpif_sflow *ds = ofproto->sflow;
1851
1852     if (sflow_options) {
1853         if (!ds) {
1854             struct ofport_dpif *ofport;
1855
1856             ds = ofproto->sflow = dpif_sflow_create();
1857             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1858                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1859             }
1860             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1861         }
1862         dpif_sflow_set_options(ds, sflow_options);
1863     } else {
1864         if (ds) {
1865             dpif_sflow_unref(ds);
1866             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1867             ofproto->sflow = NULL;
1868         }
1869     }
1870     return 0;
1871 }
1872
1873 static int
1874 set_ipfix(
1875     struct ofproto *ofproto_,
1876     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1877     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1878     size_t n_flow_exporters_options)
1879 {
1880     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1881     struct dpif_ipfix *di = ofproto->ipfix;
1882     bool has_options = bridge_exporter_options || flow_exporters_options;
1883     bool new_di = false;
1884
1885     if (has_options && !di) {
1886         di = ofproto->ipfix = dpif_ipfix_create();
1887         new_di = true;
1888     }
1889
1890     if (di) {
1891         /* Call set_options in any case to cleanly flush the flow
1892          * caches in the last exporters that are to be destroyed. */
1893         dpif_ipfix_set_options(
1894             di, bridge_exporter_options, flow_exporters_options,
1895             n_flow_exporters_options);
1896
1897         /* Add tunnel ports only when a new ipfix created */
1898         if (new_di == true) {
1899             struct ofport_dpif *ofport;
1900             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1901                 if (ofport->is_tunnel == true) {
1902                     dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
1903                 }
1904             }
1905         }
1906
1907         if (!has_options) {
1908             dpif_ipfix_unref(di);
1909             ofproto->ipfix = NULL;
1910         }
1911     }
1912
1913     return 0;
1914 }
1915
1916 static int
1917 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1918 {
1919     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1920     int error = 0;
1921
1922     if (s) {
1923         if (!ofport->cfm) {
1924             struct ofproto_dpif *ofproto;
1925
1926             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1927             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1928             ofport->cfm = cfm_create(ofport->up.netdev);
1929         }
1930
1931         if (cfm_configure(ofport->cfm, s)) {
1932             error = 0;
1933             goto out;
1934         }
1935
1936         error = EINVAL;
1937     }
1938     cfm_unref(ofport->cfm);
1939     ofport->cfm = NULL;
1940 out:
1941     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1942                                      ofport->up.pp.hw_addr);
1943     return error;
1944 }
1945
1946 static bool
1947 cfm_status_changed(struct ofport *ofport_)
1948 {
1949     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1950
1951     return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
1952 }
1953
1954 static int
1955 get_cfm_status(const struct ofport *ofport_,
1956                struct cfm_status *status)
1957 {
1958     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1959     int ret = 0;
1960
1961     if (ofport->cfm) {
1962         cfm_get_status(ofport->cfm, status);
1963     } else {
1964         ret = ENOENT;
1965     }
1966
1967     return ret;
1968 }
1969
1970 static int
1971 set_bfd(struct ofport *ofport_, const struct smap *cfg)
1972 {
1973     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
1974     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1975     struct bfd *old;
1976
1977     old = ofport->bfd;
1978     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev),
1979                                 cfg, ofport->up.netdev);
1980     if (ofport->bfd != old) {
1981         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1982     }
1983     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1984                                      ofport->up.pp.hw_addr);
1985     return 0;
1986 }
1987
1988 static bool
1989 bfd_status_changed(struct ofport *ofport_)
1990 {
1991     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1992
1993     return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
1994 }
1995
1996 static int
1997 get_bfd_status(struct ofport *ofport_, struct smap *smap)
1998 {
1999     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2000     int ret = 0;
2001
2002     if (ofport->bfd) {
2003         bfd_get_status(ofport->bfd, smap);
2004     } else {
2005         ret = ENOENT;
2006     }
2007
2008     return ret;
2009 }
2010 \f
2011 /* Spanning Tree. */
2012
2013 /* Called while rstp_mutex is held. */
2014 static void
2015 rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_)
2016 {
2017     struct ofproto_dpif *ofproto = ofproto_;
2018     struct ofport_dpif *ofport = ofport_;
2019     struct eth_header *eth = ofpbuf_l2(pkt);
2020
2021     netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2022     if (eth_addr_is_zero(eth->eth_src)) {
2023         VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
2024                      "does not have a configured source MAC address.",
2025                      ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
2026     } else {
2027         ofproto_dpif_send_packet(ofport, pkt);
2028     }
2029     ofpbuf_delete(pkt);
2030 }
2031
2032 static void
2033 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
2034 {
2035     struct ofproto_dpif *ofproto = ofproto_;
2036     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
2037     struct ofport_dpif *ofport;
2038
2039     ofport = stp_port_get_aux(sp);
2040     if (!ofport) {
2041         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
2042                      ofproto->up.name, port_num);
2043     } else {
2044         struct eth_header *eth = ofpbuf_l2(pkt);
2045
2046         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2047         if (eth_addr_is_zero(eth->eth_src)) {
2048             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
2049                          "with unknown MAC", ofproto->up.name, port_num);
2050         } else {
2051             ofproto_dpif_send_packet(ofport, pkt);
2052         }
2053     }
2054     ofpbuf_delete(pkt);
2055 }
2056
2057 /* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
2058 static void
2059 set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
2060 {
2061     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2062
2063     /* Only revalidate flows if the configuration changed. */
2064     if (!s != !ofproto->rstp) {
2065         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2066     }
2067
2068     if (s) {
2069         if (!ofproto->rstp) {
2070             ofproto->rstp = rstp_create(ofproto_->name, s->address,
2071                                         rstp_send_bpdu_cb, ofproto);
2072             ofproto->rstp_last_tick = time_msec();
2073         }
2074         rstp_set_bridge_address(ofproto->rstp, s->address);
2075         rstp_set_bridge_priority(ofproto->rstp, s->priority);
2076         rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
2077         rstp_set_bridge_force_protocol_version(ofproto->rstp,
2078                                                s->force_protocol_version);
2079         rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
2080         rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
2081         rstp_set_bridge_transmit_hold_count(ofproto->rstp,
2082                                             s->transmit_hold_count);
2083     } else {
2084         struct ofport *ofport;
2085         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2086             set_rstp_port(ofport, NULL);
2087         }
2088         rstp_unref(ofproto->rstp);
2089         ofproto->rstp = NULL;
2090     }
2091 }
2092
2093 static void
2094 get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
2095 {
2096     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2097
2098     if (ofproto->rstp) {
2099         s->enabled = true;
2100         s->root_id = rstp_get_root_id(ofproto->rstp);
2101         s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
2102         s->designated_id = rstp_get_designated_id(ofproto->rstp);
2103         s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
2104         s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
2105         s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
2106     } else {
2107         s->enabled = false;
2108     }
2109 }
2110
2111 static void
2112 update_rstp_port_state(struct ofport_dpif *ofport)
2113 {
2114     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2115     enum rstp_state state;
2116
2117     /* Figure out new state. */
2118     state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
2119         : RSTP_DISABLED;
2120
2121     /* Update state. */
2122     if (ofport->rstp_state != state) {
2123         enum ofputil_port_state of_state;
2124         bool fwd_change;
2125
2126         VLOG_DBG("port %s: RSTP state changed from %s to %s",
2127                  netdev_get_name(ofport->up.netdev),
2128                  rstp_state_name(ofport->rstp_state),
2129                  rstp_state_name(state));
2130
2131         if (rstp_learn_in_state(ofport->rstp_state)
2132             != rstp_learn_in_state(state)) {
2133             /* XXX: Learning action flows should also be flushed. */
2134             if (ofport->bundle) {
2135                 if (!rstp_shift_root_learned_address(ofproto->rstp)
2136                     || rstp_get_old_root_aux(ofproto->rstp) != ofport) {
2137                     bundle_flush_macs(ofport->bundle, false);
2138                 }
2139             }
2140         }
2141         fwd_change = rstp_forward_in_state(ofport->rstp_state)
2142             != rstp_forward_in_state(state);
2143
2144         ofproto->backer->need_revalidate = REV_RSTP;
2145         ofport->rstp_state = state;
2146
2147         if (fwd_change && ofport->bundle) {
2148             bundle_update(ofport->bundle);
2149         }
2150
2151         /* Update the RSTP state bits in the OpenFlow port description. */
2152         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2153         of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
2154                 : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2155                 : state == RSTP_DISCARDING ?  OFPUTIL_PS_STP_LISTEN
2156                 : 0);
2157         ofproto_port_set_state(&ofport->up, of_state);
2158     }
2159 }
2160
2161 static void
2162 rstp_run(struct ofproto_dpif *ofproto)
2163 {
2164     if (ofproto->rstp) {
2165         long long int now = time_msec();
2166         long long int elapsed = now - ofproto->rstp_last_tick;
2167         struct rstp_port *rp;
2168         struct ofport_dpif *ofport;
2169
2170         /* Every second, decrease the values of the timers. */
2171         if (elapsed >= 1000) {
2172             rstp_tick_timers(ofproto->rstp);
2173             ofproto->rstp_last_tick = now;
2174         }
2175         rp = NULL;
2176         while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
2177             update_rstp_port_state(ofport);
2178         }
2179         rp = NULL;
2180         ofport = NULL;
2181         /* FIXME: This check should be done on-event (i.e., when setting
2182          * p->fdb_flush) and not periodically.
2183          */
2184         while ((ofport = rstp_check_and_reset_fdb_flush(ofproto->rstp, &rp))) {
2185             if (!rstp_shift_root_learned_address(ofproto->rstp)
2186                 || rstp_get_old_root_aux(ofproto->rstp) != ofport) {
2187                 bundle_flush_macs(ofport->bundle, false);
2188             }
2189         }
2190
2191         if (rstp_shift_root_learned_address(ofproto->rstp)) {
2192             bundle_move(((struct ofport_dpif *)rstp_get_old_root_aux(ofproto->rstp))->bundle,
2193                         ((struct ofport_dpif *)rstp_get_new_root_aux(ofproto->rstp))->bundle);
2194             rstp_reset_root_changed(ofproto->rstp);
2195         }
2196     }
2197 }
2198
2199 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2200 static int
2201 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2202 {
2203     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2204
2205     /* Only revalidate flows if the configuration changed. */
2206     if (!s != !ofproto->stp) {
2207         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2208     }
2209
2210     if (s) {
2211         if (!ofproto->stp) {
2212             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2213                                       send_bpdu_cb, ofproto);
2214             ofproto->stp_last_tick = time_msec();
2215         }
2216
2217         stp_set_bridge_id(ofproto->stp, s->system_id);
2218         stp_set_bridge_priority(ofproto->stp, s->priority);
2219         stp_set_hello_time(ofproto->stp, s->hello_time);
2220         stp_set_max_age(ofproto->stp, s->max_age);
2221         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2222     }  else {
2223         struct ofport *ofport;
2224
2225         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2226             set_stp_port(ofport, NULL);
2227         }
2228
2229         stp_unref(ofproto->stp);
2230         ofproto->stp = NULL;
2231     }
2232
2233     return 0;
2234 }
2235
2236 static int
2237 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2238 {
2239     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2240
2241     if (ofproto->stp) {
2242         s->enabled = true;
2243         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2244         s->designated_root = stp_get_designated_root(ofproto->stp);
2245         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2246     } else {
2247         s->enabled = false;
2248     }
2249
2250     return 0;
2251 }
2252
2253 static void
2254 update_stp_port_state(struct ofport_dpif *ofport)
2255 {
2256     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2257     enum stp_state state;
2258
2259     /* Figure out new state. */
2260     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2261                              : STP_DISABLED;
2262
2263     /* Update state. */
2264     if (ofport->stp_state != state) {
2265         enum ofputil_port_state of_state;
2266         bool fwd_change;
2267
2268         VLOG_DBG("port %s: STP state changed from %s to %s",
2269                  netdev_get_name(ofport->up.netdev),
2270                  stp_state_name(ofport->stp_state),
2271                  stp_state_name(state));
2272         if (stp_learn_in_state(ofport->stp_state)
2273                 != stp_learn_in_state(state)) {
2274             /* xxx Learning action flows should also be flushed. */
2275             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2276             mac_learning_flush(ofproto->ml);
2277             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2278             mcast_snooping_mdb_flush(ofproto->ms);
2279         }
2280         fwd_change = stp_forward_in_state(ofport->stp_state)
2281                         != stp_forward_in_state(state);
2282
2283         ofproto->backer->need_revalidate = REV_STP;
2284         ofport->stp_state = state;
2285         ofport->stp_state_entered = time_msec();
2286
2287         if (fwd_change && ofport->bundle) {
2288             bundle_update(ofport->bundle);
2289         }
2290
2291         /* Update the STP state bits in the OpenFlow port description. */
2292         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2293         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2294                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2295                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2296                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2297                      : 0);
2298         ofproto_port_set_state(&ofport->up, of_state);
2299     }
2300 }
2301
2302 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2303  * caller is responsible for assigning STP port numbers and ensuring
2304  * there are no duplicates. */
2305 static int
2306 set_stp_port(struct ofport *ofport_,
2307              const struct ofproto_port_stp_settings *s)
2308 {
2309     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2310     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2311     struct stp_port *sp = ofport->stp_port;
2312
2313     if (!s || !s->enable) {
2314         if (sp) {
2315             ofport->stp_port = NULL;
2316             stp_port_disable(sp);
2317             update_stp_port_state(ofport);
2318         }
2319         return 0;
2320     } else if (sp && stp_port_no(sp) != s->port_num
2321                && ofport == stp_port_get_aux(sp)) {
2322         /* The port-id changed, so disable the old one if it's not
2323          * already in use by another port. */
2324         stp_port_disable(sp);
2325     }
2326
2327     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2328
2329     /* Set name before enabling the port so that debugging messages can print
2330      * the name. */
2331     stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
2332     stp_port_enable(sp);
2333
2334     stp_port_set_aux(sp, ofport);
2335     stp_port_set_priority(sp, s->priority);
2336     stp_port_set_path_cost(sp, s->path_cost);
2337
2338     update_stp_port_state(ofport);
2339
2340     return 0;
2341 }
2342
2343 static int
2344 get_stp_port_status(struct ofport *ofport_,
2345                     struct ofproto_port_stp_status *s)
2346 {
2347     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2348     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2349     struct stp_port *sp = ofport->stp_port;
2350
2351     if (!ofproto->stp || !sp) {
2352         s->enabled = false;
2353         return 0;
2354     }
2355
2356     s->enabled = true;
2357     s->port_id = stp_port_get_id(sp);
2358     s->state = stp_port_get_state(sp);
2359     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2360     s->role = stp_port_get_role(sp);
2361
2362     return 0;
2363 }
2364
2365 static int
2366 get_stp_port_stats(struct ofport *ofport_,
2367                    struct ofproto_port_stp_stats *s)
2368 {
2369     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2370     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2371     struct stp_port *sp = ofport->stp_port;
2372
2373     if (!ofproto->stp || !sp) {
2374         s->enabled = false;
2375         return 0;
2376     }
2377
2378     s->enabled = true;
2379     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2380
2381     return 0;
2382 }
2383
2384 static void
2385 stp_run(struct ofproto_dpif *ofproto)
2386 {
2387     if (ofproto->stp) {
2388         long long int now = time_msec();
2389         long long int elapsed = now - ofproto->stp_last_tick;
2390         struct stp_port *sp;
2391
2392         if (elapsed > 0) {
2393             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2394             ofproto->stp_last_tick = now;
2395         }
2396         while (stp_get_changed_port(ofproto->stp, &sp)) {
2397             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2398
2399             if (ofport) {
2400                 update_stp_port_state(ofport);
2401             }
2402         }
2403
2404         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2405             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2406             mac_learning_flush(ofproto->ml);
2407             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2408             mcast_snooping_mdb_flush(ofproto->ms);
2409         }
2410     }
2411 }
2412
2413 static void
2414 stp_wait(struct ofproto_dpif *ofproto)
2415 {
2416     if (ofproto->stp) {
2417         poll_timer_wait(1000);
2418     }
2419 }
2420
2421 /* Configures RSTP on 'ofport_' using the settings defined in 's'.  The
2422  * caller is responsible for assigning RSTP port numbers and ensuring
2423  * there are no duplicates. */
2424 static void
2425 set_rstp_port(struct ofport *ofport_,
2426               const struct ofproto_port_rstp_settings *s)
2427 {
2428     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2429     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2430     struct rstp_port *rp = ofport->rstp_port;
2431
2432     if (!s || !s->enable) {
2433         if (rp) {
2434             rstp_port_unref(rp);
2435             ofport->rstp_port = NULL;
2436             update_rstp_port_state(ofport);
2437         }
2438         return;
2439     }
2440
2441     /* Check if need to add a new port. */
2442     if (!rp) {
2443         rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
2444     }
2445
2446     rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
2447                   s->admin_edge_port, s->auto_edge,
2448                   s->admin_p2p_mac_state, s->admin_port_state, s->mcheck,
2449                   ofport);
2450     update_rstp_port_state(ofport);
2451     /* Synchronize operational status. */
2452     rstp_port_set_mac_operational(rp, ofport->may_enable);
2453 }
2454
2455 static void
2456 get_rstp_port_status(struct ofport *ofport_,
2457         struct ofproto_port_rstp_status *s)
2458 {
2459     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2460     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2461     struct rstp_port *rp = ofport->rstp_port;
2462
2463     if (!ofproto->rstp || !rp) {
2464         s->enabled = false;
2465         return;
2466     }
2467
2468     s->enabled = true;
2469     rstp_port_get_status(rp, &s->port_id, &s->state, &s->role,
2470                          &s->designated_bridge_id, &s->designated_port_id,
2471                          &s->designated_path_cost, &s->tx_count,
2472                          &s->rx_count, &s->error_count, &s->uptime);
2473 }
2474
2475 \f
2476 static int
2477 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2478            size_t n_qdscp)
2479 {
2480     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2481     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2482
2483     if (ofport->n_qdscp != n_qdscp
2484         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2485                               n_qdscp * sizeof *qdscp))) {
2486         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2487         free(ofport->qdscp);
2488         ofport->qdscp = n_qdscp
2489             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2490             : NULL;
2491         ofport->n_qdscp = n_qdscp;
2492     }
2493
2494     return 0;
2495 }
2496 \f
2497 /* Bundles. */
2498
2499 /* Expires all MAC learning entries associated with 'bundle' and forces its
2500  * ofproto to revalidate every flow.
2501  *
2502  * Normally MAC learning entries are removed only from the ofproto associated
2503  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2504  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2505  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2506  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2507  * with the host from which it migrated. */
2508 static void
2509 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2510 {
2511     struct ofproto_dpif *ofproto = bundle->ofproto;
2512     struct mac_learning *ml = ofproto->ml;
2513     struct mac_entry *mac, *next_mac;
2514
2515     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2516     ovs_rwlock_wrlock(&ml->rwlock);
2517     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2518         if (mac->port.p == bundle) {
2519             if (all_ofprotos) {
2520                 struct ofproto_dpif *o;
2521
2522                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2523                     if (o != ofproto) {
2524                         struct mac_entry *e;
2525
2526                         ovs_rwlock_wrlock(&o->ml->rwlock);
2527                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2528                         if (e) {
2529                             mac_learning_expire(o->ml, e);
2530                         }
2531                         ovs_rwlock_unlock(&o->ml->rwlock);
2532                     }
2533                 }
2534             }
2535
2536             mac_learning_expire(ml, mac);
2537         }
2538     }
2539     ovs_rwlock_unlock(&ml->rwlock);
2540 }
2541
2542 static void
2543 bundle_move(struct ofbundle *old, struct ofbundle *new)
2544 {
2545     struct ofproto_dpif *ofproto = old->ofproto;
2546     struct mac_learning *ml = ofproto->ml;
2547     struct mac_entry *mac, *next_mac;
2548
2549     ovs_assert(new->ofproto == old->ofproto);
2550
2551     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2552     ovs_rwlock_wrlock(&ml->rwlock);
2553     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2554         if (mac->port.p == old) {
2555             mac->port.p = new;
2556         }
2557     }
2558     ovs_rwlock_unlock(&ml->rwlock);
2559 }
2560
2561 static struct ofbundle *
2562 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2563 {
2564     struct ofbundle *bundle;
2565
2566     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2567                              &ofproto->bundles) {
2568         if (bundle->aux == aux) {
2569             return bundle;
2570         }
2571     }
2572     return NULL;
2573 }
2574
2575 static void
2576 bundle_update(struct ofbundle *bundle)
2577 {
2578     struct ofport_dpif *port;
2579
2580     bundle->floodable = true;
2581     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2582         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2583             || port->is_layer3
2584             || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
2585             || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
2586             bundle->floodable = false;
2587             break;
2588         }
2589     }
2590 }
2591
2592 static void
2593 bundle_del_port(struct ofport_dpif *port)
2594 {
2595     struct ofbundle *bundle = port->bundle;
2596
2597     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2598
2599     list_remove(&port->bundle_node);
2600     port->bundle = NULL;
2601
2602     if (bundle->lacp) {
2603         lacp_slave_unregister(bundle->lacp, port);
2604     }
2605     if (bundle->bond) {
2606         bond_slave_unregister(bundle->bond, port);
2607     }
2608
2609     bundle_update(bundle);
2610 }
2611
2612 static bool
2613 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2614                 struct lacp_slave_settings *lacp)
2615 {
2616     struct ofport_dpif *port;
2617
2618     port = get_ofp_port(bundle->ofproto, ofp_port);
2619     if (!port) {
2620         return false;
2621     }
2622
2623     if (port->bundle != bundle) {
2624         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2625         if (port->bundle) {
2626             bundle_remove(&port->up);
2627         }
2628
2629         port->bundle = bundle;
2630         list_push_back(&bundle->ports, &port->bundle_node);
2631         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2632             || port->is_layer3
2633             || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
2634             || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
2635             bundle->floodable = false;
2636         }
2637     }
2638     if (lacp) {
2639         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2640         lacp_slave_register(bundle->lacp, port, lacp);
2641     }
2642
2643     return true;
2644 }
2645
2646 static void
2647 bundle_destroy(struct ofbundle *bundle)
2648 {
2649     struct ofproto_dpif *ofproto;
2650     struct ofport_dpif *port, *next_port;
2651
2652     if (!bundle) {
2653         return;
2654     }
2655
2656     ofproto = bundle->ofproto;
2657     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2658
2659     xlate_txn_start();
2660     xlate_bundle_remove(bundle);
2661     xlate_txn_commit();
2662
2663     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2664         bundle_del_port(port);
2665     }
2666
2667     bundle_flush_macs(bundle, true);
2668     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2669     free(bundle->name);
2670     free(bundle->trunks);
2671     lacp_unref(bundle->lacp);
2672     bond_unref(bundle->bond);
2673     free(bundle);
2674 }
2675
2676 static int
2677 bundle_set(struct ofproto *ofproto_, void *aux,
2678            const struct ofproto_bundle_settings *s)
2679 {
2680     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2681     bool need_flush = false;
2682     struct ofport_dpif *port;
2683     struct ofbundle *bundle;
2684     unsigned long *trunks;
2685     int vlan;
2686     size_t i;
2687     bool ok;
2688
2689     if (!s) {
2690         bundle_destroy(bundle_lookup(ofproto, aux));
2691         return 0;
2692     }
2693
2694     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2695     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2696
2697     bundle = bundle_lookup(ofproto, aux);
2698     if (!bundle) {
2699         bundle = xmalloc(sizeof *bundle);
2700
2701         bundle->ofproto = ofproto;
2702         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2703                     hash_pointer(aux, 0));
2704         bundle->aux = aux;
2705         bundle->name = NULL;
2706
2707         list_init(&bundle->ports);
2708         bundle->vlan_mode = PORT_VLAN_TRUNK;
2709         bundle->vlan = -1;
2710         bundle->trunks = NULL;
2711         bundle->use_priority_tags = s->use_priority_tags;
2712         bundle->lacp = NULL;
2713         bundle->bond = NULL;
2714
2715         bundle->floodable = true;
2716         mbridge_register_bundle(ofproto->mbridge, bundle);
2717     }
2718
2719     if (!bundle->name || strcmp(s->name, bundle->name)) {
2720         free(bundle->name);
2721         bundle->name = xstrdup(s->name);
2722     }
2723
2724     /* LACP. */
2725     if (s->lacp) {
2726         ofproto->lacp_enabled = true;
2727         if (!bundle->lacp) {
2728             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2729             bundle->lacp = lacp_create();
2730         }
2731         lacp_configure(bundle->lacp, s->lacp);
2732     } else {
2733         lacp_unref(bundle->lacp);
2734         bundle->lacp = NULL;
2735     }
2736
2737     /* Update set of ports. */
2738     ok = true;
2739     for (i = 0; i < s->n_slaves; i++) {
2740         if (!bundle_add_port(bundle, s->slaves[i],
2741                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2742             ok = false;
2743         }
2744     }
2745     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2746         struct ofport_dpif *next_port;
2747
2748         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2749             for (i = 0; i < s->n_slaves; i++) {
2750                 if (s->slaves[i] == port->up.ofp_port) {
2751                     goto found;
2752                 }
2753             }
2754
2755             bundle_del_port(port);
2756         found: ;
2757         }
2758     }
2759     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2760
2761     if (list_is_empty(&bundle->ports)) {
2762         bundle_destroy(bundle);
2763         return EINVAL;
2764     }
2765
2766     /* Set VLAN tagging mode */
2767     if (s->vlan_mode != bundle->vlan_mode
2768         || s->use_priority_tags != bundle->use_priority_tags) {
2769         bundle->vlan_mode = s->vlan_mode;
2770         bundle->use_priority_tags = s->use_priority_tags;
2771         need_flush = true;
2772     }
2773
2774     /* Set VLAN tag. */
2775     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2776             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2777             : 0);
2778     if (vlan != bundle->vlan) {
2779         bundle->vlan = vlan;
2780         need_flush = true;
2781     }
2782
2783     /* Get trunked VLANs. */
2784     switch (s->vlan_mode) {
2785     case PORT_VLAN_ACCESS:
2786         trunks = NULL;
2787         break;
2788
2789     case PORT_VLAN_TRUNK:
2790         trunks = CONST_CAST(unsigned long *, s->trunks);
2791         break;
2792
2793     case PORT_VLAN_NATIVE_UNTAGGED:
2794     case PORT_VLAN_NATIVE_TAGGED:
2795         if (vlan != 0 && (!s->trunks
2796                           || !bitmap_is_set(s->trunks, vlan)
2797                           || bitmap_is_set(s->trunks, 0))) {
2798             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2799             if (s->trunks) {
2800                 trunks = bitmap_clone(s->trunks, 4096);
2801             } else {
2802                 trunks = bitmap_allocate1(4096);
2803             }
2804             bitmap_set1(trunks, vlan);
2805             bitmap_set0(trunks, 0);
2806         } else {
2807             trunks = CONST_CAST(unsigned long *, s->trunks);
2808         }
2809         break;
2810
2811     default:
2812         OVS_NOT_REACHED();
2813     }
2814     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2815         free(bundle->trunks);
2816         if (trunks == s->trunks) {
2817             bundle->trunks = vlan_bitmap_clone(trunks);
2818         } else {
2819             bundle->trunks = trunks;
2820             trunks = NULL;
2821         }
2822         need_flush = true;
2823     }
2824     if (trunks != s->trunks) {
2825         free(trunks);
2826     }
2827
2828     /* Bonding. */
2829     if (!list_is_short(&bundle->ports)) {
2830         bundle->ofproto->has_bonded_bundles = true;
2831         if (bundle->bond) {
2832             if (bond_reconfigure(bundle->bond, s->bond)) {
2833                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2834             }
2835         } else {
2836             bundle->bond = bond_create(s->bond, ofproto);
2837             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2838         }
2839
2840         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2841             bond_slave_register(bundle->bond, port,
2842                                 port->up.ofp_port, port->up.netdev);
2843         }
2844     } else {
2845         bond_unref(bundle->bond);
2846         bundle->bond = NULL;
2847     }
2848
2849     /* If we changed something that would affect MAC learning, un-learn
2850      * everything on this port and force flow revalidation. */
2851     if (need_flush) {
2852         bundle_flush_macs(bundle, false);
2853     }
2854
2855     return 0;
2856 }
2857
2858 static void
2859 bundle_remove(struct ofport *port_)
2860 {
2861     struct ofport_dpif *port = ofport_dpif_cast(port_);
2862     struct ofbundle *bundle = port->bundle;
2863
2864     if (bundle) {
2865         bundle_del_port(port);
2866         if (list_is_empty(&bundle->ports)) {
2867             bundle_destroy(bundle);
2868         } else if (list_is_short(&bundle->ports)) {
2869             bond_unref(bundle->bond);
2870             bundle->bond = NULL;
2871         }
2872     }
2873 }
2874
2875 static void
2876 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2877 {
2878     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2879     struct ofport_dpif *port = port_;
2880     uint8_t ea[ETH_ADDR_LEN];
2881     int error;
2882
2883     error = netdev_get_etheraddr(port->up.netdev, ea);
2884     if (!error) {
2885         struct ofpbuf packet;
2886         void *packet_pdu;
2887
2888         ofpbuf_init(&packet, 0);
2889         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2890                                  pdu_size);
2891         memcpy(packet_pdu, pdu, pdu_size);
2892
2893         ofproto_dpif_send_packet(port, &packet);
2894         ofpbuf_uninit(&packet);
2895     } else {
2896         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2897                     "%s (%s)", port->bundle->name,
2898                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2899     }
2900 }
2901
2902 static void
2903 bundle_send_learning_packets(struct ofbundle *bundle)
2904 {
2905     struct ofproto_dpif *ofproto = bundle->ofproto;
2906     struct ofpbuf *learning_packet;
2907     int error, n_packets, n_errors;
2908     struct mac_entry *e;
2909     struct list packets;
2910
2911     list_init(&packets);
2912     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2913     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2914         if (e->port.p != bundle) {
2915             void *port_void;
2916
2917             learning_packet = bond_compose_learning_packet(bundle->bond,
2918                                                            e->mac, e->vlan,
2919                                                            &port_void);
2920             /* Temporarily use 'frame' as a private pointer (see below). */
2921             ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
2922             learning_packet->frame = port_void;
2923             list_push_back(&packets, &learning_packet->list_node);
2924         }
2925     }
2926     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2927
2928     error = n_packets = n_errors = 0;
2929     LIST_FOR_EACH (learning_packet, list_node, &packets) {
2930         int ret;
2931         void *port_void = learning_packet->frame;
2932
2933         /* Restore 'frame'. */
2934         learning_packet->frame = ofpbuf_data(learning_packet);
2935         ret = ofproto_dpif_send_packet(port_void, learning_packet);
2936         if (ret) {
2937             error = ret;
2938             n_errors++;
2939         }
2940         n_packets++;
2941     }
2942     ofpbuf_list_delete(&packets);
2943
2944     if (n_errors) {
2945         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2946         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2947                      "packets, last error was: %s",
2948                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2949     } else {
2950         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2951                  bundle->name, n_packets);
2952     }
2953 }
2954
2955 static void
2956 bundle_run(struct ofbundle *bundle)
2957 {
2958     if (bundle->lacp) {
2959         lacp_run(bundle->lacp, send_pdu_cb);
2960     }
2961     if (bundle->bond) {
2962         struct ofport_dpif *port;
2963
2964         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2965             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2966         }
2967
2968         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2969             bundle->ofproto->backer->need_revalidate = REV_BOND;
2970         }
2971
2972         if (bond_should_send_learning_packets(bundle->bond)) {
2973             bundle_send_learning_packets(bundle);
2974         }
2975     }
2976 }
2977
2978 static void
2979 bundle_wait(struct ofbundle *bundle)
2980 {
2981     if (bundle->lacp) {
2982         lacp_wait(bundle->lacp);
2983     }
2984     if (bundle->bond) {
2985         bond_wait(bundle->bond);
2986     }
2987 }
2988 \f
2989 /* Mirrors. */
2990
2991 static int
2992 mirror_set__(struct ofproto *ofproto_, void *aux,
2993              const struct ofproto_mirror_settings *s)
2994 {
2995     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2996     struct ofbundle **srcs, **dsts;
2997     int error;
2998     size_t i;
2999
3000     if (!s) {
3001         mirror_destroy(ofproto->mbridge, aux);
3002         return 0;
3003     }
3004
3005     srcs = xmalloc(s->n_srcs * sizeof *srcs);
3006     dsts = xmalloc(s->n_dsts * sizeof *dsts);
3007
3008     for (i = 0; i < s->n_srcs; i++) {
3009         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
3010     }
3011
3012     for (i = 0; i < s->n_dsts; i++) {
3013         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
3014     }
3015
3016     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
3017                        s->n_dsts, s->src_vlans,
3018                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
3019     free(srcs);
3020     free(dsts);
3021     return error;
3022 }
3023
3024 static int
3025 mirror_get_stats__(struct ofproto *ofproto, void *aux,
3026                    uint64_t *packets, uint64_t *bytes)
3027 {
3028     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
3029                             bytes);
3030 }
3031
3032 static int
3033 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
3034 {
3035     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3036     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
3037     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
3038         mac_learning_flush(ofproto->ml);
3039     }
3040     ovs_rwlock_unlock(&ofproto->ml->rwlock);
3041     return 0;
3042 }
3043
3044 static bool
3045 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
3046 {
3047     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3048     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3049     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
3050 }
3051
3052 static void
3053 forward_bpdu_changed(struct ofproto *ofproto_)
3054 {
3055     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3056     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3057 }
3058
3059 static void
3060 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
3061                      size_t max_entries)
3062 {
3063     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3064     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
3065     mac_learning_set_idle_time(ofproto->ml, idle_time);
3066     mac_learning_set_max_entries(ofproto->ml, max_entries);
3067     ovs_rwlock_unlock(&ofproto->ml->rwlock);
3068 }
3069
3070 /* Configures multicast snooping on 'ofport' using the settings
3071  * defined in 's'. */
3072 static int
3073 set_mcast_snooping(struct ofproto *ofproto_,
3074                    const struct ofproto_mcast_snooping_settings *s)
3075 {
3076     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3077
3078     /* Only revalidate flows if the configuration changed. */
3079     if (!s != !ofproto->ms) {
3080         ofproto->backer->need_revalidate = REV_RECONFIGURE;
3081     }
3082
3083     if (s) {
3084         if (!ofproto->ms) {
3085             ofproto->ms = mcast_snooping_create();
3086         }
3087
3088         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3089         mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
3090         mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
3091         if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
3092             ofproto->backer->need_revalidate = REV_RECONFIGURE;
3093         }
3094         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3095     } else {
3096         mcast_snooping_unref(ofproto->ms);
3097         ofproto->ms = NULL;
3098     }
3099
3100     return 0;
3101 }
3102
3103 /* Configures multicast snooping port's flood setting on 'ofproto'. */
3104 static int
3105 set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood)
3106 {
3107     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3108     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3109
3110     if (ofproto->ms) {
3111         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3112         mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle,
3113                                       flood);
3114         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3115     }
3116     return 0;
3117 }
3118
3119 \f
3120 /* Ports. */
3121
3122 static struct ofport_dpif *
3123 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
3124 {
3125     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
3126     return ofport ? ofport_dpif_cast(ofport) : NULL;
3127 }
3128
3129 static void
3130 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
3131                             struct ofproto_port *ofproto_port,
3132                             struct dpif_port *dpif_port)
3133 {
3134     ofproto_port->name = dpif_port->name;
3135     ofproto_port->type = dpif_port->type;
3136     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
3137 }
3138
3139 static void
3140 ofport_update_peer(struct ofport_dpif *ofport)
3141 {
3142     const struct ofproto_dpif *ofproto;
3143     struct dpif_backer *backer;
3144     char *peer_name;
3145
3146     if (!netdev_vport_is_patch(ofport->up.netdev)) {
3147         return;
3148     }
3149
3150     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
3151     backer->need_revalidate = REV_RECONFIGURE;
3152
3153     if (ofport->peer) {
3154         ofport->peer->peer = NULL;
3155         ofport->peer = NULL;
3156     }
3157
3158     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
3159     if (!peer_name) {
3160         return;
3161     }
3162
3163     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3164         struct ofport *peer_ofport;
3165         struct ofport_dpif *peer;
3166         char *peer_peer;
3167
3168         if (ofproto->backer != backer) {
3169             continue;
3170         }
3171
3172         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
3173         if (!peer_ofport) {
3174             continue;
3175         }
3176
3177         peer = ofport_dpif_cast(peer_ofport);
3178         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
3179         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
3180                                  peer_peer)) {
3181             ofport->peer = peer;
3182             ofport->peer->peer = ofport;
3183         }
3184         free(peer_peer);
3185
3186         break;
3187     }
3188     free(peer_name);
3189 }
3190
3191 static void
3192 port_run(struct ofport_dpif *ofport)
3193 {
3194     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
3195     bool carrier_changed = carrier_seq != ofport->carrier_seq;
3196     bool enable = netdev_get_carrier(ofport->up.netdev);
3197     bool cfm_enable = false;
3198     bool bfd_enable = false;
3199
3200     ofport->carrier_seq = carrier_seq;
3201
3202     if (ofport->cfm) {
3203         int cfm_opup = cfm_get_opup(ofport->cfm);
3204
3205         cfm_enable = !cfm_get_fault(ofport->cfm);
3206
3207         if (cfm_opup >= 0) {
3208             cfm_enable = cfm_enable && cfm_opup;
3209         }
3210     }
3211
3212     if (ofport->bfd) {
3213         bfd_enable = bfd_forwarding(ofport->bfd);
3214     }
3215
3216     if (ofport->bfd || ofport->cfm) {
3217         enable = enable && (cfm_enable || bfd_enable);
3218     }
3219
3220     if (ofport->bundle) {
3221         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3222         if (carrier_changed) {
3223             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
3224         }
3225     }
3226
3227     if (ofport->may_enable != enable) {
3228         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3229
3230         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
3231
3232         if (ofport->rstp_port) {
3233             rstp_port_set_mac_operational(ofport->rstp_port, enable);
3234         }
3235     }
3236
3237     ofport->may_enable = enable;
3238 }
3239
3240 static int
3241 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3242                    struct ofproto_port *ofproto_port)
3243 {
3244     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3245     struct dpif_port dpif_port;
3246     int error;
3247
3248     if (sset_contains(&ofproto->ghost_ports, devname)) {
3249         const char *type = netdev_get_type_from_name(devname);
3250
3251         /* We may be called before ofproto->up.port_by_name is populated with
3252          * the appropriate ofport.  For this reason, we must get the name and
3253          * type from the netdev layer directly. */
3254         if (type) {
3255             const struct ofport *ofport;
3256
3257             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3258             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3259             ofproto_port->name = xstrdup(devname);
3260             ofproto_port->type = xstrdup(type);
3261             return 0;
3262         }
3263         return ENODEV;
3264     }
3265
3266     if (!sset_contains(&ofproto->ports, devname)) {
3267         return ENODEV;
3268     }
3269     error = dpif_port_query_by_name(ofproto->backer->dpif,
3270                                     devname, &dpif_port);
3271     if (!error) {
3272         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3273     }
3274     return error;
3275 }
3276
3277 static int
3278 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3279 {
3280     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3281     const char *devname = netdev_get_name(netdev);
3282     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3283     const char *dp_port_name;
3284
3285     if (netdev_vport_is_patch(netdev)) {
3286         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3287         return 0;
3288     }
3289
3290     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3291     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3292         odp_port_t port_no = ODPP_NONE;
3293         int error;
3294
3295         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3296         if (error) {
3297             return error;
3298         }
3299         if (netdev_get_tunnel_config(netdev)) {
3300             simap_put(&ofproto->backer->tnl_backers,
3301                       dp_port_name, odp_to_u32(port_no));
3302         }
3303     }
3304
3305     if (netdev_get_tunnel_config(netdev)) {
3306         sset_add(&ofproto->ghost_ports, devname);
3307     } else {
3308         sset_add(&ofproto->ports, devname);
3309     }
3310     return 0;
3311 }
3312
3313 static int
3314 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3315 {
3316     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3317     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3318     int error = 0;
3319
3320     if (!ofport) {
3321         return 0;
3322     }
3323
3324     sset_find_and_delete(&ofproto->ghost_ports,
3325                          netdev_get_name(ofport->up.netdev));
3326     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3327     if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
3328         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3329         if (!error) {
3330             /* The caller is going to close ofport->up.netdev.  If this is a
3331              * bonded port, then the bond is using that netdev, so remove it
3332              * from the bond.  The client will need to reconfigure everything
3333              * after deleting ports, so then the slave will get re-added. */
3334             bundle_remove(&ofport->up);
3335         }
3336     }
3337     return error;
3338 }
3339
3340 static int
3341 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3342 {
3343     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3344     int error;
3345
3346     error = netdev_get_stats(ofport->up.netdev, stats);
3347
3348     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3349         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3350
3351         ovs_mutex_lock(&ofproto->stats_mutex);
3352         /* ofproto->stats.tx_packets represents packets that we created
3353          * internally and sent to some port (e.g. packets sent with
3354          * ofproto_dpif_send_packet()).  Account for them as if they had
3355          * come from OFPP_LOCAL and got forwarded. */
3356
3357         if (stats->rx_packets != UINT64_MAX) {
3358             stats->rx_packets += ofproto->stats.tx_packets;
3359         }
3360
3361         if (stats->rx_bytes != UINT64_MAX) {
3362             stats->rx_bytes += ofproto->stats.tx_bytes;
3363         }
3364
3365         /* ofproto->stats.rx_packets represents packets that were received on
3366          * some port and we processed internally and dropped (e.g. STP).
3367          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3368
3369         if (stats->tx_packets != UINT64_MAX) {
3370             stats->tx_packets += ofproto->stats.rx_packets;
3371         }
3372
3373         if (stats->tx_bytes != UINT64_MAX) {
3374             stats->tx_bytes += ofproto->stats.rx_bytes;
3375         }
3376         ovs_mutex_unlock(&ofproto->stats_mutex);
3377     }
3378
3379     return error;
3380 }
3381
3382 static int
3383 port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats)
3384 {
3385     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3386     if (ofport->bundle && ofport->bundle->lacp) {
3387         if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) {
3388             return 0;
3389         }
3390     }
3391     return -1;
3392 }
3393
3394 struct port_dump_state {
3395     uint32_t bucket;
3396     uint32_t offset;
3397     bool ghost;
3398
3399     struct ofproto_port port;
3400     bool has_port;
3401 };
3402
3403 static int
3404 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3405 {
3406     *statep = xzalloc(sizeof(struct port_dump_state));
3407     return 0;
3408 }
3409
3410 static int
3411 port_dump_next(const struct ofproto *ofproto_, void *state_,
3412                struct ofproto_port *port)
3413 {
3414     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3415     struct port_dump_state *state = state_;
3416     const struct sset *sset;
3417     struct sset_node *node;
3418
3419     if (state->has_port) {
3420         ofproto_port_destroy(&state->port);
3421         state->has_port = false;
3422     }
3423     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3424     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3425         int error;
3426
3427         error = port_query_by_name(ofproto_, node->name, &state->port);
3428         if (!error) {
3429             *port = state->port;
3430             state->has_port = true;
3431             return 0;
3432         } else if (error != ENODEV) {
3433             return error;
3434         }
3435     }
3436
3437     if (!state->ghost) {
3438         state->ghost = true;
3439         state->bucket = 0;
3440         state->offset = 0;
3441         return port_dump_next(ofproto_, state_, port);
3442     }
3443
3444     return EOF;
3445 }
3446
3447 static int
3448 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3449 {
3450     struct port_dump_state *state = state_;
3451
3452     if (state->has_port) {
3453         ofproto_port_destroy(&state->port);
3454     }
3455     free(state);
3456     return 0;
3457 }
3458
3459 static int
3460 port_poll(const struct ofproto *ofproto_, char **devnamep)
3461 {
3462     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3463
3464     if (ofproto->port_poll_errno) {
3465         int error = ofproto->port_poll_errno;
3466         ofproto->port_poll_errno = 0;
3467         return error;
3468     }
3469
3470     if (sset_is_empty(&ofproto->port_poll_set)) {
3471         return EAGAIN;
3472     }
3473
3474     *devnamep = sset_pop(&ofproto->port_poll_set);
3475     return 0;
3476 }
3477
3478 static void
3479 port_poll_wait(const struct ofproto *ofproto_)
3480 {
3481     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3482     dpif_port_poll_wait(ofproto->backer->dpif);
3483 }
3484
3485 static int
3486 port_is_lacp_current(const struct ofport *ofport_)
3487 {
3488     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3489     return (ofport->bundle && ofport->bundle->lacp
3490             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3491             : -1);
3492 }
3493 \f
3494 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3495  * then delete it entirely. */
3496 static void
3497 rule_expire(struct rule_dpif *rule)
3498     OVS_REQUIRES(ofproto_mutex)
3499 {
3500     uint16_t hard_timeout, idle_timeout;
3501     long long int now = time_msec();
3502     int reason = -1;
3503
3504     hard_timeout = rule->up.hard_timeout;
3505     idle_timeout = rule->up.idle_timeout;
3506
3507     /* Has 'rule' expired? */
3508     if (hard_timeout) {
3509         long long int modified;
3510
3511         ovs_mutex_lock(&rule->up.mutex);
3512         modified = rule->up.modified;
3513         ovs_mutex_unlock(&rule->up.mutex);
3514
3515         if (now > modified + hard_timeout * 1000) {
3516             reason = OFPRR_HARD_TIMEOUT;
3517         }
3518     }
3519
3520     if (reason < 0 && idle_timeout) {
3521         long long int used;
3522
3523         ovs_mutex_lock(&rule->stats_mutex);
3524         used = rule->stats.used;
3525         ovs_mutex_unlock(&rule->stats_mutex);
3526
3527         if (now > used + idle_timeout * 1000) {
3528             reason = OFPRR_IDLE_TIMEOUT;
3529         }
3530     }
3531
3532     if (reason >= 0) {
3533         COVERAGE_INC(ofproto_dpif_expired);
3534         ofproto_rule_expire(&rule->up, reason);
3535     }
3536 }
3537
3538 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3539  * 'flow' must reflect the data in 'packet'. */
3540 int
3541 ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
3542                              const struct flow *flow,
3543                              struct rule_dpif *rule,
3544                              const struct ofpact *ofpacts, size_t ofpacts_len,
3545                              struct ofpbuf *packet)
3546 {
3547     struct dpif_flow_stats stats;
3548     struct xlate_out xout;
3549     struct xlate_in xin;
3550     ofp_port_t in_port;
3551     struct dpif_execute execute;
3552     int error;
3553
3554     ovs_assert((rule != NULL) != (ofpacts != NULL));
3555
3556     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
3557
3558     if (rule) {
3559         rule_dpif_credit_stats(rule, &stats);
3560     }
3561
3562     xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
3563                   stats.tcp_flags, packet);
3564     xin.ofpacts = ofpacts;
3565     xin.ofpacts_len = ofpacts_len;
3566     xin.resubmit_stats = &stats;
3567     xlate_actions(&xin, &xout);
3568
3569     execute.actions = ofpbuf_data(xout.odp_actions);
3570     execute.actions_len = ofpbuf_size(xout.odp_actions);
3571
3572     execute.packet = packet;
3573     execute.md = pkt_metadata_from_flow(flow);
3574     execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
3575     execute.probe = false;
3576
3577     /* Fix up in_port. */
3578     in_port = flow->in_port.ofp_port;
3579     if (in_port == OFPP_NONE) {
3580         in_port = OFPP_LOCAL;
3581     }
3582     execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
3583
3584     error = dpif_execute(ofproto->backer->dpif, &execute);
3585
3586     xlate_out_uninit(&xout);
3587
3588     return error;
3589 }
3590
3591 void
3592 rule_dpif_credit_stats(struct rule_dpif *rule,
3593                        const struct dpif_flow_stats *stats)
3594 {
3595     ovs_mutex_lock(&rule->stats_mutex);
3596     rule->stats.n_packets += stats->n_packets;
3597     rule->stats.n_bytes += stats->n_bytes;
3598     rule->stats.used = MAX(rule->stats.used, stats->used);
3599     ovs_mutex_unlock(&rule->stats_mutex);
3600 }
3601
3602 ovs_be64
3603 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
3604     OVS_REQUIRES(rule->up.mutex)
3605 {
3606     return rule->up.flow_cookie;
3607 }
3608
3609 void
3610 rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
3611                      uint16_t hard_timeout)
3612 {
3613     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
3614 }
3615
3616 /* Returns 'rule''s actions.  The returned actions are RCU-protected, and can
3617  * be read until the calling thread quiesces. */
3618 const struct rule_actions *
3619 rule_dpif_get_actions(const struct rule_dpif *rule)
3620 {
3621     return rule_get_actions(&rule->up);
3622 }
3623
3624 /* Sets 'rule''s recirculation id. */
3625 static void
3626 rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
3627     OVS_REQUIRES(rule->up.mutex)
3628 {
3629     ovs_assert(!rule->recirc_id);
3630     rule->recirc_id = id;
3631 }
3632
3633 /* Returns 'rule''s recirculation id. */
3634 uint32_t
3635 rule_dpif_get_recirc_id(struct rule_dpif *rule)
3636     OVS_REQUIRES(rule->up.mutex)
3637 {
3638     if (!rule->recirc_id) {
3639         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3640
3641         rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
3642     }
3643     return rule->recirc_id;
3644 }
3645
3646 /* Sets 'rule''s recirculation id. */
3647 void
3648 rule_set_recirc_id(struct rule *rule_, uint32_t id)
3649 {
3650     struct rule_dpif *rule = rule_dpif_cast(rule_);
3651
3652     ovs_mutex_lock(&rule->up.mutex);
3653     rule_dpif_set_recirc_id(rule, id);
3654     ovs_mutex_unlock(&rule->up.mutex);
3655 }
3656
3657 /* Lookup 'flow' in table 0 of 'ofproto''s classifier.
3658  * If 'wc' is non-null, sets the fields that were relevant as part of
3659  * the lookup. Returns the table id where a match or miss occurred via
3660  * 'table_id'.  This will be zero unless there was a miss and
3661  * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
3662  * where misses occur, or TBL_INTERNAL if the rule has a non-zero
3663  * recirculation ID, and a match was found in the internal table, or if
3664  * there was no match and one of the special rules (drop_frags_rule,
3665  * miss_rule, or no_packet_in_rule) was returned.
3666  *
3667  * The return value is the found rule, which is valid at least until the next
3668  * RCU quiescent period.  If the rule needs to stay around longer,
3669  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3670  * on it before this returns. */
3671 struct rule_dpif *
3672 rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
3673                  struct flow_wildcards *wc, bool take_ref,
3674                  const struct dpif_flow_stats *stats, uint8_t *table_id)
3675 {
3676     *table_id = 0;
3677
3678     if (ofproto_dpif_get_enable_recirc(ofproto)) {
3679         /* Always exactly match recirc_id since datapath supports
3680          * recirculation.  */
3681         if (wc) {
3682             wc->masks.recirc_id = UINT32_MAX;
3683         }
3684         if (flow->recirc_id) {
3685             /* Start looking up from internal table for post recirculation
3686              * flows or packets. */
3687             *table_id = TBL_INTERNAL;
3688         }
3689     }
3690
3691     return rule_dpif_lookup_from_table(ofproto, flow, wc, take_ref, stats,
3692                                        table_id, flow->in_port.ofp_port, true,
3693                                        true);
3694 }
3695
3696 /* The returned rule (if any) is valid at least until the next RCU quiescent
3697  * period.  If the rule needs to stay around longer, a non-zero 'take_ref'
3698  * must be passed in to cause a reference to be taken on it. */
3699 static struct rule_dpif *
3700 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
3701                           const struct flow *flow, struct flow_wildcards *wc,
3702                           bool take_ref)
3703 {
3704     struct classifier *cls = &ofproto->up.tables[table_id].cls;
3705     const struct cls_rule *cls_rule;
3706     struct rule_dpif *rule;
3707
3708     do {
3709         cls_rule = classifier_lookup(cls, flow, wc);
3710
3711         rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3712
3713         /* Try again if the rule was released before we get the reference. */
3714     } while (rule && take_ref && !rule_dpif_try_ref(rule));
3715
3716     return rule;
3717 }
3718
3719 /* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
3720  * Returns the rule that was found, which may be one of the special rules
3721  * according to packet miss hadling.  If 'may_packet_in' is false, returning of
3722  * the miss_rule (which issues packet ins for the controller) is avoided.
3723  * Updates 'wc', if nonnull, to reflect the fields that were used during the
3724  * lookup.
3725  *
3726  * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
3727  * if none is found then the table miss configuration for that table is
3728  * honored, which can result in additional lookups in other OpenFlow tables.
3729  * In this case the function updates '*table_id' to reflect the final OpenFlow
3730  * table that was searched.
3731  *
3732  * If 'honor_table_miss' is false, then only one table lookup occurs, in
3733  * '*table_id'.
3734  *
3735  * The rule is returned in '*rule', which is valid at least until the next
3736  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3737  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3738  * on it before this returns.
3739  *
3740  * 'in_port' allows the lookup to take place as if the in port had the value
3741  * 'in_port'.  This is needed for resubmit action support. */
3742 struct rule_dpif *
3743 rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow,
3744                             struct flow_wildcards *wc, bool take_ref,
3745                             const struct dpif_flow_stats *stats,
3746                             uint8_t *table_id, ofp_port_t in_port,
3747                             bool may_packet_in, bool honor_table_miss)
3748 {
3749     ovs_be16 old_tp_src = flow->tp_src, old_tp_dst = flow->tp_dst;
3750     ofp_port_t old_in_port = flow->in_port.ofp_port;
3751     enum ofputil_table_miss miss_config;
3752     struct rule_dpif *rule;
3753     uint8_t next_id;
3754
3755     /* We always unwildcard nw_frag (for IP), so they
3756      * need not be unwildcarded here. */
3757     if (flow->nw_frag & FLOW_NW_FRAG_ANY
3758         && ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
3759         if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
3760             /* We must pretend that transport ports are unavailable. */
3761             flow->tp_src = htons(0);
3762             flow->tp_dst = htons(0);
3763         } else {
3764             /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
3765              * Use the drop_frags_rule (which cannot disappear). */
3766             rule = ofproto->drop_frags_rule;
3767             if (take_ref) {
3768                 rule_dpif_ref(rule);
3769             }
3770             if (stats) {
3771                 struct oftable *tbl = &ofproto->up.tables[*table_id];
3772                 unsigned long orig;
3773
3774                 atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig);
3775             }
3776             return rule;
3777         }
3778     }
3779
3780     /* Look up a flow with 'in_port' as the input port.  Then restore the
3781      * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
3782      * have surprising behavior). */
3783     flow->in_port.ofp_port = in_port;
3784
3785     /* Our current implementation depends on n_tables == N_TABLES, and
3786      * TBL_INTERNAL being the last table. */
3787     BUILD_ASSERT_DECL(N_TABLES == TBL_INTERNAL + 1);
3788
3789     miss_config = OFPUTIL_TABLE_MISS_CONTINUE;
3790
3791     for (next_id = *table_id;
3792          next_id < ofproto->up.n_tables;
3793          next_id++, next_id += (next_id == TBL_INTERNAL))
3794     {
3795         *table_id = next_id;
3796         rule = rule_dpif_lookup_in_table(ofproto, next_id, flow, wc, take_ref);
3797         if (stats) {
3798             struct oftable *tbl = &ofproto->up.tables[next_id];
3799             unsigned long orig;
3800
3801             atomic_add_relaxed(rule ? &tbl->n_matched : &tbl->n_missed,
3802                                stats->n_packets, &orig);
3803         }
3804         if (rule) {
3805             goto out;   /* Match. */
3806         }
3807         if (honor_table_miss) {
3808             miss_config = ofproto_table_get_miss_config(&ofproto->up,
3809                                                         *table_id);
3810             if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE) {
3811                 continue;
3812             }
3813         }
3814         break;
3815     }
3816     /* Miss. */
3817     rule = ofproto->no_packet_in_rule;
3818     if (may_packet_in) {
3819         if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE
3820             || miss_config == OFPUTIL_TABLE_MISS_CONTROLLER) {
3821             struct ofport_dpif *port;
3822
3823             port = get_ofp_port(ofproto, old_in_port);
3824             if (!port) {
3825                 VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
3826                              old_in_port);
3827             } else if (!(port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN)) {
3828                 rule = ofproto->miss_rule;
3829             }
3830         } else if (miss_config == OFPUTIL_TABLE_MISS_DEFAULT &&
3831                    connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
3832             rule = ofproto->miss_rule;
3833         }
3834     }
3835     if (take_ref) {
3836         rule_dpif_ref(rule);
3837     }
3838 out:
3839     /* Restore port numbers, as they may have been modified above. */
3840     flow->tp_src = old_tp_src;
3841     flow->tp_dst = old_tp_dst;
3842     /* Restore the old in port. */
3843     flow->in_port.ofp_port = old_in_port;
3844
3845     return rule;
3846 }
3847
3848 static void
3849 complete_operation(struct rule_dpif *rule)
3850     OVS_REQUIRES(ofproto_mutex)
3851 {
3852     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3853
3854     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3855 }
3856
3857 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
3858 {
3859     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
3860 }
3861
3862 static struct rule *
3863 rule_alloc(void)
3864 {
3865     struct rule_dpif *rule = xmalloc(sizeof *rule);
3866     return &rule->up;
3867 }
3868
3869 static void
3870 rule_dealloc(struct rule *rule_)
3871 {
3872     struct rule_dpif *rule = rule_dpif_cast(rule_);
3873     free(rule);
3874 }
3875
3876 static enum ofperr
3877 rule_construct(struct rule *rule_)
3878     OVS_NO_THREAD_SAFETY_ANALYSIS
3879 {
3880     struct rule_dpif *rule = rule_dpif_cast(rule_);
3881     ovs_mutex_init_adaptive(&rule->stats_mutex);
3882     rule->stats.n_packets = 0;
3883     rule->stats.n_bytes = 0;
3884     rule->stats.used = rule->up.modified;
3885     rule->recirc_id = 0;
3886
3887     return 0;
3888 }
3889
3890 static enum ofperr
3891 rule_insert(struct rule *rule_)
3892     OVS_REQUIRES(ofproto_mutex)
3893 {
3894     struct rule_dpif *rule = rule_dpif_cast(rule_);
3895     complete_operation(rule);
3896     return 0;
3897 }
3898
3899 static void
3900 rule_delete(struct rule *rule_)
3901     OVS_REQUIRES(ofproto_mutex)
3902 {
3903     struct rule_dpif *rule = rule_dpif_cast(rule_);
3904     complete_operation(rule);
3905 }
3906
3907 static void
3908 rule_destruct(struct rule *rule_)
3909 {
3910     struct rule_dpif *rule = rule_dpif_cast(rule_);
3911
3912     ovs_mutex_destroy(&rule->stats_mutex);
3913     if (rule->recirc_id) {
3914         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3915
3916         ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
3917     }
3918 }
3919
3920 static void
3921 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
3922                long long int *used)
3923 {
3924     struct rule_dpif *rule = rule_dpif_cast(rule_);
3925
3926     ovs_mutex_lock(&rule->stats_mutex);
3927     *packets = rule->stats.n_packets;
3928     *bytes = rule->stats.n_bytes;
3929     *used = rule->stats.used;
3930     ovs_mutex_unlock(&rule->stats_mutex);
3931 }
3932
3933 static void
3934 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
3935                   struct ofpbuf *packet)
3936 {
3937     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3938
3939     ofproto_dpif_execute_actions(ofproto, flow, rule, NULL, 0, packet);
3940 }
3941
3942 static enum ofperr
3943 rule_execute(struct rule *rule, const struct flow *flow,
3944              struct ofpbuf *packet)
3945 {
3946     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
3947     ofpbuf_delete(packet);
3948     return 0;
3949 }
3950
3951 static void
3952 rule_modify_actions(struct rule *rule_, bool reset_counters)
3953     OVS_REQUIRES(ofproto_mutex)
3954 {
3955     struct rule_dpif *rule = rule_dpif_cast(rule_);
3956
3957     if (reset_counters) {
3958         ovs_mutex_lock(&rule->stats_mutex);
3959         rule->stats.n_packets = 0;
3960         rule->stats.n_bytes = 0;
3961         ovs_mutex_unlock(&rule->stats_mutex);
3962     }
3963
3964     complete_operation(rule);
3965 }
3966
3967 static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
3968 {
3969     return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
3970 }
3971
3972 static struct ofgroup *
3973 group_alloc(void)
3974 {
3975     struct group_dpif *group = xzalloc(sizeof *group);
3976     return &group->up;
3977 }
3978
3979 static void
3980 group_dealloc(struct ofgroup *group_)
3981 {
3982     struct group_dpif *group = group_dpif_cast(group_);
3983     free(group);
3984 }
3985
3986 static void
3987 group_construct_stats(struct group_dpif *group)
3988     OVS_REQUIRES(group->stats_mutex)
3989 {
3990     struct ofputil_bucket *bucket;
3991     const struct list *buckets;
3992
3993     group->packet_count = 0;
3994     group->byte_count = 0;
3995
3996     group_dpif_get_buckets(group, &buckets);
3997     LIST_FOR_EACH (bucket, list_node, buckets) {
3998         bucket->stats.packet_count = 0;
3999         bucket->stats.byte_count = 0;
4000     }
4001 }
4002
4003 void
4004 group_dpif_credit_stats(struct group_dpif *group,
4005                         struct ofputil_bucket *bucket,
4006                         const struct dpif_flow_stats *stats)
4007 {
4008     ovs_mutex_lock(&group->stats_mutex);
4009     group->packet_count += stats->n_packets;
4010     group->byte_count += stats->n_bytes;
4011     if (bucket) {
4012         bucket->stats.packet_count += stats->n_packets;
4013         bucket->stats.byte_count += stats->n_bytes;
4014     } else { /* Credit to all buckets */
4015         const struct list *buckets;
4016
4017         group_dpif_get_buckets(group, &buckets);
4018         LIST_FOR_EACH (bucket, list_node, buckets) {
4019             bucket->stats.packet_count += stats->n_packets;
4020             bucket->stats.byte_count += stats->n_bytes;
4021         }
4022     }
4023     ovs_mutex_unlock(&group->stats_mutex);
4024 }
4025
4026 static enum ofperr
4027 group_construct(struct ofgroup *group_)
4028 {
4029     struct group_dpif *group = group_dpif_cast(group_);
4030     const struct ofputil_bucket *bucket;
4031
4032     /* Prevent group chaining because our locking structure makes it hard to
4033      * implement deadlock-free.  (See xlate_group_resource_check().) */
4034     LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
4035         const struct ofpact *a;
4036
4037         OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
4038             if (a->type == OFPACT_GROUP) {
4039                 return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
4040             }
4041         }
4042     }
4043
4044     ovs_mutex_init_adaptive(&group->stats_mutex);
4045     ovs_mutex_lock(&group->stats_mutex);
4046     group_construct_stats(group);
4047     ovs_mutex_unlock(&group->stats_mutex);
4048     return 0;
4049 }
4050
4051 static void
4052 group_destruct(struct ofgroup *group_)
4053 {
4054     struct group_dpif *group = group_dpif_cast(group_);
4055     ovs_mutex_destroy(&group->stats_mutex);
4056 }
4057
4058 static enum ofperr
4059 group_modify(struct ofgroup *group_)
4060 {
4061     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
4062
4063     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
4064
4065     return 0;
4066 }
4067
4068 static enum ofperr
4069 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
4070 {
4071     struct group_dpif *group = group_dpif_cast(group_);
4072     struct ofputil_bucket *bucket;
4073     const struct list *buckets;
4074     struct bucket_counter *bucket_stats;
4075
4076     ovs_mutex_lock(&group->stats_mutex);
4077     ogs->packet_count = group->packet_count;
4078     ogs->byte_count = group->byte_count;
4079
4080     group_dpif_get_buckets(group, &buckets);
4081     bucket_stats = ogs->bucket_stats;
4082     LIST_FOR_EACH (bucket, list_node, buckets) {
4083         bucket_stats->packet_count = bucket->stats.packet_count;
4084         bucket_stats->byte_count = bucket->stats.byte_count;
4085         bucket_stats++;
4086     }
4087     ovs_mutex_unlock(&group->stats_mutex);
4088
4089     return 0;
4090 }
4091
4092 /* If the group exists, this function increments the groups's reference count.
4093  *
4094  * Make sure to call group_dpif_unref() after no longer needing to maintain
4095  * a reference to the group. */
4096 bool
4097 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
4098                   struct group_dpif **group)
4099 {
4100     struct ofgroup *ofgroup;
4101     bool found;
4102
4103     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
4104     *group = found ?  group_dpif_cast(ofgroup) : NULL;
4105
4106     return found;
4107 }
4108
4109 void
4110 group_dpif_get_buckets(const struct group_dpif *group,
4111                        const struct list **buckets)
4112 {
4113     *buckets = &group->up.buckets;
4114 }
4115
4116 enum ofp11_group_type
4117 group_dpif_get_type(const struct group_dpif *group)
4118 {
4119     return group->up.type;
4120 }
4121 \f
4122 /* Sends 'packet' out 'ofport'.
4123  * May modify 'packet'.
4124  * Returns 0 if successful, otherwise a positive errno value. */
4125 int
4126 ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4127 {
4128     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4129     int error;
4130
4131     error = xlate_send_packet(ofport, packet);
4132
4133     ovs_mutex_lock(&ofproto->stats_mutex);
4134     ofproto->stats.tx_packets++;
4135     ofproto->stats.tx_bytes += ofpbuf_size(packet);
4136     ovs_mutex_unlock(&ofproto->stats_mutex);
4137     return error;
4138 }
4139 \f
4140 /* Return the version string of the datapath that backs up
4141  * this 'ofproto'.
4142  */
4143 static const char *
4144 get_datapath_version(const struct ofproto *ofproto_)
4145 {
4146     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4147
4148     return ofproto->backer->dp_version_string;
4149 }
4150
4151 static bool
4152 set_frag_handling(struct ofproto *ofproto_,
4153                   enum ofp_config_flags frag_handling)
4154 {
4155     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4156     if (frag_handling != OFPC_FRAG_REASM) {
4157         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4158         return true;
4159     } else {
4160         return false;
4161     }
4162 }
4163
4164 static enum ofperr
4165 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
4166            const struct flow *flow,
4167            const struct ofpact *ofpacts, size_t ofpacts_len)
4168 {
4169     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4170
4171     ofproto_dpif_execute_actions(ofproto, flow, NULL, ofpacts,
4172                                  ofpacts_len, packet);
4173     return 0;
4174 }
4175 \f
4176 /* NetFlow. */
4177
4178 static int
4179 set_netflow(struct ofproto *ofproto_,
4180             const struct netflow_options *netflow_options)
4181 {
4182     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4183
4184     if (netflow_options) {
4185         if (!ofproto->netflow) {
4186             ofproto->netflow = netflow_create();
4187             ofproto->backer->need_revalidate = REV_RECONFIGURE;
4188         }
4189         return netflow_set_options(ofproto->netflow, netflow_options);
4190     } else if (ofproto->netflow) {
4191         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4192         netflow_unref(ofproto->netflow);
4193         ofproto->netflow = NULL;
4194     }
4195
4196     return 0;
4197 }
4198
4199 static void
4200 get_netflow_ids(const struct ofproto *ofproto_,
4201                 uint8_t *engine_type, uint8_t *engine_id)
4202 {
4203     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4204
4205     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
4206 }
4207 \f
4208 static struct ofproto_dpif *
4209 ofproto_dpif_lookup(const char *name)
4210 {
4211     struct ofproto_dpif *ofproto;
4212
4213     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
4214                              hash_string(name, 0), &all_ofproto_dpifs) {
4215         if (!strcmp(ofproto->up.name, name)) {
4216             return ofproto;
4217         }
4218     }
4219     return NULL;
4220 }
4221
4222 static void
4223 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
4224                           const char *argv[], void *aux OVS_UNUSED)
4225 {
4226     struct ofproto_dpif *ofproto;
4227
4228     if (argc > 1) {
4229         ofproto = ofproto_dpif_lookup(argv[1]);
4230         if (!ofproto) {
4231             unixctl_command_reply_error(conn, "no such bridge");
4232             return;
4233         }
4234         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4235         mac_learning_flush(ofproto->ml);
4236         ovs_rwlock_unlock(&ofproto->ml->rwlock);
4237     } else {
4238         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4239             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4240             mac_learning_flush(ofproto->ml);
4241             ovs_rwlock_unlock(&ofproto->ml->rwlock);
4242         }
4243     }
4244
4245     unixctl_command_reply(conn, "table successfully flushed");
4246 }
4247
4248 static void
4249 ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
4250                                      const char *argv[], void *aux OVS_UNUSED)
4251 {
4252     struct ofproto_dpif *ofproto;
4253
4254     if (argc > 1) {
4255         ofproto = ofproto_dpif_lookup(argv[1]);
4256         if (!ofproto) {
4257             unixctl_command_reply_error(conn, "no such bridge");
4258             return;
4259         }
4260
4261         if (!mcast_snooping_enabled(ofproto->ms)) {
4262             unixctl_command_reply_error(conn, "multicast snooping is disabled");
4263             return;
4264         }
4265         mcast_snooping_mdb_flush(ofproto->ms);
4266     } else {
4267         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4268             if (!mcast_snooping_enabled(ofproto->ms)) {
4269                 continue;
4270             }
4271             mcast_snooping_mdb_flush(ofproto->ms);
4272         }
4273     }
4274
4275     unixctl_command_reply(conn, "table successfully flushed");
4276 }
4277
4278 static struct ofport_dpif *
4279 ofbundle_get_a_port(const struct ofbundle *bundle)
4280 {
4281     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
4282                         bundle_node);
4283 }
4284
4285 static void
4286 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4287                          const char *argv[], void *aux OVS_UNUSED)
4288 {
4289     struct ds ds = DS_EMPTY_INITIALIZER;
4290     const struct ofproto_dpif *ofproto;
4291     const struct mac_entry *e;
4292
4293     ofproto = ofproto_dpif_lookup(argv[1]);
4294     if (!ofproto) {
4295         unixctl_command_reply_error(conn, "no such bridge");
4296         return;
4297     }
4298
4299     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
4300     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
4301     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
4302         struct ofbundle *bundle = e->port.p;
4303         char name[OFP_MAX_PORT_NAME_LEN];
4304
4305         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4306                                name, sizeof name);
4307         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
4308                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
4309                       mac_entry_age(ofproto->ml, e));
4310     }
4311     ovs_rwlock_unlock(&ofproto->ml->rwlock);
4312     unixctl_command_reply(conn, ds_cstr(&ds));
4313     ds_destroy(&ds);
4314 }
4315
4316 static void
4317 ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
4318                                     int argc OVS_UNUSED,
4319                                     const char *argv[],
4320                                     void *aux OVS_UNUSED)
4321 {
4322     struct ds ds = DS_EMPTY_INITIALIZER;
4323     const struct ofproto_dpif *ofproto;
4324     const struct ofbundle *bundle;
4325     const struct mcast_group *grp;
4326     struct mcast_group_bundle *b;
4327     struct mcast_mrouter_bundle *mrouter;
4328
4329     ofproto = ofproto_dpif_lookup(argv[1]);
4330     if (!ofproto) {
4331         unixctl_command_reply_error(conn, "no such bridge");
4332         return;
4333     }
4334
4335     if (!mcast_snooping_enabled(ofproto->ms)) {
4336         unixctl_command_reply_error(conn, "multicast snooping is disabled");
4337         return;
4338     }
4339
4340     ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
4341     ovs_rwlock_rdlock(&ofproto->ms->rwlock);
4342     LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
4343         LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
4344             char name[OFP_MAX_PORT_NAME_LEN];
4345
4346             bundle = b->port;
4347             ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4348                                    name, sizeof name);
4349             ds_put_format(&ds, "%5s  %4d  "IP_FMT"         %3d\n",
4350                           name, grp->vlan, IP_ARGS(grp->ip4),
4351                           mcast_bundle_age(ofproto->ms, b));
4352         }
4353     }
4354
4355     /* ports connected to multicast routers */
4356     LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
4357         char name[OFP_MAX_PORT_NAME_LEN];
4358
4359         bundle = mrouter->port;
4360         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4361                                name, sizeof name);
4362             ds_put_format(&ds, "%5s  %4d  querier             %3d\n",
4363                       name, mrouter->vlan,
4364                       mcast_mrouter_age(ofproto->ms, mrouter));
4365     }
4366     ovs_rwlock_unlock(&ofproto->ms->rwlock);
4367     unixctl_command_reply(conn, ds_cstr(&ds));
4368     ds_destroy(&ds);
4369 }
4370
4371 struct trace_ctx {
4372     struct xlate_out xout;
4373     struct xlate_in xin;
4374     const struct flow *key;
4375     struct flow flow;
4376     struct flow_wildcards wc;
4377     struct ds *result;
4378 };
4379
4380 static void
4381 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
4382 {
4383     const struct rule_actions *actions;
4384     ovs_be64 cookie;
4385
4386     ds_put_char_multiple(result, '\t', level);
4387     if (!rule) {
4388         ds_put_cstr(result, "No match\n");
4389         return;
4390     }
4391
4392     ovs_mutex_lock(&rule->up.mutex);
4393     cookie = rule->up.flow_cookie;
4394     ovs_mutex_unlock(&rule->up.mutex);
4395
4396     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
4397                   rule ? rule->up.table_id : 0, ntohll(cookie));
4398     cls_rule_format(&rule->up.cr, result);
4399     ds_put_char(result, '\n');
4400
4401     actions = rule_dpif_get_actions(rule);
4402
4403     ds_put_char_multiple(result, '\t', level);
4404     ds_put_cstr(result, "OpenFlow actions=");
4405     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
4406     ds_put_char(result, '\n');
4407 }
4408
4409 static void
4410 trace_format_flow(struct ds *result, int level, const char *title,
4411                   struct trace_ctx *trace)
4412 {
4413     ds_put_char_multiple(result, '\t', level);
4414     ds_put_format(result, "%s: ", title);
4415     /* Do not report unchanged flows for resubmits. */
4416     if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
4417         || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
4418         ds_put_cstr(result, "unchanged");
4419     } else {
4420         flow_format(result, &trace->xin.flow);
4421         trace->flow = trace->xin.flow;
4422     }
4423     ds_put_char(result, '\n');
4424 }
4425
4426 static void
4427 trace_format_regs(struct ds *result, int level, const char *title,
4428                   struct trace_ctx *trace)
4429 {
4430     size_t i;
4431
4432     ds_put_char_multiple(result, '\t', level);
4433     ds_put_format(result, "%s:", title);
4434     for (i = 0; i < FLOW_N_REGS; i++) {
4435         ds_put_format(result, " reg%"PRIuSIZE"=0x%"PRIx32, i, trace->flow.regs[i]);
4436     }
4437     ds_put_char(result, '\n');
4438 }
4439
4440 static void
4441 trace_format_odp(struct ds *result, int level, const char *title,
4442                  struct trace_ctx *trace)
4443 {
4444     struct ofpbuf *odp_actions = trace->xout.odp_actions;
4445
4446     ds_put_char_multiple(result, '\t', level);
4447     ds_put_format(result, "%s: ", title);
4448     format_odp_actions(result, ofpbuf_data(odp_actions),
4449                                ofpbuf_size(odp_actions));
4450     ds_put_char(result, '\n');
4451 }
4452
4453 static void
4454 trace_format_megaflow(struct ds *result, int level, const char *title,
4455                       struct trace_ctx *trace)
4456 {
4457     struct match match;
4458
4459     ds_put_char_multiple(result, '\t', level);
4460     ds_put_format(result, "%s: ", title);
4461     flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
4462     match_init(&match, trace->key, &trace->wc);
4463     match_format(&match, result, OFP_DEFAULT_PRIORITY);
4464     ds_put_char(result, '\n');
4465 }
4466
4467 static void trace_report(struct xlate_in *xin, const char *s, int recurse);
4468
4469 static void
4470 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
4471 {
4472     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4473     struct ds *result = trace->result;
4474
4475     if (!recurse) {
4476         if (rule == xin->ofproto->miss_rule) {
4477             trace_report(xin, "No match, flow generates \"packet in\"s.",
4478                          recurse);
4479         } else if (rule == xin->ofproto->no_packet_in_rule) {
4480             trace_report(xin, "No match, packets dropped because "
4481                          "OFPPC_NO_PACKET_IN is set on in_port.", recurse);
4482         } else if (rule == xin->ofproto->drop_frags_rule) {
4483             trace_report(xin, "Packets dropped because they are IP "
4484                          "fragments and the fragment handling mode is "
4485                          "\"drop\".", recurse);
4486         }
4487     }
4488
4489     ds_put_char(result, '\n');
4490     if (recurse) {
4491         trace_format_flow(result, recurse, "Resubmitted flow", trace);
4492         trace_format_regs(result, recurse, "Resubmitted regs", trace);
4493         trace_format_odp(result,  recurse, "Resubmitted  odp", trace);
4494         trace_format_megaflow(result, recurse, "Resubmitted megaflow", trace);
4495     }
4496     trace_format_rule(result, recurse, rule);
4497 }
4498
4499 static void
4500 trace_report(struct xlate_in *xin, const char *s, int recurse)
4501 {
4502     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4503     struct ds *result = trace->result;
4504
4505     ds_put_char_multiple(result, '\t', recurse);
4506     ds_put_cstr(result, s);
4507     ds_put_char(result, '\n');
4508 }
4509
4510 /* Parses the 'argc' elements of 'argv', ignoring argv[0].  The following
4511  * forms are supported:
4512  *
4513  *     - [dpname] odp_flow [-generate | packet]
4514  *     - bridge br_flow [-generate | packet]
4515  *
4516  * On success, initializes '*ofprotop' and 'flow' and returns NULL.  On failure
4517  * returns a nonnull malloced error message. */
4518 static char * WARN_UNUSED_RESULT
4519 parse_flow_and_packet(int argc, const char *argv[],
4520                       struct ofproto_dpif **ofprotop, struct flow *flow,
4521                       struct ofpbuf **packetp)
4522 {
4523     const struct dpif_backer *backer = NULL;
4524     const char *error = NULL;
4525     char *m_err = NULL;
4526     struct simap port_names = SIMAP_INITIALIZER(&port_names);
4527     struct ofpbuf *packet;
4528     struct ofpbuf odp_key;
4529     struct ofpbuf odp_mask;
4530
4531     ofpbuf_init(&odp_key, 0);
4532     ofpbuf_init(&odp_mask, 0);
4533
4534     /* Handle "-generate" or a hex string as the last argument. */
4535     if (!strcmp(argv[argc - 1], "-generate")) {
4536         packet = ofpbuf_new(0);
4537         argc--;
4538     } else {
4539         error = eth_from_hex(argv[argc - 1], &packet);
4540         if (!error) {
4541             argc--;
4542         } else if (argc == 4) {
4543             /* The 3-argument form must end in "-generate' or a hex string. */
4544             goto exit;
4545         }
4546         error = NULL;
4547     }
4548
4549     /* odp_flow can have its in_port specified as a name instead of port no.
4550      * We do not yet know whether a given flow is a odp_flow or a br_flow.
4551      * But, to know whether a flow is odp_flow through odp_flow_from_string(),
4552      * we need to create a simap of name to port no. */
4553     if (argc == 3) {
4554         const char *dp_type;
4555         if (!strncmp(argv[1], "ovs-", 4)) {
4556             dp_type = argv[1] + 4;
4557         } else {
4558             dp_type = argv[1];
4559         }
4560         backer = shash_find_data(&all_dpif_backers, dp_type);
4561     } else if (argc == 2) {
4562         struct shash_node *node;
4563         if (shash_count(&all_dpif_backers) == 1) {
4564             node = shash_first(&all_dpif_backers);
4565             backer = node->data;
4566         }
4567     } else {
4568         error = "Syntax error";
4569         goto exit;
4570     }
4571     if (backer && backer->dpif) {
4572         struct dpif_port dpif_port;
4573         struct dpif_port_dump port_dump;
4574         DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
4575             simap_put(&port_names, dpif_port.name,
4576                       odp_to_u32(dpif_port.port_no));
4577         }
4578     }
4579
4580     /* Parse the flow and determine whether a datapath or
4581      * bridge is specified. If function odp_flow_key_from_string()
4582      * returns 0, the flow is a odp_flow. If function
4583      * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
4584     if (!odp_flow_from_string(argv[argc - 1], &port_names,
4585                               &odp_key, &odp_mask)) {
4586         if (!backer) {
4587             error = "Cannot find the datapath";
4588             goto exit;
4589         }
4590
4591         if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key),
4592                                  flow) == ODP_FIT_ERROR) {
4593             error = "Failed to parse flow key";
4594             goto exit;
4595         }
4596
4597         *ofprotop = xlate_lookup_ofproto(backer, flow,
4598                                          &flow->in_port.ofp_port);
4599         if (*ofprotop == NULL) {
4600             error = "Invalid datapath flow";
4601             goto exit;
4602         }
4603
4604         vsp_adjust_flow(*ofprotop, flow, NULL);
4605
4606     } else {
4607         char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
4608
4609         if (err) {
4610             m_err = xasprintf("Bad flow syntax: %s", err);
4611             free(err);
4612             goto exit;
4613         } else {
4614             if (argc != 3) {
4615                 error = "Must specify bridge name";
4616                 goto exit;
4617             }
4618
4619             *ofprotop = ofproto_dpif_lookup(argv[1]);
4620             if (!*ofprotop) {
4621                 error = "Unknown bridge name";
4622                 goto exit;
4623             }
4624         }
4625     }
4626
4627     /* Generate a packet, if requested. */
4628     if (packet) {
4629         if (!ofpbuf_size(packet)) {
4630             flow_compose(packet, flow);
4631         } else {
4632             struct pkt_metadata md = pkt_metadata_from_flow(flow);
4633
4634             /* Use the metadata from the flow and the packet argument
4635              * to reconstruct the flow. */
4636             flow_extract(packet, &md, flow);
4637         }
4638     }
4639
4640 exit:
4641     if (error && !m_err) {
4642         m_err = xstrdup(error);
4643     }
4644     if (m_err) {
4645         ofpbuf_delete(packet);
4646         packet = NULL;
4647     }
4648     *packetp = packet;
4649     ofpbuf_uninit(&odp_key);
4650     ofpbuf_uninit(&odp_mask);
4651     simap_destroy(&port_names);
4652     return m_err;
4653 }
4654
4655 static void
4656 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
4657                       void *aux OVS_UNUSED)
4658 {
4659     struct ofproto_dpif *ofproto;
4660     struct ofpbuf *packet;
4661     char *error;
4662     struct flow flow;
4663
4664     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4665     if (!error) {
4666         struct ds result;
4667
4668         ds_init(&result);
4669         ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
4670         unixctl_command_reply(conn, ds_cstr(&result));
4671         ds_destroy(&result);
4672         ofpbuf_delete(packet);
4673     } else {
4674         unixctl_command_reply_error(conn, error);
4675         free(error);
4676     }
4677 }
4678
4679 static void
4680 ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
4681                               const char *argv[], void *aux OVS_UNUSED)
4682 {
4683     enum ofputil_protocol usable_protocols;
4684     struct ofproto_dpif *ofproto;
4685     bool enforce_consistency;
4686     struct ofpbuf ofpacts;
4687     struct ofpbuf *packet;
4688     struct ds result;
4689     struct flow flow;
4690     uint16_t in_port;
4691
4692     /* Three kinds of error return values! */
4693     enum ofperr retval;
4694     char *error;
4695
4696     packet = NULL;
4697     ds_init(&result);
4698     ofpbuf_init(&ofpacts, 0);
4699
4700     /* Parse actions. */
4701     error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
4702     if (error) {
4703         unixctl_command_reply_error(conn, error);
4704         free(error);
4705         goto exit;
4706     }
4707
4708     /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
4709      * consistency between the flow and the actions.  With -consistent, we
4710      * enforce consistency even for a flow supported in OpenFlow 1.0. */
4711     if (!strcmp(argv[1], "-consistent")) {
4712         enforce_consistency = true;
4713         argv++;
4714         argc--;
4715     } else {
4716         enforce_consistency = false;
4717     }
4718
4719     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4720     if (error) {
4721         unixctl_command_reply_error(conn, error);
4722         free(error);
4723         goto exit;
4724     }
4725
4726     /* Do the same checks as handle_packet_out() in ofproto.c.
4727      *
4728      * We pass a 'table_id' of 0 to ofpacts_check(), which isn't
4729      * strictly correct because these actions aren't in any table, but it's OK
4730      * because it 'table_id' is used only to check goto_table instructions, but
4731      * packet-outs take a list of actions and therefore it can't include
4732      * instructions.
4733      *
4734      * We skip the "meter" check here because meter is an instruction, not an
4735      * action, and thus cannot appear in ofpacts. */
4736     in_port = ofp_to_u16(flow.in_port.ofp_port);
4737     if (in_port >= ofproto->up.max_ports && in_port < ofp_to_u16(OFPP_MAX)) {
4738         unixctl_command_reply_error(conn, "invalid in_port");
4739         goto exit;
4740     }
4741     if (enforce_consistency) {
4742         retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
4743                                            &flow, u16_to_ofp(ofproto->up.max_ports),
4744                                            0, 0, usable_protocols);
4745     } else {
4746         retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
4747                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
4748                                &usable_protocols);
4749     }
4750
4751     if (retval) {
4752         ds_clear(&result);
4753         ds_put_format(&result, "Bad actions: %s", ofperr_to_string(retval));
4754         unixctl_command_reply_error(conn, ds_cstr(&result));
4755         goto exit;
4756     }
4757
4758     ofproto_trace(ofproto, &flow, packet,
4759                   ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
4760     unixctl_command_reply(conn, ds_cstr(&result));
4761
4762 exit:
4763     ds_destroy(&result);
4764     ofpbuf_delete(packet);
4765     ofpbuf_uninit(&ofpacts);
4766 }
4767
4768 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
4769  * description of the results to 'ds'.
4770  *
4771  * The trace follows a packet with the specified 'flow' through the flow
4772  * table.  'packet' may be nonnull to trace an actual packet, with consequent
4773  * side effects (if it is nonnull then its flow must be 'flow').
4774  *
4775  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
4776  * trace, otherwise the actions are determined by a flow table lookup. */
4777 static void
4778 ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
4779               const struct ofpbuf *packet,
4780               const struct ofpact ofpacts[], size_t ofpacts_len,
4781               struct ds *ds)
4782 {
4783     struct trace_ctx trace;
4784
4785     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
4786     ds_put_cstr(ds, "Flow: ");
4787     flow_format(ds, flow);
4788     ds_put_char(ds, '\n');
4789
4790     flow_wildcards_init_catchall(&trace.wc);
4791
4792     trace.result = ds;
4793     trace.key = flow; /* Original flow key, used for megaflow. */
4794     trace.flow = *flow; /* May be modified by actions. */
4795     xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, NULL,
4796                   ntohs(flow->tcp_flags), packet);
4797     trace.xin.ofpacts = ofpacts;
4798     trace.xin.ofpacts_len = ofpacts_len;
4799     trace.xin.resubmit_hook = trace_resubmit;
4800     trace.xin.report_hook = trace_report;
4801
4802     xlate_actions(&trace.xin, &trace.xout);
4803
4804     ds_put_char(ds, '\n');
4805     trace_format_flow(ds, 0, "Final flow", &trace);
4806     trace_format_megaflow(ds, 0, "Megaflow", &trace);
4807
4808     ds_put_cstr(ds, "Datapath actions: ");
4809     format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions),
4810                        ofpbuf_size(trace.xout.odp_actions));
4811
4812     if (trace.xout.slow) {
4813         enum slow_path_reason slow;
4814
4815         ds_put_cstr(ds, "\nThis flow is handled by the userspace "
4816                     "slow path because it:");
4817
4818         slow = trace.xout.slow;
4819         while (slow) {
4820             enum slow_path_reason bit = rightmost_1bit(slow);
4821
4822             ds_put_format(ds, "\n\t- %s.",
4823                           slow_path_reason_to_explanation(bit));
4824
4825             slow &= ~bit;
4826         }
4827     }
4828
4829     xlate_out_uninit(&trace.xout);
4830 }
4831
4832 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
4833  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
4834  * to destroy 'ofproto_shash' and free the returned value. */
4835 static const struct shash_node **
4836 get_ofprotos(struct shash *ofproto_shash)
4837 {
4838     const struct ofproto_dpif *ofproto;
4839
4840     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4841         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
4842         shash_add_nocopy(ofproto_shash, name, ofproto);
4843     }
4844
4845     return shash_sort(ofproto_shash);
4846 }
4847
4848 static void
4849 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
4850                               const char *argv[] OVS_UNUSED,
4851                               void *aux OVS_UNUSED)
4852 {
4853     struct ds ds = DS_EMPTY_INITIALIZER;
4854     struct shash ofproto_shash;
4855     const struct shash_node **sorted_ofprotos;
4856     int i;
4857
4858     shash_init(&ofproto_shash);
4859     sorted_ofprotos = get_ofprotos(&ofproto_shash);
4860     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4861         const struct shash_node *node = sorted_ofprotos[i];
4862         ds_put_format(&ds, "%s\n", node->name);
4863     }
4864
4865     shash_destroy(&ofproto_shash);
4866     free(sorted_ofprotos);
4867
4868     unixctl_command_reply(conn, ds_cstr(&ds));
4869     ds_destroy(&ds);
4870 }
4871
4872 static void
4873 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
4874 {
4875     const struct shash_node **ofprotos;
4876     struct dpif_dp_stats dp_stats;
4877     struct shash ofproto_shash;
4878     size_t i;
4879
4880     dpif_get_dp_stats(backer->dpif, &dp_stats);
4881
4882     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
4883                   dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
4884
4885     shash_init(&ofproto_shash);
4886     ofprotos = get_ofprotos(&ofproto_shash);
4887     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4888         struct ofproto_dpif *ofproto = ofprotos[i]->data;
4889         const struct shash_node **ports;
4890         size_t j;
4891
4892         if (ofproto->backer != backer) {
4893             continue;
4894         }
4895
4896         ds_put_format(ds, "\t%s:\n", ofproto->up.name);
4897
4898         ports = shash_sort(&ofproto->up.port_by_name);
4899         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
4900             const struct shash_node *node = ports[j];
4901             struct ofport *ofport = node->data;
4902             struct smap config;
4903             odp_port_t odp_port;
4904
4905             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
4906                           ofport->ofp_port);
4907
4908             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
4909             if (odp_port != ODPP_NONE) {
4910                 ds_put_format(ds, "%"PRIu32":", odp_port);
4911             } else {
4912                 ds_put_cstr(ds, "none:");
4913             }
4914
4915             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
4916
4917             smap_init(&config);
4918             if (!netdev_get_config(ofport->netdev, &config)) {
4919                 const struct smap_node **nodes;
4920                 size_t i;
4921
4922                 nodes = smap_sort(&config);
4923                 for (i = 0; i < smap_count(&config); i++) {
4924                     const struct smap_node *node = nodes[i];
4925                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
4926                                   node->key, node->value);
4927                 }
4928                 free(nodes);
4929             }
4930             smap_destroy(&config);
4931
4932             ds_put_char(ds, ')');
4933             ds_put_char(ds, '\n');
4934         }
4935         free(ports);
4936     }
4937     shash_destroy(&ofproto_shash);
4938     free(ofprotos);
4939 }
4940
4941 static void
4942 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4943                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
4944 {
4945     struct ds ds = DS_EMPTY_INITIALIZER;
4946     const struct shash_node **backers;
4947     int i;
4948
4949     backers = shash_sort(&all_dpif_backers);
4950     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
4951         dpif_show_backer(backers[i]->data, &ds);
4952     }
4953     free(backers);
4954
4955     unixctl_command_reply(conn, ds_cstr(&ds));
4956     ds_destroy(&ds);
4957 }
4958
4959 static void
4960 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
4961                                 int argc OVS_UNUSED, const char *argv[],
4962                                 void *aux OVS_UNUSED)
4963 {
4964     const struct ofproto_dpif *ofproto;
4965
4966     struct ds ds = DS_EMPTY_INITIALIZER;
4967     bool verbosity = false;
4968
4969     struct dpif_port dpif_port;
4970     struct dpif_port_dump port_dump;
4971     struct hmap portno_names;
4972
4973     struct dpif_flow_dump *flow_dump;
4974     struct dpif_flow_dump_thread *flow_dump_thread;
4975     struct dpif_flow f;
4976     int error;
4977
4978     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
4979     if (!ofproto) {
4980         unixctl_command_reply_error(conn, "no such bridge");
4981         return;
4982     }
4983
4984     if (argc > 2 && !strcmp(argv[1], "-m")) {
4985         verbosity = true;
4986     }
4987
4988     hmap_init(&portno_names);
4989     DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
4990         odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
4991     }
4992
4993     ds_init(&ds);
4994     flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
4995     flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
4996     while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
4997         struct flow flow;
4998
4999         if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
5000             || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
5001             continue;
5002         }
5003
5004         if (verbosity) {
5005             odp_format_ufid(&f.ufid, &ds);
5006             ds_put_cstr(&ds, " ");
5007         }
5008         odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
5009                         &portno_names, &ds, verbosity);
5010         ds_put_cstr(&ds, ", ");
5011         dpif_flow_stats_format(&f.stats, &ds);
5012         ds_put_cstr(&ds, ", actions:");
5013         format_odp_actions(&ds, f.actions, f.actions_len);
5014         ds_put_char(&ds, '\n');
5015     }
5016     dpif_flow_dump_thread_destroy(flow_dump_thread);
5017     error = dpif_flow_dump_destroy(flow_dump);
5018
5019     if (error) {
5020         ds_clear(&ds);
5021         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
5022         unixctl_command_reply_error(conn, ds_cstr(&ds));
5023     } else {
5024         unixctl_command_reply(conn, ds_cstr(&ds));
5025     }
5026     odp_portno_names_destroy(&portno_names);
5027     hmap_destroy(&portno_names);
5028     ds_destroy(&ds);
5029 }
5030
5031 static void
5032 ofproto_revalidate_all_backers(void)
5033 {
5034     const struct shash_node **backers;
5035     int i;
5036
5037     backers = shash_sort(&all_dpif_backers);
5038     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
5039         struct dpif_backer *backer = backers[i]->data;
5040         backer->need_revalidate = REV_RECONFIGURE;
5041     }
5042     free(backers);
5043 }
5044
5045 static void
5046 disable_tnl_push_pop(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
5047                      const char *argv[], void *aux OVS_UNUSED)
5048 {
5049     if (!strcasecmp(argv[1], "off")) {
5050         ofproto_use_tnl_push_pop = false;
5051         unixctl_command_reply(conn, "Tunnel push-pop off");
5052         ofproto_revalidate_all_backers();
5053     } else if (!strcasecmp(argv[1], "on")) {
5054         ofproto_use_tnl_push_pop = true;
5055         unixctl_command_reply(conn, "Tunnel push-pop on");
5056         ofproto_revalidate_all_backers();
5057     }
5058 }
5059
5060 static void
5061 ofproto_unixctl_init(void)
5062 {
5063     static bool registered;
5064     if (registered) {
5065         return;
5066     }
5067     registered = true;
5068
5069     unixctl_command_register(
5070         "ofproto/trace",
5071         "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
5072         1, 3, ofproto_unixctl_trace, NULL);
5073     unixctl_command_register(
5074         "ofproto/trace-packet-out",
5075         "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
5076         2, 6, ofproto_unixctl_trace_actions, NULL);
5077     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
5078                              ofproto_unixctl_fdb_flush, NULL);
5079     unixctl_command_register("fdb/show", "bridge", 1, 1,
5080                              ofproto_unixctl_fdb_show, NULL);
5081     unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
5082                              ofproto_unixctl_mcast_snooping_flush, NULL);
5083     unixctl_command_register("mdb/show", "bridge", 1, 1,
5084                              ofproto_unixctl_mcast_snooping_show, NULL);
5085     unixctl_command_register("dpif/dump-dps", "", 0, 0,
5086                              ofproto_unixctl_dpif_dump_dps, NULL);
5087     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
5088                              NULL);
5089     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
5090                              ofproto_unixctl_dpif_dump_flows, NULL);
5091
5092     unixctl_command_register("ofproto/tnl-push-pop", "[on]|[off]", 1, 1,
5093                              disable_tnl_push_pop, NULL);
5094 }
5095
5096 /* Returns true if 'table' is the table used for internal rules,
5097  * false otherwise. */
5098 bool
5099 table_is_internal(uint8_t table_id)
5100 {
5101     return table_id == TBL_INTERNAL;
5102 }
5103 \f
5104 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
5105  *
5106  * This is deprecated.  It is only for compatibility with broken device drivers
5107  * in old versions of Linux that do not properly support VLANs when VLAN
5108  * devices are not used.  When broken device drivers are no longer in
5109  * widespread use, we will delete these interfaces. */
5110
5111 static int
5112 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5113 {
5114     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5115     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5116
5117     if (realdev_ofp_port == ofport->realdev_ofp_port
5118         && vid == ofport->vlandev_vid) {
5119         return 0;
5120     }
5121
5122     ofproto->backer->need_revalidate = REV_RECONFIGURE;
5123
5124     if (ofport->realdev_ofp_port) {
5125         vsp_remove(ofport);
5126     }
5127     if (realdev_ofp_port && ofport->bundle) {
5128         /* vlandevs are enslaved to their realdevs, so they are not allowed to
5129          * themselves be part of a bundle. */
5130         bundle_set(ofport_->ofproto, ofport->bundle, NULL);
5131     }
5132
5133     ofport->realdev_ofp_port = realdev_ofp_port;
5134     ofport->vlandev_vid = vid;
5135
5136     if (realdev_ofp_port) {
5137         vsp_add(ofport, realdev_ofp_port, vid);
5138     }
5139
5140     return 0;
5141 }
5142
5143 static uint32_t
5144 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
5145 {
5146     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
5147 }
5148
5149 bool
5150 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
5151     OVS_EXCLUDED(ofproto->vsp_mutex)
5152 {
5153     /* hmap_is_empty is thread safe. */
5154     return !hmap_is_empty(&ofproto->realdev_vid_map);
5155 }
5156
5157
5158 static ofp_port_t
5159 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
5160                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5161     OVS_REQUIRES(ofproto->vsp_mutex)
5162 {
5163     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
5164         int vid = vlan_tci_to_vid(vlan_tci);
5165         const struct vlan_splinter *vsp;
5166
5167         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
5168                                  hash_realdev_vid(realdev_ofp_port, vid),
5169                                  &ofproto->realdev_vid_map) {
5170             if (vsp->realdev_ofp_port == realdev_ofp_port
5171                 && vsp->vid == vid) {
5172                 return vsp->vlandev_ofp_port;
5173             }
5174         }
5175     }
5176     return realdev_ofp_port;
5177 }
5178
5179 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5180  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5181  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
5182  * 'vlan_tci' 9, it would return the port number of eth0.9.
5183  *
5184  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5185  * function just returns its 'realdev_ofp_port' argument. */
5186 ofp_port_t
5187 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
5188                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5189     OVS_EXCLUDED(ofproto->vsp_mutex)
5190 {
5191     ofp_port_t ret;
5192
5193     /* hmap_is_empty is thread safe, see if we can return immediately. */
5194     if (hmap_is_empty(&ofproto->realdev_vid_map)) {
5195         return realdev_ofp_port;
5196     }
5197     ovs_mutex_lock(&ofproto->vsp_mutex);
5198     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
5199     ovs_mutex_unlock(&ofproto->vsp_mutex);
5200     return ret;
5201 }
5202
5203 static struct vlan_splinter *
5204 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
5205 {
5206     struct vlan_splinter *vsp;
5207
5208     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
5209                              hash_ofp_port(vlandev_ofp_port),
5210                              &ofproto->vlandev_map) {
5211         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
5212             return vsp;
5213         }
5214     }
5215
5216     return NULL;
5217 }
5218
5219 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5220  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5221  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
5222  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5223  * eth0 and store 9 in '*vid'.
5224  *
5225  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5226  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
5227  * always does.*/
5228 static ofp_port_t
5229 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
5230                        ofp_port_t vlandev_ofp_port, int *vid)
5231     OVS_REQUIRES(ofproto->vsp_mutex)
5232 {
5233     if (!hmap_is_empty(&ofproto->vlandev_map)) {
5234         const struct vlan_splinter *vsp;
5235
5236         vsp = vlandev_find(ofproto, vlandev_ofp_port);
5237         if (vsp) {
5238             if (vid) {
5239                 *vid = vsp->vid;
5240             }
5241             return vsp->realdev_ofp_port;
5242         }
5243     }
5244     return 0;
5245 }
5246
5247 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5248  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
5249  * 'flow->in_port' to the "real" device backing the VLAN device, sets
5250  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Optionally pushes the
5251  * appropriate VLAN on 'packet' if provided.  Otherwise (which is always the
5252  * case unless VLAN splinters are enabled), returns false without making any
5253  * changes. */
5254 bool
5255 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
5256                 struct ofpbuf *packet)
5257     OVS_EXCLUDED(ofproto->vsp_mutex)
5258 {
5259     ofp_port_t realdev;
5260     int vid;
5261
5262     /* hmap_is_empty is thread safe. */
5263     if (hmap_is_empty(&ofproto->vlandev_map)) {
5264         return false;
5265     }
5266
5267     ovs_mutex_lock(&ofproto->vsp_mutex);
5268     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
5269     ovs_mutex_unlock(&ofproto->vsp_mutex);
5270     if (!realdev) {
5271         return false;
5272     }
5273
5274     /* Cause the flow to be processed as if it came in on the real device with
5275      * the VLAN device's VLAN ID. */
5276     flow->in_port.ofp_port = realdev;
5277     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
5278
5279     if (packet) {
5280         /* Make the packet resemble the flow, so that it gets sent to an
5281          * OpenFlow controller properly, so that it looks correct for sFlow,
5282          * and so that flow_extract() will get the correct vlan_tci if it is
5283          * called on 'packet'. */
5284         eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
5285     }
5286
5287     return true;
5288 }
5289
5290 static void
5291 vsp_remove(struct ofport_dpif *port)
5292 {
5293     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5294     struct vlan_splinter *vsp;
5295
5296     ovs_mutex_lock(&ofproto->vsp_mutex);
5297     vsp = vlandev_find(ofproto, port->up.ofp_port);
5298     if (vsp) {
5299         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
5300         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
5301         free(vsp);
5302
5303         port->realdev_ofp_port = 0;
5304     } else {
5305         VLOG_ERR("missing vlan device record");
5306     }
5307     ovs_mutex_unlock(&ofproto->vsp_mutex);
5308 }
5309
5310 static void
5311 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
5312 {
5313     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5314
5315     ovs_mutex_lock(&ofproto->vsp_mutex);
5316     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
5317         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
5318             == realdev_ofp_port)) {
5319         struct vlan_splinter *vsp;
5320
5321         vsp = xmalloc(sizeof *vsp);
5322         vsp->realdev_ofp_port = realdev_ofp_port;
5323         vsp->vlandev_ofp_port = port->up.ofp_port;
5324         vsp->vid = vid;
5325
5326         port->realdev_ofp_port = realdev_ofp_port;
5327
5328         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
5329                     hash_ofp_port(port->up.ofp_port));
5330         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
5331                     hash_realdev_vid(realdev_ofp_port, vid));
5332     } else {
5333         VLOG_ERR("duplicate vlan device record");
5334     }
5335     ovs_mutex_unlock(&ofproto->vsp_mutex);
5336 }
5337
5338 static odp_port_t
5339 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
5340 {
5341     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
5342     return ofport ? ofport->odp_port : ODPP_NONE;
5343 }
5344
5345 struct ofport_dpif *
5346 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
5347 {
5348     struct ofport_dpif *port;
5349
5350     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
5351     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
5352                              &backer->odp_to_ofport_map) {
5353         if (port->odp_port == odp_port) {
5354             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5355             return port;
5356         }
5357     }
5358
5359     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5360     return NULL;
5361 }
5362
5363 static ofp_port_t
5364 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
5365 {
5366     struct ofport_dpif *port;
5367
5368     port = odp_port_to_ofport(ofproto->backer, odp_port);
5369     if (port && &ofproto->up == port->up.ofproto) {
5370         return port->up.ofp_port;
5371     } else {
5372         return OFPP_NONE;
5373     }
5374 }
5375
5376 uint32_t
5377 ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
5378 {
5379     struct dpif_backer *backer = ofproto->backer;
5380
5381     return  recirc_id_alloc(backer->rid_pool);
5382 }
5383
5384 void
5385 ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
5386 {
5387     struct dpif_backer *backer = ofproto->backer;
5388
5389     recirc_id_free(backer->rid_pool, recirc_id);
5390 }
5391
5392 int
5393 ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
5394                                const struct match *match, int priority,
5395                                uint16_t idle_timeout,
5396                                const struct ofpbuf *ofpacts,
5397                                struct rule **rulep)
5398 {
5399     struct ofputil_flow_mod fm;
5400     struct rule_dpif *rule;
5401     int error;
5402
5403     fm.match = *match;
5404     fm.priority = priority;
5405     fm.new_cookie = htonll(0);
5406     fm.cookie = htonll(0);
5407     fm.cookie_mask = htonll(0);
5408     fm.modify_cookie = false;
5409     fm.table_id = TBL_INTERNAL;
5410     fm.command = OFPFC_ADD;
5411     fm.idle_timeout = idle_timeout;
5412     fm.hard_timeout = 0;
5413     fm.importance = 0;
5414     fm.buffer_id = 0;
5415     fm.out_port = 0;
5416     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5417     fm.ofpacts = ofpbuf_data(ofpacts);
5418     fm.ofpacts_len = ofpbuf_size(ofpacts);
5419
5420     error = ofproto_flow_mod(&ofproto->up, &fm);
5421     if (error) {
5422         VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
5423                     ofperr_to_string(error));
5424         *rulep = NULL;
5425         return error;
5426     }
5427
5428     rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
5429                                      &fm.match.wc, false);
5430     if (rule) {
5431         *rulep = &rule->up;
5432     } else {
5433         OVS_NOT_REACHED();
5434     }
5435     return 0;
5436 }
5437
5438 int
5439 ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
5440                                   struct match *match, int priority)
5441 {
5442     struct ofputil_flow_mod fm;
5443     int error;
5444
5445     fm.match = *match;
5446     fm.priority = priority;
5447     fm.new_cookie = htonll(0);
5448     fm.cookie = htonll(0);
5449     fm.cookie_mask = htonll(0);
5450     fm.modify_cookie = false;
5451     fm.table_id = TBL_INTERNAL;
5452     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5453     fm.command = OFPFC_DELETE_STRICT;
5454
5455     error = ofproto_flow_mod(&ofproto->up, &fm);
5456     if (error) {
5457         VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
5458                     ofperr_to_string(error));
5459         return error;
5460     }
5461
5462     return 0;
5463 }
5464
5465 const struct ofproto_class ofproto_dpif_class = {
5466     init,
5467     enumerate_types,
5468     enumerate_names,
5469     del,
5470     port_open_type,
5471     type_run,
5472     type_wait,
5473     alloc,
5474     construct,
5475     destruct,
5476     dealloc,
5477     run,
5478     wait,
5479     NULL,                       /* get_memory_usage. */
5480     type_get_memory_usage,
5481     flush,
5482     query_tables,
5483     port_alloc,
5484     port_construct,
5485     port_destruct,
5486     port_dealloc,
5487     port_modified,
5488     port_reconfigured,
5489     port_query_by_name,
5490     port_add,
5491     port_del,
5492     port_get_stats,
5493     port_dump_start,
5494     port_dump_next,
5495     port_dump_done,
5496     port_poll,
5497     port_poll_wait,
5498     port_is_lacp_current,
5499     port_get_lacp_stats,
5500     NULL,                       /* rule_choose_table */
5501     rule_alloc,
5502     rule_construct,
5503     rule_insert,
5504     rule_delete,
5505     rule_destruct,
5506     rule_dealloc,
5507     rule_get_stats,
5508     rule_execute,
5509     NULL,                       /* rule_premodify_actions */
5510     rule_modify_actions,
5511     set_frag_handling,
5512     packet_out,
5513     set_netflow,
5514     get_netflow_ids,
5515     set_sflow,
5516     set_ipfix,
5517     set_cfm,
5518     cfm_status_changed,
5519     get_cfm_status,
5520     set_bfd,
5521     bfd_status_changed,
5522     get_bfd_status,
5523     set_stp,
5524     get_stp_status,
5525     set_stp_port,
5526     get_stp_port_status,
5527     get_stp_port_stats,
5528     set_rstp,
5529     get_rstp_status,
5530     set_rstp_port,
5531     get_rstp_port_status,
5532     set_queues,
5533     bundle_set,
5534     bundle_remove,
5535     mirror_set__,
5536     mirror_get_stats__,
5537     set_flood_vlans,
5538     is_mirror_output_bundle,
5539     forward_bpdu_changed,
5540     set_mac_table_config,
5541     set_mcast_snooping,
5542     set_mcast_snooping_port,
5543     set_realdev,
5544     NULL,                       /* meter_get_features */
5545     NULL,                       /* meter_set */
5546     NULL,                       /* meter_get */
5547     NULL,                       /* meter_del */
5548     group_alloc,                /* group_alloc */
5549     group_construct,            /* group_construct */
5550     group_destruct,             /* group_destruct */
5551     group_dealloc,              /* group_dealloc */
5552     group_modify,               /* group_modify */
5553     group_get_stats,            /* group_get_stats */
5554     get_datapath_version,       /* get_datapath_version */
5555 };