5b2ae5df04032d7a6ad8bb771f9001288ea8cb20
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
29 #include "connmgr.h"
30 #include "coverage.h"
31 #include "cfm.h"
32 #include "dpif.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
36 #include "hmapx.h"
37 #include "lacp.h"
38 #include "learn.h"
39 #include "mac-learning.h"
40 #include "mcast-snooping.h"
41 #include "meta-flow.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netdev.h"
45 #include "netlink.h"
46 #include "nx-match.h"
47 #include "odp-util.h"
48 #include "odp-execute.h"
49 #include "ofp-util.h"
50 #include "ofpbuf.h"
51 #include "ofp-actions.h"
52 #include "ofp-parse.h"
53 #include "ofp-print.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-rid.h"
58 #include "ofproto-dpif-sflow.h"
59 #include "ofproto-dpif-upcall.h"
60 #include "ofproto-dpif-xlate.h"
61 #include "poll-loop.h"
62 #include "ovs-router.h"
63 #include "seq.h"
64 #include "simap.h"
65 #include "smap.h"
66 #include "timer.h"
67 #include "tunnel.h"
68 #include "unaligned.h"
69 #include "unixctl.h"
70 #include "vlan-bitmap.h"
71 #include "vlog.h"
72
73 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
74
75 COVERAGE_DEFINE(ofproto_dpif_expired);
76 COVERAGE_DEFINE(packet_in_overflow);
77
78 struct flow_miss;
79
80 struct rule_dpif {
81     struct rule up;
82
83     /* These statistics:
84      *
85      *   - Do include packets and bytes from datapath flows which have not
86      *   recently been processed by a revalidator. */
87     struct ovs_mutex stats_mutex;
88     struct dpif_flow_stats stats OVS_GUARDED;
89
90     /* If non-zero then the recirculation id that has
91      * been allocated for use with this rule.
92      * The recirculation id and associated internal flow should
93      * be freed when the rule is freed */
94     uint32_t recirc_id;
95 };
96
97 /* RULE_CAST() depends on this. */
98 BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
99
100 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
101                            long long int *used);
102 static struct rule_dpif *rule_dpif_cast(const struct rule *);
103 static void rule_expire(struct rule_dpif *);
104
105 struct group_dpif {
106     struct ofgroup up;
107
108     /* These statistics:
109      *
110      *   - Do include packets and bytes from datapath flows which have not
111      *   recently been processed by a revalidator. */
112     struct ovs_mutex stats_mutex;
113     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
114     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
115 };
116
117 struct ofbundle {
118     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
119     struct ofproto_dpif *ofproto; /* Owning ofproto. */
120     void *aux;                  /* Key supplied by ofproto's client. */
121     char *name;                 /* Identifier for log messages. */
122
123     /* Configuration. */
124     struct list ports;          /* Contains "struct ofport"s. */
125     enum port_vlan_mode vlan_mode; /* VLAN mode */
126     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
127     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
128                                  * NULL if all VLANs are trunked. */
129     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
130     struct bond *bond;          /* Nonnull iff more than one port. */
131     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
132
133     /* Status. */
134     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
135 };
136
137 static void bundle_remove(struct ofport *);
138 static void bundle_update(struct ofbundle *);
139 static void bundle_destroy(struct ofbundle *);
140 static void bundle_del_port(struct ofport_dpif *);
141 static void bundle_run(struct ofbundle *);
142 static void bundle_wait(struct ofbundle *);
143 static void bundle_flush_macs(struct ofbundle *, bool);
144
145 static void stp_run(struct ofproto_dpif *ofproto);
146 static void stp_wait(struct ofproto_dpif *ofproto);
147 static int set_stp_port(struct ofport *,
148                         const struct ofproto_port_stp_settings *);
149
150 static void rstp_run(struct ofproto_dpif *ofproto);
151 static void set_rstp_port(struct ofport *,
152                          const struct ofproto_port_rstp_settings *);
153
154 struct ofport_dpif {
155     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
156     struct ofport up;
157
158     odp_port_t odp_port;
159     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
160     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
161     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
162     struct bfd *bfd;            /* BFD, if any. */
163     bool may_enable;            /* May be enabled in bonds. */
164     bool is_tunnel;             /* This port is a tunnel. */
165     bool is_layer3;             /* This is a layer 3 port. */
166     long long int carrier_seq;  /* Carrier status changes. */
167     struct ofport_dpif *peer;   /* Peer if patch port. */
168
169     /* Spanning tree. */
170     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
171     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
172     long long int stp_state_entered;
173
174     /* Rapid Spanning Tree. */
175     struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
176     enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
177
178     /* Queue to DSCP mapping. */
179     struct ofproto_port_queue *qdscp;
180     size_t n_qdscp;
181
182     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
183      *
184      * This is deprecated.  It is only for compatibility with broken device
185      * drivers in old versions of Linux that do not properly support VLANs when
186      * VLAN devices are not used.  When broken device drivers are no longer in
187      * widespread use, we will delete these interfaces. */
188     ofp_port_t realdev_ofp_port;
189     int vlandev_vid;
190 };
191
192 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
193  *
194  * This is deprecated.  It is only for compatibility with broken device drivers
195  * in old versions of Linux that do not properly support VLANs when VLAN
196  * devices are not used.  When broken device drivers are no longer in
197  * widespread use, we will delete these interfaces. */
198 struct vlan_splinter {
199     struct hmap_node realdev_vid_node;
200     struct hmap_node vlandev_node;
201     ofp_port_t realdev_ofp_port;
202     ofp_port_t vlandev_ofp_port;
203     int vid;
204 };
205
206 static void vsp_remove(struct ofport_dpif *);
207 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
208
209 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
210                                        ofp_port_t);
211
212 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
213                                        odp_port_t);
214
215 static struct ofport_dpif *
216 ofport_dpif_cast(const struct ofport *ofport)
217 {
218     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
219 }
220
221 static void port_run(struct ofport_dpif *);
222 static int set_bfd(struct ofport *, const struct smap *);
223 static int set_cfm(struct ofport *, const struct cfm_settings *);
224 static void ofport_update_peer(struct ofport_dpif *);
225
226 /* Reasons that we might need to revalidate every datapath flow, and
227  * corresponding coverage counters.
228  *
229  * A value of 0 means that there is no need to revalidate.
230  *
231  * It would be nice to have some cleaner way to integrate with coverage
232  * counters, but with only a few reasons I guess this is good enough for
233  * now. */
234 enum revalidate_reason {
235     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
236     REV_STP,                   /* Spanning tree protocol port status change. */
237     REV_RSTP,                  /* RSTP port status change. */
238     REV_BOND,                  /* Bonding changed. */
239     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
240     REV_FLOW_TABLE,            /* Flow table changed. */
241     REV_MAC_LEARNING,          /* Mac learning changed. */
242     REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
243 };
244 COVERAGE_DEFINE(rev_reconfigure);
245 COVERAGE_DEFINE(rev_stp);
246 COVERAGE_DEFINE(rev_rstp);
247 COVERAGE_DEFINE(rev_bond);
248 COVERAGE_DEFINE(rev_port_toggled);
249 COVERAGE_DEFINE(rev_flow_table);
250 COVERAGE_DEFINE(rev_mac_learning);
251 COVERAGE_DEFINE(rev_mcast_snooping);
252
253 /* All datapaths of a given type share a single dpif backer instance. */
254 struct dpif_backer {
255     char *type;
256     int refcount;
257     struct dpif *dpif;
258     struct udpif *udpif;
259
260     struct ovs_rwlock odp_to_ofport_lock;
261     struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
262
263     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
264
265     enum revalidate_reason need_revalidate; /* Revalidate all flows. */
266
267     bool recv_set_enable; /* Enables or disables receiving packets. */
268
269     /* Recirculation. */
270     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
271     bool enable_recirc;   /* True if the datapath supports recirculation */
272
273     /* True if the datapath supports variable-length
274      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
275      * False if the datapath supports only 8-byte (or shorter) userdata. */
276     bool variable_length_userdata;
277
278     /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET
279      * actions. */
280     bool masked_set_action;
281
282     /* Maximum number of MPLS label stack entries that the datapath supports
283      * in a match */
284     size_t max_mpls_depth;
285
286     /* Version string of the datapath stored in OVSDB. */
287     char *dp_version_string;
288
289     /* True if the datapath supports tnl_push and pop actions. */
290     bool enable_tnl_push_pop;
291     struct atomic_count tnl_count;
292 };
293
294 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
295 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
296
297 struct ofproto_dpif {
298     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
299     struct ofproto up;
300     struct dpif_backer *backer;
301
302     uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
303
304     /* Special OpenFlow rules. */
305     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
306     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
307     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
308
309     /* Bridging. */
310     struct netflow *netflow;
311     struct dpif_sflow *sflow;
312     struct dpif_ipfix *ipfix;
313     struct hmap bundles;        /* Contains "struct ofbundle"s. */
314     struct mac_learning *ml;
315     struct mcast_snooping *ms;
316     bool has_bonded_bundles;
317     bool lacp_enabled;
318     struct mbridge *mbridge;
319
320     struct ovs_mutex stats_mutex;
321     struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
322                                             * consumed in userspace. */
323
324     /* Spanning tree. */
325     struct stp *stp;
326     long long int stp_last_tick;
327
328     /* Rapid Spanning Tree. */
329     struct rstp *rstp;
330     long long int rstp_last_tick;
331
332     /* VLAN splinters. */
333     struct ovs_mutex vsp_mutex;
334     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
335     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
336
337     /* Ports. */
338     struct sset ports;             /* Set of standard port names. */
339     struct sset ghost_ports;       /* Ports with no datapath port. */
340     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
341     int port_poll_errno;           /* Last errno for port_poll() reply. */
342     uint64_t change_seq;           /* Connectivity status changes. */
343
344     /* Work queues. */
345     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
346     struct seq *pins_seq;          /* For notifying 'pins' reception. */
347     uint64_t pins_seqno;
348 };
349
350 /* All existing ofproto_dpif instances, indexed by ->up.name. */
351 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
352
353 static bool ofproto_use_tnl_push_pop = true;
354 static void ofproto_unixctl_init(void);
355
356 static inline struct ofproto_dpif *
357 ofproto_dpif_cast(const struct ofproto *ofproto)
358 {
359     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
360     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
361 }
362
363 size_t
364 ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
365 {
366     return ofproto->backer->max_mpls_depth;
367 }
368
369 bool
370 ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
371 {
372     return ofproto->backer->enable_recirc;
373 }
374
375 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
376                                         ofp_port_t ofp_port);
377 static void ofproto_trace(struct ofproto_dpif *, struct flow *,
378                           const struct ofpbuf *packet,
379                           const struct ofpact[], size_t ofpacts_len,
380                           struct ds *);
381
382 /* Global variables. */
383 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
384
385 /* Initial mappings of port to bridge mappings. */
386 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
387
388 /* Executes 'fm'.  The caller retains ownership of 'fm' and everything in
389  * it. */
390 void
391 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
392                       struct ofputil_flow_mod *fm)
393 {
394     ofproto_flow_mod(&ofproto->up, fm);
395 }
396
397 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
398  * Takes ownership of 'pin' and pin->packet. */
399 void
400 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
401                             struct ofproto_packet_in *pin)
402 {
403     if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
404         COVERAGE_INC(packet_in_overflow);
405         free(CONST_CAST(void *, pin->up.packet));
406         free(pin);
407     }
408
409     /* Wakes up main thread for packet-in I/O. */
410     seq_change(ofproto->pins_seq);
411 }
412
413 /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
414  * packet rather than to send the packet to the controller.
415  *
416  * This function returns false to indicate that a packet_in message
417  * for a "table-miss" should be sent to at least one controller.
418  * False otherwise. */
419 bool
420 ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
421 {
422     return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
423 }
424 \f
425 /* Factory functions. */
426
427 static void
428 init(const struct shash *iface_hints)
429 {
430     struct shash_node *node;
431
432     /* Make a local copy, since we don't own 'iface_hints' elements. */
433     SHASH_FOR_EACH(node, iface_hints) {
434         const struct iface_hint *orig_hint = node->data;
435         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
436
437         new_hint->br_name = xstrdup(orig_hint->br_name);
438         new_hint->br_type = xstrdup(orig_hint->br_type);
439         new_hint->ofp_port = orig_hint->ofp_port;
440
441         shash_add(&init_ofp_ports, node->name, new_hint);
442     }
443 }
444
445 static void
446 enumerate_types(struct sset *types)
447 {
448     dp_enumerate_types(types);
449 }
450
451 static int
452 enumerate_names(const char *type, struct sset *names)
453 {
454     struct ofproto_dpif *ofproto;
455
456     sset_clear(names);
457     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
458         if (strcmp(type, ofproto->up.type)) {
459             continue;
460         }
461         sset_add(names, ofproto->up.name);
462     }
463
464     return 0;
465 }
466
467 static int
468 del(const char *type, const char *name)
469 {
470     struct dpif *dpif;
471     int error;
472
473     error = dpif_open(name, type, &dpif);
474     if (!error) {
475         error = dpif_delete(dpif);
476         dpif_close(dpif);
477     }
478     return error;
479 }
480 \f
481 static const char *
482 port_open_type(const char *datapath_type, const char *port_type)
483 {
484     return dpif_port_open_type(datapath_type, port_type);
485 }
486
487 /* Type functions. */
488
489 static void process_dpif_port_changes(struct dpif_backer *);
490 static void process_dpif_all_ports_changed(struct dpif_backer *);
491 static void process_dpif_port_change(struct dpif_backer *,
492                                      const char *devname);
493 static void process_dpif_port_error(struct dpif_backer *, int error);
494
495 static struct ofproto_dpif *
496 lookup_ofproto_dpif_by_port_name(const char *name)
497 {
498     struct ofproto_dpif *ofproto;
499
500     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
501         if (sset_contains(&ofproto->ports, name)) {
502             return ofproto;
503         }
504     }
505
506     return NULL;
507 }
508
509 static int
510 type_run(const char *type)
511 {
512     struct dpif_backer *backer;
513
514     backer = shash_find_data(&all_dpif_backers, type);
515     if (!backer) {
516         /* This is not necessarily a problem, since backers are only
517          * created on demand. */
518         return 0;
519     }
520
521
522     if (dpif_run(backer->dpif)) {
523         backer->need_revalidate = REV_RECONFIGURE;
524     }
525
526     udpif_run(backer->udpif);
527
528     /* If vswitchd started with other_config:flow_restore_wait set as "true",
529      * and the configuration has now changed to "false", enable receiving
530      * packets from the datapath. */
531     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
532         int error;
533
534         backer->recv_set_enable = true;
535
536         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
537         if (error) {
538             VLOG_ERR("Failed to enable receiving packets in dpif.");
539             return error;
540         }
541         dpif_flow_flush(backer->dpif);
542         backer->need_revalidate = REV_RECONFIGURE;
543     }
544
545     if (backer->recv_set_enable) {
546         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
547     }
548
549     dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
550
551     if (backer->need_revalidate) {
552         struct ofproto_dpif *ofproto;
553         struct simap_node *node;
554         struct simap tmp_backers;
555
556         /* Handle tunnel garbage collection. */
557         simap_init(&tmp_backers);
558         simap_swap(&backer->tnl_backers, &tmp_backers);
559
560         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
561             struct ofport_dpif *iter;
562
563             if (backer != ofproto->backer) {
564                 continue;
565             }
566
567             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
568                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
569                 const char *dp_port;
570
571                 if (!iter->is_tunnel) {
572                     continue;
573                 }
574
575                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
576                                                      namebuf, sizeof namebuf);
577                 node = simap_find(&tmp_backers, dp_port);
578                 if (node) {
579                     simap_put(&backer->tnl_backers, dp_port, node->data);
580                     simap_delete(&tmp_backers, node);
581                     node = simap_find(&backer->tnl_backers, dp_port);
582                 } else {
583                     node = simap_find(&backer->tnl_backers, dp_port);
584                     if (!node) {
585                         odp_port_t odp_port = ODPP_NONE;
586
587                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
588                                            &odp_port)) {
589                             simap_put(&backer->tnl_backers, dp_port,
590                                       odp_to_u32(odp_port));
591                             node = simap_find(&backer->tnl_backers, dp_port);
592                         }
593                     }
594                 }
595
596                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
597                 if (tnl_port_reconfigure(iter, iter->up.netdev,
598                                          iter->odp_port,
599                                          ovs_native_tunneling_is_on(ofproto), dp_port)) {
600                     backer->need_revalidate = REV_RECONFIGURE;
601                 }
602             }
603         }
604
605         SIMAP_FOR_EACH (node, &tmp_backers) {
606             dpif_port_del(backer->dpif, u32_to_odp(node->data));
607         }
608         simap_destroy(&tmp_backers);
609
610         switch (backer->need_revalidate) {
611         case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
612         case REV_STP:            COVERAGE_INC(rev_stp);            break;
613         case REV_RSTP:           COVERAGE_INC(rev_rstp);           break;
614         case REV_BOND:           COVERAGE_INC(rev_bond);           break;
615         case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
616         case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
617         case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
618         case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
619         }
620         backer->need_revalidate = 0;
621
622         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
623             struct ofport_dpif *ofport;
624             struct ofbundle *bundle;
625
626             if (ofproto->backer != backer) {
627                 continue;
628             }
629
630             xlate_txn_start();
631             xlate_ofproto_set(ofproto, ofproto->up.name,
632                               ofproto->backer->dpif, ofproto->ml,
633                               ofproto->stp, ofproto->rstp, ofproto->ms,
634                               ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
635                               ofproto->netflow,
636                               ofproto->up.forward_bpdu,
637                               connmgr_has_in_band(ofproto->up.connmgr),
638                               ofproto->backer->enable_recirc,
639                               ofproto->backer->variable_length_userdata,
640                               ofproto->backer->max_mpls_depth,
641                               ofproto->backer->masked_set_action);
642
643             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
644                 xlate_bundle_set(ofproto, bundle, bundle->name,
645                                  bundle->vlan_mode, bundle->vlan,
646                                  bundle->trunks, bundle->use_priority_tags,
647                                  bundle->bond, bundle->lacp,
648                                  bundle->floodable);
649             }
650
651             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
652                 int stp_port = ofport->stp_port
653                     ? stp_port_no(ofport->stp_port)
654                     : -1;
655                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
656                                  ofport->up.ofp_port, ofport->odp_port,
657                                  ofport->up.netdev, ofport->cfm,
658                                  ofport->bfd, ofport->peer, stp_port,
659                                  ofport->rstp_port, ofport->qdscp,
660                                  ofport->n_qdscp, ofport->up.pp.config,
661                                  ofport->up.pp.state, ofport->is_tunnel,
662                                  ofport->may_enable);
663             }
664             xlate_txn_commit();
665         }
666
667         udpif_revalidate(backer->udpif);
668     }
669
670     process_dpif_port_changes(backer);
671
672     return 0;
673 }
674
675 /* Check for and handle port changes in 'backer''s dpif. */
676 static void
677 process_dpif_port_changes(struct dpif_backer *backer)
678 {
679     for (;;) {
680         char *devname;
681         int error;
682
683         error = dpif_port_poll(backer->dpif, &devname);
684         switch (error) {
685         case EAGAIN:
686             return;
687
688         case ENOBUFS:
689             process_dpif_all_ports_changed(backer);
690             break;
691
692         case 0:
693             process_dpif_port_change(backer, devname);
694             free(devname);
695             break;
696
697         default:
698             process_dpif_port_error(backer, error);
699             break;
700         }
701     }
702 }
703
704 static void
705 process_dpif_all_ports_changed(struct dpif_backer *backer)
706 {
707     struct ofproto_dpif *ofproto;
708     struct dpif_port dpif_port;
709     struct dpif_port_dump dump;
710     struct sset devnames;
711     const char *devname;
712
713     sset_init(&devnames);
714     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
715         if (ofproto->backer == backer) {
716             struct ofport *ofport;
717
718             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
719                 sset_add(&devnames, netdev_get_name(ofport->netdev));
720             }
721         }
722     }
723     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
724         sset_add(&devnames, dpif_port.name);
725     }
726
727     SSET_FOR_EACH (devname, &devnames) {
728         process_dpif_port_change(backer, devname);
729     }
730     sset_destroy(&devnames);
731 }
732
733 static void
734 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
735 {
736     struct ofproto_dpif *ofproto;
737     struct dpif_port port;
738
739     /* Don't report on the datapath's device. */
740     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
741         return;
742     }
743
744     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
745                    &all_ofproto_dpifs) {
746         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
747             return;
748         }
749     }
750
751     ofproto = lookup_ofproto_dpif_by_port_name(devname);
752     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
753         /* The port was removed.  If we know the datapath,
754          * report it through poll_set().  If we don't, it may be
755          * notifying us of a removal we initiated, so ignore it.
756          * If there's a pending ENOBUFS, let it stand, since
757          * everything will be reevaluated. */
758         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
759             sset_add(&ofproto->port_poll_set, devname);
760             ofproto->port_poll_errno = 0;
761         }
762     } else if (!ofproto) {
763         /* The port was added, but we don't know with which
764          * ofproto we should associate it.  Delete it. */
765         dpif_port_del(backer->dpif, port.port_no);
766     } else {
767         struct ofport_dpif *ofport;
768
769         ofport = ofport_dpif_cast(shash_find_data(
770                                       &ofproto->up.port_by_name, devname));
771         if (ofport
772             && ofport->odp_port != port.port_no
773             && !odp_port_to_ofport(backer, port.port_no))
774         {
775             /* 'ofport''s datapath port number has changed from
776              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
777              * structures to match. */
778             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
779             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
780             ofport->odp_port = port.port_no;
781             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
782                         hash_odp_port(port.port_no));
783             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
784             backer->need_revalidate = REV_RECONFIGURE;
785         }
786     }
787     dpif_port_destroy(&port);
788 }
789
790 /* Propagate 'error' to all ofprotos based on 'backer'. */
791 static void
792 process_dpif_port_error(struct dpif_backer *backer, int error)
793 {
794     struct ofproto_dpif *ofproto;
795
796     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
797         if (ofproto->backer == backer) {
798             sset_clear(&ofproto->port_poll_set);
799             ofproto->port_poll_errno = error;
800         }
801     }
802 }
803
804 static void
805 type_wait(const char *type)
806 {
807     struct dpif_backer *backer;
808
809     backer = shash_find_data(&all_dpif_backers, type);
810     if (!backer) {
811         /* This is not necessarily a problem, since backers are only
812          * created on demand. */
813         return;
814     }
815
816     dpif_wait(backer->dpif);
817 }
818 \f
819 /* Basic life-cycle. */
820
821 static int add_internal_flows(struct ofproto_dpif *);
822
823 static struct ofproto *
824 alloc(void)
825 {
826     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
827     return &ofproto->up;
828 }
829
830 static void
831 dealloc(struct ofproto *ofproto_)
832 {
833     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
834     free(ofproto);
835 }
836
837 static void
838 close_dpif_backer(struct dpif_backer *backer)
839 {
840     ovs_assert(backer->refcount > 0);
841
842     if (--backer->refcount) {
843         return;
844     }
845
846     udpif_destroy(backer->udpif);
847
848     simap_destroy(&backer->tnl_backers);
849     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
850     hmap_destroy(&backer->odp_to_ofport_map);
851     shash_find_and_delete(&all_dpif_backers, backer->type);
852     recirc_id_pool_destroy(backer->rid_pool);
853     free(backer->type);
854     free(backer->dp_version_string);
855     dpif_close(backer->dpif);
856     free(backer);
857 }
858
859 /* Datapath port slated for removal from datapath. */
860 struct odp_garbage {
861     struct list list_node;
862     odp_port_t odp_port;
863 };
864
865 static bool check_variable_length_userdata(struct dpif_backer *backer);
866 static size_t check_max_mpls_depth(struct dpif_backer *backer);
867 static bool check_recirc(struct dpif_backer *backer);
868 static bool check_masked_set_action(struct dpif_backer *backer);
869
870 static int
871 open_dpif_backer(const char *type, struct dpif_backer **backerp)
872 {
873     struct dpif_backer *backer;
874     struct dpif_port_dump port_dump;
875     struct dpif_port port;
876     struct shash_node *node;
877     struct list garbage_list;
878     struct odp_garbage *garbage, *next;
879
880     struct sset names;
881     char *backer_name;
882     const char *name;
883     int error;
884
885     backer = shash_find_data(&all_dpif_backers, type);
886     if (backer) {
887         backer->refcount++;
888         *backerp = backer;
889         return 0;
890     }
891
892     backer_name = xasprintf("ovs-%s", type);
893
894     /* Remove any existing datapaths, since we assume we're the only
895      * userspace controlling the datapath. */
896     sset_init(&names);
897     dp_enumerate_names(type, &names);
898     SSET_FOR_EACH(name, &names) {
899         struct dpif *old_dpif;
900
901         /* Don't remove our backer if it exists. */
902         if (!strcmp(name, backer_name)) {
903             continue;
904         }
905
906         if (dpif_open(name, type, &old_dpif)) {
907             VLOG_WARN("couldn't open old datapath %s to remove it", name);
908         } else {
909             dpif_delete(old_dpif);
910             dpif_close(old_dpif);
911         }
912     }
913     sset_destroy(&names);
914
915     backer = xmalloc(sizeof *backer);
916
917     error = dpif_create_and_open(backer_name, type, &backer->dpif);
918     free(backer_name);
919     if (error) {
920         VLOG_ERR("failed to open datapath of type %s: %s", type,
921                  ovs_strerror(error));
922         free(backer);
923         return error;
924     }
925     backer->udpif = udpif_create(backer, backer->dpif);
926
927     backer->type = xstrdup(type);
928     backer->refcount = 1;
929     hmap_init(&backer->odp_to_ofport_map);
930     ovs_rwlock_init(&backer->odp_to_ofport_lock);
931     backer->need_revalidate = 0;
932     simap_init(&backer->tnl_backers);
933     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
934     *backerp = backer;
935
936     if (backer->recv_set_enable) {
937         dpif_flow_flush(backer->dpif);
938     }
939
940     /* Loop through the ports already on the datapath and remove any
941      * that we don't need anymore. */
942     list_init(&garbage_list);
943     dpif_port_dump_start(&port_dump, backer->dpif);
944     while (dpif_port_dump_next(&port_dump, &port)) {
945         node = shash_find(&init_ofp_ports, port.name);
946         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
947             garbage = xmalloc(sizeof *garbage);
948             garbage->odp_port = port.port_no;
949             list_push_front(&garbage_list, &garbage->list_node);
950         }
951     }
952     dpif_port_dump_done(&port_dump);
953
954     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
955         dpif_port_del(backer->dpif, garbage->odp_port);
956         list_remove(&garbage->list_node);
957         free(garbage);
958     }
959
960     shash_add(&all_dpif_backers, type, backer);
961
962     backer->enable_recirc = check_recirc(backer);
963     backer->max_mpls_depth = check_max_mpls_depth(backer);
964     backer->masked_set_action = check_masked_set_action(backer);
965     backer->rid_pool = recirc_id_pool_create();
966
967     backer->enable_tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
968     atomic_count_init(&backer->tnl_count, 0);
969
970     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
971     if (error) {
972         VLOG_ERR("failed to listen on datapath of type %s: %s",
973                  type, ovs_strerror(error));
974         close_dpif_backer(backer);
975         return error;
976     }
977
978     if (backer->recv_set_enable) {
979         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
980     }
981
982     /* This check fails if performed before udpif threads have been set,
983      * as the kernel module checks that the 'pid' in userspace action
984      * is non-zero. */
985     backer->variable_length_userdata = check_variable_length_userdata(backer);
986     backer->dp_version_string = dpif_get_dp_version(backer->dpif);
987
988     return error;
989 }
990
991 bool
992 ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto)
993 {
994     return ofproto_use_tnl_push_pop && ofproto->backer->enable_tnl_push_pop &&
995            atomic_count_get(&ofproto->backer->tnl_count);
996 }
997
998 /* Tests whether 'backer''s datapath supports recirculation.  Only newer
999  * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
1000  * features on older datapaths that don't support this feature.
1001  *
1002  * Returns false if 'backer' definitely does not support recirculation, true if
1003  * it seems to support recirculation or if at least the error we get is
1004  * ambiguous. */
1005 static bool
1006 check_recirc(struct dpif_backer *backer)
1007 {
1008     struct flow flow;
1009     struct odputil_keybuf keybuf;
1010     struct ofpbuf key;
1011     int error;
1012     bool enable_recirc = false;
1013
1014     memset(&flow, 0, sizeof flow);
1015     flow.recirc_id = 1;
1016     flow.dp_hash = 1;
1017
1018     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1019     odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
1020
1021     error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1022                           ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
1023                           0, NULL);
1024     if (error && error != EEXIST) {
1025         if (error != EINVAL) {
1026             VLOG_WARN("%s: Reciculation flow probe failed (%s)",
1027                       dpif_name(backer->dpif), ovs_strerror(error));
1028         }
1029         goto done;
1030     }
1031
1032     error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
1033                           NULL);
1034     if (error) {
1035         VLOG_WARN("%s: failed to delete recirculation feature probe flow",
1036                   dpif_name(backer->dpif));
1037     }
1038
1039     enable_recirc = true;
1040
1041 done:
1042     if (enable_recirc) {
1043         VLOG_INFO("%s: Datapath supports recirculation",
1044                   dpif_name(backer->dpif));
1045     } else {
1046         VLOG_INFO("%s: Datapath does not support recirculation",
1047                   dpif_name(backer->dpif));
1048     }
1049
1050     return enable_recirc;
1051 }
1052
1053 /* Tests whether 'backer''s datapath supports variable-length
1054  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
1055  * to disable some features on older datapaths that don't support this
1056  * feature.
1057  *
1058  * Returns false if 'backer' definitely does not support variable-length
1059  * userdata, true if it seems to support them or if at least the error we get
1060  * is ambiguous. */
1061 static bool
1062 check_variable_length_userdata(struct dpif_backer *backer)
1063 {
1064     struct eth_header *eth;
1065     struct ofpbuf actions;
1066     struct dpif_execute execute;
1067     struct ofpbuf packet;
1068     size_t start;
1069     int error;
1070
1071     /* Compose a userspace action that will cause an ERANGE error on older
1072      * datapaths that don't support variable-length userdata.
1073      *
1074      * We really test for using userdata longer than 8 bytes, but older
1075      * datapaths accepted these, silently truncating the userdata to 8 bytes.
1076      * The same older datapaths rejected userdata shorter than 8 bytes, so we
1077      * test for that instead as a proxy for longer userdata support. */
1078     ofpbuf_init(&actions, 64);
1079     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
1080     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
1081                    dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
1082     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
1083     nl_msg_end_nested(&actions, start);
1084
1085     /* Compose a dummy ethernet packet. */
1086     ofpbuf_init(&packet, ETH_HEADER_LEN);
1087     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1088     eth->eth_type = htons(0x1234);
1089
1090     /* Execute the actions.  On older datapaths this fails with ERANGE, on
1091      * newer datapaths it succeeds. */
1092     execute.actions = ofpbuf_data(&actions);
1093     execute.actions_len = ofpbuf_size(&actions);
1094     execute.packet = &packet;
1095     execute.md = PKT_METADATA_INITIALIZER(0);
1096     execute.needs_help = false;
1097     execute.probe = true;
1098
1099     error = dpif_execute(backer->dpif, &execute);
1100
1101     ofpbuf_uninit(&packet);
1102     ofpbuf_uninit(&actions);
1103
1104     switch (error) {
1105     case 0:
1106         return true;
1107
1108     case ERANGE:
1109         /* Variable-length userdata is not supported. */
1110         VLOG_WARN("%s: datapath does not support variable-length userdata "
1111                   "feature (needs Linux 3.10+ or kernel module from OVS "
1112                   "1..11+).  The NXAST_SAMPLE action will be ignored.",
1113                   dpif_name(backer->dpif));
1114         return false;
1115
1116     default:
1117         /* Something odd happened.  We're not sure whether variable-length
1118          * userdata is supported.  Default to "yes". */
1119         VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
1120                   dpif_name(backer->dpif), ovs_strerror(error));
1121         return true;
1122     }
1123 }
1124
1125 /* Tests the MPLS label stack depth supported by 'backer''s datapath.
1126  *
1127  * Returns the number of elements in a struct flow's mpls_lse field
1128  * if the datapath supports at least that many entries in an
1129  * MPLS label stack.
1130  * Otherwise returns the number of MPLS push actions supported by
1131  * the datapath. */
1132 static size_t
1133 check_max_mpls_depth(struct dpif_backer *backer)
1134 {
1135     struct flow flow;
1136     int n;
1137
1138     for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
1139         struct odputil_keybuf keybuf;
1140         struct ofpbuf key;
1141         int error;
1142
1143         memset(&flow, 0, sizeof flow);
1144         flow.dl_type = htons(ETH_TYPE_MPLS);
1145         flow_set_mpls_bos(&flow, n, 1);
1146
1147         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1148         odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
1149
1150         error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1151                               ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0,
1152                               NULL, 0, NULL);
1153         if (error && error != EEXIST) {
1154             if (error != EINVAL) {
1155                 VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
1156                           dpif_name(backer->dpif), ovs_strerror(error));
1157             }
1158             break;
1159         }
1160
1161         error = dpif_flow_del(backer->dpif, ofpbuf_data(&key),
1162                               ofpbuf_size(&key), NULL);
1163         if (error) {
1164             VLOG_WARN("%s: failed to delete MPLS feature probe flow",
1165                       dpif_name(backer->dpif));
1166         }
1167     }
1168
1169     VLOG_INFO("%s: MPLS label stack length probed as %d",
1170               dpif_name(backer->dpif), n);
1171     return n;
1172 }
1173
1174 /* Tests whether 'backer''s datapath supports masked data in
1175  * OVS_ACTION_ATTR_SET actions.  We need to disable some features on older
1176  * datapaths that don't support this feature. */
1177 static bool
1178 check_masked_set_action(struct dpif_backer *backer)
1179 {
1180     struct eth_header *eth;
1181     struct ofpbuf actions;
1182     struct dpif_execute execute;
1183     struct ofpbuf packet;
1184     int error;
1185     struct ovs_key_ethernet key, mask;
1186
1187     /* Compose a set action that will cause an EINVAL error on older
1188      * datapaths that don't support masked set actions.
1189      * Avoid using a full mask, as it could be translated to a non-masked
1190      * set action instead. */
1191     ofpbuf_init(&actions, 64);
1192     memset(&key, 0x53, sizeof key);
1193     memset(&mask, 0x7f, sizeof mask);
1194     commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
1195                              sizeof key);
1196
1197     /* Compose a dummy ethernet packet. */
1198     ofpbuf_init(&packet, ETH_HEADER_LEN);
1199     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1200     eth->eth_type = htons(0x1234);
1201
1202     /* Execute the actions.  On older datapaths this fails with EINVAL, on
1203      * newer datapaths it succeeds. */
1204     execute.actions = ofpbuf_data(&actions);
1205     execute.actions_len = ofpbuf_size(&actions);
1206     execute.packet = &packet;
1207     execute.md = PKT_METADATA_INITIALIZER(0);
1208     execute.needs_help = false;
1209     execute.probe = true;
1210
1211     error = dpif_execute(backer->dpif, &execute);
1212
1213     ofpbuf_uninit(&packet);
1214     ofpbuf_uninit(&actions);
1215
1216     if (error) {
1217         /* Masked set action is not supported. */
1218         VLOG_INFO("%s: datapath does not support masked set action feature.",
1219                   dpif_name(backer->dpif));
1220     }
1221     return !error;
1222 }
1223
1224 static int
1225 construct(struct ofproto *ofproto_)
1226 {
1227     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1228     struct shash_node *node, *next;
1229     int error;
1230
1231     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1232     if (error) {
1233         return error;
1234     }
1235
1236     ofproto->netflow = NULL;
1237     ofproto->sflow = NULL;
1238     ofproto->ipfix = NULL;
1239     ofproto->stp = NULL;
1240     ofproto->rstp = NULL;
1241     ofproto->dump_seq = 0;
1242     hmap_init(&ofproto->bundles);
1243     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1244     ofproto->ms = NULL;
1245     ofproto->mbridge = mbridge_create();
1246     ofproto->has_bonded_bundles = false;
1247     ofproto->lacp_enabled = false;
1248     ofproto_tunnel_init();
1249     ovs_mutex_init_adaptive(&ofproto->stats_mutex);
1250     ovs_mutex_init(&ofproto->vsp_mutex);
1251
1252     guarded_list_init(&ofproto->pins);
1253
1254     ofproto_unixctl_init();
1255     ovs_router_unixctl_register();
1256
1257     hmap_init(&ofproto->vlandev_map);
1258     hmap_init(&ofproto->realdev_vid_map);
1259
1260     sset_init(&ofproto->ports);
1261     sset_init(&ofproto->ghost_ports);
1262     sset_init(&ofproto->port_poll_set);
1263     ofproto->port_poll_errno = 0;
1264     ofproto->change_seq = 0;
1265     ofproto->pins_seq = seq_create();
1266     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1267
1268
1269     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1270         struct iface_hint *iface_hint = node->data;
1271
1272         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1273             /* Check if the datapath already has this port. */
1274             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1275                 sset_add(&ofproto->ports, node->name);
1276             }
1277
1278             free(iface_hint->br_name);
1279             free(iface_hint->br_type);
1280             free(iface_hint);
1281             shash_delete(&init_ofp_ports, node);
1282         }
1283     }
1284
1285     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1286                 hash_string(ofproto->up.name, 0));
1287     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1288
1289     ofproto_init_tables(ofproto_, N_TABLES);
1290     error = add_internal_flows(ofproto);
1291
1292     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1293
1294     return error;
1295 }
1296
1297 static int
1298 add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
1299                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1300 {
1301     struct match match;
1302     int error;
1303     struct rule *rule;
1304
1305     match_init_catchall(&match);
1306     match_set_reg(&match, 0, id);
1307
1308     error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
1309                                            &rule);
1310     *rulep = error ? NULL : rule_dpif_cast(rule);
1311
1312     return error;
1313 }
1314
1315 static int
1316 add_internal_flows(struct ofproto_dpif *ofproto)
1317 {
1318     struct ofpact_controller *controller;
1319     uint64_t ofpacts_stub[128 / 8];
1320     struct ofpbuf ofpacts;
1321     struct rule *unused_rulep OVS_UNUSED;
1322     struct ofpact_resubmit *resubmit;
1323     struct match match;
1324     int error;
1325     int id;
1326
1327     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1328     id = 1;
1329
1330     controller = ofpact_put_CONTROLLER(&ofpacts);
1331     controller->max_len = UINT16_MAX;
1332     controller->controller_id = 0;
1333     controller->reason = OFPR_NO_MATCH;
1334     ofpact_pad(&ofpacts);
1335
1336     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1337                                    &ofproto->miss_rule);
1338     if (error) {
1339         return error;
1340     }
1341
1342     ofpbuf_clear(&ofpacts);
1343     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1344                                    &ofproto->no_packet_in_rule);
1345     if (error) {
1346         return error;
1347     }
1348
1349     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1350                                    &ofproto->drop_frags_rule);
1351     if (error) {
1352         return error;
1353     }
1354
1355     /* Continue non-recirculation rule lookups from table 0.
1356      *
1357      * (priority=2), recirc=0, actions=resubmit(, 0)
1358      */
1359     resubmit = ofpact_put_RESUBMIT(&ofpacts);
1360     resubmit->in_port = OFPP_IN_PORT;
1361     resubmit->table_id = 0;
1362
1363     match_init_catchall(&match);
1364     match_set_recirc_id(&match, 0);
1365
1366     error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
1367                                            &unused_rulep);
1368     if (error) {
1369         return error;
1370     }
1371
1372     /* Drop any run away recirc rule lookups. Recirc_id has to be
1373      * non-zero when reaching this rule.
1374      *
1375      * (priority=1), *, actions=drop
1376      */
1377     ofpbuf_clear(&ofpacts);
1378     match_init_catchall(&match);
1379     error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
1380                                            &unused_rulep);
1381
1382     return error;
1383 }
1384
1385 static void
1386 destruct(struct ofproto *ofproto_)
1387 {
1388     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1389     struct ofproto_packet_in *pin, *next_pin;
1390     struct rule_dpif *rule;
1391     struct oftable *table;
1392     struct list pins;
1393
1394     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1395     xlate_txn_start();
1396     xlate_remove_ofproto(ofproto);
1397     xlate_txn_commit();
1398
1399     /* Ensure that the upcall processing threads have no remaining references
1400      * to the ofproto or anything in it. */
1401     udpif_synchronize(ofproto->backer->udpif);
1402
1403     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1404
1405     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1406         CLS_FOR_EACH_SAFE (rule, up.cr, &table->cls) {
1407             ofproto_rule_delete(&ofproto->up, &rule->up);
1408         }
1409     }
1410
1411     guarded_list_pop_all(&ofproto->pins, &pins);
1412     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1413         list_remove(&pin->list_node);
1414         free(CONST_CAST(void *, pin->up.packet));
1415         free(pin);
1416     }
1417     guarded_list_destroy(&ofproto->pins);
1418
1419     mbridge_unref(ofproto->mbridge);
1420
1421     netflow_unref(ofproto->netflow);
1422     dpif_sflow_unref(ofproto->sflow);
1423     dpif_ipfix_unref(ofproto->ipfix);
1424     hmap_destroy(&ofproto->bundles);
1425     mac_learning_unref(ofproto->ml);
1426     mcast_snooping_unref(ofproto->ms);
1427
1428     hmap_destroy(&ofproto->vlandev_map);
1429     hmap_destroy(&ofproto->realdev_vid_map);
1430
1431     sset_destroy(&ofproto->ports);
1432     sset_destroy(&ofproto->ghost_ports);
1433     sset_destroy(&ofproto->port_poll_set);
1434
1435     ovs_mutex_destroy(&ofproto->stats_mutex);
1436     ovs_mutex_destroy(&ofproto->vsp_mutex);
1437
1438     seq_destroy(ofproto->pins_seq);
1439
1440     close_dpif_backer(ofproto->backer);
1441 }
1442
1443 static int
1444 run(struct ofproto *ofproto_)
1445 {
1446     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1447     uint64_t new_seq, new_dump_seq;
1448
1449     if (mbridge_need_revalidate(ofproto->mbridge)) {
1450         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1451         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1452         mac_learning_flush(ofproto->ml);
1453         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1454         mcast_snooping_mdb_flush(ofproto->ms);
1455     }
1456
1457     /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
1458      * flow restore.  Even though nothing is processed during flow restore,
1459      * all queued 'pins' will be handled immediately when flow restore
1460      * completes. */
1461     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1462
1463     /* Do not perform any periodic activity required by 'ofproto' while
1464      * waiting for flow restore to complete. */
1465     if (!ofproto_get_flow_restore_wait()) {
1466         struct ofproto_packet_in *pin, *next_pin;
1467         struct list pins;
1468
1469         guarded_list_pop_all(&ofproto->pins, &pins);
1470         LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1471             connmgr_send_packet_in(ofproto->up.connmgr, pin);
1472             list_remove(&pin->list_node);
1473             free(CONST_CAST(void *, pin->up.packet));
1474             free(pin);
1475         }
1476     }
1477
1478     if (ofproto->netflow) {
1479         netflow_run(ofproto->netflow);
1480     }
1481     if (ofproto->sflow) {
1482         dpif_sflow_run(ofproto->sflow);
1483     }
1484     if (ofproto->ipfix) {
1485         dpif_ipfix_run(ofproto->ipfix);
1486     }
1487
1488     new_seq = seq_read(connectivity_seq_get());
1489     if (ofproto->change_seq != new_seq) {
1490         struct ofport_dpif *ofport;
1491
1492         HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1493             port_run(ofport);
1494         }
1495
1496         ofproto->change_seq = new_seq;
1497     }
1498     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1499         struct ofbundle *bundle;
1500
1501         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1502             bundle_run(bundle);
1503         }
1504     }
1505
1506     stp_run(ofproto);
1507     rstp_run(ofproto);
1508     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1509     if (mac_learning_run(ofproto->ml)) {
1510         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1511     }
1512     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1513
1514     if (mcast_snooping_run(ofproto->ms)) {
1515         ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
1516     }
1517
1518     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
1519     if (ofproto->dump_seq != new_dump_seq) {
1520         struct rule *rule, *next_rule;
1521
1522         /* We know stats are relatively fresh, so now is a good time to do some
1523          * periodic work. */
1524         ofproto->dump_seq = new_dump_seq;
1525
1526         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
1527          * has passed. */
1528         ovs_mutex_lock(&ofproto_mutex);
1529         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
1530                             &ofproto->up.expirable) {
1531             rule_expire(rule_dpif_cast(rule));
1532         }
1533         ovs_mutex_unlock(&ofproto_mutex);
1534
1535         /* All outstanding data in existing flows has been accounted, so it's a
1536          * good time to do bond rebalancing. */
1537         if (ofproto->has_bonded_bundles) {
1538             struct ofbundle *bundle;
1539
1540             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1541                 if (bundle->bond) {
1542                     bond_rebalance(bundle->bond);
1543                 }
1544             }
1545         }
1546     }
1547     return 0;
1548 }
1549
1550 static void
1551 wait(struct ofproto *ofproto_)
1552 {
1553     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1554
1555     if (ofproto_get_flow_restore_wait()) {
1556         return;
1557     }
1558
1559     if (ofproto->sflow) {
1560         dpif_sflow_wait(ofproto->sflow);
1561     }
1562     if (ofproto->ipfix) {
1563         dpif_ipfix_wait(ofproto->ipfix);
1564     }
1565     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1566         struct ofbundle *bundle;
1567
1568         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1569             bundle_wait(bundle);
1570         }
1571     }
1572     if (ofproto->netflow) {
1573         netflow_wait(ofproto->netflow);
1574     }
1575     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1576     mac_learning_wait(ofproto->ml);
1577     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1578     mcast_snooping_wait(ofproto->ms);
1579     stp_wait(ofproto);
1580     if (ofproto->backer->need_revalidate) {
1581         /* Shouldn't happen, but if it does just go around again. */
1582         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1583         poll_immediate_wake();
1584     }
1585
1586     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
1587     seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
1588 }
1589
1590 static void
1591 type_get_memory_usage(const char *type, struct simap *usage)
1592 {
1593     struct dpif_backer *backer;
1594
1595     backer = shash_find_data(&all_dpif_backers, type);
1596     if (backer) {
1597         udpif_get_memory_usage(backer->udpif, usage);
1598     }
1599 }
1600
1601 static void
1602 flush(struct ofproto *ofproto_)
1603 {
1604     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1605     struct dpif_backer *backer = ofproto->backer;
1606
1607     if (backer) {
1608         udpif_flush(backer->udpif);
1609     }
1610 }
1611
1612 static void
1613 query_tables(struct ofproto *ofproto,
1614              struct ofputil_table_features *features,
1615              struct ofputil_table_stats *stats)
1616 {
1617     strcpy(features->name, "classifier");
1618
1619     if (stats) {
1620         int i;
1621
1622         for (i = 0; i < ofproto->n_tables; i++) {
1623             unsigned long missed, matched;
1624
1625             atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
1626             atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
1627
1628             stats[i].matched_count = matched;
1629             stats[i].lookup_count = matched + missed;
1630         }
1631     }
1632 }
1633
1634 static struct ofport *
1635 port_alloc(void)
1636 {
1637     struct ofport_dpif *port = xmalloc(sizeof *port);
1638     return &port->up;
1639 }
1640
1641 static void
1642 port_dealloc(struct ofport *port_)
1643 {
1644     struct ofport_dpif *port = ofport_dpif_cast(port_);
1645     free(port);
1646 }
1647
1648 static int
1649 port_construct(struct ofport *port_)
1650 {
1651     struct ofport_dpif *port = ofport_dpif_cast(port_);
1652     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1653     const struct netdev *netdev = port->up.netdev;
1654     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1655     struct dpif_port dpif_port;
1656     int error;
1657
1658     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1659     port->bundle = NULL;
1660     port->cfm = NULL;
1661     port->bfd = NULL;
1662     port->may_enable = false;
1663     port->stp_port = NULL;
1664     port->stp_state = STP_DISABLED;
1665     port->rstp_port = NULL;
1666     port->rstp_state = RSTP_DISABLED;
1667     port->is_tunnel = false;
1668     port->peer = NULL;
1669     port->qdscp = NULL;
1670     port->n_qdscp = 0;
1671     port->realdev_ofp_port = 0;
1672     port->vlandev_vid = 0;
1673     port->carrier_seq = netdev_get_carrier_resets(netdev);
1674     port->is_layer3 = netdev_vport_is_layer3(netdev);
1675
1676     if (netdev_vport_is_patch(netdev)) {
1677         /* By bailing out here, we don't submit the port to the sFlow module
1678          * to be considered for counter polling export.  This is correct
1679          * because the patch port represents an interface that sFlow considers
1680          * to be "internal" to the switch as a whole, and therefore not an
1681          * candidate for counter polling. */
1682         port->odp_port = ODPP_NONE;
1683         ofport_update_peer(port);
1684         return 0;
1685     }
1686
1687     error = dpif_port_query_by_name(ofproto->backer->dpif,
1688                                     netdev_vport_get_dpif_port(netdev, namebuf,
1689                                                                sizeof namebuf),
1690                                     &dpif_port);
1691     if (error) {
1692         return error;
1693     }
1694
1695     port->odp_port = dpif_port.port_no;
1696
1697     if (netdev_get_tunnel_config(netdev)) {
1698         atomic_count_inc(&ofproto->backer->tnl_count);
1699         tnl_port_add(port, port->up.netdev, port->odp_port,
1700                      ovs_native_tunneling_is_on(ofproto), namebuf);
1701         port->is_tunnel = true;
1702         if (ofproto->ipfix) {
1703            dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
1704         }
1705     } else {
1706         /* Sanity-check that a mapping doesn't already exist.  This
1707          * shouldn't happen for non-tunnel ports. */
1708         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1709             VLOG_ERR("port %s already has an OpenFlow port number",
1710                      dpif_port.name);
1711             dpif_port_destroy(&dpif_port);
1712             return EBUSY;
1713         }
1714
1715         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1716         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1717                     hash_odp_port(port->odp_port));
1718         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1719     }
1720     dpif_port_destroy(&dpif_port);
1721
1722     if (ofproto->sflow) {
1723         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1724     }
1725
1726     return 0;
1727 }
1728
1729 static void
1730 port_destruct(struct ofport *port_)
1731 {
1732     struct ofport_dpif *port = ofport_dpif_cast(port_);
1733     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1734     const char *devname = netdev_get_name(port->up.netdev);
1735     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1736     const char *dp_port_name;
1737
1738     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1739     xlate_txn_start();
1740     xlate_ofport_remove(port);
1741     xlate_txn_commit();
1742
1743     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1744                                               sizeof namebuf);
1745     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1746         /* The underlying device is still there, so delete it.  This
1747          * happens when the ofproto is being destroyed, since the caller
1748          * assumes that removal of attached ports will happen as part of
1749          * destruction. */
1750         if (!port->is_tunnel) {
1751             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1752         }
1753     }
1754
1755     if (port->peer) {
1756         port->peer->peer = NULL;
1757         port->peer = NULL;
1758     }
1759
1760     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1761         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1762         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1763         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1764     }
1765
1766     if (port->is_tunnel) {
1767         atomic_count_dec(&ofproto->backer->tnl_count);
1768     }
1769
1770     if (port->is_tunnel && ofproto->ipfix) {
1771        dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
1772     }
1773
1774     tnl_port_del(port);
1775     sset_find_and_delete(&ofproto->ports, devname);
1776     sset_find_and_delete(&ofproto->ghost_ports, devname);
1777     bundle_remove(port_);
1778     set_cfm(port_, NULL);
1779     set_bfd(port_, NULL);
1780     if (port->stp_port) {
1781         stp_port_disable(port->stp_port);
1782     }
1783     set_rstp_port(port_, NULL);
1784     if (ofproto->sflow) {
1785         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1786     }
1787
1788     free(port->qdscp);
1789 }
1790
1791 static void
1792 port_modified(struct ofport *port_)
1793 {
1794     struct ofport_dpif *port = ofport_dpif_cast(port_);
1795     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1796     struct netdev *netdev = port->up.netdev;
1797
1798     if (port->bundle && port->bundle->bond) {
1799         bond_slave_set_netdev(port->bundle->bond, port, netdev);
1800     }
1801
1802     if (port->cfm) {
1803         cfm_set_netdev(port->cfm, netdev);
1804     }
1805
1806     if (port->bfd) {
1807         bfd_set_netdev(port->bfd, netdev);
1808     }
1809
1810     ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
1811                                      port->up.pp.hw_addr);
1812
1813     netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
1814
1815     if (port->is_tunnel) {
1816         struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1817
1818         if (tnl_port_reconfigure(port, netdev, port->odp_port,
1819                                  ovs_native_tunneling_is_on(ofproto), namebuf)) {
1820             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1821         }
1822     }
1823
1824     ofport_update_peer(port);
1825 }
1826
1827 static void
1828 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1829 {
1830     struct ofport_dpif *port = ofport_dpif_cast(port_);
1831     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1832     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1833
1834     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1835                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1836                    OFPUTIL_PC_NO_PACKET_IN)) {
1837         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1838
1839         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1840             bundle_update(port->bundle);
1841         }
1842     }
1843 }
1844
1845 static int
1846 set_sflow(struct ofproto *ofproto_,
1847           const struct ofproto_sflow_options *sflow_options)
1848 {
1849     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1850     struct dpif_sflow *ds = ofproto->sflow;
1851
1852     if (sflow_options) {
1853         if (!ds) {
1854             struct ofport_dpif *ofport;
1855
1856             ds = ofproto->sflow = dpif_sflow_create();
1857             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1858                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1859             }
1860             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1861         }
1862         dpif_sflow_set_options(ds, sflow_options);
1863     } else {
1864         if (ds) {
1865             dpif_sflow_unref(ds);
1866             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1867             ofproto->sflow = NULL;
1868         }
1869     }
1870     return 0;
1871 }
1872
1873 static int
1874 set_ipfix(
1875     struct ofproto *ofproto_,
1876     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1877     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1878     size_t n_flow_exporters_options)
1879 {
1880     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1881     struct dpif_ipfix *di = ofproto->ipfix;
1882     bool has_options = bridge_exporter_options || flow_exporters_options;
1883     bool new_di = false;
1884
1885     if (has_options && !di) {
1886         di = ofproto->ipfix = dpif_ipfix_create();
1887         new_di = true;
1888     }
1889
1890     if (di) {
1891         /* Call set_options in any case to cleanly flush the flow
1892          * caches in the last exporters that are to be destroyed. */
1893         dpif_ipfix_set_options(
1894             di, bridge_exporter_options, flow_exporters_options,
1895             n_flow_exporters_options);
1896
1897         /* Add tunnel ports only when a new ipfix created */
1898         if (new_di == true) {
1899             struct ofport_dpif *ofport;
1900             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1901                 if (ofport->is_tunnel == true) {
1902                     dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
1903                 }
1904             }
1905         }
1906
1907         if (!has_options) {
1908             dpif_ipfix_unref(di);
1909             ofproto->ipfix = NULL;
1910         }
1911     }
1912
1913     return 0;
1914 }
1915
1916 static int
1917 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1918 {
1919     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1920     int error = 0;
1921
1922     if (s) {
1923         if (!ofport->cfm) {
1924             struct ofproto_dpif *ofproto;
1925
1926             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1927             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1928             ofport->cfm = cfm_create(ofport->up.netdev);
1929         }
1930
1931         if (cfm_configure(ofport->cfm, s)) {
1932             error = 0;
1933             goto out;
1934         }
1935
1936         error = EINVAL;
1937     }
1938     cfm_unref(ofport->cfm);
1939     ofport->cfm = NULL;
1940 out:
1941     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1942                                      ofport->up.pp.hw_addr);
1943     return error;
1944 }
1945
1946 static bool
1947 cfm_status_changed(struct ofport *ofport_)
1948 {
1949     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1950
1951     return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
1952 }
1953
1954 static int
1955 get_cfm_status(const struct ofport *ofport_,
1956                struct cfm_status *status)
1957 {
1958     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1959     int ret = 0;
1960
1961     if (ofport->cfm) {
1962         cfm_get_status(ofport->cfm, status);
1963     } else {
1964         ret = ENOENT;
1965     }
1966
1967     return ret;
1968 }
1969
1970 static int
1971 set_bfd(struct ofport *ofport_, const struct smap *cfg)
1972 {
1973     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
1974     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1975     struct bfd *old;
1976
1977     old = ofport->bfd;
1978     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev),
1979                                 cfg, ofport->up.netdev);
1980     if (ofport->bfd != old) {
1981         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1982     }
1983     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1984                                      ofport->up.pp.hw_addr);
1985     return 0;
1986 }
1987
1988 static bool
1989 bfd_status_changed(struct ofport *ofport_)
1990 {
1991     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1992
1993     return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
1994 }
1995
1996 static int
1997 get_bfd_status(struct ofport *ofport_, struct smap *smap)
1998 {
1999     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2000     int ret = 0;
2001
2002     if (ofport->bfd) {
2003         bfd_get_status(ofport->bfd, smap);
2004     } else {
2005         ret = ENOENT;
2006     }
2007
2008     return ret;
2009 }
2010 \f
2011 /* Spanning Tree. */
2012
2013 /* Called while rstp_mutex is held. */
2014 static void
2015 rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_)
2016 {
2017     struct ofproto_dpif *ofproto = ofproto_;
2018     struct ofport_dpif *ofport = ofport_;
2019     struct eth_header *eth = ofpbuf_l2(pkt);
2020
2021     netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2022     if (eth_addr_is_zero(eth->eth_src)) {
2023         VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
2024                      "does not have a configured source MAC address.",
2025                      ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
2026     } else {
2027         ofproto_dpif_send_packet(ofport, pkt);
2028     }
2029     ofpbuf_delete(pkt);
2030 }
2031
2032 static void
2033 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
2034 {
2035     struct ofproto_dpif *ofproto = ofproto_;
2036     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
2037     struct ofport_dpif *ofport;
2038
2039     ofport = stp_port_get_aux(sp);
2040     if (!ofport) {
2041         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
2042                      ofproto->up.name, port_num);
2043     } else {
2044         struct eth_header *eth = ofpbuf_l2(pkt);
2045
2046         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2047         if (eth_addr_is_zero(eth->eth_src)) {
2048             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
2049                          "with unknown MAC", ofproto->up.name, port_num);
2050         } else {
2051             ofproto_dpif_send_packet(ofport, pkt);
2052         }
2053     }
2054     ofpbuf_delete(pkt);
2055 }
2056
2057 /* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
2058 static void
2059 set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
2060 {
2061     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2062
2063     /* Only revalidate flows if the configuration changed. */
2064     if (!s != !ofproto->rstp) {
2065         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2066     }
2067
2068     if (s) {
2069         if (!ofproto->rstp) {
2070             ofproto->rstp = rstp_create(ofproto_->name, s->address,
2071                                         rstp_send_bpdu_cb, ofproto);
2072             ofproto->rstp_last_tick = time_msec();
2073         }
2074         rstp_set_bridge_address(ofproto->rstp, s->address);
2075         rstp_set_bridge_priority(ofproto->rstp, s->priority);
2076         rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
2077         rstp_set_bridge_force_protocol_version(ofproto->rstp,
2078                                                s->force_protocol_version);
2079         rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
2080         rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
2081         rstp_set_bridge_transmit_hold_count(ofproto->rstp,
2082                                             s->transmit_hold_count);
2083     } else {
2084         struct ofport *ofport;
2085         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2086             set_rstp_port(ofport, NULL);
2087         }
2088         rstp_unref(ofproto->rstp);
2089         ofproto->rstp = NULL;
2090     }
2091 }
2092
2093 static void
2094 get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
2095 {
2096     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2097
2098     if (ofproto->rstp) {
2099         s->enabled = true;
2100         s->root_id = rstp_get_root_id(ofproto->rstp);
2101         s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
2102         s->designated_id = rstp_get_designated_id(ofproto->rstp);
2103         s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
2104         s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
2105         s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
2106     } else {
2107         s->enabled = false;
2108     }
2109 }
2110
2111 static void
2112 update_rstp_port_state(struct ofport_dpif *ofport)
2113 {
2114     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2115     enum rstp_state state;
2116
2117     /* Figure out new state. */
2118     state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
2119         : RSTP_DISABLED;
2120
2121     /* Update state. */
2122     if (ofport->rstp_state != state) {
2123         enum ofputil_port_state of_state;
2124         bool fwd_change;
2125
2126         VLOG_DBG("port %s: RSTP state changed from %s to %s",
2127                  netdev_get_name(ofport->up.netdev),
2128                  rstp_state_name(ofport->rstp_state),
2129                  rstp_state_name(state));
2130         if (rstp_learn_in_state(ofport->rstp_state)
2131             != rstp_learn_in_state(state)) {
2132             /* XXX: Learning action flows should also be flushed. */
2133             if (ofport->bundle) {
2134                 bundle_flush_macs(ofport->bundle, false);
2135             }
2136         }
2137         fwd_change = rstp_forward_in_state(ofport->rstp_state)
2138             != rstp_forward_in_state(state);
2139
2140         ofproto->backer->need_revalidate = REV_RSTP;
2141         ofport->rstp_state = state;
2142
2143         if (fwd_change && ofport->bundle) {
2144             bundle_update(ofport->bundle);
2145         }
2146
2147         /* Update the RSTP state bits in the OpenFlow port description. */
2148         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2149         of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
2150                 : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2151                 : state == RSTP_DISCARDING ?  OFPUTIL_PS_STP_LISTEN
2152                 : 0);
2153         ofproto_port_set_state(&ofport->up, of_state);
2154     }
2155 }
2156
2157 static void
2158 rstp_run(struct ofproto_dpif *ofproto)
2159 {
2160     if (ofproto->rstp) {
2161         long long int now = time_msec();
2162         long long int elapsed = now - ofproto->rstp_last_tick;
2163         struct rstp_port *rp;
2164         struct ofport_dpif *ofport;
2165
2166         /* Every second, decrease the values of the timers. */
2167         if (elapsed >= 1000) {
2168             rstp_tick_timers(ofproto->rstp);
2169             ofproto->rstp_last_tick = now;
2170         }
2171         rp = NULL;
2172         while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
2173             update_rstp_port_state(ofport);
2174         }
2175         rp = NULL;
2176         ofport = NULL;
2177         /* FIXME: This check should be done on-event (i.e., when setting
2178          * p->fdb_flush) and not periodically.
2179          */
2180         while ((ofport = rstp_check_and_reset_fdb_flush(ofproto->rstp, &rp))) {
2181             bundle_flush_macs(ofport->bundle, false);
2182         }
2183     }
2184 }
2185
2186 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2187 static int
2188 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2189 {
2190     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2191
2192     /* Only revalidate flows if the configuration changed. */
2193     if (!s != !ofproto->stp) {
2194         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2195     }
2196
2197     if (s) {
2198         if (!ofproto->stp) {
2199             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2200                                       send_bpdu_cb, ofproto);
2201             ofproto->stp_last_tick = time_msec();
2202         }
2203
2204         stp_set_bridge_id(ofproto->stp, s->system_id);
2205         stp_set_bridge_priority(ofproto->stp, s->priority);
2206         stp_set_hello_time(ofproto->stp, s->hello_time);
2207         stp_set_max_age(ofproto->stp, s->max_age);
2208         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2209     }  else {
2210         struct ofport *ofport;
2211
2212         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2213             set_stp_port(ofport, NULL);
2214         }
2215
2216         stp_unref(ofproto->stp);
2217         ofproto->stp = NULL;
2218     }
2219
2220     return 0;
2221 }
2222
2223 static int
2224 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2225 {
2226     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2227
2228     if (ofproto->stp) {
2229         s->enabled = true;
2230         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2231         s->designated_root = stp_get_designated_root(ofproto->stp);
2232         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2233     } else {
2234         s->enabled = false;
2235     }
2236
2237     return 0;
2238 }
2239
2240 static void
2241 update_stp_port_state(struct ofport_dpif *ofport)
2242 {
2243     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2244     enum stp_state state;
2245
2246     /* Figure out new state. */
2247     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2248                              : STP_DISABLED;
2249
2250     /* Update state. */
2251     if (ofport->stp_state != state) {
2252         enum ofputil_port_state of_state;
2253         bool fwd_change;
2254
2255         VLOG_DBG("port %s: STP state changed from %s to %s",
2256                  netdev_get_name(ofport->up.netdev),
2257                  stp_state_name(ofport->stp_state),
2258                  stp_state_name(state));
2259         if (stp_learn_in_state(ofport->stp_state)
2260                 != stp_learn_in_state(state)) {
2261             /* xxx Learning action flows should also be flushed. */
2262             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2263             mac_learning_flush(ofproto->ml);
2264             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2265             mcast_snooping_mdb_flush(ofproto->ms);
2266         }
2267         fwd_change = stp_forward_in_state(ofport->stp_state)
2268                         != stp_forward_in_state(state);
2269
2270         ofproto->backer->need_revalidate = REV_STP;
2271         ofport->stp_state = state;
2272         ofport->stp_state_entered = time_msec();
2273
2274         if (fwd_change && ofport->bundle) {
2275             bundle_update(ofport->bundle);
2276         }
2277
2278         /* Update the STP state bits in the OpenFlow port description. */
2279         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2280         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2281                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2282                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2283                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2284                      : 0);
2285         ofproto_port_set_state(&ofport->up, of_state);
2286     }
2287 }
2288
2289 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2290  * caller is responsible for assigning STP port numbers and ensuring
2291  * there are no duplicates. */
2292 static int
2293 set_stp_port(struct ofport *ofport_,
2294              const struct ofproto_port_stp_settings *s)
2295 {
2296     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2297     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2298     struct stp_port *sp = ofport->stp_port;
2299
2300     if (!s || !s->enable) {
2301         if (sp) {
2302             ofport->stp_port = NULL;
2303             stp_port_disable(sp);
2304             update_stp_port_state(ofport);
2305         }
2306         return 0;
2307     } else if (sp && stp_port_no(sp) != s->port_num
2308                && ofport == stp_port_get_aux(sp)) {
2309         /* The port-id changed, so disable the old one if it's not
2310          * already in use by another port. */
2311         stp_port_disable(sp);
2312     }
2313
2314     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2315
2316     /* Set name before enabling the port so that debugging messages can print
2317      * the name. */
2318     stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
2319     stp_port_enable(sp);
2320
2321     stp_port_set_aux(sp, ofport);
2322     stp_port_set_priority(sp, s->priority);
2323     stp_port_set_path_cost(sp, s->path_cost);
2324
2325     update_stp_port_state(ofport);
2326
2327     return 0;
2328 }
2329
2330 static int
2331 get_stp_port_status(struct ofport *ofport_,
2332                     struct ofproto_port_stp_status *s)
2333 {
2334     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2335     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2336     struct stp_port *sp = ofport->stp_port;
2337
2338     if (!ofproto->stp || !sp) {
2339         s->enabled = false;
2340         return 0;
2341     }
2342
2343     s->enabled = true;
2344     s->port_id = stp_port_get_id(sp);
2345     s->state = stp_port_get_state(sp);
2346     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2347     s->role = stp_port_get_role(sp);
2348
2349     return 0;
2350 }
2351
2352 static int
2353 get_stp_port_stats(struct ofport *ofport_,
2354                    struct ofproto_port_stp_stats *s)
2355 {
2356     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2357     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2358     struct stp_port *sp = ofport->stp_port;
2359
2360     if (!ofproto->stp || !sp) {
2361         s->enabled = false;
2362         return 0;
2363     }
2364
2365     s->enabled = true;
2366     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2367
2368     return 0;
2369 }
2370
2371 static void
2372 stp_run(struct ofproto_dpif *ofproto)
2373 {
2374     if (ofproto->stp) {
2375         long long int now = time_msec();
2376         long long int elapsed = now - ofproto->stp_last_tick;
2377         struct stp_port *sp;
2378
2379         if (elapsed > 0) {
2380             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2381             ofproto->stp_last_tick = now;
2382         }
2383         while (stp_get_changed_port(ofproto->stp, &sp)) {
2384             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2385
2386             if (ofport) {
2387                 update_stp_port_state(ofport);
2388             }
2389         }
2390
2391         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2392             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2393             mac_learning_flush(ofproto->ml);
2394             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2395             mcast_snooping_mdb_flush(ofproto->ms);
2396         }
2397     }
2398 }
2399
2400 static void
2401 stp_wait(struct ofproto_dpif *ofproto)
2402 {
2403     if (ofproto->stp) {
2404         poll_timer_wait(1000);
2405     }
2406 }
2407
2408 /* Configures RSTP on 'ofport_' using the settings defined in 's'.  The
2409  * caller is responsible for assigning RSTP port numbers and ensuring
2410  * there are no duplicates. */
2411 static void
2412 set_rstp_port(struct ofport *ofport_,
2413               const struct ofproto_port_rstp_settings *s)
2414 {
2415     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2416     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2417     struct rstp_port *rp = ofport->rstp_port;
2418
2419     if (!s || !s->enable) {
2420         if (rp) {
2421             rstp_port_unref(rp);
2422             ofport->rstp_port = NULL;
2423             update_rstp_port_state(ofport);
2424         }
2425         return;
2426     }
2427
2428     /* Check if need to add a new port. */
2429     if (!rp) {
2430         rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
2431     }
2432
2433     rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
2434                   s->admin_edge_port, s->auto_edge,
2435                   s->admin_p2p_mac_state, s->admin_port_state, s->mcheck,
2436                   ofport);
2437     update_rstp_port_state(ofport);
2438     /* Synchronize operational status. */
2439     rstp_port_set_mac_operational(rp, ofport->may_enable);
2440 }
2441
2442 static void
2443 get_rstp_port_status(struct ofport *ofport_,
2444         struct ofproto_port_rstp_status *s)
2445 {
2446     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2447     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2448     struct rstp_port *rp = ofport->rstp_port;
2449
2450     if (!ofproto->rstp || !rp) {
2451         s->enabled = false;
2452         return;
2453     }
2454
2455     s->enabled = true;
2456     rstp_port_get_status(rp, &s->port_id, &s->state, &s->role,
2457                          &s->designated_bridge_id, &s->designated_port_id,
2458                          &s->designated_path_cost, &s->tx_count,
2459                          &s->rx_count, &s->error_count, &s->uptime);
2460 }
2461
2462 \f
2463 static int
2464 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2465            size_t n_qdscp)
2466 {
2467     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2468     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2469
2470     if (ofport->n_qdscp != n_qdscp
2471         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2472                               n_qdscp * sizeof *qdscp))) {
2473         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2474         free(ofport->qdscp);
2475         ofport->qdscp = n_qdscp
2476             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2477             : NULL;
2478         ofport->n_qdscp = n_qdscp;
2479     }
2480
2481     return 0;
2482 }
2483 \f
2484 /* Bundles. */
2485
2486 /* Expires all MAC learning entries associated with 'bundle' and forces its
2487  * ofproto to revalidate every flow.
2488  *
2489  * Normally MAC learning entries are removed only from the ofproto associated
2490  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2491  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2492  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2493  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2494  * with the host from which it migrated. */
2495 static void
2496 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2497 {
2498     struct ofproto_dpif *ofproto = bundle->ofproto;
2499     struct mac_learning *ml = ofproto->ml;
2500     struct mac_entry *mac, *next_mac;
2501
2502     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2503     ovs_rwlock_wrlock(&ml->rwlock);
2504     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2505         if (mac->port.p == bundle) {
2506             if (all_ofprotos) {
2507                 struct ofproto_dpif *o;
2508
2509                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2510                     if (o != ofproto) {
2511                         struct mac_entry *e;
2512
2513                         ovs_rwlock_wrlock(&o->ml->rwlock);
2514                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2515                         if (e) {
2516                             mac_learning_expire(o->ml, e);
2517                         }
2518                         ovs_rwlock_unlock(&o->ml->rwlock);
2519                     }
2520                 }
2521             }
2522
2523             mac_learning_expire(ml, mac);
2524         }
2525     }
2526     ovs_rwlock_unlock(&ml->rwlock);
2527 }
2528
2529 static struct ofbundle *
2530 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2531 {
2532     struct ofbundle *bundle;
2533
2534     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2535                              &ofproto->bundles) {
2536         if (bundle->aux == aux) {
2537             return bundle;
2538         }
2539     }
2540     return NULL;
2541 }
2542
2543 static void
2544 bundle_update(struct ofbundle *bundle)
2545 {
2546     struct ofport_dpif *port;
2547
2548     bundle->floodable = true;
2549     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2550         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2551             || port->is_layer3
2552             || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
2553             || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
2554             bundle->floodable = false;
2555             break;
2556         }
2557     }
2558 }
2559
2560 static void
2561 bundle_del_port(struct ofport_dpif *port)
2562 {
2563     struct ofbundle *bundle = port->bundle;
2564
2565     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2566
2567     list_remove(&port->bundle_node);
2568     port->bundle = NULL;
2569
2570     if (bundle->lacp) {
2571         lacp_slave_unregister(bundle->lacp, port);
2572     }
2573     if (bundle->bond) {
2574         bond_slave_unregister(bundle->bond, port);
2575     }
2576
2577     bundle_update(bundle);
2578 }
2579
2580 static bool
2581 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2582                 struct lacp_slave_settings *lacp)
2583 {
2584     struct ofport_dpif *port;
2585
2586     port = get_ofp_port(bundle->ofproto, ofp_port);
2587     if (!port) {
2588         return false;
2589     }
2590
2591     if (port->bundle != bundle) {
2592         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2593         if (port->bundle) {
2594             bundle_remove(&port->up);
2595         }
2596
2597         port->bundle = bundle;
2598         list_push_back(&bundle->ports, &port->bundle_node);
2599         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2600             || port->is_layer3
2601             || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
2602             || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
2603             bundle->floodable = false;
2604         }
2605     }
2606     if (lacp) {
2607         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2608         lacp_slave_register(bundle->lacp, port, lacp);
2609     }
2610
2611     return true;
2612 }
2613
2614 static void
2615 bundle_destroy(struct ofbundle *bundle)
2616 {
2617     struct ofproto_dpif *ofproto;
2618     struct ofport_dpif *port, *next_port;
2619
2620     if (!bundle) {
2621         return;
2622     }
2623
2624     ofproto = bundle->ofproto;
2625     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2626
2627     xlate_txn_start();
2628     xlate_bundle_remove(bundle);
2629     xlate_txn_commit();
2630
2631     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2632         bundle_del_port(port);
2633     }
2634
2635     bundle_flush_macs(bundle, true);
2636     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2637     free(bundle->name);
2638     free(bundle->trunks);
2639     lacp_unref(bundle->lacp);
2640     bond_unref(bundle->bond);
2641     free(bundle);
2642 }
2643
2644 static int
2645 bundle_set(struct ofproto *ofproto_, void *aux,
2646            const struct ofproto_bundle_settings *s)
2647 {
2648     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2649     bool need_flush = false;
2650     struct ofport_dpif *port;
2651     struct ofbundle *bundle;
2652     unsigned long *trunks;
2653     int vlan;
2654     size_t i;
2655     bool ok;
2656
2657     if (!s) {
2658         bundle_destroy(bundle_lookup(ofproto, aux));
2659         return 0;
2660     }
2661
2662     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2663     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2664
2665     bundle = bundle_lookup(ofproto, aux);
2666     if (!bundle) {
2667         bundle = xmalloc(sizeof *bundle);
2668
2669         bundle->ofproto = ofproto;
2670         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2671                     hash_pointer(aux, 0));
2672         bundle->aux = aux;
2673         bundle->name = NULL;
2674
2675         list_init(&bundle->ports);
2676         bundle->vlan_mode = PORT_VLAN_TRUNK;
2677         bundle->vlan = -1;
2678         bundle->trunks = NULL;
2679         bundle->use_priority_tags = s->use_priority_tags;
2680         bundle->lacp = NULL;
2681         bundle->bond = NULL;
2682
2683         bundle->floodable = true;
2684         mbridge_register_bundle(ofproto->mbridge, bundle);
2685     }
2686
2687     if (!bundle->name || strcmp(s->name, bundle->name)) {
2688         free(bundle->name);
2689         bundle->name = xstrdup(s->name);
2690     }
2691
2692     /* LACP. */
2693     if (s->lacp) {
2694         ofproto->lacp_enabled = true;
2695         if (!bundle->lacp) {
2696             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2697             bundle->lacp = lacp_create();
2698         }
2699         lacp_configure(bundle->lacp, s->lacp);
2700     } else {
2701         lacp_unref(bundle->lacp);
2702         bundle->lacp = NULL;
2703     }
2704
2705     /* Update set of ports. */
2706     ok = true;
2707     for (i = 0; i < s->n_slaves; i++) {
2708         if (!bundle_add_port(bundle, s->slaves[i],
2709                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2710             ok = false;
2711         }
2712     }
2713     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2714         struct ofport_dpif *next_port;
2715
2716         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2717             for (i = 0; i < s->n_slaves; i++) {
2718                 if (s->slaves[i] == port->up.ofp_port) {
2719                     goto found;
2720                 }
2721             }
2722
2723             bundle_del_port(port);
2724         found: ;
2725         }
2726     }
2727     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2728
2729     if (list_is_empty(&bundle->ports)) {
2730         bundle_destroy(bundle);
2731         return EINVAL;
2732     }
2733
2734     /* Set VLAN tagging mode */
2735     if (s->vlan_mode != bundle->vlan_mode
2736         || s->use_priority_tags != bundle->use_priority_tags) {
2737         bundle->vlan_mode = s->vlan_mode;
2738         bundle->use_priority_tags = s->use_priority_tags;
2739         need_flush = true;
2740     }
2741
2742     /* Set VLAN tag. */
2743     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2744             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2745             : 0);
2746     if (vlan != bundle->vlan) {
2747         bundle->vlan = vlan;
2748         need_flush = true;
2749     }
2750
2751     /* Get trunked VLANs. */
2752     switch (s->vlan_mode) {
2753     case PORT_VLAN_ACCESS:
2754         trunks = NULL;
2755         break;
2756
2757     case PORT_VLAN_TRUNK:
2758         trunks = CONST_CAST(unsigned long *, s->trunks);
2759         break;
2760
2761     case PORT_VLAN_NATIVE_UNTAGGED:
2762     case PORT_VLAN_NATIVE_TAGGED:
2763         if (vlan != 0 && (!s->trunks
2764                           || !bitmap_is_set(s->trunks, vlan)
2765                           || bitmap_is_set(s->trunks, 0))) {
2766             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2767             if (s->trunks) {
2768                 trunks = bitmap_clone(s->trunks, 4096);
2769             } else {
2770                 trunks = bitmap_allocate1(4096);
2771             }
2772             bitmap_set1(trunks, vlan);
2773             bitmap_set0(trunks, 0);
2774         } else {
2775             trunks = CONST_CAST(unsigned long *, s->trunks);
2776         }
2777         break;
2778
2779     default:
2780         OVS_NOT_REACHED();
2781     }
2782     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2783         free(bundle->trunks);
2784         if (trunks == s->trunks) {
2785             bundle->trunks = vlan_bitmap_clone(trunks);
2786         } else {
2787             bundle->trunks = trunks;
2788             trunks = NULL;
2789         }
2790         need_flush = true;
2791     }
2792     if (trunks != s->trunks) {
2793         free(trunks);
2794     }
2795
2796     /* Bonding. */
2797     if (!list_is_short(&bundle->ports)) {
2798         bundle->ofproto->has_bonded_bundles = true;
2799         if (bundle->bond) {
2800             if (bond_reconfigure(bundle->bond, s->bond)) {
2801                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2802             }
2803         } else {
2804             bundle->bond = bond_create(s->bond, ofproto);
2805             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2806         }
2807
2808         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2809             bond_slave_register(bundle->bond, port,
2810                                 port->up.ofp_port, port->up.netdev);
2811         }
2812     } else {
2813         bond_unref(bundle->bond);
2814         bundle->bond = NULL;
2815     }
2816
2817     /* If we changed something that would affect MAC learning, un-learn
2818      * everything on this port and force flow revalidation. */
2819     if (need_flush) {
2820         bundle_flush_macs(bundle, false);
2821     }
2822
2823     return 0;
2824 }
2825
2826 static void
2827 bundle_remove(struct ofport *port_)
2828 {
2829     struct ofport_dpif *port = ofport_dpif_cast(port_);
2830     struct ofbundle *bundle = port->bundle;
2831
2832     if (bundle) {
2833         bundle_del_port(port);
2834         if (list_is_empty(&bundle->ports)) {
2835             bundle_destroy(bundle);
2836         } else if (list_is_short(&bundle->ports)) {
2837             bond_unref(bundle->bond);
2838             bundle->bond = NULL;
2839         }
2840     }
2841 }
2842
2843 static void
2844 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2845 {
2846     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2847     struct ofport_dpif *port = port_;
2848     uint8_t ea[ETH_ADDR_LEN];
2849     int error;
2850
2851     error = netdev_get_etheraddr(port->up.netdev, ea);
2852     if (!error) {
2853         struct ofpbuf packet;
2854         void *packet_pdu;
2855
2856         ofpbuf_init(&packet, 0);
2857         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2858                                  pdu_size);
2859         memcpy(packet_pdu, pdu, pdu_size);
2860
2861         ofproto_dpif_send_packet(port, &packet);
2862         ofpbuf_uninit(&packet);
2863     } else {
2864         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2865                     "%s (%s)", port->bundle->name,
2866                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2867     }
2868 }
2869
2870 static void
2871 bundle_send_learning_packets(struct ofbundle *bundle)
2872 {
2873     struct ofproto_dpif *ofproto = bundle->ofproto;
2874     struct ofpbuf *learning_packet;
2875     int error, n_packets, n_errors;
2876     struct mac_entry *e;
2877     struct list packets;
2878
2879     list_init(&packets);
2880     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2881     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2882         if (e->port.p != bundle) {
2883             void *port_void;
2884
2885             learning_packet = bond_compose_learning_packet(bundle->bond,
2886                                                            e->mac, e->vlan,
2887                                                            &port_void);
2888             /* Temporarily use 'frame' as a private pointer (see below). */
2889             ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
2890             learning_packet->frame = port_void;
2891             list_push_back(&packets, &learning_packet->list_node);
2892         }
2893     }
2894     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2895
2896     error = n_packets = n_errors = 0;
2897     LIST_FOR_EACH (learning_packet, list_node, &packets) {
2898         int ret;
2899         void *port_void = learning_packet->frame;
2900
2901         /* Restore 'frame'. */
2902         learning_packet->frame = ofpbuf_data(learning_packet);
2903         ret = ofproto_dpif_send_packet(port_void, learning_packet);
2904         if (ret) {
2905             error = ret;
2906             n_errors++;
2907         }
2908         n_packets++;
2909     }
2910     ofpbuf_list_delete(&packets);
2911
2912     if (n_errors) {
2913         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2914         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2915                      "packets, last error was: %s",
2916                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2917     } else {
2918         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2919                  bundle->name, n_packets);
2920     }
2921 }
2922
2923 static void
2924 bundle_run(struct ofbundle *bundle)
2925 {
2926     if (bundle->lacp) {
2927         lacp_run(bundle->lacp, send_pdu_cb);
2928     }
2929     if (bundle->bond) {
2930         struct ofport_dpif *port;
2931
2932         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2933             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2934         }
2935
2936         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2937             bundle->ofproto->backer->need_revalidate = REV_BOND;
2938         }
2939
2940         if (bond_should_send_learning_packets(bundle->bond)) {
2941             bundle_send_learning_packets(bundle);
2942         }
2943     }
2944 }
2945
2946 static void
2947 bundle_wait(struct ofbundle *bundle)
2948 {
2949     if (bundle->lacp) {
2950         lacp_wait(bundle->lacp);
2951     }
2952     if (bundle->bond) {
2953         bond_wait(bundle->bond);
2954     }
2955 }
2956 \f
2957 /* Mirrors. */
2958
2959 static int
2960 mirror_set__(struct ofproto *ofproto_, void *aux,
2961              const struct ofproto_mirror_settings *s)
2962 {
2963     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2964     struct ofbundle **srcs, **dsts;
2965     int error;
2966     size_t i;
2967
2968     if (!s) {
2969         mirror_destroy(ofproto->mbridge, aux);
2970         return 0;
2971     }
2972
2973     srcs = xmalloc(s->n_srcs * sizeof *srcs);
2974     dsts = xmalloc(s->n_dsts * sizeof *dsts);
2975
2976     for (i = 0; i < s->n_srcs; i++) {
2977         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
2978     }
2979
2980     for (i = 0; i < s->n_dsts; i++) {
2981         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
2982     }
2983
2984     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
2985                        s->n_dsts, s->src_vlans,
2986                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
2987     free(srcs);
2988     free(dsts);
2989     return error;
2990 }
2991
2992 static int
2993 mirror_get_stats__(struct ofproto *ofproto, void *aux,
2994                    uint64_t *packets, uint64_t *bytes)
2995 {
2996     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
2997                             bytes);
2998 }
2999
3000 static int
3001 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
3002 {
3003     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3004     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
3005     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
3006         mac_learning_flush(ofproto->ml);
3007     }
3008     ovs_rwlock_unlock(&ofproto->ml->rwlock);
3009     return 0;
3010 }
3011
3012 static bool
3013 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
3014 {
3015     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3016     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3017     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
3018 }
3019
3020 static void
3021 forward_bpdu_changed(struct ofproto *ofproto_)
3022 {
3023     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3024     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3025 }
3026
3027 static void
3028 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
3029                      size_t max_entries)
3030 {
3031     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3032     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
3033     mac_learning_set_idle_time(ofproto->ml, idle_time);
3034     mac_learning_set_max_entries(ofproto->ml, max_entries);
3035     ovs_rwlock_unlock(&ofproto->ml->rwlock);
3036 }
3037
3038 /* Configures multicast snooping on 'ofport' using the settings
3039  * defined in 's'. */
3040 static int
3041 set_mcast_snooping(struct ofproto *ofproto_,
3042                    const struct ofproto_mcast_snooping_settings *s)
3043 {
3044     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3045
3046     /* Only revalidate flows if the configuration changed. */
3047     if (!s != !ofproto->ms) {
3048         ofproto->backer->need_revalidate = REV_RECONFIGURE;
3049     }
3050
3051     if (s) {
3052         if (!ofproto->ms) {
3053             ofproto->ms = mcast_snooping_create();
3054         }
3055
3056         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3057         mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
3058         mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
3059         if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
3060             ofproto->backer->need_revalidate = REV_RECONFIGURE;
3061         }
3062         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3063     } else {
3064         mcast_snooping_unref(ofproto->ms);
3065         ofproto->ms = NULL;
3066     }
3067
3068     return 0;
3069 }
3070
3071 /* Configures multicast snooping port's flood setting on 'ofproto'. */
3072 static int
3073 set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood)
3074 {
3075     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3076     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3077
3078     if (ofproto->ms) {
3079         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3080         mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle,
3081                                       flood);
3082         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3083     }
3084     return 0;
3085 }
3086
3087 \f
3088 /* Ports. */
3089
3090 static struct ofport_dpif *
3091 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
3092 {
3093     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
3094     return ofport ? ofport_dpif_cast(ofport) : NULL;
3095 }
3096
3097 static void
3098 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
3099                             struct ofproto_port *ofproto_port,
3100                             struct dpif_port *dpif_port)
3101 {
3102     ofproto_port->name = dpif_port->name;
3103     ofproto_port->type = dpif_port->type;
3104     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
3105 }
3106
3107 static void
3108 ofport_update_peer(struct ofport_dpif *ofport)
3109 {
3110     const struct ofproto_dpif *ofproto;
3111     struct dpif_backer *backer;
3112     char *peer_name;
3113
3114     if (!netdev_vport_is_patch(ofport->up.netdev)) {
3115         return;
3116     }
3117
3118     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
3119     backer->need_revalidate = REV_RECONFIGURE;
3120
3121     if (ofport->peer) {
3122         ofport->peer->peer = NULL;
3123         ofport->peer = NULL;
3124     }
3125
3126     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
3127     if (!peer_name) {
3128         return;
3129     }
3130
3131     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3132         struct ofport *peer_ofport;
3133         struct ofport_dpif *peer;
3134         char *peer_peer;
3135
3136         if (ofproto->backer != backer) {
3137             continue;
3138         }
3139
3140         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
3141         if (!peer_ofport) {
3142             continue;
3143         }
3144
3145         peer = ofport_dpif_cast(peer_ofport);
3146         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
3147         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
3148                                  peer_peer)) {
3149             ofport->peer = peer;
3150             ofport->peer->peer = ofport;
3151         }
3152         free(peer_peer);
3153
3154         break;
3155     }
3156     free(peer_name);
3157 }
3158
3159 static void
3160 port_run(struct ofport_dpif *ofport)
3161 {
3162     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
3163     bool carrier_changed = carrier_seq != ofport->carrier_seq;
3164     bool enable = netdev_get_carrier(ofport->up.netdev);
3165     bool cfm_enable = false;
3166     bool bfd_enable = false;
3167
3168     ofport->carrier_seq = carrier_seq;
3169
3170     if (ofport->cfm) {
3171         int cfm_opup = cfm_get_opup(ofport->cfm);
3172
3173         cfm_enable = !cfm_get_fault(ofport->cfm);
3174
3175         if (cfm_opup >= 0) {
3176             cfm_enable = cfm_enable && cfm_opup;
3177         }
3178     }
3179
3180     if (ofport->bfd) {
3181         bfd_enable = bfd_forwarding(ofport->bfd);
3182     }
3183
3184     if (ofport->bfd || ofport->cfm) {
3185         enable = enable && (cfm_enable || bfd_enable);
3186     }
3187
3188     if (ofport->bundle) {
3189         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3190         if (carrier_changed) {
3191             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
3192         }
3193     }
3194
3195     if (ofport->may_enable != enable) {
3196         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3197
3198         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
3199
3200         if (ofport->rstp_port) {
3201             rstp_port_set_mac_operational(ofport->rstp_port, enable);
3202         }
3203     }
3204
3205     ofport->may_enable = enable;
3206 }
3207
3208 static int
3209 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3210                    struct ofproto_port *ofproto_port)
3211 {
3212     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3213     struct dpif_port dpif_port;
3214     int error;
3215
3216     if (sset_contains(&ofproto->ghost_ports, devname)) {
3217         const char *type = netdev_get_type_from_name(devname);
3218
3219         /* We may be called before ofproto->up.port_by_name is populated with
3220          * the appropriate ofport.  For this reason, we must get the name and
3221          * type from the netdev layer directly. */
3222         if (type) {
3223             const struct ofport *ofport;
3224
3225             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3226             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3227             ofproto_port->name = xstrdup(devname);
3228             ofproto_port->type = xstrdup(type);
3229             return 0;
3230         }
3231         return ENODEV;
3232     }
3233
3234     if (!sset_contains(&ofproto->ports, devname)) {
3235         return ENODEV;
3236     }
3237     error = dpif_port_query_by_name(ofproto->backer->dpif,
3238                                     devname, &dpif_port);
3239     if (!error) {
3240         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3241     }
3242     return error;
3243 }
3244
3245 static int
3246 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3247 {
3248     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3249     const char *devname = netdev_get_name(netdev);
3250     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3251     const char *dp_port_name;
3252
3253     if (netdev_vport_is_patch(netdev)) {
3254         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3255         return 0;
3256     }
3257
3258     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3259     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3260         odp_port_t port_no = ODPP_NONE;
3261         int error;
3262
3263         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3264         if (error) {
3265             return error;
3266         }
3267         if (netdev_get_tunnel_config(netdev)) {
3268             simap_put(&ofproto->backer->tnl_backers,
3269                       dp_port_name, odp_to_u32(port_no));
3270         }
3271     }
3272
3273     if (netdev_get_tunnel_config(netdev)) {
3274         sset_add(&ofproto->ghost_ports, devname);
3275     } else {
3276         sset_add(&ofproto->ports, devname);
3277     }
3278     return 0;
3279 }
3280
3281 static int
3282 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3283 {
3284     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3285     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3286     int error = 0;
3287
3288     if (!ofport) {
3289         return 0;
3290     }
3291
3292     sset_find_and_delete(&ofproto->ghost_ports,
3293                          netdev_get_name(ofport->up.netdev));
3294     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3295     if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
3296         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3297         if (!error) {
3298             /* The caller is going to close ofport->up.netdev.  If this is a
3299              * bonded port, then the bond is using that netdev, so remove it
3300              * from the bond.  The client will need to reconfigure everything
3301              * after deleting ports, so then the slave will get re-added. */
3302             bundle_remove(&ofport->up);
3303         }
3304     }
3305     return error;
3306 }
3307
3308 static int
3309 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3310 {
3311     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3312     int error;
3313
3314     error = netdev_get_stats(ofport->up.netdev, stats);
3315
3316     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3317         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3318
3319         ovs_mutex_lock(&ofproto->stats_mutex);
3320         /* ofproto->stats.tx_packets represents packets that we created
3321          * internally and sent to some port (e.g. packets sent with
3322          * ofproto_dpif_send_packet()).  Account for them as if they had
3323          * come from OFPP_LOCAL and got forwarded. */
3324
3325         if (stats->rx_packets != UINT64_MAX) {
3326             stats->rx_packets += ofproto->stats.tx_packets;
3327         }
3328
3329         if (stats->rx_bytes != UINT64_MAX) {
3330             stats->rx_bytes += ofproto->stats.tx_bytes;
3331         }
3332
3333         /* ofproto->stats.rx_packets represents packets that were received on
3334          * some port and we processed internally and dropped (e.g. STP).
3335          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3336
3337         if (stats->tx_packets != UINT64_MAX) {
3338             stats->tx_packets += ofproto->stats.rx_packets;
3339         }
3340
3341         if (stats->tx_bytes != UINT64_MAX) {
3342             stats->tx_bytes += ofproto->stats.rx_bytes;
3343         }
3344         ovs_mutex_unlock(&ofproto->stats_mutex);
3345     }
3346
3347     return error;
3348 }
3349
3350 static int
3351 port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats)
3352 {
3353     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3354     if (ofport->bundle && ofport->bundle->lacp) {
3355         if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) {
3356             return 0;
3357         }
3358     }
3359     return -1;
3360 }
3361
3362 struct port_dump_state {
3363     uint32_t bucket;
3364     uint32_t offset;
3365     bool ghost;
3366
3367     struct ofproto_port port;
3368     bool has_port;
3369 };
3370
3371 static int
3372 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3373 {
3374     *statep = xzalloc(sizeof(struct port_dump_state));
3375     return 0;
3376 }
3377
3378 static int
3379 port_dump_next(const struct ofproto *ofproto_, void *state_,
3380                struct ofproto_port *port)
3381 {
3382     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3383     struct port_dump_state *state = state_;
3384     const struct sset *sset;
3385     struct sset_node *node;
3386
3387     if (state->has_port) {
3388         ofproto_port_destroy(&state->port);
3389         state->has_port = false;
3390     }
3391     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3392     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3393         int error;
3394
3395         error = port_query_by_name(ofproto_, node->name, &state->port);
3396         if (!error) {
3397             *port = state->port;
3398             state->has_port = true;
3399             return 0;
3400         } else if (error != ENODEV) {
3401             return error;
3402         }
3403     }
3404
3405     if (!state->ghost) {
3406         state->ghost = true;
3407         state->bucket = 0;
3408         state->offset = 0;
3409         return port_dump_next(ofproto_, state_, port);
3410     }
3411
3412     return EOF;
3413 }
3414
3415 static int
3416 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3417 {
3418     struct port_dump_state *state = state_;
3419
3420     if (state->has_port) {
3421         ofproto_port_destroy(&state->port);
3422     }
3423     free(state);
3424     return 0;
3425 }
3426
3427 static int
3428 port_poll(const struct ofproto *ofproto_, char **devnamep)
3429 {
3430     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3431
3432     if (ofproto->port_poll_errno) {
3433         int error = ofproto->port_poll_errno;
3434         ofproto->port_poll_errno = 0;
3435         return error;
3436     }
3437
3438     if (sset_is_empty(&ofproto->port_poll_set)) {
3439         return EAGAIN;
3440     }
3441
3442     *devnamep = sset_pop(&ofproto->port_poll_set);
3443     return 0;
3444 }
3445
3446 static void
3447 port_poll_wait(const struct ofproto *ofproto_)
3448 {
3449     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3450     dpif_port_poll_wait(ofproto->backer->dpif);
3451 }
3452
3453 static int
3454 port_is_lacp_current(const struct ofport *ofport_)
3455 {
3456     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3457     return (ofport->bundle && ofport->bundle->lacp
3458             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3459             : -1);
3460 }
3461 \f
3462 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3463  * then delete it entirely. */
3464 static void
3465 rule_expire(struct rule_dpif *rule)
3466     OVS_REQUIRES(ofproto_mutex)
3467 {
3468     uint16_t hard_timeout, idle_timeout;
3469     long long int now = time_msec();
3470     int reason = -1;
3471
3472     hard_timeout = rule->up.hard_timeout;
3473     idle_timeout = rule->up.idle_timeout;
3474
3475     /* Has 'rule' expired? */
3476     if (hard_timeout) {
3477         long long int modified;
3478
3479         ovs_mutex_lock(&rule->up.mutex);
3480         modified = rule->up.modified;
3481         ovs_mutex_unlock(&rule->up.mutex);
3482
3483         if (now > modified + hard_timeout * 1000) {
3484             reason = OFPRR_HARD_TIMEOUT;
3485         }
3486     }
3487
3488     if (reason < 0 && idle_timeout) {
3489         long long int used;
3490
3491         ovs_mutex_lock(&rule->stats_mutex);
3492         used = rule->stats.used;
3493         ovs_mutex_unlock(&rule->stats_mutex);
3494
3495         if (now > used + idle_timeout * 1000) {
3496             reason = OFPRR_IDLE_TIMEOUT;
3497         }
3498     }
3499
3500     if (reason >= 0) {
3501         COVERAGE_INC(ofproto_dpif_expired);
3502         ofproto_rule_expire(&rule->up, reason);
3503     }
3504 }
3505
3506 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3507  * 'flow' must reflect the data in 'packet'. */
3508 int
3509 ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
3510                              const struct flow *flow,
3511                              struct rule_dpif *rule,
3512                              const struct ofpact *ofpacts, size_t ofpacts_len,
3513                              struct ofpbuf *packet)
3514 {
3515     struct dpif_flow_stats stats;
3516     struct xlate_out xout;
3517     struct xlate_in xin;
3518     ofp_port_t in_port;
3519     struct dpif_execute execute;
3520     int error;
3521
3522     ovs_assert((rule != NULL) != (ofpacts != NULL));
3523
3524     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
3525
3526     if (rule) {
3527         rule_dpif_credit_stats(rule, &stats);
3528     }
3529
3530     xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
3531                   stats.tcp_flags, packet);
3532     xin.ofpacts = ofpacts;
3533     xin.ofpacts_len = ofpacts_len;
3534     xin.resubmit_stats = &stats;
3535     xlate_actions(&xin, &xout);
3536
3537     execute.actions = ofpbuf_data(xout.odp_actions);
3538     execute.actions_len = ofpbuf_size(xout.odp_actions);
3539
3540     execute.packet = packet;
3541     execute.md = pkt_metadata_from_flow(flow);
3542     execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
3543     execute.probe = false;
3544
3545     /* Fix up in_port. */
3546     in_port = flow->in_port.ofp_port;
3547     if (in_port == OFPP_NONE) {
3548         in_port = OFPP_LOCAL;
3549     }
3550     execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
3551
3552     error = dpif_execute(ofproto->backer->dpif, &execute);
3553
3554     xlate_out_uninit(&xout);
3555
3556     return error;
3557 }
3558
3559 void
3560 rule_dpif_credit_stats(struct rule_dpif *rule,
3561                        const struct dpif_flow_stats *stats)
3562 {
3563     ovs_mutex_lock(&rule->stats_mutex);
3564     rule->stats.n_packets += stats->n_packets;
3565     rule->stats.n_bytes += stats->n_bytes;
3566     rule->stats.used = MAX(rule->stats.used, stats->used);
3567     ovs_mutex_unlock(&rule->stats_mutex);
3568 }
3569
3570 ovs_be64
3571 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
3572     OVS_REQUIRES(rule->up.mutex)
3573 {
3574     return rule->up.flow_cookie;
3575 }
3576
3577 void
3578 rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
3579                      uint16_t hard_timeout)
3580 {
3581     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
3582 }
3583
3584 /* Returns 'rule''s actions.  The returned actions are RCU-protected, and can
3585  * be read until the calling thread quiesces. */
3586 const struct rule_actions *
3587 rule_dpif_get_actions(const struct rule_dpif *rule)
3588 {
3589     return rule_get_actions(&rule->up);
3590 }
3591
3592 /* Sets 'rule''s recirculation id. */
3593 static void
3594 rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
3595     OVS_REQUIRES(rule->up.mutex)
3596 {
3597     ovs_assert(!rule->recirc_id);
3598     rule->recirc_id = id;
3599 }
3600
3601 /* Returns 'rule''s recirculation id. */
3602 uint32_t
3603 rule_dpif_get_recirc_id(struct rule_dpif *rule)
3604     OVS_REQUIRES(rule->up.mutex)
3605 {
3606     if (!rule->recirc_id) {
3607         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3608
3609         rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
3610     }
3611     return rule->recirc_id;
3612 }
3613
3614 /* Sets 'rule''s recirculation id. */
3615 void
3616 rule_set_recirc_id(struct rule *rule_, uint32_t id)
3617 {
3618     struct rule_dpif *rule = rule_dpif_cast(rule_);
3619
3620     ovs_mutex_lock(&rule->up.mutex);
3621     rule_dpif_set_recirc_id(rule, id);
3622     ovs_mutex_unlock(&rule->up.mutex);
3623 }
3624
3625 /* Lookup 'flow' in table 0 of 'ofproto''s classifier.
3626  * If 'wc' is non-null, sets the fields that were relevant as part of
3627  * the lookup. Returns the table id where a match or miss occurred via
3628  * 'table_id'.  This will be zero unless there was a miss and
3629  * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
3630  * where misses occur, or TBL_INTERNAL if the rule has a non-zero
3631  * recirculation ID, and a match was found in the internal table, or if
3632  * there was no match and one of the special rules (drop_frags_rule,
3633  * miss_rule, or no_packet_in_rule) was returned.
3634  *
3635  * The return value is the found rule, which is valid at least until the next
3636  * RCU quiescent period.  If the rule needs to stay around longer,
3637  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3638  * on it before this returns. */
3639 struct rule_dpif *
3640 rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
3641                  struct flow_wildcards *wc, bool take_ref,
3642                  const struct dpif_flow_stats *stats, uint8_t *table_id)
3643 {
3644     *table_id = 0;
3645
3646     if (ofproto_dpif_get_enable_recirc(ofproto)) {
3647         /* Always exactly match recirc_id since datapath supports
3648          * recirculation.  */
3649         if (wc) {
3650             wc->masks.recirc_id = UINT32_MAX;
3651         }
3652         if (flow->recirc_id) {
3653             /* Start looking up from internal table for post recirculation
3654              * flows or packets. */
3655             *table_id = TBL_INTERNAL;
3656         }
3657     }
3658
3659     return rule_dpif_lookup_from_table(ofproto, flow, wc, take_ref, stats,
3660                                        table_id, flow->in_port.ofp_port, true,
3661                                        true);
3662 }
3663
3664 /* The returned rule (if any) is valid at least until the next RCU quiescent
3665  * period.  If the rule needs to stay around longer, a non-zero 'take_ref'
3666  * must be passed in to cause a reference to be taken on it. */
3667 static struct rule_dpif *
3668 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
3669                           const struct flow *flow, struct flow_wildcards *wc,
3670                           bool take_ref)
3671 {
3672     struct classifier *cls = &ofproto->up.tables[table_id].cls;
3673     const struct cls_rule *cls_rule;
3674     struct rule_dpif *rule;
3675
3676     do {
3677         cls_rule = classifier_lookup(cls, flow, wc);
3678
3679         rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3680
3681         /* Try again if the rule was released before we get the reference. */
3682     } while (rule && take_ref && !rule_dpif_try_ref(rule));
3683
3684     return rule;
3685 }
3686
3687 /* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
3688  * Returns the rule that was found, which may be one of the special rules
3689  * according to packet miss hadling.  If 'may_packet_in' is false, returning of
3690  * the miss_rule (which issues packet ins for the controller) is avoided.
3691  * Updates 'wc', if nonnull, to reflect the fields that were used during the
3692  * lookup.
3693  *
3694  * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
3695  * if none is found then the table miss configuration for that table is
3696  * honored, which can result in additional lookups in other OpenFlow tables.
3697  * In this case the function updates '*table_id' to reflect the final OpenFlow
3698  * table that was searched.
3699  *
3700  * If 'honor_table_miss' is false, then only one table lookup occurs, in
3701  * '*table_id'.
3702  *
3703  * The rule is returned in '*rule', which is valid at least until the next
3704  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3705  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3706  * on it before this returns.
3707  *
3708  * 'in_port' allows the lookup to take place as if the in port had the value
3709  * 'in_port'.  This is needed for resubmit action support. */
3710 struct rule_dpif *
3711 rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow,
3712                             struct flow_wildcards *wc, bool take_ref,
3713                             const struct dpif_flow_stats *stats,
3714                             uint8_t *table_id, ofp_port_t in_port,
3715                             bool may_packet_in, bool honor_table_miss)
3716 {
3717     ovs_be16 old_tp_src = flow->tp_src, old_tp_dst = flow->tp_dst;
3718     ofp_port_t old_in_port = flow->in_port.ofp_port;
3719     enum ofputil_table_miss miss_config;
3720     struct rule_dpif *rule;
3721     uint8_t next_id;
3722
3723     /* We always unwildcard nw_frag (for IP), so they
3724      * need not be unwildcarded here. */
3725     if (flow->nw_frag & FLOW_NW_FRAG_ANY
3726         && ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
3727         if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
3728             /* We must pretend that transport ports are unavailable. */
3729             flow->tp_src = htons(0);
3730             flow->tp_dst = htons(0);
3731         } else {
3732             /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
3733              * Use the drop_frags_rule (which cannot disappear). */
3734             rule = ofproto->drop_frags_rule;
3735             if (take_ref) {
3736                 rule_dpif_ref(rule);
3737             }
3738             if (stats) {
3739                 struct oftable *tbl = &ofproto->up.tables[*table_id];
3740                 unsigned long orig;
3741
3742                 atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig);
3743             }
3744             return rule;
3745         }
3746     }
3747
3748     /* Look up a flow with 'in_port' as the input port.  Then restore the
3749      * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
3750      * have surprising behavior). */
3751     flow->in_port.ofp_port = in_port;
3752
3753     /* Our current implementation depends on n_tables == N_TABLES, and
3754      * TBL_INTERNAL being the last table. */
3755     BUILD_ASSERT_DECL(N_TABLES == TBL_INTERNAL + 1);
3756
3757     miss_config = OFPUTIL_TABLE_MISS_CONTINUE;
3758
3759     for (next_id = *table_id;
3760          next_id < ofproto->up.n_tables;
3761          next_id++, next_id += (next_id == TBL_INTERNAL))
3762     {
3763         *table_id = next_id;
3764         rule = rule_dpif_lookup_in_table(ofproto, next_id, flow, wc, take_ref);
3765         if (stats) {
3766             struct oftable *tbl = &ofproto->up.tables[next_id];
3767             unsigned long orig;
3768
3769             atomic_add_relaxed(rule ? &tbl->n_matched : &tbl->n_missed,
3770                                stats->n_packets, &orig);
3771         }
3772         if (rule) {
3773             goto out;   /* Match. */
3774         }
3775         if (honor_table_miss) {
3776             miss_config = ofproto_table_get_miss_config(&ofproto->up,
3777                                                         *table_id);
3778             if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE) {
3779                 continue;
3780             }
3781         }
3782         break;
3783     }
3784     /* Miss. */
3785     rule = ofproto->no_packet_in_rule;
3786     if (may_packet_in) {
3787         if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE
3788             || miss_config == OFPUTIL_TABLE_MISS_CONTROLLER) {
3789             struct ofport_dpif *port;
3790
3791             port = get_ofp_port(ofproto, old_in_port);
3792             if (!port) {
3793                 VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
3794                              old_in_port);
3795             } else if (!(port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN)) {
3796                 rule = ofproto->miss_rule;
3797             }
3798         } else if (miss_config == OFPUTIL_TABLE_MISS_DEFAULT &&
3799                    connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
3800             rule = ofproto->miss_rule;
3801         }
3802     }
3803     if (take_ref) {
3804         rule_dpif_ref(rule);
3805     }
3806 out:
3807     /* Restore port numbers, as they may have been modified above. */
3808     flow->tp_src = old_tp_src;
3809     flow->tp_dst = old_tp_dst;
3810     /* Restore the old in port. */
3811     flow->in_port.ofp_port = old_in_port;
3812
3813     return rule;
3814 }
3815
3816 static void
3817 complete_operation(struct rule_dpif *rule)
3818     OVS_REQUIRES(ofproto_mutex)
3819 {
3820     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3821
3822     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3823 }
3824
3825 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
3826 {
3827     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
3828 }
3829
3830 static struct rule *
3831 rule_alloc(void)
3832 {
3833     struct rule_dpif *rule = xmalloc(sizeof *rule);
3834     return &rule->up;
3835 }
3836
3837 static void
3838 rule_dealloc(struct rule *rule_)
3839 {
3840     struct rule_dpif *rule = rule_dpif_cast(rule_);
3841     free(rule);
3842 }
3843
3844 static enum ofperr
3845 rule_construct(struct rule *rule_)
3846     OVS_NO_THREAD_SAFETY_ANALYSIS
3847 {
3848     struct rule_dpif *rule = rule_dpif_cast(rule_);
3849     ovs_mutex_init_adaptive(&rule->stats_mutex);
3850     rule->stats.n_packets = 0;
3851     rule->stats.n_bytes = 0;
3852     rule->stats.used = rule->up.modified;
3853     rule->recirc_id = 0;
3854
3855     return 0;
3856 }
3857
3858 static enum ofperr
3859 rule_insert(struct rule *rule_)
3860     OVS_REQUIRES(ofproto_mutex)
3861 {
3862     struct rule_dpif *rule = rule_dpif_cast(rule_);
3863     complete_operation(rule);
3864     return 0;
3865 }
3866
3867 static void
3868 rule_delete(struct rule *rule_)
3869     OVS_REQUIRES(ofproto_mutex)
3870 {
3871     struct rule_dpif *rule = rule_dpif_cast(rule_);
3872     complete_operation(rule);
3873 }
3874
3875 static void
3876 rule_destruct(struct rule *rule_)
3877 {
3878     struct rule_dpif *rule = rule_dpif_cast(rule_);
3879
3880     ovs_mutex_destroy(&rule->stats_mutex);
3881     if (rule->recirc_id) {
3882         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3883
3884         ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
3885     }
3886 }
3887
3888 static void
3889 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
3890                long long int *used)
3891 {
3892     struct rule_dpif *rule = rule_dpif_cast(rule_);
3893
3894     ovs_mutex_lock(&rule->stats_mutex);
3895     *packets = rule->stats.n_packets;
3896     *bytes = rule->stats.n_bytes;
3897     *used = rule->stats.used;
3898     ovs_mutex_unlock(&rule->stats_mutex);
3899 }
3900
3901 static void
3902 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
3903                   struct ofpbuf *packet)
3904 {
3905     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3906
3907     ofproto_dpif_execute_actions(ofproto, flow, rule, NULL, 0, packet);
3908 }
3909
3910 static enum ofperr
3911 rule_execute(struct rule *rule, const struct flow *flow,
3912              struct ofpbuf *packet)
3913 {
3914     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
3915     ofpbuf_delete(packet);
3916     return 0;
3917 }
3918
3919 static void
3920 rule_modify_actions(struct rule *rule_, bool reset_counters)
3921     OVS_REQUIRES(ofproto_mutex)
3922 {
3923     struct rule_dpif *rule = rule_dpif_cast(rule_);
3924
3925     if (reset_counters) {
3926         ovs_mutex_lock(&rule->stats_mutex);
3927         rule->stats.n_packets = 0;
3928         rule->stats.n_bytes = 0;
3929         ovs_mutex_unlock(&rule->stats_mutex);
3930     }
3931
3932     complete_operation(rule);
3933 }
3934
3935 static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
3936 {
3937     return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
3938 }
3939
3940 static struct ofgroup *
3941 group_alloc(void)
3942 {
3943     struct group_dpif *group = xzalloc(sizeof *group);
3944     return &group->up;
3945 }
3946
3947 static void
3948 group_dealloc(struct ofgroup *group_)
3949 {
3950     struct group_dpif *group = group_dpif_cast(group_);
3951     free(group);
3952 }
3953
3954 static void
3955 group_construct_stats(struct group_dpif *group)
3956     OVS_REQUIRES(group->stats_mutex)
3957 {
3958     struct ofputil_bucket *bucket;
3959     const struct list *buckets;
3960
3961     group->packet_count = 0;
3962     group->byte_count = 0;
3963
3964     group_dpif_get_buckets(group, &buckets);
3965     LIST_FOR_EACH (bucket, list_node, buckets) {
3966         bucket->stats.packet_count = 0;
3967         bucket->stats.byte_count = 0;
3968     }
3969 }
3970
3971 void
3972 group_dpif_credit_stats(struct group_dpif *group,
3973                         struct ofputil_bucket *bucket,
3974                         const struct dpif_flow_stats *stats)
3975 {
3976     ovs_mutex_lock(&group->stats_mutex);
3977     group->packet_count += stats->n_packets;
3978     group->byte_count += stats->n_bytes;
3979     if (bucket) {
3980         bucket->stats.packet_count += stats->n_packets;
3981         bucket->stats.byte_count += stats->n_bytes;
3982     } else { /* Credit to all buckets */
3983         const struct list *buckets;
3984
3985         group_dpif_get_buckets(group, &buckets);
3986         LIST_FOR_EACH (bucket, list_node, buckets) {
3987             bucket->stats.packet_count += stats->n_packets;
3988             bucket->stats.byte_count += stats->n_bytes;
3989         }
3990     }
3991     ovs_mutex_unlock(&group->stats_mutex);
3992 }
3993
3994 static enum ofperr
3995 group_construct(struct ofgroup *group_)
3996 {
3997     struct group_dpif *group = group_dpif_cast(group_);
3998     const struct ofputil_bucket *bucket;
3999
4000     /* Prevent group chaining because our locking structure makes it hard to
4001      * implement deadlock-free.  (See xlate_group_resource_check().) */
4002     LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
4003         const struct ofpact *a;
4004
4005         OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
4006             if (a->type == OFPACT_GROUP) {
4007                 return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
4008             }
4009         }
4010     }
4011
4012     ovs_mutex_init_adaptive(&group->stats_mutex);
4013     ovs_mutex_lock(&group->stats_mutex);
4014     group_construct_stats(group);
4015     ovs_mutex_unlock(&group->stats_mutex);
4016     return 0;
4017 }
4018
4019 static void
4020 group_destruct(struct ofgroup *group_)
4021 {
4022     struct group_dpif *group = group_dpif_cast(group_);
4023     ovs_mutex_destroy(&group->stats_mutex);
4024 }
4025
4026 static enum ofperr
4027 group_modify(struct ofgroup *group_)
4028 {
4029     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
4030
4031     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
4032
4033     return 0;
4034 }
4035
4036 static enum ofperr
4037 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
4038 {
4039     struct group_dpif *group = group_dpif_cast(group_);
4040     struct ofputil_bucket *bucket;
4041     const struct list *buckets;
4042     struct bucket_counter *bucket_stats;
4043
4044     ovs_mutex_lock(&group->stats_mutex);
4045     ogs->packet_count = group->packet_count;
4046     ogs->byte_count = group->byte_count;
4047
4048     group_dpif_get_buckets(group, &buckets);
4049     bucket_stats = ogs->bucket_stats;
4050     LIST_FOR_EACH (bucket, list_node, buckets) {
4051         bucket_stats->packet_count = bucket->stats.packet_count;
4052         bucket_stats->byte_count = bucket->stats.byte_count;
4053         bucket_stats++;
4054     }
4055     ovs_mutex_unlock(&group->stats_mutex);
4056
4057     return 0;
4058 }
4059
4060 /* If the group exists, this function increments the groups's reference count.
4061  *
4062  * Make sure to call group_dpif_unref() after no longer needing to maintain
4063  * a reference to the group. */
4064 bool
4065 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
4066                   struct group_dpif **group)
4067 {
4068     struct ofgroup *ofgroup;
4069     bool found;
4070
4071     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
4072     *group = found ?  group_dpif_cast(ofgroup) : NULL;
4073
4074     return found;
4075 }
4076
4077 void
4078 group_dpif_get_buckets(const struct group_dpif *group,
4079                        const struct list **buckets)
4080 {
4081     *buckets = &group->up.buckets;
4082 }
4083
4084 enum ofp11_group_type
4085 group_dpif_get_type(const struct group_dpif *group)
4086 {
4087     return group->up.type;
4088 }
4089 \f
4090 /* Sends 'packet' out 'ofport'.
4091  * May modify 'packet'.
4092  * Returns 0 if successful, otherwise a positive errno value. */
4093 int
4094 ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4095 {
4096     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4097     int error;
4098
4099     error = xlate_send_packet(ofport, packet);
4100
4101     ovs_mutex_lock(&ofproto->stats_mutex);
4102     ofproto->stats.tx_packets++;
4103     ofproto->stats.tx_bytes += ofpbuf_size(packet);
4104     ovs_mutex_unlock(&ofproto->stats_mutex);
4105     return error;
4106 }
4107 \f
4108 /* Return the version string of the datapath that backs up
4109  * this 'ofproto'.
4110  */
4111 static const char *
4112 get_datapath_version(const struct ofproto *ofproto_)
4113 {
4114     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4115
4116     return ofproto->backer->dp_version_string;
4117 }
4118
4119 static bool
4120 set_frag_handling(struct ofproto *ofproto_,
4121                   enum ofp_config_flags frag_handling)
4122 {
4123     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4124     if (frag_handling != OFPC_FRAG_REASM) {
4125         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4126         return true;
4127     } else {
4128         return false;
4129     }
4130 }
4131
4132 static enum ofperr
4133 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
4134            const struct flow *flow,
4135            const struct ofpact *ofpacts, size_t ofpacts_len)
4136 {
4137     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4138
4139     ofproto_dpif_execute_actions(ofproto, flow, NULL, ofpacts,
4140                                  ofpacts_len, packet);
4141     return 0;
4142 }
4143 \f
4144 /* NetFlow. */
4145
4146 static int
4147 set_netflow(struct ofproto *ofproto_,
4148             const struct netflow_options *netflow_options)
4149 {
4150     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4151
4152     if (netflow_options) {
4153         if (!ofproto->netflow) {
4154             ofproto->netflow = netflow_create();
4155             ofproto->backer->need_revalidate = REV_RECONFIGURE;
4156         }
4157         return netflow_set_options(ofproto->netflow, netflow_options);
4158     } else if (ofproto->netflow) {
4159         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4160         netflow_unref(ofproto->netflow);
4161         ofproto->netflow = NULL;
4162     }
4163
4164     return 0;
4165 }
4166
4167 static void
4168 get_netflow_ids(const struct ofproto *ofproto_,
4169                 uint8_t *engine_type, uint8_t *engine_id)
4170 {
4171     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4172
4173     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
4174 }
4175 \f
4176 static struct ofproto_dpif *
4177 ofproto_dpif_lookup(const char *name)
4178 {
4179     struct ofproto_dpif *ofproto;
4180
4181     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
4182                              hash_string(name, 0), &all_ofproto_dpifs) {
4183         if (!strcmp(ofproto->up.name, name)) {
4184             return ofproto;
4185         }
4186     }
4187     return NULL;
4188 }
4189
4190 static void
4191 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
4192                           const char *argv[], void *aux OVS_UNUSED)
4193 {
4194     struct ofproto_dpif *ofproto;
4195
4196     if (argc > 1) {
4197         ofproto = ofproto_dpif_lookup(argv[1]);
4198         if (!ofproto) {
4199             unixctl_command_reply_error(conn, "no such bridge");
4200             return;
4201         }
4202         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4203         mac_learning_flush(ofproto->ml);
4204         ovs_rwlock_unlock(&ofproto->ml->rwlock);
4205     } else {
4206         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4207             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4208             mac_learning_flush(ofproto->ml);
4209             ovs_rwlock_unlock(&ofproto->ml->rwlock);
4210         }
4211     }
4212
4213     unixctl_command_reply(conn, "table successfully flushed");
4214 }
4215
4216 static void
4217 ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
4218                                      const char *argv[], void *aux OVS_UNUSED)
4219 {
4220     struct ofproto_dpif *ofproto;
4221
4222     if (argc > 1) {
4223         ofproto = ofproto_dpif_lookup(argv[1]);
4224         if (!ofproto) {
4225             unixctl_command_reply_error(conn, "no such bridge");
4226             return;
4227         }
4228
4229         if (!mcast_snooping_enabled(ofproto->ms)) {
4230             unixctl_command_reply_error(conn, "multicast snooping is disabled");
4231             return;
4232         }
4233         mcast_snooping_mdb_flush(ofproto->ms);
4234     } else {
4235         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4236             if (!mcast_snooping_enabled(ofproto->ms)) {
4237                 continue;
4238             }
4239             mcast_snooping_mdb_flush(ofproto->ms);
4240         }
4241     }
4242
4243     unixctl_command_reply(conn, "table successfully flushed");
4244 }
4245
4246 static struct ofport_dpif *
4247 ofbundle_get_a_port(const struct ofbundle *bundle)
4248 {
4249     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
4250                         bundle_node);
4251 }
4252
4253 static void
4254 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4255                          const char *argv[], void *aux OVS_UNUSED)
4256 {
4257     struct ds ds = DS_EMPTY_INITIALIZER;
4258     const struct ofproto_dpif *ofproto;
4259     const struct mac_entry *e;
4260
4261     ofproto = ofproto_dpif_lookup(argv[1]);
4262     if (!ofproto) {
4263         unixctl_command_reply_error(conn, "no such bridge");
4264         return;
4265     }
4266
4267     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
4268     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
4269     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
4270         struct ofbundle *bundle = e->port.p;
4271         char name[OFP_MAX_PORT_NAME_LEN];
4272
4273         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4274                                name, sizeof name);
4275         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
4276                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
4277                       mac_entry_age(ofproto->ml, e));
4278     }
4279     ovs_rwlock_unlock(&ofproto->ml->rwlock);
4280     unixctl_command_reply(conn, ds_cstr(&ds));
4281     ds_destroy(&ds);
4282 }
4283
4284 static void
4285 ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
4286                                     int argc OVS_UNUSED,
4287                                     const char *argv[],
4288                                     void *aux OVS_UNUSED)
4289 {
4290     struct ds ds = DS_EMPTY_INITIALIZER;
4291     const struct ofproto_dpif *ofproto;
4292     const struct ofbundle *bundle;
4293     const struct mcast_group *grp;
4294     struct mcast_group_bundle *b;
4295     struct mcast_mrouter_bundle *mrouter;
4296
4297     ofproto = ofproto_dpif_lookup(argv[1]);
4298     if (!ofproto) {
4299         unixctl_command_reply_error(conn, "no such bridge");
4300         return;
4301     }
4302
4303     if (!mcast_snooping_enabled(ofproto->ms)) {
4304         unixctl_command_reply_error(conn, "multicast snooping is disabled");
4305         return;
4306     }
4307
4308     ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
4309     ovs_rwlock_rdlock(&ofproto->ms->rwlock);
4310     LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
4311         LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
4312             char name[OFP_MAX_PORT_NAME_LEN];
4313
4314             bundle = b->port;
4315             ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4316                                    name, sizeof name);
4317             ds_put_format(&ds, "%5s  %4d  "IP_FMT"         %3d\n",
4318                           name, grp->vlan, IP_ARGS(grp->ip4),
4319                           mcast_bundle_age(ofproto->ms, b));
4320         }
4321     }
4322
4323     /* ports connected to multicast routers */
4324     LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
4325         char name[OFP_MAX_PORT_NAME_LEN];
4326
4327         bundle = mrouter->port;
4328         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4329                                name, sizeof name);
4330             ds_put_format(&ds, "%5s  %4d  querier             %3d\n",
4331                       name, mrouter->vlan,
4332                       mcast_mrouter_age(ofproto->ms, mrouter));
4333     }
4334     ovs_rwlock_unlock(&ofproto->ms->rwlock);
4335     unixctl_command_reply(conn, ds_cstr(&ds));
4336     ds_destroy(&ds);
4337 }
4338
4339 struct trace_ctx {
4340     struct xlate_out xout;
4341     struct xlate_in xin;
4342     const struct flow *key;
4343     struct flow flow;
4344     struct flow_wildcards wc;
4345     struct ds *result;
4346 };
4347
4348 static void
4349 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
4350 {
4351     const struct rule_actions *actions;
4352     ovs_be64 cookie;
4353
4354     ds_put_char_multiple(result, '\t', level);
4355     if (!rule) {
4356         ds_put_cstr(result, "No match\n");
4357         return;
4358     }
4359
4360     ovs_mutex_lock(&rule->up.mutex);
4361     cookie = rule->up.flow_cookie;
4362     ovs_mutex_unlock(&rule->up.mutex);
4363
4364     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
4365                   rule ? rule->up.table_id : 0, ntohll(cookie));
4366     cls_rule_format(&rule->up.cr, result);
4367     ds_put_char(result, '\n');
4368
4369     actions = rule_dpif_get_actions(rule);
4370
4371     ds_put_char_multiple(result, '\t', level);
4372     ds_put_cstr(result, "OpenFlow actions=");
4373     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
4374     ds_put_char(result, '\n');
4375 }
4376
4377 static void
4378 trace_format_flow(struct ds *result, int level, const char *title,
4379                   struct trace_ctx *trace)
4380 {
4381     ds_put_char_multiple(result, '\t', level);
4382     ds_put_format(result, "%s: ", title);
4383     /* Do not report unchanged flows for resubmits. */
4384     if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
4385         || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
4386         ds_put_cstr(result, "unchanged");
4387     } else {
4388         flow_format(result, &trace->xin.flow);
4389         trace->flow = trace->xin.flow;
4390     }
4391     ds_put_char(result, '\n');
4392 }
4393
4394 static void
4395 trace_format_regs(struct ds *result, int level, const char *title,
4396                   struct trace_ctx *trace)
4397 {
4398     size_t i;
4399
4400     ds_put_char_multiple(result, '\t', level);
4401     ds_put_format(result, "%s:", title);
4402     for (i = 0; i < FLOW_N_REGS; i++) {
4403         ds_put_format(result, " reg%"PRIuSIZE"=0x%"PRIx32, i, trace->flow.regs[i]);
4404     }
4405     ds_put_char(result, '\n');
4406 }
4407
4408 static void
4409 trace_format_odp(struct ds *result, int level, const char *title,
4410                  struct trace_ctx *trace)
4411 {
4412     struct ofpbuf *odp_actions = trace->xout.odp_actions;
4413
4414     ds_put_char_multiple(result, '\t', level);
4415     ds_put_format(result, "%s: ", title);
4416     format_odp_actions(result, ofpbuf_data(odp_actions),
4417                                ofpbuf_size(odp_actions));
4418     ds_put_char(result, '\n');
4419 }
4420
4421 static void
4422 trace_format_megaflow(struct ds *result, int level, const char *title,
4423                       struct trace_ctx *trace)
4424 {
4425     struct match match;
4426
4427     ds_put_char_multiple(result, '\t', level);
4428     ds_put_format(result, "%s: ", title);
4429     flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
4430     match_init(&match, trace->key, &trace->wc);
4431     match_format(&match, result, OFP_DEFAULT_PRIORITY);
4432     ds_put_char(result, '\n');
4433 }
4434
4435 static void trace_report(struct xlate_in *xin, const char *s, int recurse);
4436
4437 static void
4438 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
4439 {
4440     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4441     struct ds *result = trace->result;
4442
4443     if (!recurse) {
4444         if (rule == xin->ofproto->miss_rule) {
4445             trace_report(xin, "No match, flow generates \"packet in\"s.",
4446                          recurse);
4447         } else if (rule == xin->ofproto->no_packet_in_rule) {
4448             trace_report(xin, "No match, packets dropped because "
4449                          "OFPPC_NO_PACKET_IN is set on in_port.", recurse);
4450         } else if (rule == xin->ofproto->drop_frags_rule) {
4451             trace_report(xin, "Packets dropped because they are IP "
4452                          "fragments and the fragment handling mode is "
4453                          "\"drop\".", recurse);
4454         }
4455     }
4456
4457     ds_put_char(result, '\n');
4458     if (recurse) {
4459         trace_format_flow(result, recurse, "Resubmitted flow", trace);
4460         trace_format_regs(result, recurse, "Resubmitted regs", trace);
4461         trace_format_odp(result,  recurse, "Resubmitted  odp", trace);
4462         trace_format_megaflow(result, recurse, "Resubmitted megaflow", trace);
4463     }
4464     trace_format_rule(result, recurse, rule);
4465 }
4466
4467 static void
4468 trace_report(struct xlate_in *xin, const char *s, int recurse)
4469 {
4470     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4471     struct ds *result = trace->result;
4472
4473     ds_put_char_multiple(result, '\t', recurse);
4474     ds_put_cstr(result, s);
4475     ds_put_char(result, '\n');
4476 }
4477
4478 /* Parses the 'argc' elements of 'argv', ignoring argv[0].  The following
4479  * forms are supported:
4480  *
4481  *     - [dpname] odp_flow [-generate | packet]
4482  *     - bridge br_flow [-generate | packet]
4483  *
4484  * On success, initializes '*ofprotop' and 'flow' and returns NULL.  On failure
4485  * returns a nonnull malloced error message. */
4486 static char * WARN_UNUSED_RESULT
4487 parse_flow_and_packet(int argc, const char *argv[],
4488                       struct ofproto_dpif **ofprotop, struct flow *flow,
4489                       struct ofpbuf **packetp)
4490 {
4491     const struct dpif_backer *backer = NULL;
4492     const char *error = NULL;
4493     char *m_err = NULL;
4494     struct simap port_names = SIMAP_INITIALIZER(&port_names);
4495     struct ofpbuf *packet;
4496     struct ofpbuf odp_key;
4497     struct ofpbuf odp_mask;
4498
4499     ofpbuf_init(&odp_key, 0);
4500     ofpbuf_init(&odp_mask, 0);
4501
4502     /* Handle "-generate" or a hex string as the last argument. */
4503     if (!strcmp(argv[argc - 1], "-generate")) {
4504         packet = ofpbuf_new(0);
4505         argc--;
4506     } else {
4507         error = eth_from_hex(argv[argc - 1], &packet);
4508         if (!error) {
4509             argc--;
4510         } else if (argc == 4) {
4511             /* The 3-argument form must end in "-generate' or a hex string. */
4512             goto exit;
4513         }
4514         error = NULL;
4515     }
4516
4517     /* odp_flow can have its in_port specified as a name instead of port no.
4518      * We do not yet know whether a given flow is a odp_flow or a br_flow.
4519      * But, to know whether a flow is odp_flow through odp_flow_from_string(),
4520      * we need to create a simap of name to port no. */
4521     if (argc == 3) {
4522         const char *dp_type;
4523         if (!strncmp(argv[1], "ovs-", 4)) {
4524             dp_type = argv[1] + 4;
4525         } else {
4526             dp_type = argv[1];
4527         }
4528         backer = shash_find_data(&all_dpif_backers, dp_type);
4529     } else if (argc == 2) {
4530         struct shash_node *node;
4531         if (shash_count(&all_dpif_backers) == 1) {
4532             node = shash_first(&all_dpif_backers);
4533             backer = node->data;
4534         }
4535     } else {
4536         error = "Syntax error";
4537         goto exit;
4538     }
4539     if (backer && backer->dpif) {
4540         struct dpif_port dpif_port;
4541         struct dpif_port_dump port_dump;
4542         DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
4543             simap_put(&port_names, dpif_port.name,
4544                       odp_to_u32(dpif_port.port_no));
4545         }
4546     }
4547
4548     /* Parse the flow and determine whether a datapath or
4549      * bridge is specified. If function odp_flow_key_from_string()
4550      * returns 0, the flow is a odp_flow. If function
4551      * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
4552     if (!odp_flow_from_string(argv[argc - 1], &port_names,
4553                               &odp_key, &odp_mask)) {
4554         if (!backer) {
4555             error = "Cannot find the datapath";
4556             goto exit;
4557         }
4558
4559         if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key),
4560                                  flow) == ODP_FIT_ERROR) {
4561             error = "Failed to parse flow key";
4562             goto exit;
4563         }
4564
4565         *ofprotop = xlate_lookup_ofproto(backer, flow,
4566                                          &flow->in_port.ofp_port);
4567         if (*ofprotop == NULL) {
4568             error = "Invalid datapath flow";
4569             goto exit;
4570         }
4571
4572         vsp_adjust_flow(*ofprotop, flow, NULL);
4573
4574     } else {
4575         char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
4576
4577         if (err) {
4578             m_err = xasprintf("Bad flow syntax: %s", err);
4579             free(err);
4580             goto exit;
4581         } else {
4582             if (argc != 3) {
4583                 error = "Must specify bridge name";
4584                 goto exit;
4585             }
4586
4587             *ofprotop = ofproto_dpif_lookup(argv[1]);
4588             if (!*ofprotop) {
4589                 error = "Unknown bridge name";
4590                 goto exit;
4591             }
4592         }
4593     }
4594
4595     /* Generate a packet, if requested. */
4596     if (packet) {
4597         if (!ofpbuf_size(packet)) {
4598             flow_compose(packet, flow);
4599         } else {
4600             struct pkt_metadata md = pkt_metadata_from_flow(flow);
4601
4602             /* Use the metadata from the flow and the packet argument
4603              * to reconstruct the flow. */
4604             flow_extract(packet, &md, flow);
4605         }
4606     }
4607
4608 exit:
4609     if (error && !m_err) {
4610         m_err = xstrdup(error);
4611     }
4612     if (m_err) {
4613         ofpbuf_delete(packet);
4614         packet = NULL;
4615     }
4616     *packetp = packet;
4617     ofpbuf_uninit(&odp_key);
4618     ofpbuf_uninit(&odp_mask);
4619     simap_destroy(&port_names);
4620     return m_err;
4621 }
4622
4623 static void
4624 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
4625                       void *aux OVS_UNUSED)
4626 {
4627     struct ofproto_dpif *ofproto;
4628     struct ofpbuf *packet;
4629     char *error;
4630     struct flow flow;
4631
4632     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4633     if (!error) {
4634         struct ds result;
4635
4636         ds_init(&result);
4637         ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
4638         unixctl_command_reply(conn, ds_cstr(&result));
4639         ds_destroy(&result);
4640         ofpbuf_delete(packet);
4641     } else {
4642         unixctl_command_reply_error(conn, error);
4643         free(error);
4644     }
4645 }
4646
4647 static void
4648 ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
4649                               const char *argv[], void *aux OVS_UNUSED)
4650 {
4651     enum ofputil_protocol usable_protocols;
4652     struct ofproto_dpif *ofproto;
4653     bool enforce_consistency;
4654     struct ofpbuf ofpacts;
4655     struct ofpbuf *packet;
4656     struct ds result;
4657     struct flow flow;
4658     uint16_t in_port;
4659
4660     /* Three kinds of error return values! */
4661     enum ofperr retval;
4662     char *error;
4663
4664     packet = NULL;
4665     ds_init(&result);
4666     ofpbuf_init(&ofpacts, 0);
4667
4668     /* Parse actions. */
4669     error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
4670     if (error) {
4671         unixctl_command_reply_error(conn, error);
4672         free(error);
4673         goto exit;
4674     }
4675
4676     /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
4677      * consistency between the flow and the actions.  With -consistent, we
4678      * enforce consistency even for a flow supported in OpenFlow 1.0. */
4679     if (!strcmp(argv[1], "-consistent")) {
4680         enforce_consistency = true;
4681         argv++;
4682         argc--;
4683     } else {
4684         enforce_consistency = false;
4685     }
4686
4687     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4688     if (error) {
4689         unixctl_command_reply_error(conn, error);
4690         free(error);
4691         goto exit;
4692     }
4693
4694     /* Do the same checks as handle_packet_out() in ofproto.c.
4695      *
4696      * We pass a 'table_id' of 0 to ofpacts_check(), which isn't
4697      * strictly correct because these actions aren't in any table, but it's OK
4698      * because it 'table_id' is used only to check goto_table instructions, but
4699      * packet-outs take a list of actions and therefore it can't include
4700      * instructions.
4701      *
4702      * We skip the "meter" check here because meter is an instruction, not an
4703      * action, and thus cannot appear in ofpacts. */
4704     in_port = ofp_to_u16(flow.in_port.ofp_port);
4705     if (in_port >= ofproto->up.max_ports && in_port < ofp_to_u16(OFPP_MAX)) {
4706         unixctl_command_reply_error(conn, "invalid in_port");
4707         goto exit;
4708     }
4709     if (enforce_consistency) {
4710         retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
4711                                            &flow, u16_to_ofp(ofproto->up.max_ports),
4712                                            0, 0, usable_protocols);
4713     } else {
4714         retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
4715                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
4716                                &usable_protocols);
4717     }
4718
4719     if (retval) {
4720         ds_clear(&result);
4721         ds_put_format(&result, "Bad actions: %s", ofperr_to_string(retval));
4722         unixctl_command_reply_error(conn, ds_cstr(&result));
4723         goto exit;
4724     }
4725
4726     ofproto_trace(ofproto, &flow, packet,
4727                   ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
4728     unixctl_command_reply(conn, ds_cstr(&result));
4729
4730 exit:
4731     ds_destroy(&result);
4732     ofpbuf_delete(packet);
4733     ofpbuf_uninit(&ofpacts);
4734 }
4735
4736 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
4737  * description of the results to 'ds'.
4738  *
4739  * The trace follows a packet with the specified 'flow' through the flow
4740  * table.  'packet' may be nonnull to trace an actual packet, with consequent
4741  * side effects (if it is nonnull then its flow must be 'flow').
4742  *
4743  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
4744  * trace, otherwise the actions are determined by a flow table lookup. */
4745 static void
4746 ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
4747               const struct ofpbuf *packet,
4748               const struct ofpact ofpacts[], size_t ofpacts_len,
4749               struct ds *ds)
4750 {
4751     struct trace_ctx trace;
4752
4753     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
4754     ds_put_cstr(ds, "Flow: ");
4755     flow_format(ds, flow);
4756     ds_put_char(ds, '\n');
4757
4758     flow_wildcards_init_catchall(&trace.wc);
4759
4760     trace.result = ds;
4761     trace.key = flow; /* Original flow key, used for megaflow. */
4762     trace.flow = *flow; /* May be modified by actions. */
4763     xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, NULL,
4764                   ntohs(flow->tcp_flags), packet);
4765     trace.xin.ofpacts = ofpacts;
4766     trace.xin.ofpacts_len = ofpacts_len;
4767     trace.xin.resubmit_hook = trace_resubmit;
4768     trace.xin.report_hook = trace_report;
4769
4770     xlate_actions(&trace.xin, &trace.xout);
4771
4772     ds_put_char(ds, '\n');
4773     trace_format_flow(ds, 0, "Final flow", &trace);
4774     trace_format_megaflow(ds, 0, "Megaflow", &trace);
4775
4776     ds_put_cstr(ds, "Datapath actions: ");
4777     format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions),
4778                        ofpbuf_size(trace.xout.odp_actions));
4779
4780     if (trace.xout.slow) {
4781         enum slow_path_reason slow;
4782
4783         ds_put_cstr(ds, "\nThis flow is handled by the userspace "
4784                     "slow path because it:");
4785
4786         slow = trace.xout.slow;
4787         while (slow) {
4788             enum slow_path_reason bit = rightmost_1bit(slow);
4789
4790             ds_put_format(ds, "\n\t- %s.",
4791                           slow_path_reason_to_explanation(bit));
4792
4793             slow &= ~bit;
4794         }
4795     }
4796
4797     xlate_out_uninit(&trace.xout);
4798 }
4799
4800 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
4801  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
4802  * to destroy 'ofproto_shash' and free the returned value. */
4803 static const struct shash_node **
4804 get_ofprotos(struct shash *ofproto_shash)
4805 {
4806     const struct ofproto_dpif *ofproto;
4807
4808     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4809         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
4810         shash_add_nocopy(ofproto_shash, name, ofproto);
4811     }
4812
4813     return shash_sort(ofproto_shash);
4814 }
4815
4816 static void
4817 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
4818                               const char *argv[] OVS_UNUSED,
4819                               void *aux OVS_UNUSED)
4820 {
4821     struct ds ds = DS_EMPTY_INITIALIZER;
4822     struct shash ofproto_shash;
4823     const struct shash_node **sorted_ofprotos;
4824     int i;
4825
4826     shash_init(&ofproto_shash);
4827     sorted_ofprotos = get_ofprotos(&ofproto_shash);
4828     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4829         const struct shash_node *node = sorted_ofprotos[i];
4830         ds_put_format(&ds, "%s\n", node->name);
4831     }
4832
4833     shash_destroy(&ofproto_shash);
4834     free(sorted_ofprotos);
4835
4836     unixctl_command_reply(conn, ds_cstr(&ds));
4837     ds_destroy(&ds);
4838 }
4839
4840 static void
4841 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
4842 {
4843     const struct shash_node **ofprotos;
4844     struct dpif_dp_stats dp_stats;
4845     struct shash ofproto_shash;
4846     size_t i;
4847
4848     dpif_get_dp_stats(backer->dpif, &dp_stats);
4849
4850     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
4851                   dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
4852
4853     shash_init(&ofproto_shash);
4854     ofprotos = get_ofprotos(&ofproto_shash);
4855     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4856         struct ofproto_dpif *ofproto = ofprotos[i]->data;
4857         const struct shash_node **ports;
4858         size_t j;
4859
4860         if (ofproto->backer != backer) {
4861             continue;
4862         }
4863
4864         ds_put_format(ds, "\t%s:\n", ofproto->up.name);
4865
4866         ports = shash_sort(&ofproto->up.port_by_name);
4867         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
4868             const struct shash_node *node = ports[j];
4869             struct ofport *ofport = node->data;
4870             struct smap config;
4871             odp_port_t odp_port;
4872
4873             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
4874                           ofport->ofp_port);
4875
4876             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
4877             if (odp_port != ODPP_NONE) {
4878                 ds_put_format(ds, "%"PRIu32":", odp_port);
4879             } else {
4880                 ds_put_cstr(ds, "none:");
4881             }
4882
4883             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
4884
4885             smap_init(&config);
4886             if (!netdev_get_config(ofport->netdev, &config)) {
4887                 const struct smap_node **nodes;
4888                 size_t i;
4889
4890                 nodes = smap_sort(&config);
4891                 for (i = 0; i < smap_count(&config); i++) {
4892                     const struct smap_node *node = nodes[i];
4893                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
4894                                   node->key, node->value);
4895                 }
4896                 free(nodes);
4897             }
4898             smap_destroy(&config);
4899
4900             ds_put_char(ds, ')');
4901             ds_put_char(ds, '\n');
4902         }
4903         free(ports);
4904     }
4905     shash_destroy(&ofproto_shash);
4906     free(ofprotos);
4907 }
4908
4909 static void
4910 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4911                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
4912 {
4913     struct ds ds = DS_EMPTY_INITIALIZER;
4914     const struct shash_node **backers;
4915     int i;
4916
4917     backers = shash_sort(&all_dpif_backers);
4918     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
4919         dpif_show_backer(backers[i]->data, &ds);
4920     }
4921     free(backers);
4922
4923     unixctl_command_reply(conn, ds_cstr(&ds));
4924     ds_destroy(&ds);
4925 }
4926
4927 static void
4928 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
4929                                 int argc OVS_UNUSED, const char *argv[],
4930                                 void *aux OVS_UNUSED)
4931 {
4932     const struct ofproto_dpif *ofproto;
4933
4934     struct ds ds = DS_EMPTY_INITIALIZER;
4935     bool verbosity = false;
4936
4937     struct dpif_port dpif_port;
4938     struct dpif_port_dump port_dump;
4939     struct hmap portno_names;
4940
4941     struct dpif_flow_dump *flow_dump;
4942     struct dpif_flow_dump_thread *flow_dump_thread;
4943     struct dpif_flow f;
4944     int error;
4945
4946     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
4947     if (!ofproto) {
4948         unixctl_command_reply_error(conn, "no such bridge");
4949         return;
4950     }
4951
4952     if (argc > 2 && !strcmp(argv[1], "-m")) {
4953         verbosity = true;
4954     }
4955
4956     hmap_init(&portno_names);
4957     DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
4958         odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
4959     }
4960
4961     ds_init(&ds);
4962     flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
4963     flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
4964     while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
4965         struct flow flow;
4966
4967         if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
4968             || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
4969             continue;
4970         }
4971
4972         odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
4973                         &portno_names, &ds, verbosity);
4974         ds_put_cstr(&ds, ", ");
4975         dpif_flow_stats_format(&f.stats, &ds);
4976         ds_put_cstr(&ds, ", actions:");
4977         format_odp_actions(&ds, f.actions, f.actions_len);
4978         ds_put_char(&ds, '\n');
4979     }
4980     dpif_flow_dump_thread_destroy(flow_dump_thread);
4981     error = dpif_flow_dump_destroy(flow_dump);
4982
4983     if (error) {
4984         ds_clear(&ds);
4985         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
4986         unixctl_command_reply_error(conn, ds_cstr(&ds));
4987     } else {
4988         unixctl_command_reply(conn, ds_cstr(&ds));
4989     }
4990     odp_portno_names_destroy(&portno_names);
4991     hmap_destroy(&portno_names);
4992     ds_destroy(&ds);
4993 }
4994
4995 static void
4996 ofproto_revalidate_all_backers(void)
4997 {
4998     const struct shash_node **backers;
4999     int i;
5000
5001     backers = shash_sort(&all_dpif_backers);
5002     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
5003         struct dpif_backer *backer = backers[i]->data;
5004         backer->need_revalidate = REV_RECONFIGURE;
5005     }
5006     free(backers);
5007 }
5008
5009 static void
5010 disable_tnl_push_pop(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
5011                      const char *argv[], void *aux OVS_UNUSED)
5012 {
5013     if (!strcasecmp(argv[1], "off")) {
5014         ofproto_use_tnl_push_pop = false;
5015         unixctl_command_reply(conn, "Tunnel push-pop off");
5016         ofproto_revalidate_all_backers();
5017     } else if (!strcasecmp(argv[1], "on")) {
5018         ofproto_use_tnl_push_pop = true;
5019         unixctl_command_reply(conn, "Tunnel push-pop on");
5020         ofproto_revalidate_all_backers();
5021     }
5022 }
5023
5024 static void
5025 ofproto_unixctl_init(void)
5026 {
5027     static bool registered;
5028     if (registered) {
5029         return;
5030     }
5031     registered = true;
5032
5033     unixctl_command_register(
5034         "ofproto/trace",
5035         "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
5036         1, 3, ofproto_unixctl_trace, NULL);
5037     unixctl_command_register(
5038         "ofproto/trace-packet-out",
5039         "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
5040         2, 6, ofproto_unixctl_trace_actions, NULL);
5041     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
5042                              ofproto_unixctl_fdb_flush, NULL);
5043     unixctl_command_register("fdb/show", "bridge", 1, 1,
5044                              ofproto_unixctl_fdb_show, NULL);
5045     unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
5046                              ofproto_unixctl_mcast_snooping_flush, NULL);
5047     unixctl_command_register("mdb/show", "bridge", 1, 1,
5048                              ofproto_unixctl_mcast_snooping_show, NULL);
5049     unixctl_command_register("dpif/dump-dps", "", 0, 0,
5050                              ofproto_unixctl_dpif_dump_dps, NULL);
5051     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
5052                              NULL);
5053     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
5054                              ofproto_unixctl_dpif_dump_flows, NULL);
5055
5056     unixctl_command_register("ofproto/tnl-push-pop", "[on]|[off]", 1, 1,
5057                              disable_tnl_push_pop, NULL);
5058 }
5059
5060 /* Returns true if 'table' is the table used for internal rules,
5061  * false otherwise. */
5062 bool
5063 table_is_internal(uint8_t table_id)
5064 {
5065     return table_id == TBL_INTERNAL;
5066 }
5067 \f
5068 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
5069  *
5070  * This is deprecated.  It is only for compatibility with broken device drivers
5071  * in old versions of Linux that do not properly support VLANs when VLAN
5072  * devices are not used.  When broken device drivers are no longer in
5073  * widespread use, we will delete these interfaces. */
5074
5075 static int
5076 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5077 {
5078     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5079     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5080
5081     if (realdev_ofp_port == ofport->realdev_ofp_port
5082         && vid == ofport->vlandev_vid) {
5083         return 0;
5084     }
5085
5086     ofproto->backer->need_revalidate = REV_RECONFIGURE;
5087
5088     if (ofport->realdev_ofp_port) {
5089         vsp_remove(ofport);
5090     }
5091     if (realdev_ofp_port && ofport->bundle) {
5092         /* vlandevs are enslaved to their realdevs, so they are not allowed to
5093          * themselves be part of a bundle. */
5094         bundle_set(ofport_->ofproto, ofport->bundle, NULL);
5095     }
5096
5097     ofport->realdev_ofp_port = realdev_ofp_port;
5098     ofport->vlandev_vid = vid;
5099
5100     if (realdev_ofp_port) {
5101         vsp_add(ofport, realdev_ofp_port, vid);
5102     }
5103
5104     return 0;
5105 }
5106
5107 static uint32_t
5108 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
5109 {
5110     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
5111 }
5112
5113 bool
5114 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
5115     OVS_EXCLUDED(ofproto->vsp_mutex)
5116 {
5117     /* hmap_is_empty is thread safe. */
5118     return !hmap_is_empty(&ofproto->realdev_vid_map);
5119 }
5120
5121
5122 static ofp_port_t
5123 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
5124                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5125     OVS_REQUIRES(ofproto->vsp_mutex)
5126 {
5127     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
5128         int vid = vlan_tci_to_vid(vlan_tci);
5129         const struct vlan_splinter *vsp;
5130
5131         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
5132                                  hash_realdev_vid(realdev_ofp_port, vid),
5133                                  &ofproto->realdev_vid_map) {
5134             if (vsp->realdev_ofp_port == realdev_ofp_port
5135                 && vsp->vid == vid) {
5136                 return vsp->vlandev_ofp_port;
5137             }
5138         }
5139     }
5140     return realdev_ofp_port;
5141 }
5142
5143 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5144  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5145  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
5146  * 'vlan_tci' 9, it would return the port number of eth0.9.
5147  *
5148  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5149  * function just returns its 'realdev_ofp_port' argument. */
5150 ofp_port_t
5151 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
5152                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5153     OVS_EXCLUDED(ofproto->vsp_mutex)
5154 {
5155     ofp_port_t ret;
5156
5157     /* hmap_is_empty is thread safe, see if we can return immediately. */
5158     if (hmap_is_empty(&ofproto->realdev_vid_map)) {
5159         return realdev_ofp_port;
5160     }
5161     ovs_mutex_lock(&ofproto->vsp_mutex);
5162     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
5163     ovs_mutex_unlock(&ofproto->vsp_mutex);
5164     return ret;
5165 }
5166
5167 static struct vlan_splinter *
5168 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
5169 {
5170     struct vlan_splinter *vsp;
5171
5172     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
5173                              hash_ofp_port(vlandev_ofp_port),
5174                              &ofproto->vlandev_map) {
5175         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
5176             return vsp;
5177         }
5178     }
5179
5180     return NULL;
5181 }
5182
5183 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5184  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5185  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
5186  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5187  * eth0 and store 9 in '*vid'.
5188  *
5189  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5190  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
5191  * always does.*/
5192 static ofp_port_t
5193 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
5194                        ofp_port_t vlandev_ofp_port, int *vid)
5195     OVS_REQUIRES(ofproto->vsp_mutex)
5196 {
5197     if (!hmap_is_empty(&ofproto->vlandev_map)) {
5198         const struct vlan_splinter *vsp;
5199
5200         vsp = vlandev_find(ofproto, vlandev_ofp_port);
5201         if (vsp) {
5202             if (vid) {
5203                 *vid = vsp->vid;
5204             }
5205             return vsp->realdev_ofp_port;
5206         }
5207     }
5208     return 0;
5209 }
5210
5211 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5212  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
5213  * 'flow->in_port' to the "real" device backing the VLAN device, sets
5214  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Optionally pushes the
5215  * appropriate VLAN on 'packet' if provided.  Otherwise (which is always the
5216  * case unless VLAN splinters are enabled), returns false without making any
5217  * changes. */
5218 bool
5219 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
5220                 struct ofpbuf *packet)
5221     OVS_EXCLUDED(ofproto->vsp_mutex)
5222 {
5223     ofp_port_t realdev;
5224     int vid;
5225
5226     /* hmap_is_empty is thread safe. */
5227     if (hmap_is_empty(&ofproto->vlandev_map)) {
5228         return false;
5229     }
5230
5231     ovs_mutex_lock(&ofproto->vsp_mutex);
5232     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
5233     ovs_mutex_unlock(&ofproto->vsp_mutex);
5234     if (!realdev) {
5235         return false;
5236     }
5237
5238     /* Cause the flow to be processed as if it came in on the real device with
5239      * the VLAN device's VLAN ID. */
5240     flow->in_port.ofp_port = realdev;
5241     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
5242
5243     if (packet) {
5244         /* Make the packet resemble the flow, so that it gets sent to an
5245          * OpenFlow controller properly, so that it looks correct for sFlow,
5246          * and so that flow_extract() will get the correct vlan_tci if it is
5247          * called on 'packet'. */
5248         eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
5249     }
5250
5251     return true;
5252 }
5253
5254 static void
5255 vsp_remove(struct ofport_dpif *port)
5256 {
5257     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5258     struct vlan_splinter *vsp;
5259
5260     ovs_mutex_lock(&ofproto->vsp_mutex);
5261     vsp = vlandev_find(ofproto, port->up.ofp_port);
5262     if (vsp) {
5263         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
5264         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
5265         free(vsp);
5266
5267         port->realdev_ofp_port = 0;
5268     } else {
5269         VLOG_ERR("missing vlan device record");
5270     }
5271     ovs_mutex_unlock(&ofproto->vsp_mutex);
5272 }
5273
5274 static void
5275 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
5276 {
5277     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5278
5279     ovs_mutex_lock(&ofproto->vsp_mutex);
5280     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
5281         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
5282             == realdev_ofp_port)) {
5283         struct vlan_splinter *vsp;
5284
5285         vsp = xmalloc(sizeof *vsp);
5286         vsp->realdev_ofp_port = realdev_ofp_port;
5287         vsp->vlandev_ofp_port = port->up.ofp_port;
5288         vsp->vid = vid;
5289
5290         port->realdev_ofp_port = realdev_ofp_port;
5291
5292         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
5293                     hash_ofp_port(port->up.ofp_port));
5294         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
5295                     hash_realdev_vid(realdev_ofp_port, vid));
5296     } else {
5297         VLOG_ERR("duplicate vlan device record");
5298     }
5299     ovs_mutex_unlock(&ofproto->vsp_mutex);
5300 }
5301
5302 static odp_port_t
5303 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
5304 {
5305     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
5306     return ofport ? ofport->odp_port : ODPP_NONE;
5307 }
5308
5309 struct ofport_dpif *
5310 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
5311 {
5312     struct ofport_dpif *port;
5313
5314     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
5315     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
5316                              &backer->odp_to_ofport_map) {
5317         if (port->odp_port == odp_port) {
5318             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5319             return port;
5320         }
5321     }
5322
5323     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5324     return NULL;
5325 }
5326
5327 static ofp_port_t
5328 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
5329 {
5330     struct ofport_dpif *port;
5331
5332     port = odp_port_to_ofport(ofproto->backer, odp_port);
5333     if (port && &ofproto->up == port->up.ofproto) {
5334         return port->up.ofp_port;
5335     } else {
5336         return OFPP_NONE;
5337     }
5338 }
5339
5340 uint32_t
5341 ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
5342 {
5343     struct dpif_backer *backer = ofproto->backer;
5344
5345     return  recirc_id_alloc(backer->rid_pool);
5346 }
5347
5348 void
5349 ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
5350 {
5351     struct dpif_backer *backer = ofproto->backer;
5352
5353     recirc_id_free(backer->rid_pool, recirc_id);
5354 }
5355
5356 int
5357 ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
5358                                const struct match *match, int priority,
5359                                uint16_t idle_timeout,
5360                                const struct ofpbuf *ofpacts,
5361                                struct rule **rulep)
5362 {
5363     struct ofputil_flow_mod fm;
5364     struct rule_dpif *rule;
5365     int error;
5366
5367     fm.match = *match;
5368     fm.priority = priority;
5369     fm.new_cookie = htonll(0);
5370     fm.cookie = htonll(0);
5371     fm.cookie_mask = htonll(0);
5372     fm.modify_cookie = false;
5373     fm.table_id = TBL_INTERNAL;
5374     fm.command = OFPFC_ADD;
5375     fm.idle_timeout = idle_timeout;
5376     fm.hard_timeout = 0;
5377     fm.importance = 0;
5378     fm.buffer_id = 0;
5379     fm.out_port = 0;
5380     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5381     fm.ofpacts = ofpbuf_data(ofpacts);
5382     fm.ofpacts_len = ofpbuf_size(ofpacts);
5383
5384     error = ofproto_flow_mod(&ofproto->up, &fm);
5385     if (error) {
5386         VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
5387                     ofperr_to_string(error));
5388         *rulep = NULL;
5389         return error;
5390     }
5391
5392     rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
5393                                      &fm.match.wc, false);
5394     if (rule) {
5395         *rulep = &rule->up;
5396     } else {
5397         OVS_NOT_REACHED();
5398     }
5399     return 0;
5400 }
5401
5402 int
5403 ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
5404                                   struct match *match, int priority)
5405 {
5406     struct ofputil_flow_mod fm;
5407     int error;
5408
5409     fm.match = *match;
5410     fm.priority = priority;
5411     fm.new_cookie = htonll(0);
5412     fm.cookie = htonll(0);
5413     fm.cookie_mask = htonll(0);
5414     fm.modify_cookie = false;
5415     fm.table_id = TBL_INTERNAL;
5416     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5417     fm.command = OFPFC_DELETE_STRICT;
5418
5419     error = ofproto_flow_mod(&ofproto->up, &fm);
5420     if (error) {
5421         VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
5422                     ofperr_to_string(error));
5423         return error;
5424     }
5425
5426     return 0;
5427 }
5428
5429 const struct ofproto_class ofproto_dpif_class = {
5430     init,
5431     enumerate_types,
5432     enumerate_names,
5433     del,
5434     port_open_type,
5435     type_run,
5436     type_wait,
5437     alloc,
5438     construct,
5439     destruct,
5440     dealloc,
5441     run,
5442     wait,
5443     NULL,                       /* get_memory_usage. */
5444     type_get_memory_usage,
5445     flush,
5446     query_tables,
5447     port_alloc,
5448     port_construct,
5449     port_destruct,
5450     port_dealloc,
5451     port_modified,
5452     port_reconfigured,
5453     port_query_by_name,
5454     port_add,
5455     port_del,
5456     port_get_stats,
5457     port_dump_start,
5458     port_dump_next,
5459     port_dump_done,
5460     port_poll,
5461     port_poll_wait,
5462     port_is_lacp_current,
5463     port_get_lacp_stats,
5464     NULL,                       /* rule_choose_table */
5465     rule_alloc,
5466     rule_construct,
5467     rule_insert,
5468     rule_delete,
5469     rule_destruct,
5470     rule_dealloc,
5471     rule_get_stats,
5472     rule_execute,
5473     NULL,                       /* rule_premodify_actions */
5474     rule_modify_actions,
5475     set_frag_handling,
5476     packet_out,
5477     set_netflow,
5478     get_netflow_ids,
5479     set_sflow,
5480     set_ipfix,
5481     set_cfm,
5482     cfm_status_changed,
5483     get_cfm_status,
5484     set_bfd,
5485     bfd_status_changed,
5486     get_bfd_status,
5487     set_stp,
5488     get_stp_status,
5489     set_stp_port,
5490     get_stp_port_status,
5491     get_stp_port_stats,
5492     set_rstp,
5493     get_rstp_status,
5494     set_rstp_port,
5495     get_rstp_port_status,
5496     set_queues,
5497     bundle_set,
5498     bundle_remove,
5499     mirror_set__,
5500     mirror_get_stats__,
5501     set_flood_vlans,
5502     is_mirror_output_bundle,
5503     forward_bpdu_changed,
5504     set_mac_table_config,
5505     set_mcast_snooping,
5506     set_mcast_snooping_port,
5507     set_realdev,
5508     NULL,                       /* meter_get_features */
5509     NULL,                       /* meter_set */
5510     NULL,                       /* meter_get */
5511     NULL,                       /* meter_del */
5512     group_alloc,                /* group_alloc */
5513     group_construct,            /* group_construct */
5514     group_destruct,             /* group_destruct */
5515     group_dealloc,              /* group_dealloc */
5516     group_modify,               /* group_modify */
5517     group_get_stats,            /* group_get_stats */
5518     get_datapath_version,       /* get_datapath_version */
5519 };