ofproto: Do not update stats on fake bond interface.
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
29 #include "connmgr.h"
30 #include "coverage.h"
31 #include "cfm.h"
32 #include "dpif.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
36 #include "hmapx.h"
37 #include "lacp.h"
38 #include "learn.h"
39 #include "mac-learning.h"
40 #include "mcast-snooping.h"
41 #include "meta-flow.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netdev.h"
45 #include "netlink.h"
46 #include "nx-match.h"
47 #include "odp-util.h"
48 #include "odp-execute.h"
49 #include "ofp-util.h"
50 #include "ofpbuf.h"
51 #include "ofp-actions.h"
52 #include "ofp-parse.h"
53 #include "ofp-print.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-rid.h"
58 #include "ofproto-dpif-sflow.h"
59 #include "ofproto-dpif-upcall.h"
60 #include "ofproto-dpif-xlate.h"
61 #include "poll-loop.h"
62 #include "seq.h"
63 #include "simap.h"
64 #include "smap.h"
65 #include "timer.h"
66 #include "tunnel.h"
67 #include "unaligned.h"
68 #include "unixctl.h"
69 #include "vlan-bitmap.h"
70 #include "vlog.h"
71
72 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
73
74 COVERAGE_DEFINE(ofproto_dpif_expired);
75 COVERAGE_DEFINE(packet_in_overflow);
76
77 struct flow_miss;
78
79 struct rule_dpif {
80     struct rule up;
81
82     /* These statistics:
83      *
84      *   - Do include packets and bytes from datapath flows which have not
85      *   recently been processed by a revalidator. */
86     struct ovs_mutex stats_mutex;
87     struct dpif_flow_stats stats OVS_GUARDED;
88
89     /* If non-zero then the recirculation id that has
90      * been allocated for use with this rule.
91      * The recirculation id and associated internal flow should
92      * be freed when the rule is freed */
93     uint32_t recirc_id;
94 };
95
96 /* RULE_CAST() depends on this. */
97 BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
98
99 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
100                            long long int *used);
101 static struct rule_dpif *rule_dpif_cast(const struct rule *);
102 static void rule_expire(struct rule_dpif *);
103
104 struct group_dpif {
105     struct ofgroup up;
106
107     /* These statistics:
108      *
109      *   - Do include packets and bytes from datapath flows which have not
110      *   recently been processed by a revalidator. */
111     struct ovs_mutex stats_mutex;
112     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
113     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
114 };
115
116 struct ofbundle {
117     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
118     struct ofproto_dpif *ofproto; /* Owning ofproto. */
119     void *aux;                  /* Key supplied by ofproto's client. */
120     char *name;                 /* Identifier for log messages. */
121
122     /* Configuration. */
123     struct list ports;          /* Contains "struct ofport"s. */
124     enum port_vlan_mode vlan_mode; /* VLAN mode */
125     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
126     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
127                                  * NULL if all VLANs are trunked. */
128     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
129     struct bond *bond;          /* Nonnull iff more than one port. */
130     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
131
132     /* Status. */
133     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
134 };
135
136 static void bundle_remove(struct ofport *);
137 static void bundle_update(struct ofbundle *);
138 static void bundle_destroy(struct ofbundle *);
139 static void bundle_del_port(struct ofport_dpif *);
140 static void bundle_run(struct ofbundle *);
141 static void bundle_wait(struct ofbundle *);
142
143 static void stp_run(struct ofproto_dpif *ofproto);
144 static void stp_wait(struct ofproto_dpif *ofproto);
145 static int set_stp_port(struct ofport *,
146                         const struct ofproto_port_stp_settings *);
147
148 static void rstp_run(struct ofproto_dpif *ofproto);
149 static void set_rstp_port(struct ofport *,
150                          const struct ofproto_port_rstp_settings *);
151
152 struct ofport_dpif {
153     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
154     struct ofport up;
155
156     odp_port_t odp_port;
157     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
158     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
159     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
160     struct bfd *bfd;            /* BFD, if any. */
161     bool may_enable;            /* May be enabled in bonds. */
162     bool is_tunnel;             /* This port is a tunnel. */
163     bool is_layer3;             /* This is a layer 3 port. */
164     long long int carrier_seq;  /* Carrier status changes. */
165     struct ofport_dpif *peer;   /* Peer if patch port. */
166
167     /* Spanning tree. */
168     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
169     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
170     long long int stp_state_entered;
171
172     /* Rapid Spanning Tree. */
173     struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
174     enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
175
176     /* Queue to DSCP mapping. */
177     struct ofproto_port_queue *qdscp;
178     size_t n_qdscp;
179
180     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
181      *
182      * This is deprecated.  It is only for compatibility with broken device
183      * drivers in old versions of Linux that do not properly support VLANs when
184      * VLAN devices are not used.  When broken device drivers are no longer in
185      * widespread use, we will delete these interfaces. */
186     ofp_port_t realdev_ofp_port;
187     int vlandev_vid;
188 };
189
190 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
191  *
192  * This is deprecated.  It is only for compatibility with broken device drivers
193  * in old versions of Linux that do not properly support VLANs when VLAN
194  * devices are not used.  When broken device drivers are no longer in
195  * widespread use, we will delete these interfaces. */
196 struct vlan_splinter {
197     struct hmap_node realdev_vid_node;
198     struct hmap_node vlandev_node;
199     ofp_port_t realdev_ofp_port;
200     ofp_port_t vlandev_ofp_port;
201     int vid;
202 };
203
204 static void vsp_remove(struct ofport_dpif *);
205 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
206
207 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
208                                        ofp_port_t);
209
210 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
211                                        odp_port_t);
212
213 static struct ofport_dpif *
214 ofport_dpif_cast(const struct ofport *ofport)
215 {
216     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
217 }
218
219 static void port_run(struct ofport_dpif *);
220 static int set_bfd(struct ofport *, const struct smap *);
221 static int set_cfm(struct ofport *, const struct cfm_settings *);
222 static void ofport_update_peer(struct ofport_dpif *);
223
224 /* Reasons that we might need to revalidate every datapath flow, and
225  * corresponding coverage counters.
226  *
227  * A value of 0 means that there is no need to revalidate.
228  *
229  * It would be nice to have some cleaner way to integrate with coverage
230  * counters, but with only a few reasons I guess this is good enough for
231  * now. */
232 enum revalidate_reason {
233     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
234     REV_STP,                   /* Spanning tree protocol port status change. */
235     REV_RSTP,                  /* RSTP port status change. */
236     REV_BOND,                  /* Bonding changed. */
237     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
238     REV_FLOW_TABLE,            /* Flow table changed. */
239     REV_MAC_LEARNING,          /* Mac learning changed. */
240     REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
241 };
242 COVERAGE_DEFINE(rev_reconfigure);
243 COVERAGE_DEFINE(rev_stp);
244 COVERAGE_DEFINE(rev_rstp);
245 COVERAGE_DEFINE(rev_bond);
246 COVERAGE_DEFINE(rev_port_toggled);
247 COVERAGE_DEFINE(rev_flow_table);
248 COVERAGE_DEFINE(rev_mac_learning);
249 COVERAGE_DEFINE(rev_mcast_snooping);
250
251 /* All datapaths of a given type share a single dpif backer instance. */
252 struct dpif_backer {
253     char *type;
254     int refcount;
255     struct dpif *dpif;
256     struct udpif *udpif;
257
258     struct ovs_rwlock odp_to_ofport_lock;
259     struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
260
261     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
262
263     enum revalidate_reason need_revalidate; /* Revalidate all flows. */
264
265     bool recv_set_enable; /* Enables or disables receiving packets. */
266
267     /* Recirculation. */
268     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
269     bool enable_recirc;   /* True if the datapath supports recirculation */
270
271     /* True if the datapath supports variable-length
272      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
273      * False if the datapath supports only 8-byte (or shorter) userdata. */
274     bool variable_length_userdata;
275
276     /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET
277      * actions. */
278     bool masked_set_action;
279
280     /* Maximum number of MPLS label stack entries that the datapath supports
281      * in a match */
282     size_t max_mpls_depth;
283 };
284
285 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
286 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
287
288 struct ofproto_dpif {
289     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
290     struct ofproto up;
291     struct dpif_backer *backer;
292
293     uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
294
295     /* Special OpenFlow rules. */
296     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
297     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
298     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
299
300     /* Bridging. */
301     struct netflow *netflow;
302     struct dpif_sflow *sflow;
303     struct dpif_ipfix *ipfix;
304     struct hmap bundles;        /* Contains "struct ofbundle"s. */
305     struct mac_learning *ml;
306     struct mcast_snooping *ms;
307     bool has_bonded_bundles;
308     bool lacp_enabled;
309     struct mbridge *mbridge;
310
311     struct ovs_mutex stats_mutex;
312     struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
313                                             * consumed in userspace. */
314
315     /* Spanning tree. */
316     struct stp *stp;
317     long long int stp_last_tick;
318
319     /* Rapid Spanning Tree. */
320     struct rstp *rstp;
321     long long int rstp_last_tick;
322
323     /* VLAN splinters. */
324     struct ovs_mutex vsp_mutex;
325     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
326     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
327
328     /* Ports. */
329     struct sset ports;             /* Set of standard port names. */
330     struct sset ghost_ports;       /* Ports with no datapath port. */
331     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
332     int port_poll_errno;           /* Last errno for port_poll() reply. */
333     uint64_t change_seq;           /* Connectivity status changes. */
334
335     /* Work queues. */
336     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
337     struct seq *pins_seq;          /* For notifying 'pins' reception. */
338     uint64_t pins_seqno;
339 };
340
341 /* All existing ofproto_dpif instances, indexed by ->up.name. */
342 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
343
344 static void ofproto_dpif_unixctl_init(void);
345
346 static inline struct ofproto_dpif *
347 ofproto_dpif_cast(const struct ofproto *ofproto)
348 {
349     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
350     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
351 }
352
353 size_t
354 ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
355 {
356     return ofproto->backer->max_mpls_depth;
357 }
358
359 bool
360 ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
361 {
362     return ofproto->backer->enable_recirc;
363 }
364
365 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
366                                         ofp_port_t ofp_port);
367 static void ofproto_trace(struct ofproto_dpif *, struct flow *,
368                           const struct ofpbuf *packet,
369                           const struct ofpact[], size_t ofpacts_len,
370                           struct ds *);
371
372 /* Global variables. */
373 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
374
375 /* Initial mappings of port to bridge mappings. */
376 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
377
378 /* Executes 'fm'.  The caller retains ownership of 'fm' and everything in
379  * it. */
380 void
381 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
382                       struct ofputil_flow_mod *fm)
383 {
384     ofproto_flow_mod(&ofproto->up, fm);
385 }
386
387 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
388  * Takes ownership of 'pin' and pin->packet. */
389 void
390 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
391                             struct ofproto_packet_in *pin)
392 {
393     if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
394         COVERAGE_INC(packet_in_overflow);
395         free(CONST_CAST(void *, pin->up.packet));
396         free(pin);
397     }
398
399     /* Wakes up main thread for packet-in I/O. */
400     seq_change(ofproto->pins_seq);
401 }
402
403 /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
404  * packet rather than to send the packet to the controller.
405  *
406  * This function returns false to indicate that a packet_in message
407  * for a "table-miss" should be sent to at least one controller.
408  * False otherwise. */
409 bool
410 ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
411 {
412     return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
413 }
414 \f
415 /* Factory functions. */
416
417 static void
418 init(const struct shash *iface_hints)
419 {
420     struct shash_node *node;
421
422     /* Make a local copy, since we don't own 'iface_hints' elements. */
423     SHASH_FOR_EACH(node, iface_hints) {
424         const struct iface_hint *orig_hint = node->data;
425         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
426
427         new_hint->br_name = xstrdup(orig_hint->br_name);
428         new_hint->br_type = xstrdup(orig_hint->br_type);
429         new_hint->ofp_port = orig_hint->ofp_port;
430
431         shash_add(&init_ofp_ports, node->name, new_hint);
432     }
433 }
434
435 static void
436 enumerate_types(struct sset *types)
437 {
438     dp_enumerate_types(types);
439 }
440
441 static int
442 enumerate_names(const char *type, struct sset *names)
443 {
444     struct ofproto_dpif *ofproto;
445
446     sset_clear(names);
447     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
448         if (strcmp(type, ofproto->up.type)) {
449             continue;
450         }
451         sset_add(names, ofproto->up.name);
452     }
453
454     return 0;
455 }
456
457 static int
458 del(const char *type, const char *name)
459 {
460     struct dpif *dpif;
461     int error;
462
463     error = dpif_open(name, type, &dpif);
464     if (!error) {
465         error = dpif_delete(dpif);
466         dpif_close(dpif);
467     }
468     return error;
469 }
470 \f
471 static const char *
472 port_open_type(const char *datapath_type, const char *port_type)
473 {
474     return dpif_port_open_type(datapath_type, port_type);
475 }
476
477 /* Type functions. */
478
479 static void process_dpif_port_changes(struct dpif_backer *);
480 static void process_dpif_all_ports_changed(struct dpif_backer *);
481 static void process_dpif_port_change(struct dpif_backer *,
482                                      const char *devname);
483 static void process_dpif_port_error(struct dpif_backer *, int error);
484
485 static struct ofproto_dpif *
486 lookup_ofproto_dpif_by_port_name(const char *name)
487 {
488     struct ofproto_dpif *ofproto;
489
490     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
491         if (sset_contains(&ofproto->ports, name)) {
492             return ofproto;
493         }
494     }
495
496     return NULL;
497 }
498
499 static int
500 type_run(const char *type)
501 {
502     struct dpif_backer *backer;
503
504     backer = shash_find_data(&all_dpif_backers, type);
505     if (!backer) {
506         /* This is not necessarily a problem, since backers are only
507          * created on demand. */
508         return 0;
509     }
510
511     dpif_run(backer->dpif);
512     udpif_run(backer->udpif);
513
514     /* If vswitchd started with other_config:flow_restore_wait set as "true",
515      * and the configuration has now changed to "false", enable receiving
516      * packets from the datapath. */
517     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
518         int error;
519
520         backer->recv_set_enable = true;
521
522         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
523         if (error) {
524             VLOG_ERR("Failed to enable receiving packets in dpif.");
525             return error;
526         }
527         dpif_flow_flush(backer->dpif);
528         backer->need_revalidate = REV_RECONFIGURE;
529     }
530
531     if (backer->recv_set_enable) {
532         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
533     }
534
535     if (backer->need_revalidate) {
536         struct ofproto_dpif *ofproto;
537         struct simap_node *node;
538         struct simap tmp_backers;
539
540         /* Handle tunnel garbage collection. */
541         simap_init(&tmp_backers);
542         simap_swap(&backer->tnl_backers, &tmp_backers);
543
544         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
545             struct ofport_dpif *iter;
546
547             if (backer != ofproto->backer) {
548                 continue;
549             }
550
551             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
552                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
553                 const char *dp_port;
554
555                 if (!iter->is_tunnel) {
556                     continue;
557                 }
558
559                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
560                                                      namebuf, sizeof namebuf);
561                 node = simap_find(&tmp_backers, dp_port);
562                 if (node) {
563                     simap_put(&backer->tnl_backers, dp_port, node->data);
564                     simap_delete(&tmp_backers, node);
565                     node = simap_find(&backer->tnl_backers, dp_port);
566                 } else {
567                     node = simap_find(&backer->tnl_backers, dp_port);
568                     if (!node) {
569                         odp_port_t odp_port = ODPP_NONE;
570
571                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
572                                            &odp_port)) {
573                             simap_put(&backer->tnl_backers, dp_port,
574                                       odp_to_u32(odp_port));
575                             node = simap_find(&backer->tnl_backers, dp_port);
576                         }
577                     }
578                 }
579
580                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
581                 if (tnl_port_reconfigure(iter, iter->up.netdev,
582                                          iter->odp_port)) {
583                     backer->need_revalidate = REV_RECONFIGURE;
584                 }
585             }
586         }
587
588         SIMAP_FOR_EACH (node, &tmp_backers) {
589             dpif_port_del(backer->dpif, u32_to_odp(node->data));
590         }
591         simap_destroy(&tmp_backers);
592
593         switch (backer->need_revalidate) {
594         case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
595         case REV_STP:            COVERAGE_INC(rev_stp);            break;
596         case REV_RSTP:           COVERAGE_INC(rev_rstp);           break;
597         case REV_BOND:           COVERAGE_INC(rev_bond);           break;
598         case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
599         case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
600         case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
601         case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
602         }
603         backer->need_revalidate = 0;
604
605         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
606             struct ofport_dpif *ofport;
607             struct ofbundle *bundle;
608
609             if (ofproto->backer != backer) {
610                 continue;
611             }
612
613             xlate_txn_start();
614             xlate_ofproto_set(ofproto, ofproto->up.name,
615                               ofproto->backer->dpif, ofproto->miss_rule,
616                               ofproto->no_packet_in_rule, ofproto->ml,
617                               ofproto->stp, ofproto->rstp, ofproto->ms,
618                               ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
619                               ofproto->netflow, ofproto->up.frag_handling,
620                               ofproto->up.forward_bpdu,
621                               connmgr_has_in_band(ofproto->up.connmgr),
622                               ofproto->backer->enable_recirc,
623                               ofproto->backer->variable_length_userdata,
624                               ofproto->backer->max_mpls_depth,
625                               ofproto->backer->masked_set_action);
626
627             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
628                 xlate_bundle_set(ofproto, bundle, bundle->name,
629                                  bundle->vlan_mode, bundle->vlan,
630                                  bundle->trunks, bundle->use_priority_tags,
631                                  bundle->bond, bundle->lacp,
632                                  bundle->floodable);
633             }
634
635             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
636                 int stp_port = ofport->stp_port
637                     ? stp_port_no(ofport->stp_port)
638                     : -1;
639                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
640                                  ofport->up.ofp_port, ofport->odp_port,
641                                  ofport->up.netdev, ofport->cfm,
642                                  ofport->bfd, ofport->peer, stp_port,
643                                  ofport->rstp_port, ofport->qdscp,
644                                  ofport->n_qdscp, ofport->up.pp.config,
645                                  ofport->up.pp.state, ofport->is_tunnel,
646                                  ofport->may_enable);
647             }
648             xlate_txn_commit();
649         }
650
651         udpif_revalidate(backer->udpif);
652     }
653
654     process_dpif_port_changes(backer);
655
656     return 0;
657 }
658
659 /* Check for and handle port changes in 'backer''s dpif. */
660 static void
661 process_dpif_port_changes(struct dpif_backer *backer)
662 {
663     for (;;) {
664         char *devname;
665         int error;
666
667         error = dpif_port_poll(backer->dpif, &devname);
668         switch (error) {
669         case EAGAIN:
670             return;
671
672         case ENOBUFS:
673             process_dpif_all_ports_changed(backer);
674             break;
675
676         case 0:
677             process_dpif_port_change(backer, devname);
678             free(devname);
679             break;
680
681         default:
682             process_dpif_port_error(backer, error);
683             break;
684         }
685     }
686 }
687
688 static void
689 process_dpif_all_ports_changed(struct dpif_backer *backer)
690 {
691     struct ofproto_dpif *ofproto;
692     struct dpif_port dpif_port;
693     struct dpif_port_dump dump;
694     struct sset devnames;
695     const char *devname;
696
697     sset_init(&devnames);
698     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
699         if (ofproto->backer == backer) {
700             struct ofport *ofport;
701
702             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
703                 sset_add(&devnames, netdev_get_name(ofport->netdev));
704             }
705         }
706     }
707     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
708         sset_add(&devnames, dpif_port.name);
709     }
710
711     SSET_FOR_EACH (devname, &devnames) {
712         process_dpif_port_change(backer, devname);
713     }
714     sset_destroy(&devnames);
715 }
716
717 static void
718 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
719 {
720     struct ofproto_dpif *ofproto;
721     struct dpif_port port;
722
723     /* Don't report on the datapath's device. */
724     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
725         return;
726     }
727
728     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
729                    &all_ofproto_dpifs) {
730         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
731             return;
732         }
733     }
734
735     ofproto = lookup_ofproto_dpif_by_port_name(devname);
736     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
737         /* The port was removed.  If we know the datapath,
738          * report it through poll_set().  If we don't, it may be
739          * notifying us of a removal we initiated, so ignore it.
740          * If there's a pending ENOBUFS, let it stand, since
741          * everything will be reevaluated. */
742         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
743             sset_add(&ofproto->port_poll_set, devname);
744             ofproto->port_poll_errno = 0;
745         }
746     } else if (!ofproto) {
747         /* The port was added, but we don't know with which
748          * ofproto we should associate it.  Delete it. */
749         dpif_port_del(backer->dpif, port.port_no);
750     } else {
751         struct ofport_dpif *ofport;
752
753         ofport = ofport_dpif_cast(shash_find_data(
754                                       &ofproto->up.port_by_name, devname));
755         if (ofport
756             && ofport->odp_port != port.port_no
757             && !odp_port_to_ofport(backer, port.port_no))
758         {
759             /* 'ofport''s datapath port number has changed from
760              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
761              * structures to match. */
762             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
763             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
764             ofport->odp_port = port.port_no;
765             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
766                         hash_odp_port(port.port_no));
767             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
768             backer->need_revalidate = REV_RECONFIGURE;
769         }
770     }
771     dpif_port_destroy(&port);
772 }
773
774 /* Propagate 'error' to all ofprotos based on 'backer'. */
775 static void
776 process_dpif_port_error(struct dpif_backer *backer, int error)
777 {
778     struct ofproto_dpif *ofproto;
779
780     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
781         if (ofproto->backer == backer) {
782             sset_clear(&ofproto->port_poll_set);
783             ofproto->port_poll_errno = error;
784         }
785     }
786 }
787
788 static void
789 type_wait(const char *type)
790 {
791     struct dpif_backer *backer;
792
793     backer = shash_find_data(&all_dpif_backers, type);
794     if (!backer) {
795         /* This is not necessarily a problem, since backers are only
796          * created on demand. */
797         return;
798     }
799
800     dpif_wait(backer->dpif);
801 }
802 \f
803 /* Basic life-cycle. */
804
805 static int add_internal_flows(struct ofproto_dpif *);
806
807 static struct ofproto *
808 alloc(void)
809 {
810     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
811     return &ofproto->up;
812 }
813
814 static void
815 dealloc(struct ofproto *ofproto_)
816 {
817     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
818     free(ofproto);
819 }
820
821 static void
822 close_dpif_backer(struct dpif_backer *backer)
823 {
824     ovs_assert(backer->refcount > 0);
825
826     if (--backer->refcount) {
827         return;
828     }
829
830     udpif_destroy(backer->udpif);
831
832     simap_destroy(&backer->tnl_backers);
833     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
834     hmap_destroy(&backer->odp_to_ofport_map);
835     shash_find_and_delete(&all_dpif_backers, backer->type);
836     recirc_id_pool_destroy(backer->rid_pool);
837     free(backer->type);
838     dpif_close(backer->dpif);
839     free(backer);
840 }
841
842 /* Datapath port slated for removal from datapath. */
843 struct odp_garbage {
844     struct list list_node;
845     odp_port_t odp_port;
846 };
847
848 static bool check_variable_length_userdata(struct dpif_backer *backer);
849 static size_t check_max_mpls_depth(struct dpif_backer *backer);
850 static bool check_recirc(struct dpif_backer *backer);
851 static bool check_masked_set_action(struct dpif_backer *backer);
852
853 static int
854 open_dpif_backer(const char *type, struct dpif_backer **backerp)
855 {
856     struct dpif_backer *backer;
857     struct dpif_port_dump port_dump;
858     struct dpif_port port;
859     struct shash_node *node;
860     struct list garbage_list;
861     struct odp_garbage *garbage, *next;
862
863     struct sset names;
864     char *backer_name;
865     const char *name;
866     int error;
867
868     backer = shash_find_data(&all_dpif_backers, type);
869     if (backer) {
870         backer->refcount++;
871         *backerp = backer;
872         return 0;
873     }
874
875     backer_name = xasprintf("ovs-%s", type);
876
877     /* Remove any existing datapaths, since we assume we're the only
878      * userspace controlling the datapath. */
879     sset_init(&names);
880     dp_enumerate_names(type, &names);
881     SSET_FOR_EACH(name, &names) {
882         struct dpif *old_dpif;
883
884         /* Don't remove our backer if it exists. */
885         if (!strcmp(name, backer_name)) {
886             continue;
887         }
888
889         if (dpif_open(name, type, &old_dpif)) {
890             VLOG_WARN("couldn't open old datapath %s to remove it", name);
891         } else {
892             dpif_delete(old_dpif);
893             dpif_close(old_dpif);
894         }
895     }
896     sset_destroy(&names);
897
898     backer = xmalloc(sizeof *backer);
899
900     error = dpif_create_and_open(backer_name, type, &backer->dpif);
901     free(backer_name);
902     if (error) {
903         VLOG_ERR("failed to open datapath of type %s: %s", type,
904                  ovs_strerror(error));
905         free(backer);
906         return error;
907     }
908     backer->udpif = udpif_create(backer, backer->dpif);
909
910     backer->type = xstrdup(type);
911     backer->refcount = 1;
912     hmap_init(&backer->odp_to_ofport_map);
913     ovs_rwlock_init(&backer->odp_to_ofport_lock);
914     backer->need_revalidate = 0;
915     simap_init(&backer->tnl_backers);
916     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
917     *backerp = backer;
918
919     if (backer->recv_set_enable) {
920         dpif_flow_flush(backer->dpif);
921     }
922
923     /* Loop through the ports already on the datapath and remove any
924      * that we don't need anymore. */
925     list_init(&garbage_list);
926     dpif_port_dump_start(&port_dump, backer->dpif);
927     while (dpif_port_dump_next(&port_dump, &port)) {
928         node = shash_find(&init_ofp_ports, port.name);
929         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
930             garbage = xmalloc(sizeof *garbage);
931             garbage->odp_port = port.port_no;
932             list_push_front(&garbage_list, &garbage->list_node);
933         }
934     }
935     dpif_port_dump_done(&port_dump);
936
937     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
938         dpif_port_del(backer->dpif, garbage->odp_port);
939         list_remove(&garbage->list_node);
940         free(garbage);
941     }
942
943     shash_add(&all_dpif_backers, type, backer);
944
945     backer->enable_recirc = check_recirc(backer);
946     backer->max_mpls_depth = check_max_mpls_depth(backer);
947     backer->masked_set_action = check_masked_set_action(backer);
948     backer->rid_pool = recirc_id_pool_create();
949
950     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
951     if (error) {
952         VLOG_ERR("failed to listen on datapath of type %s: %s",
953                  type, ovs_strerror(error));
954         close_dpif_backer(backer);
955         return error;
956     }
957
958     if (backer->recv_set_enable) {
959         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
960     }
961
962     /* This check fails if performed before udpif threads have been set,
963      * as the kernel module checks that the 'pid' in userspace action
964      * is non-zero. */
965     backer->variable_length_userdata = check_variable_length_userdata(backer);
966
967     return error;
968 }
969
970 /* Tests whether 'backer''s datapath supports recirculation.  Only newer
971  * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
972  * features on older datapaths that don't support this feature.
973  *
974  * Returns false if 'backer' definitely does not support recirculation, true if
975  * it seems to support recirculation or if at least the error we get is
976  * ambiguous. */
977 static bool
978 check_recirc(struct dpif_backer *backer)
979 {
980     struct flow flow;
981     struct odputil_keybuf keybuf;
982     struct ofpbuf key;
983     int error;
984     bool enable_recirc = false;
985
986     memset(&flow, 0, sizeof flow);
987     flow.recirc_id = 1;
988     flow.dp_hash = 1;
989
990     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
991     odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
992
993     error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE,
994                           ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
995                           0, NULL);
996     if (error && error != EEXIST) {
997         if (error != EINVAL) {
998             VLOG_WARN("%s: Reciculation flow probe failed (%s)",
999                       dpif_name(backer->dpif), ovs_strerror(error));
1000         }
1001         goto done;
1002     }
1003
1004     error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
1005                           NULL);
1006     if (error) {
1007         VLOG_WARN("%s: failed to delete recirculation feature probe flow",
1008                   dpif_name(backer->dpif));
1009     }
1010
1011     enable_recirc = true;
1012
1013 done:
1014     if (enable_recirc) {
1015         VLOG_INFO("%s: Datapath supports recirculation",
1016                   dpif_name(backer->dpif));
1017     } else {
1018         VLOG_INFO("%s: Datapath does not support recirculation",
1019                   dpif_name(backer->dpif));
1020     }
1021
1022     return enable_recirc;
1023 }
1024
1025 /* Tests whether 'backer''s datapath supports variable-length
1026  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
1027  * to disable some features on older datapaths that don't support this
1028  * feature.
1029  *
1030  * Returns false if 'backer' definitely does not support variable-length
1031  * userdata, true if it seems to support them or if at least the error we get
1032  * is ambiguous. */
1033 static bool
1034 check_variable_length_userdata(struct dpif_backer *backer)
1035 {
1036     struct eth_header *eth;
1037     struct ofpbuf actions;
1038     struct dpif_execute execute;
1039     struct ofpbuf packet;
1040     size_t start;
1041     int error;
1042
1043     /* Compose a userspace action that will cause an ERANGE error on older
1044      * datapaths that don't support variable-length userdata.
1045      *
1046      * We really test for using userdata longer than 8 bytes, but older
1047      * datapaths accepted these, silently truncating the userdata to 8 bytes.
1048      * The same older datapaths rejected userdata shorter than 8 bytes, so we
1049      * test for that instead as a proxy for longer userdata support. */
1050     ofpbuf_init(&actions, 64);
1051     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
1052     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
1053                    dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
1054     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
1055     nl_msg_end_nested(&actions, start);
1056
1057     /* Compose a dummy ethernet packet. */
1058     ofpbuf_init(&packet, ETH_HEADER_LEN);
1059     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1060     eth->eth_type = htons(0x1234);
1061
1062     /* Execute the actions.  On older datapaths this fails with ERANGE, on
1063      * newer datapaths it succeeds. */
1064     execute.actions = ofpbuf_data(&actions);
1065     execute.actions_len = ofpbuf_size(&actions);
1066     execute.packet = &packet;
1067     execute.md = PKT_METADATA_INITIALIZER(0);
1068     execute.needs_help = false;
1069
1070     error = dpif_execute(backer->dpif, &execute);
1071
1072     ofpbuf_uninit(&packet);
1073     ofpbuf_uninit(&actions);
1074
1075     switch (error) {
1076     case 0:
1077         return true;
1078
1079     case ERANGE:
1080         /* Variable-length userdata is not supported. */
1081         VLOG_WARN("%s: datapath does not support variable-length userdata "
1082                   "feature (needs Linux 3.10+ or kernel module from OVS "
1083                   "1..11+).  The NXAST_SAMPLE action will be ignored.",
1084                   dpif_name(backer->dpif));
1085         return false;
1086
1087     default:
1088         /* Something odd happened.  We're not sure whether variable-length
1089          * userdata is supported.  Default to "yes". */
1090         VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
1091                   dpif_name(backer->dpif), ovs_strerror(error));
1092         return true;
1093     }
1094 }
1095
1096 /* Tests the MPLS label stack depth supported by 'backer''s datapath.
1097  *
1098  * Returns the number of elements in a struct flow's mpls_lse field
1099  * if the datapath supports at least that many entries in an
1100  * MPLS label stack.
1101  * Otherwise returns the number of MPLS push actions supported by
1102  * the datapath. */
1103 static size_t
1104 check_max_mpls_depth(struct dpif_backer *backer)
1105 {
1106     struct flow flow;
1107     int n;
1108
1109     for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
1110         struct odputil_keybuf keybuf;
1111         struct ofpbuf key;
1112         int error;
1113
1114         memset(&flow, 0, sizeof flow);
1115         flow.dl_type = htons(ETH_TYPE_MPLS);
1116         flow_set_mpls_bos(&flow, n, 1);
1117
1118         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1119         odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
1120
1121         error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE,
1122                               ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0,
1123                               NULL, 0, NULL);
1124         if (error && error != EEXIST) {
1125             if (error != EINVAL) {
1126                 VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
1127                           dpif_name(backer->dpif), ovs_strerror(error));
1128             }
1129             break;
1130         }
1131
1132         error = dpif_flow_del(backer->dpif, ofpbuf_data(&key),
1133                               ofpbuf_size(&key), NULL);
1134         if (error) {
1135             VLOG_WARN("%s: failed to delete MPLS feature probe flow",
1136                       dpif_name(backer->dpif));
1137         }
1138     }
1139
1140     VLOG_INFO("%s: MPLS label stack length probed as %d",
1141               dpif_name(backer->dpif), n);
1142     return n;
1143 }
1144
1145 /* Tests whether 'backer''s datapath supports masked data in
1146  * OVS_ACTION_ATTR_SET actions.  We need to disable some features on older
1147  * datapaths that don't support this feature. */
1148 static bool
1149 check_masked_set_action(struct dpif_backer *backer)
1150 {
1151     struct eth_header *eth;
1152     struct ofpbuf actions;
1153     struct dpif_execute execute;
1154     struct ofpbuf packet;
1155     int error;
1156     struct ovs_key_ethernet key, mask;
1157
1158     /* Compose a set action that will cause an EINVAL error on older
1159      * datapaths that don't support masked set actions.
1160      * Avoid using a full mask, as it could be translated to a non-masked
1161      * set action instead. */
1162     ofpbuf_init(&actions, 64);
1163     memset(&key, 0x53, sizeof key);
1164     memset(&mask, 0x7f, sizeof mask);
1165     commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
1166                              sizeof key);
1167
1168     /* Compose a dummy ethernet packet. */
1169     ofpbuf_init(&packet, ETH_HEADER_LEN);
1170     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1171     eth->eth_type = htons(0x1234);
1172
1173     /* Execute the actions.  On older datapaths this fails with EINVAL, on
1174      * newer datapaths it succeeds. */
1175     execute.actions = ofpbuf_data(&actions);
1176     execute.actions_len = ofpbuf_size(&actions);
1177     execute.packet = &packet;
1178     execute.md = PKT_METADATA_INITIALIZER(0);
1179     execute.needs_help = false;
1180
1181     error = dpif_execute(backer->dpif, &execute);
1182
1183     ofpbuf_uninit(&packet);
1184     ofpbuf_uninit(&actions);
1185
1186     if (error) {
1187         /* Masked set action is not supported. */
1188         VLOG_INFO("%s: datapath does not support masked set action feature.",
1189                   dpif_name(backer->dpif));
1190     }
1191     return !error;
1192 }
1193
1194 static int
1195 construct(struct ofproto *ofproto_)
1196 {
1197     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1198     struct shash_node *node, *next;
1199     int error;
1200
1201     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1202     if (error) {
1203         return error;
1204     }
1205
1206     ofproto->netflow = NULL;
1207     ofproto->sflow = NULL;
1208     ofproto->ipfix = NULL;
1209     ofproto->stp = NULL;
1210     ofproto->rstp = NULL;
1211     ofproto->dump_seq = 0;
1212     hmap_init(&ofproto->bundles);
1213     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1214     ofproto->ms = NULL;
1215     ofproto->mbridge = mbridge_create();
1216     ofproto->has_bonded_bundles = false;
1217     ofproto->lacp_enabled = false;
1218     ovs_mutex_init_adaptive(&ofproto->stats_mutex);
1219     ovs_mutex_init(&ofproto->vsp_mutex);
1220
1221     guarded_list_init(&ofproto->pins);
1222
1223     ofproto_dpif_unixctl_init();
1224
1225     hmap_init(&ofproto->vlandev_map);
1226     hmap_init(&ofproto->realdev_vid_map);
1227
1228     sset_init(&ofproto->ports);
1229     sset_init(&ofproto->ghost_ports);
1230     sset_init(&ofproto->port_poll_set);
1231     ofproto->port_poll_errno = 0;
1232     ofproto->change_seq = 0;
1233     ofproto->pins_seq = seq_create();
1234     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1235
1236
1237     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1238         struct iface_hint *iface_hint = node->data;
1239
1240         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1241             /* Check if the datapath already has this port. */
1242             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1243                 sset_add(&ofproto->ports, node->name);
1244             }
1245
1246             free(iface_hint->br_name);
1247             free(iface_hint->br_type);
1248             free(iface_hint);
1249             shash_delete(&init_ofp_ports, node);
1250         }
1251     }
1252
1253     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1254                 hash_string(ofproto->up.name, 0));
1255     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1256
1257     ofproto_init_tables(ofproto_, N_TABLES);
1258     error = add_internal_flows(ofproto);
1259
1260     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1261
1262     return error;
1263 }
1264
1265 static int
1266 add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
1267                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1268 {
1269     struct match match;
1270     int error;
1271     struct rule *rule;
1272
1273     match_init_catchall(&match);
1274     match_set_reg(&match, 0, id);
1275
1276     error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
1277                                            &rule);
1278     *rulep = error ? NULL : rule_dpif_cast(rule);
1279
1280     return error;
1281 }
1282
1283 static int
1284 add_internal_flows(struct ofproto_dpif *ofproto)
1285 {
1286     struct ofpact_controller *controller;
1287     uint64_t ofpacts_stub[128 / 8];
1288     struct ofpbuf ofpacts;
1289     struct rule *unused_rulep OVS_UNUSED;
1290     struct ofpact_resubmit *resubmit;
1291     struct match match;
1292     int error;
1293     int id;
1294
1295     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1296     id = 1;
1297
1298     controller = ofpact_put_CONTROLLER(&ofpacts);
1299     controller->max_len = UINT16_MAX;
1300     controller->controller_id = 0;
1301     controller->reason = OFPR_NO_MATCH;
1302     ofpact_pad(&ofpacts);
1303
1304     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1305                                    &ofproto->miss_rule);
1306     if (error) {
1307         return error;
1308     }
1309
1310     ofpbuf_clear(&ofpacts);
1311     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1312                                    &ofproto->no_packet_in_rule);
1313     if (error) {
1314         return error;
1315     }
1316
1317     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1318                                    &ofproto->drop_frags_rule);
1319     if (error) {
1320         return error;
1321     }
1322
1323     /* Continue non-recirculation rule lookups from table 0.
1324      *
1325      * (priority=2), recirc=0, actions=resubmit(, 0)
1326      */
1327     resubmit = ofpact_put_RESUBMIT(&ofpacts);
1328     resubmit->in_port = OFPP_IN_PORT;
1329     resubmit->table_id = 0;
1330
1331     match_init_catchall(&match);
1332     match_set_recirc_id(&match, 0);
1333
1334     error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
1335                                            &unused_rulep);
1336     if (error) {
1337         return error;
1338     }
1339
1340     /* Drop any run away recirc rule lookups. Recirc_id has to be
1341      * non-zero when reaching this rule.
1342      *
1343      * (priority=1), *, actions=drop
1344      */
1345     ofpbuf_clear(&ofpacts);
1346     match_init_catchall(&match);
1347     error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
1348                                            &unused_rulep);
1349
1350     return error;
1351 }
1352
1353 static void
1354 destruct(struct ofproto *ofproto_)
1355 {
1356     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1357     struct ofproto_packet_in *pin, *next_pin;
1358     struct rule_dpif *rule;
1359     struct oftable *table;
1360     struct list pins;
1361
1362     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1363     xlate_txn_start();
1364     xlate_remove_ofproto(ofproto);
1365     xlate_txn_commit();
1366
1367     /* Ensure that the upcall processing threads have no remaining references
1368      * to the ofproto or anything in it. */
1369     udpif_synchronize(ofproto->backer->udpif);
1370
1371     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1372
1373     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1374         CLS_FOR_EACH_SAFE (rule, up.cr, &table->cls) {
1375             ofproto_rule_delete(&ofproto->up, &rule->up);
1376         }
1377     }
1378
1379     guarded_list_pop_all(&ofproto->pins, &pins);
1380     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1381         list_remove(&pin->list_node);
1382         free(CONST_CAST(void *, pin->up.packet));
1383         free(pin);
1384     }
1385     guarded_list_destroy(&ofproto->pins);
1386
1387     mbridge_unref(ofproto->mbridge);
1388
1389     netflow_unref(ofproto->netflow);
1390     dpif_sflow_unref(ofproto->sflow);
1391     dpif_ipfix_unref(ofproto->ipfix);
1392     hmap_destroy(&ofproto->bundles);
1393     mac_learning_unref(ofproto->ml);
1394     mcast_snooping_unref(ofproto->ms);
1395
1396     hmap_destroy(&ofproto->vlandev_map);
1397     hmap_destroy(&ofproto->realdev_vid_map);
1398
1399     sset_destroy(&ofproto->ports);
1400     sset_destroy(&ofproto->ghost_ports);
1401     sset_destroy(&ofproto->port_poll_set);
1402
1403     ovs_mutex_destroy(&ofproto->stats_mutex);
1404     ovs_mutex_destroy(&ofproto->vsp_mutex);
1405
1406     seq_destroy(ofproto->pins_seq);
1407
1408     close_dpif_backer(ofproto->backer);
1409 }
1410
1411 static int
1412 run(struct ofproto *ofproto_)
1413 {
1414     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1415     uint64_t new_seq, new_dump_seq;
1416
1417     if (mbridge_need_revalidate(ofproto->mbridge)) {
1418         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1419         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1420         mac_learning_flush(ofproto->ml);
1421         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1422         mcast_snooping_mdb_flush(ofproto->ms);
1423     }
1424
1425     /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
1426      * flow restore.  Even though nothing is processed during flow restore,
1427      * all queued 'pins' will be handled immediately when flow restore
1428      * completes. */
1429     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1430
1431     /* Do not perform any periodic activity required by 'ofproto' while
1432      * waiting for flow restore to complete. */
1433     if (!ofproto_get_flow_restore_wait()) {
1434         struct ofproto_packet_in *pin, *next_pin;
1435         struct list pins;
1436
1437         guarded_list_pop_all(&ofproto->pins, &pins);
1438         LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1439             connmgr_send_packet_in(ofproto->up.connmgr, pin);
1440             list_remove(&pin->list_node);
1441             free(CONST_CAST(void *, pin->up.packet));
1442             free(pin);
1443         }
1444     }
1445
1446     if (ofproto->netflow) {
1447         netflow_run(ofproto->netflow);
1448     }
1449     if (ofproto->sflow) {
1450         dpif_sflow_run(ofproto->sflow);
1451     }
1452     if (ofproto->ipfix) {
1453         dpif_ipfix_run(ofproto->ipfix);
1454     }
1455
1456     new_seq = seq_read(connectivity_seq_get());
1457     if (ofproto->change_seq != new_seq) {
1458         struct ofport_dpif *ofport;
1459
1460         HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1461             port_run(ofport);
1462         }
1463
1464         ofproto->change_seq = new_seq;
1465     }
1466     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1467         struct ofbundle *bundle;
1468
1469         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1470             bundle_run(bundle);
1471         }
1472     }
1473
1474     stp_run(ofproto);
1475     rstp_run(ofproto);
1476     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1477     if (mac_learning_run(ofproto->ml)) {
1478         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1479     }
1480     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1481
1482     if (mcast_snooping_run(ofproto->ms)) {
1483         ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
1484     }
1485
1486     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
1487     if (ofproto->dump_seq != new_dump_seq) {
1488         struct rule *rule, *next_rule;
1489
1490         /* We know stats are relatively fresh, so now is a good time to do some
1491          * periodic work. */
1492         ofproto->dump_seq = new_dump_seq;
1493
1494         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
1495          * has passed. */
1496         ovs_mutex_lock(&ofproto_mutex);
1497         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
1498                             &ofproto->up.expirable) {
1499             rule_expire(rule_dpif_cast(rule));
1500         }
1501         ovs_mutex_unlock(&ofproto_mutex);
1502
1503         /* All outstanding data in existing flows has been accounted, so it's a
1504          * good time to do bond rebalancing. */
1505         if (ofproto->has_bonded_bundles) {
1506             struct ofbundle *bundle;
1507
1508             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1509                 if (bundle->bond) {
1510                     bond_rebalance(bundle->bond);
1511                 }
1512             }
1513         }
1514     }
1515
1516     return 0;
1517 }
1518
1519 static void
1520 wait(struct ofproto *ofproto_)
1521 {
1522     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1523
1524     if (ofproto_get_flow_restore_wait()) {
1525         return;
1526     }
1527
1528     if (ofproto->sflow) {
1529         dpif_sflow_wait(ofproto->sflow);
1530     }
1531     if (ofproto->ipfix) {
1532         dpif_ipfix_wait(ofproto->ipfix);
1533     }
1534     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1535         struct ofbundle *bundle;
1536
1537         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1538             bundle_wait(bundle);
1539         }
1540     }
1541     if (ofproto->netflow) {
1542         netflow_wait(ofproto->netflow);
1543     }
1544     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1545     mac_learning_wait(ofproto->ml);
1546     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1547     mcast_snooping_wait(ofproto->ms);
1548     stp_wait(ofproto);
1549     if (ofproto->backer->need_revalidate) {
1550         /* Shouldn't happen, but if it does just go around again. */
1551         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1552         poll_immediate_wake();
1553     }
1554
1555     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
1556     seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
1557 }
1558
1559 static void
1560 type_get_memory_usage(const char *type, struct simap *usage)
1561 {
1562     struct dpif_backer *backer;
1563
1564     backer = shash_find_data(&all_dpif_backers, type);
1565     if (backer) {
1566         udpif_get_memory_usage(backer->udpif, usage);
1567     }
1568 }
1569
1570 static void
1571 flush(struct ofproto *ofproto_)
1572 {
1573     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1574     struct dpif_backer *backer = ofproto->backer;
1575
1576     if (backer) {
1577         udpif_flush(backer->udpif);
1578     }
1579 }
1580
1581 static void
1582 query_tables(struct ofproto *ofproto,
1583              struct ofputil_table_features *features,
1584              struct ofputil_table_stats *stats)
1585 {
1586     strcpy(features->name, "classifier");
1587
1588     if (stats) {
1589         int i;
1590
1591         for (i = 0; i < ofproto->n_tables; i++) {
1592             unsigned long missed, matched;
1593
1594             atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
1595             atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
1596
1597             stats[i].matched_count = matched;
1598             stats[i].lookup_count = matched + missed;
1599         }
1600     }
1601 }
1602
1603 static struct ofport *
1604 port_alloc(void)
1605 {
1606     struct ofport_dpif *port = xmalloc(sizeof *port);
1607     return &port->up;
1608 }
1609
1610 static void
1611 port_dealloc(struct ofport *port_)
1612 {
1613     struct ofport_dpif *port = ofport_dpif_cast(port_);
1614     free(port);
1615 }
1616
1617 static int
1618 port_construct(struct ofport *port_)
1619 {
1620     struct ofport_dpif *port = ofport_dpif_cast(port_);
1621     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1622     const struct netdev *netdev = port->up.netdev;
1623     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1624     struct dpif_port dpif_port;
1625     int error;
1626
1627     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1628     port->bundle = NULL;
1629     port->cfm = NULL;
1630     port->bfd = NULL;
1631     port->may_enable = false;
1632     port->stp_port = NULL;
1633     port->stp_state = STP_DISABLED;
1634     port->rstp_port = NULL;
1635     port->rstp_state = RSTP_DISABLED;
1636     port->is_tunnel = false;
1637     port->peer = NULL;
1638     port->qdscp = NULL;
1639     port->n_qdscp = 0;
1640     port->realdev_ofp_port = 0;
1641     port->vlandev_vid = 0;
1642     port->carrier_seq = netdev_get_carrier_resets(netdev);
1643     port->is_layer3 = netdev_vport_is_layer3(netdev);
1644
1645     if (netdev_vport_is_patch(netdev)) {
1646         /* By bailing out here, we don't submit the port to the sFlow module
1647          * to be considered for counter polling export.  This is correct
1648          * because the patch port represents an interface that sFlow considers
1649          * to be "internal" to the switch as a whole, and therefore not an
1650          * candidate for counter polling. */
1651         port->odp_port = ODPP_NONE;
1652         ofport_update_peer(port);
1653         return 0;
1654     }
1655
1656     error = dpif_port_query_by_name(ofproto->backer->dpif,
1657                                     netdev_vport_get_dpif_port(netdev, namebuf,
1658                                                                sizeof namebuf),
1659                                     &dpif_port);
1660     if (error) {
1661         return error;
1662     }
1663
1664     port->odp_port = dpif_port.port_no;
1665
1666     if (netdev_get_tunnel_config(netdev)) {
1667         tnl_port_add(port, port->up.netdev, port->odp_port);
1668         port->is_tunnel = true;
1669         if (ofproto->ipfix) {
1670            dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
1671         }
1672     } else {
1673         /* Sanity-check that a mapping doesn't already exist.  This
1674          * shouldn't happen for non-tunnel ports. */
1675         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1676             VLOG_ERR("port %s already has an OpenFlow port number",
1677                      dpif_port.name);
1678             dpif_port_destroy(&dpif_port);
1679             return EBUSY;
1680         }
1681
1682         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1683         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1684                     hash_odp_port(port->odp_port));
1685         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1686     }
1687     dpif_port_destroy(&dpif_port);
1688
1689     if (ofproto->sflow) {
1690         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1691     }
1692
1693     return 0;
1694 }
1695
1696 static void
1697 port_destruct(struct ofport *port_)
1698 {
1699     struct ofport_dpif *port = ofport_dpif_cast(port_);
1700     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1701     const char *devname = netdev_get_name(port->up.netdev);
1702     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1703     const char *dp_port_name;
1704
1705     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1706     xlate_txn_start();
1707     xlate_ofport_remove(port);
1708     xlate_txn_commit();
1709
1710     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1711                                               sizeof namebuf);
1712     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1713         /* The underlying device is still there, so delete it.  This
1714          * happens when the ofproto is being destroyed, since the caller
1715          * assumes that removal of attached ports will happen as part of
1716          * destruction. */
1717         if (!port->is_tunnel) {
1718             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1719         }
1720     }
1721
1722     if (port->peer) {
1723         port->peer->peer = NULL;
1724         port->peer = NULL;
1725     }
1726
1727     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1728         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1729         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1730         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1731     }
1732
1733     if (port->is_tunnel && ofproto->ipfix) {
1734        dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
1735     }
1736
1737     tnl_port_del(port);
1738     sset_find_and_delete(&ofproto->ports, devname);
1739     sset_find_and_delete(&ofproto->ghost_ports, devname);
1740     bundle_remove(port_);
1741     set_cfm(port_, NULL);
1742     set_bfd(port_, NULL);
1743     if (port->stp_port) {
1744         stp_port_disable(port->stp_port);
1745     }
1746     set_rstp_port(port_, NULL);
1747     if (ofproto->sflow) {
1748         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1749     }
1750
1751     free(port->qdscp);
1752 }
1753
1754 static void
1755 port_modified(struct ofport *port_)
1756 {
1757     struct ofport_dpif *port = ofport_dpif_cast(port_);
1758
1759     if (port->bundle && port->bundle->bond) {
1760         bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
1761     }
1762
1763     if (port->cfm) {
1764         cfm_set_netdev(port->cfm, port->up.netdev);
1765     }
1766
1767     if (port->bfd) {
1768         bfd_set_netdev(port->bfd, port->up.netdev);
1769     }
1770
1771     ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
1772                                      port->up.pp.hw_addr);
1773
1774     if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
1775                                                 port->odp_port)) {
1776         ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
1777             REV_RECONFIGURE;
1778     }
1779
1780     ofport_update_peer(port);
1781 }
1782
1783 static void
1784 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1785 {
1786     struct ofport_dpif *port = ofport_dpif_cast(port_);
1787     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1788     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1789
1790     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1791                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1792                    OFPUTIL_PC_NO_PACKET_IN)) {
1793         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1794
1795         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1796             bundle_update(port->bundle);
1797         }
1798     }
1799 }
1800
1801 static int
1802 set_sflow(struct ofproto *ofproto_,
1803           const struct ofproto_sflow_options *sflow_options)
1804 {
1805     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1806     struct dpif_sflow *ds = ofproto->sflow;
1807
1808     if (sflow_options) {
1809         if (!ds) {
1810             struct ofport_dpif *ofport;
1811
1812             ds = ofproto->sflow = dpif_sflow_create();
1813             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1814                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1815             }
1816             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1817         }
1818         dpif_sflow_set_options(ds, sflow_options);
1819     } else {
1820         if (ds) {
1821             dpif_sflow_unref(ds);
1822             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1823             ofproto->sflow = NULL;
1824         }
1825     }
1826     return 0;
1827 }
1828
1829 static int
1830 set_ipfix(
1831     struct ofproto *ofproto_,
1832     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1833     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1834     size_t n_flow_exporters_options)
1835 {
1836     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1837     struct dpif_ipfix *di = ofproto->ipfix;
1838     bool has_options = bridge_exporter_options || flow_exporters_options;
1839     bool new_di = false;
1840
1841     if (has_options && !di) {
1842         di = ofproto->ipfix = dpif_ipfix_create();
1843         new_di = true;
1844     }
1845
1846     if (di) {
1847         /* Call set_options in any case to cleanly flush the flow
1848          * caches in the last exporters that are to be destroyed. */
1849         dpif_ipfix_set_options(
1850             di, bridge_exporter_options, flow_exporters_options,
1851             n_flow_exporters_options);
1852
1853         /* Add tunnel ports only when a new ipfix created */
1854         if (new_di == true) {
1855             struct ofport_dpif *ofport;
1856             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1857                 if (ofport->is_tunnel == true) {
1858                     dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
1859                 }
1860             }
1861         }
1862
1863         if (!has_options) {
1864             dpif_ipfix_unref(di);
1865             ofproto->ipfix = NULL;
1866         }
1867     }
1868
1869     return 0;
1870 }
1871
1872 static int
1873 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1874 {
1875     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1876     int error = 0;
1877
1878     if (s) {
1879         if (!ofport->cfm) {
1880             struct ofproto_dpif *ofproto;
1881
1882             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1883             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1884             ofport->cfm = cfm_create(ofport->up.netdev);
1885         }
1886
1887         if (cfm_configure(ofport->cfm, s)) {
1888             error = 0;
1889             goto out;
1890         }
1891
1892         error = EINVAL;
1893     }
1894     cfm_unref(ofport->cfm);
1895     ofport->cfm = NULL;
1896 out:
1897     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1898                                      ofport->up.pp.hw_addr);
1899     return error;
1900 }
1901
1902 static bool
1903 cfm_status_changed(struct ofport *ofport_)
1904 {
1905     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1906
1907     return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
1908 }
1909
1910 static int
1911 get_cfm_status(const struct ofport *ofport_,
1912                struct cfm_status *status)
1913 {
1914     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1915     int ret = 0;
1916
1917     if (ofport->cfm) {
1918         cfm_get_status(ofport->cfm, status);
1919     } else {
1920         ret = ENOENT;
1921     }
1922
1923     return ret;
1924 }
1925
1926 static int
1927 set_bfd(struct ofport *ofport_, const struct smap *cfg)
1928 {
1929     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
1930     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1931     struct bfd *old;
1932
1933     old = ofport->bfd;
1934     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev),
1935                                 cfg, ofport->up.netdev);
1936     if (ofport->bfd != old) {
1937         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1938     }
1939     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1940                                      ofport->up.pp.hw_addr);
1941     return 0;
1942 }
1943
1944 static bool
1945 bfd_status_changed(struct ofport *ofport_)
1946 {
1947     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1948
1949     return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
1950 }
1951
1952 static int
1953 get_bfd_status(struct ofport *ofport_, struct smap *smap)
1954 {
1955     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1956     int ret = 0;
1957
1958     if (ofport->bfd) {
1959         bfd_get_status(ofport->bfd, smap);
1960     } else {
1961         ret = ENOENT;
1962     }
1963
1964     return ret;
1965 }
1966 \f
1967 /* Spanning Tree. */
1968
1969 /* Called while rstp_mutex is held. */
1970 static void
1971 rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_)
1972 {
1973     struct ofproto_dpif *ofproto = ofproto_;
1974     struct ofport_dpif *ofport = ofport_;
1975     struct eth_header *eth = ofpbuf_l2(pkt);
1976
1977     netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
1978     if (eth_addr_is_zero(eth->eth_src)) {
1979         VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
1980                      "does not have a configured source MAC address.",
1981                      ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
1982     } else {
1983         ofproto_dpif_send_packet(ofport, pkt);
1984     }
1985     ofpbuf_delete(pkt);
1986 }
1987
1988 static void
1989 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
1990 {
1991     struct ofproto_dpif *ofproto = ofproto_;
1992     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
1993     struct ofport_dpif *ofport;
1994
1995     ofport = stp_port_get_aux(sp);
1996     if (!ofport) {
1997         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
1998                      ofproto->up.name, port_num);
1999     } else {
2000         struct eth_header *eth = ofpbuf_l2(pkt);
2001
2002         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2003         if (eth_addr_is_zero(eth->eth_src)) {
2004             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
2005                          "with unknown MAC", ofproto->up.name, port_num);
2006         } else {
2007             ofproto_dpif_send_packet(ofport, pkt);
2008         }
2009     }
2010     ofpbuf_delete(pkt);
2011 }
2012
2013 /* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
2014 static void
2015 set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
2016 {
2017     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2018
2019     /* Only revalidate flows if the configuration changed. */
2020     if (!s != !ofproto->rstp) {
2021         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2022     }
2023
2024     if (s) {
2025         if (!ofproto->rstp) {
2026             ofproto->rstp = rstp_create(ofproto_->name, s->address,
2027                                         rstp_send_bpdu_cb, ofproto);
2028             ofproto->rstp_last_tick = time_msec();
2029         }
2030         rstp_set_bridge_address(ofproto->rstp, s->address);
2031         rstp_set_bridge_priority(ofproto->rstp, s->priority);
2032         rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
2033         rstp_set_bridge_force_protocol_version(ofproto->rstp,
2034                                                s->force_protocol_version);
2035         rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
2036         rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
2037         rstp_set_bridge_transmit_hold_count(ofproto->rstp,
2038                                             s->transmit_hold_count);
2039     } else {
2040         struct ofport *ofport;
2041         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2042             set_rstp_port(ofport, NULL);
2043         }
2044         rstp_unref(ofproto->rstp);
2045         ofproto->rstp = NULL;
2046     }
2047 }
2048
2049 static void
2050 get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
2051 {
2052     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2053
2054     if (ofproto->rstp) {
2055         s->enabled = true;
2056         s->root_id = rstp_get_root_id(ofproto->rstp);
2057         s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
2058         s->designated_id = rstp_get_designated_id(ofproto->rstp);
2059         s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
2060         s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
2061         s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
2062     } else {
2063         s->enabled = false;
2064     }
2065 }
2066
2067 static void
2068 update_rstp_port_state(struct ofport_dpif *ofport)
2069 {
2070     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2071     enum rstp_state state;
2072
2073     /* Figure out new state. */
2074     state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
2075         : RSTP_DISABLED;
2076
2077     /* Update state. */
2078     if (ofport->rstp_state != state) {
2079         enum ofputil_port_state of_state;
2080         bool fwd_change;
2081
2082         VLOG_DBG("port %s: RSTP state changed from %s to %s",
2083                  netdev_get_name(ofport->up.netdev),
2084                  rstp_state_name(ofport->rstp_state),
2085                  rstp_state_name(state));
2086         if (rstp_learn_in_state(ofport->rstp_state)
2087                 != rstp_learn_in_state(state)) {
2088             /* xxx Learning action flows should also be flushed. */
2089             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2090             mac_learning_flush(ofproto->ml);
2091             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2092         }
2093         fwd_change = rstp_forward_in_state(ofport->rstp_state)
2094             != rstp_forward_in_state(state);
2095
2096         ofproto->backer->need_revalidate = REV_RSTP;
2097         ofport->rstp_state = state;
2098
2099         if (fwd_change && ofport->bundle) {
2100             bundle_update(ofport->bundle);
2101         }
2102
2103         /* Update the RSTP state bits in the OpenFlow port description. */
2104         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2105         of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
2106                 : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2107                 : state == RSTP_DISCARDING ?  OFPUTIL_PS_STP_LISTEN
2108                 : 0);
2109         ofproto_port_set_state(&ofport->up, of_state);
2110     }
2111 }
2112
2113 static void
2114 rstp_run(struct ofproto_dpif *ofproto)
2115 {
2116     if (ofproto->rstp) {
2117         long long int now = time_msec();
2118         long long int elapsed = now - ofproto->rstp_last_tick;
2119         struct rstp_port *rp;
2120         struct ofport_dpif *ofport;
2121
2122         /* Every second, decrease the values of the timers. */
2123         if (elapsed >= 1000) {
2124             rstp_tick_timers(ofproto->rstp);
2125             ofproto->rstp_last_tick = now;
2126         }
2127         rp = NULL;
2128         while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
2129             update_rstp_port_state(ofport);
2130         }
2131         /* FIXME: This check should be done on-event (i.e., when setting
2132          * p->fdb_flush) and not periodically.
2133          */
2134         if (rstp_check_and_reset_fdb_flush(ofproto->rstp)) {
2135             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2136             /* FIXME: RSTP should be able to flush the entries pertaining to a
2137              * single port, not the whole table.
2138              */
2139             mac_learning_flush(ofproto->ml);
2140             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2141         }
2142     }
2143 }
2144
2145 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2146 static int
2147 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2148 {
2149     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2150
2151     /* Only revalidate flows if the configuration changed. */
2152     if (!s != !ofproto->stp) {
2153         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2154     }
2155
2156     if (s) {
2157         if (!ofproto->stp) {
2158             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2159                                       send_bpdu_cb, ofproto);
2160             ofproto->stp_last_tick = time_msec();
2161         }
2162
2163         stp_set_bridge_id(ofproto->stp, s->system_id);
2164         stp_set_bridge_priority(ofproto->stp, s->priority);
2165         stp_set_hello_time(ofproto->stp, s->hello_time);
2166         stp_set_max_age(ofproto->stp, s->max_age);
2167         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2168     }  else {
2169         struct ofport *ofport;
2170
2171         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2172             set_stp_port(ofport, NULL);
2173         }
2174
2175         stp_unref(ofproto->stp);
2176         ofproto->stp = NULL;
2177     }
2178
2179     return 0;
2180 }
2181
2182 static int
2183 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2184 {
2185     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2186
2187     if (ofproto->stp) {
2188         s->enabled = true;
2189         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2190         s->designated_root = stp_get_designated_root(ofproto->stp);
2191         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2192     } else {
2193         s->enabled = false;
2194     }
2195
2196     return 0;
2197 }
2198
2199 static void
2200 update_stp_port_state(struct ofport_dpif *ofport)
2201 {
2202     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2203     enum stp_state state;
2204
2205     /* Figure out new state. */
2206     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2207                              : STP_DISABLED;
2208
2209     /* Update state. */
2210     if (ofport->stp_state != state) {
2211         enum ofputil_port_state of_state;
2212         bool fwd_change;
2213
2214         VLOG_DBG("port %s: STP state changed from %s to %s",
2215                  netdev_get_name(ofport->up.netdev),
2216                  stp_state_name(ofport->stp_state),
2217                  stp_state_name(state));
2218         if (stp_learn_in_state(ofport->stp_state)
2219                 != stp_learn_in_state(state)) {
2220             /* xxx Learning action flows should also be flushed. */
2221             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2222             mac_learning_flush(ofproto->ml);
2223             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2224             mcast_snooping_mdb_flush(ofproto->ms);
2225         }
2226         fwd_change = stp_forward_in_state(ofport->stp_state)
2227                         != stp_forward_in_state(state);
2228
2229         ofproto->backer->need_revalidate = REV_STP;
2230         ofport->stp_state = state;
2231         ofport->stp_state_entered = time_msec();
2232
2233         if (fwd_change && ofport->bundle) {
2234             bundle_update(ofport->bundle);
2235         }
2236
2237         /* Update the STP state bits in the OpenFlow port description. */
2238         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2239         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2240                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2241                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2242                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2243                      : 0);
2244         ofproto_port_set_state(&ofport->up, of_state);
2245     }
2246 }
2247
2248 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2249  * caller is responsible for assigning STP port numbers and ensuring
2250  * there are no duplicates. */
2251 static int
2252 set_stp_port(struct ofport *ofport_,
2253              const struct ofproto_port_stp_settings *s)
2254 {
2255     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2256     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2257     struct stp_port *sp = ofport->stp_port;
2258
2259     if (!s || !s->enable) {
2260         if (sp) {
2261             ofport->stp_port = NULL;
2262             stp_port_disable(sp);
2263             update_stp_port_state(ofport);
2264         }
2265         return 0;
2266     } else if (sp && stp_port_no(sp) != s->port_num
2267                && ofport == stp_port_get_aux(sp)) {
2268         /* The port-id changed, so disable the old one if it's not
2269          * already in use by another port. */
2270         stp_port_disable(sp);
2271     }
2272
2273     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2274
2275     /* Set name before enabling the port so that debugging messages can print
2276      * the name. */
2277     stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
2278     stp_port_enable(sp);
2279
2280     stp_port_set_aux(sp, ofport);
2281     stp_port_set_priority(sp, s->priority);
2282     stp_port_set_path_cost(sp, s->path_cost);
2283
2284     update_stp_port_state(ofport);
2285
2286     return 0;
2287 }
2288
2289 static int
2290 get_stp_port_status(struct ofport *ofport_,
2291                     struct ofproto_port_stp_status *s)
2292 {
2293     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2294     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2295     struct stp_port *sp = ofport->stp_port;
2296
2297     if (!ofproto->stp || !sp) {
2298         s->enabled = false;
2299         return 0;
2300     }
2301
2302     s->enabled = true;
2303     s->port_id = stp_port_get_id(sp);
2304     s->state = stp_port_get_state(sp);
2305     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2306     s->role = stp_port_get_role(sp);
2307
2308     return 0;
2309 }
2310
2311 static int
2312 get_stp_port_stats(struct ofport *ofport_,
2313                    struct ofproto_port_stp_stats *s)
2314 {
2315     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2316     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2317     struct stp_port *sp = ofport->stp_port;
2318
2319     if (!ofproto->stp || !sp) {
2320         s->enabled = false;
2321         return 0;
2322     }
2323
2324     s->enabled = true;
2325     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2326
2327     return 0;
2328 }
2329
2330 static void
2331 stp_run(struct ofproto_dpif *ofproto)
2332 {
2333     if (ofproto->stp) {
2334         long long int now = time_msec();
2335         long long int elapsed = now - ofproto->stp_last_tick;
2336         struct stp_port *sp;
2337
2338         if (elapsed > 0) {
2339             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2340             ofproto->stp_last_tick = now;
2341         }
2342         while (stp_get_changed_port(ofproto->stp, &sp)) {
2343             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2344
2345             if (ofport) {
2346                 update_stp_port_state(ofport);
2347             }
2348         }
2349
2350         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2351             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2352             mac_learning_flush(ofproto->ml);
2353             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2354             mcast_snooping_mdb_flush(ofproto->ms);
2355         }
2356     }
2357 }
2358
2359 static void
2360 stp_wait(struct ofproto_dpif *ofproto)
2361 {
2362     if (ofproto->stp) {
2363         poll_timer_wait(1000);
2364     }
2365 }
2366
2367 /* Configures RSTP on 'ofport_' using the settings defined in 's'.  The
2368  * caller is responsible for assigning RSTP port numbers and ensuring
2369  * there are no duplicates. */
2370 static void
2371 set_rstp_port(struct ofport *ofport_,
2372               const struct ofproto_port_rstp_settings *s)
2373 {
2374     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2375     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2376     struct rstp_port *rp = ofport->rstp_port;
2377
2378     if (!s || !s->enable) {
2379         if (rp) {
2380             rstp_port_unref(rp);
2381             ofport->rstp_port = NULL;
2382             update_rstp_port_state(ofport);
2383         }
2384         return;
2385     }
2386
2387     /* Check if need to add a new port. */
2388     if (!rp) {
2389         rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
2390     }
2391
2392     rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
2393                   s->admin_edge_port, s->auto_edge, s->mcheck, ofport);
2394     update_rstp_port_state(ofport);
2395 }
2396
2397 static void
2398 get_rstp_port_status(struct ofport *ofport_,
2399         struct ofproto_port_rstp_status *s)
2400 {
2401     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2402     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2403     struct rstp_port *rp = ofport->rstp_port;
2404
2405     if (!ofproto->rstp || !rp) {
2406         s->enabled = false;
2407         return;
2408     }
2409
2410     s->enabled = true;
2411     rstp_port_get_status(rp, &s->port_id, &s->state, &s->role, &s->tx_count,
2412                          &s->rx_count, &s->error_count, &s->uptime);
2413 }
2414
2415 \f
2416 static int
2417 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2418            size_t n_qdscp)
2419 {
2420     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2421     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2422
2423     if (ofport->n_qdscp != n_qdscp
2424         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2425                               n_qdscp * sizeof *qdscp))) {
2426         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2427         free(ofport->qdscp);
2428         ofport->qdscp = n_qdscp
2429             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2430             : NULL;
2431         ofport->n_qdscp = n_qdscp;
2432     }
2433
2434     return 0;
2435 }
2436 \f
2437 /* Bundles. */
2438
2439 /* Expires all MAC learning entries associated with 'bundle' and forces its
2440  * ofproto to revalidate every flow.
2441  *
2442  * Normally MAC learning entries are removed only from the ofproto associated
2443  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2444  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2445  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2446  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2447  * with the host from which it migrated. */
2448 static void
2449 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2450 {
2451     struct ofproto_dpif *ofproto = bundle->ofproto;
2452     struct mac_learning *ml = ofproto->ml;
2453     struct mac_entry *mac, *next_mac;
2454
2455     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2456     ovs_rwlock_wrlock(&ml->rwlock);
2457     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2458         if (mac->port.p == bundle) {
2459             if (all_ofprotos) {
2460                 struct ofproto_dpif *o;
2461
2462                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2463                     if (o != ofproto) {
2464                         struct mac_entry *e;
2465
2466                         ovs_rwlock_wrlock(&o->ml->rwlock);
2467                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2468                         if (e) {
2469                             mac_learning_expire(o->ml, e);
2470                         }
2471                         ovs_rwlock_unlock(&o->ml->rwlock);
2472                     }
2473                 }
2474             }
2475
2476             mac_learning_expire(ml, mac);
2477         }
2478     }
2479     ovs_rwlock_unlock(&ml->rwlock);
2480 }
2481
2482 static struct ofbundle *
2483 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2484 {
2485     struct ofbundle *bundle;
2486
2487     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2488                              &ofproto->bundles) {
2489         if (bundle->aux == aux) {
2490             return bundle;
2491         }
2492     }
2493     return NULL;
2494 }
2495
2496 static void
2497 bundle_update(struct ofbundle *bundle)
2498 {
2499     struct ofport_dpif *port;
2500
2501     bundle->floodable = true;
2502     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2503         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2504             || port->is_layer3
2505             || !stp_forward_in_state(port->stp_state)) {
2506             bundle->floodable = false;
2507             break;
2508         }
2509     }
2510 }
2511
2512 static void
2513 bundle_del_port(struct ofport_dpif *port)
2514 {
2515     struct ofbundle *bundle = port->bundle;
2516
2517     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2518
2519     list_remove(&port->bundle_node);
2520     port->bundle = NULL;
2521
2522     if (bundle->lacp) {
2523         lacp_slave_unregister(bundle->lacp, port);
2524     }
2525     if (bundle->bond) {
2526         bond_slave_unregister(bundle->bond, port);
2527     }
2528
2529     bundle_update(bundle);
2530 }
2531
2532 static bool
2533 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2534                 struct lacp_slave_settings *lacp)
2535 {
2536     struct ofport_dpif *port;
2537
2538     port = get_ofp_port(bundle->ofproto, ofp_port);
2539     if (!port) {
2540         return false;
2541     }
2542
2543     if (port->bundle != bundle) {
2544         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2545         if (port->bundle) {
2546             bundle_remove(&port->up);
2547         }
2548
2549         port->bundle = bundle;
2550         list_push_back(&bundle->ports, &port->bundle_node);
2551         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2552             || port->is_layer3
2553             || !stp_forward_in_state(port->stp_state)) {
2554             bundle->floodable = false;
2555         }
2556     }
2557     if (lacp) {
2558         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2559         lacp_slave_register(bundle->lacp, port, lacp);
2560     }
2561
2562     return true;
2563 }
2564
2565 static void
2566 bundle_destroy(struct ofbundle *bundle)
2567 {
2568     struct ofproto_dpif *ofproto;
2569     struct ofport_dpif *port, *next_port;
2570
2571     if (!bundle) {
2572         return;
2573     }
2574
2575     ofproto = bundle->ofproto;
2576     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2577
2578     xlate_txn_start();
2579     xlate_bundle_remove(bundle);
2580     xlate_txn_commit();
2581
2582     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2583         bundle_del_port(port);
2584     }
2585
2586     bundle_flush_macs(bundle, true);
2587     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2588     free(bundle->name);
2589     free(bundle->trunks);
2590     lacp_unref(bundle->lacp);
2591     bond_unref(bundle->bond);
2592     free(bundle);
2593 }
2594
2595 static int
2596 bundle_set(struct ofproto *ofproto_, void *aux,
2597            const struct ofproto_bundle_settings *s)
2598 {
2599     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2600     bool need_flush = false;
2601     struct ofport_dpif *port;
2602     struct ofbundle *bundle;
2603     unsigned long *trunks;
2604     int vlan;
2605     size_t i;
2606     bool ok;
2607
2608     if (!s) {
2609         bundle_destroy(bundle_lookup(ofproto, aux));
2610         return 0;
2611     }
2612
2613     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2614     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2615
2616     bundle = bundle_lookup(ofproto, aux);
2617     if (!bundle) {
2618         bundle = xmalloc(sizeof *bundle);
2619
2620         bundle->ofproto = ofproto;
2621         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2622                     hash_pointer(aux, 0));
2623         bundle->aux = aux;
2624         bundle->name = NULL;
2625
2626         list_init(&bundle->ports);
2627         bundle->vlan_mode = PORT_VLAN_TRUNK;
2628         bundle->vlan = -1;
2629         bundle->trunks = NULL;
2630         bundle->use_priority_tags = s->use_priority_tags;
2631         bundle->lacp = NULL;
2632         bundle->bond = NULL;
2633
2634         bundle->floodable = true;
2635         mbridge_register_bundle(ofproto->mbridge, bundle);
2636     }
2637
2638     if (!bundle->name || strcmp(s->name, bundle->name)) {
2639         free(bundle->name);
2640         bundle->name = xstrdup(s->name);
2641     }
2642
2643     /* LACP. */
2644     if (s->lacp) {
2645         ofproto->lacp_enabled = true;
2646         if (!bundle->lacp) {
2647             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2648             bundle->lacp = lacp_create();
2649         }
2650         lacp_configure(bundle->lacp, s->lacp);
2651     } else {
2652         lacp_unref(bundle->lacp);
2653         bundle->lacp = NULL;
2654     }
2655
2656     /* Update set of ports. */
2657     ok = true;
2658     for (i = 0; i < s->n_slaves; i++) {
2659         if (!bundle_add_port(bundle, s->slaves[i],
2660                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2661             ok = false;
2662         }
2663     }
2664     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2665         struct ofport_dpif *next_port;
2666
2667         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2668             for (i = 0; i < s->n_slaves; i++) {
2669                 if (s->slaves[i] == port->up.ofp_port) {
2670                     goto found;
2671                 }
2672             }
2673
2674             bundle_del_port(port);
2675         found: ;
2676         }
2677     }
2678     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2679
2680     if (list_is_empty(&bundle->ports)) {
2681         bundle_destroy(bundle);
2682         return EINVAL;
2683     }
2684
2685     /* Set VLAN tagging mode */
2686     if (s->vlan_mode != bundle->vlan_mode
2687         || s->use_priority_tags != bundle->use_priority_tags) {
2688         bundle->vlan_mode = s->vlan_mode;
2689         bundle->use_priority_tags = s->use_priority_tags;
2690         need_flush = true;
2691     }
2692
2693     /* Set VLAN tag. */
2694     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2695             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2696             : 0);
2697     if (vlan != bundle->vlan) {
2698         bundle->vlan = vlan;
2699         need_flush = true;
2700     }
2701
2702     /* Get trunked VLANs. */
2703     switch (s->vlan_mode) {
2704     case PORT_VLAN_ACCESS:
2705         trunks = NULL;
2706         break;
2707
2708     case PORT_VLAN_TRUNK:
2709         trunks = CONST_CAST(unsigned long *, s->trunks);
2710         break;
2711
2712     case PORT_VLAN_NATIVE_UNTAGGED:
2713     case PORT_VLAN_NATIVE_TAGGED:
2714         if (vlan != 0 && (!s->trunks
2715                           || !bitmap_is_set(s->trunks, vlan)
2716                           || bitmap_is_set(s->trunks, 0))) {
2717             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2718             if (s->trunks) {
2719                 trunks = bitmap_clone(s->trunks, 4096);
2720             } else {
2721                 trunks = bitmap_allocate1(4096);
2722             }
2723             bitmap_set1(trunks, vlan);
2724             bitmap_set0(trunks, 0);
2725         } else {
2726             trunks = CONST_CAST(unsigned long *, s->trunks);
2727         }
2728         break;
2729
2730     default:
2731         OVS_NOT_REACHED();
2732     }
2733     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2734         free(bundle->trunks);
2735         if (trunks == s->trunks) {
2736             bundle->trunks = vlan_bitmap_clone(trunks);
2737         } else {
2738             bundle->trunks = trunks;
2739             trunks = NULL;
2740         }
2741         need_flush = true;
2742     }
2743     if (trunks != s->trunks) {
2744         free(trunks);
2745     }
2746
2747     /* Bonding. */
2748     if (!list_is_short(&bundle->ports)) {
2749         bundle->ofproto->has_bonded_bundles = true;
2750         if (bundle->bond) {
2751             if (bond_reconfigure(bundle->bond, s->bond)) {
2752                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2753             }
2754         } else {
2755             bundle->bond = bond_create(s->bond, ofproto);
2756             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2757         }
2758
2759         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2760             bond_slave_register(bundle->bond, port,
2761                                 port->up.ofp_port, port->up.netdev);
2762         }
2763     } else {
2764         bond_unref(bundle->bond);
2765         bundle->bond = NULL;
2766     }
2767
2768     /* If we changed something that would affect MAC learning, un-learn
2769      * everything on this port and force flow revalidation. */
2770     if (need_flush) {
2771         bundle_flush_macs(bundle, false);
2772     }
2773
2774     return 0;
2775 }
2776
2777 static void
2778 bundle_remove(struct ofport *port_)
2779 {
2780     struct ofport_dpif *port = ofport_dpif_cast(port_);
2781     struct ofbundle *bundle = port->bundle;
2782
2783     if (bundle) {
2784         bundle_del_port(port);
2785         if (list_is_empty(&bundle->ports)) {
2786             bundle_destroy(bundle);
2787         } else if (list_is_short(&bundle->ports)) {
2788             bond_unref(bundle->bond);
2789             bundle->bond = NULL;
2790         }
2791     }
2792 }
2793
2794 static void
2795 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2796 {
2797     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2798     struct ofport_dpif *port = port_;
2799     uint8_t ea[ETH_ADDR_LEN];
2800     int error;
2801
2802     error = netdev_get_etheraddr(port->up.netdev, ea);
2803     if (!error) {
2804         struct ofpbuf packet;
2805         void *packet_pdu;
2806
2807         ofpbuf_init(&packet, 0);
2808         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2809                                  pdu_size);
2810         memcpy(packet_pdu, pdu, pdu_size);
2811
2812         ofproto_dpif_send_packet(port, &packet);
2813         ofpbuf_uninit(&packet);
2814     } else {
2815         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2816                     "%s (%s)", port->bundle->name,
2817                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2818     }
2819 }
2820
2821 static void
2822 bundle_send_learning_packets(struct ofbundle *bundle)
2823 {
2824     struct ofproto_dpif *ofproto = bundle->ofproto;
2825     struct ofpbuf *learning_packet;
2826     int error, n_packets, n_errors;
2827     struct mac_entry *e;
2828     struct list packets;
2829
2830     list_init(&packets);
2831     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2832     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2833         if (e->port.p != bundle) {
2834             void *port_void;
2835
2836             learning_packet = bond_compose_learning_packet(bundle->bond,
2837                                                            e->mac, e->vlan,
2838                                                            &port_void);
2839             /* Temporarily use 'frame' as a private pointer (see below). */
2840             ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
2841             learning_packet->frame = port_void;
2842             list_push_back(&packets, &learning_packet->list_node);
2843         }
2844     }
2845     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2846
2847     error = n_packets = n_errors = 0;
2848     LIST_FOR_EACH (learning_packet, list_node, &packets) {
2849         int ret;
2850         void *port_void = learning_packet->frame;
2851
2852         /* Restore 'frame'. */
2853         learning_packet->frame = ofpbuf_data(learning_packet);
2854         ret = ofproto_dpif_send_packet(port_void, learning_packet);
2855         if (ret) {
2856             error = ret;
2857             n_errors++;
2858         }
2859         n_packets++;
2860     }
2861     ofpbuf_list_delete(&packets);
2862
2863     if (n_errors) {
2864         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2865         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2866                      "packets, last error was: %s",
2867                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2868     } else {
2869         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2870                  bundle->name, n_packets);
2871     }
2872 }
2873
2874 static void
2875 bundle_run(struct ofbundle *bundle)
2876 {
2877     if (bundle->lacp) {
2878         lacp_run(bundle->lacp, send_pdu_cb);
2879     }
2880     if (bundle->bond) {
2881         struct ofport_dpif *port;
2882
2883         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2884             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2885         }
2886
2887         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2888             bundle->ofproto->backer->need_revalidate = REV_BOND;
2889         }
2890
2891         if (bond_should_send_learning_packets(bundle->bond)) {
2892             bundle_send_learning_packets(bundle);
2893         }
2894     }
2895 }
2896
2897 static void
2898 bundle_wait(struct ofbundle *bundle)
2899 {
2900     if (bundle->lacp) {
2901         lacp_wait(bundle->lacp);
2902     }
2903     if (bundle->bond) {
2904         bond_wait(bundle->bond);
2905     }
2906 }
2907 \f
2908 /* Mirrors. */
2909
2910 static int
2911 mirror_set__(struct ofproto *ofproto_, void *aux,
2912              const struct ofproto_mirror_settings *s)
2913 {
2914     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2915     struct ofbundle **srcs, **dsts;
2916     int error;
2917     size_t i;
2918
2919     if (!s) {
2920         mirror_destroy(ofproto->mbridge, aux);
2921         return 0;
2922     }
2923
2924     srcs = xmalloc(s->n_srcs * sizeof *srcs);
2925     dsts = xmalloc(s->n_dsts * sizeof *dsts);
2926
2927     for (i = 0; i < s->n_srcs; i++) {
2928         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
2929     }
2930
2931     for (i = 0; i < s->n_dsts; i++) {
2932         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
2933     }
2934
2935     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
2936                        s->n_dsts, s->src_vlans,
2937                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
2938     free(srcs);
2939     free(dsts);
2940     return error;
2941 }
2942
2943 static int
2944 mirror_get_stats__(struct ofproto *ofproto, void *aux,
2945                    uint64_t *packets, uint64_t *bytes)
2946 {
2947     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
2948                             bytes);
2949 }
2950
2951 static int
2952 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
2953 {
2954     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2955     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2956     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
2957         mac_learning_flush(ofproto->ml);
2958     }
2959     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2960     return 0;
2961 }
2962
2963 static bool
2964 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
2965 {
2966     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2967     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
2968     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
2969 }
2970
2971 static void
2972 forward_bpdu_changed(struct ofproto *ofproto_)
2973 {
2974     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2975     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2976 }
2977
2978 static void
2979 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
2980                      size_t max_entries)
2981 {
2982     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2983     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2984     mac_learning_set_idle_time(ofproto->ml, idle_time);
2985     mac_learning_set_max_entries(ofproto->ml, max_entries);
2986     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2987 }
2988
2989 /* Configures multicast snooping on 'ofport' using the settings
2990  * defined in 's'. */
2991 static int
2992 set_mcast_snooping(struct ofproto *ofproto_,
2993                    const struct ofproto_mcast_snooping_settings *s)
2994 {
2995     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2996
2997     /* Only revalidate flows if the configuration changed. */
2998     if (!s != !ofproto->ms) {
2999         ofproto->backer->need_revalidate = REV_RECONFIGURE;
3000     }
3001
3002     if (s) {
3003         if (!ofproto->ms) {
3004             ofproto->ms = mcast_snooping_create();
3005         }
3006
3007         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3008         mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
3009         mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
3010         if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
3011             ofproto->backer->need_revalidate = REV_RECONFIGURE;
3012         }
3013         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3014     } else {
3015         mcast_snooping_unref(ofproto->ms);
3016         ofproto->ms = NULL;
3017     }
3018
3019     return 0;
3020 }
3021
3022 /* Configures multicast snooping port's flood setting on 'ofproto'. */
3023 static int
3024 set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood)
3025 {
3026     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3027     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3028
3029     if (ofproto->ms) {
3030         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3031         mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle,
3032                                       flood);
3033         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3034     }
3035     return 0;
3036 }
3037
3038 \f
3039 /* Ports. */
3040
3041 static struct ofport_dpif *
3042 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
3043 {
3044     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
3045     return ofport ? ofport_dpif_cast(ofport) : NULL;
3046 }
3047
3048 static void
3049 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
3050                             struct ofproto_port *ofproto_port,
3051                             struct dpif_port *dpif_port)
3052 {
3053     ofproto_port->name = dpif_port->name;
3054     ofproto_port->type = dpif_port->type;
3055     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
3056 }
3057
3058 static void
3059 ofport_update_peer(struct ofport_dpif *ofport)
3060 {
3061     const struct ofproto_dpif *ofproto;
3062     struct dpif_backer *backer;
3063     char *peer_name;
3064
3065     if (!netdev_vport_is_patch(ofport->up.netdev)) {
3066         return;
3067     }
3068
3069     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
3070     backer->need_revalidate = REV_RECONFIGURE;
3071
3072     if (ofport->peer) {
3073         ofport->peer->peer = NULL;
3074         ofport->peer = NULL;
3075     }
3076
3077     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
3078     if (!peer_name) {
3079         return;
3080     }
3081
3082     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3083         struct ofport *peer_ofport;
3084         struct ofport_dpif *peer;
3085         char *peer_peer;
3086
3087         if (ofproto->backer != backer) {
3088             continue;
3089         }
3090
3091         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
3092         if (!peer_ofport) {
3093             continue;
3094         }
3095
3096         peer = ofport_dpif_cast(peer_ofport);
3097         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
3098         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
3099                                  peer_peer)) {
3100             ofport->peer = peer;
3101             ofport->peer->peer = ofport;
3102         }
3103         free(peer_peer);
3104
3105         break;
3106     }
3107     free(peer_name);
3108 }
3109
3110 static void
3111 port_run(struct ofport_dpif *ofport)
3112 {
3113     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
3114     bool carrier_changed = carrier_seq != ofport->carrier_seq;
3115     bool enable = netdev_get_carrier(ofport->up.netdev);
3116     bool cfm_enable = false;
3117     bool bfd_enable = false;
3118
3119     ofport->carrier_seq = carrier_seq;
3120
3121     if (ofport->cfm) {
3122         int cfm_opup = cfm_get_opup(ofport->cfm);
3123
3124         cfm_enable = !cfm_get_fault(ofport->cfm);
3125
3126         if (cfm_opup >= 0) {
3127             cfm_enable = cfm_enable && cfm_opup;
3128         }
3129     }
3130
3131     if (ofport->bfd) {
3132         bfd_enable = bfd_forwarding(ofport->bfd);
3133     }
3134
3135     if (ofport->bfd || ofport->cfm) {
3136         enable = enable && (cfm_enable || bfd_enable);
3137     }
3138
3139     if (ofport->bundle) {
3140         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3141         if (carrier_changed) {
3142             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
3143         }
3144     }
3145
3146     if (ofport->may_enable != enable) {
3147         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3148
3149         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
3150
3151         if (ofport->rstp_port) {
3152             rstp_port_set_mac_operational(ofport->rstp_port, enable);
3153         }
3154     }
3155
3156     ofport->may_enable = enable;
3157 }
3158
3159 static int
3160 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3161                    struct ofproto_port *ofproto_port)
3162 {
3163     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3164     struct dpif_port dpif_port;
3165     int error;
3166
3167     if (sset_contains(&ofproto->ghost_ports, devname)) {
3168         const char *type = netdev_get_type_from_name(devname);
3169
3170         /* We may be called before ofproto->up.port_by_name is populated with
3171          * the appropriate ofport.  For this reason, we must get the name and
3172          * type from the netdev layer directly. */
3173         if (type) {
3174             const struct ofport *ofport;
3175
3176             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3177             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3178             ofproto_port->name = xstrdup(devname);
3179             ofproto_port->type = xstrdup(type);
3180             return 0;
3181         }
3182         return ENODEV;
3183     }
3184
3185     if (!sset_contains(&ofproto->ports, devname)) {
3186         return ENODEV;
3187     }
3188     error = dpif_port_query_by_name(ofproto->backer->dpif,
3189                                     devname, &dpif_port);
3190     if (!error) {
3191         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3192     }
3193     return error;
3194 }
3195
3196 static int
3197 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3198 {
3199     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3200     const char *devname = netdev_get_name(netdev);
3201     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3202     const char *dp_port_name;
3203
3204     if (netdev_vport_is_patch(netdev)) {
3205         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3206         return 0;
3207     }
3208
3209     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3210     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3211         odp_port_t port_no = ODPP_NONE;
3212         int error;
3213
3214         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3215         if (error) {
3216             return error;
3217         }
3218         if (netdev_get_tunnel_config(netdev)) {
3219             simap_put(&ofproto->backer->tnl_backers,
3220                       dp_port_name, odp_to_u32(port_no));
3221         }
3222     }
3223
3224     if (netdev_get_tunnel_config(netdev)) {
3225         sset_add(&ofproto->ghost_ports, devname);
3226     } else {
3227         sset_add(&ofproto->ports, devname);
3228     }
3229     return 0;
3230 }
3231
3232 static int
3233 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3234 {
3235     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3236     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3237     int error = 0;
3238
3239     if (!ofport) {
3240         return 0;
3241     }
3242
3243     sset_find_and_delete(&ofproto->ghost_ports,
3244                          netdev_get_name(ofport->up.netdev));
3245     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3246     if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
3247         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3248         if (!error) {
3249             /* The caller is going to close ofport->up.netdev.  If this is a
3250              * bonded port, then the bond is using that netdev, so remove it
3251              * from the bond.  The client will need to reconfigure everything
3252              * after deleting ports, so then the slave will get re-added. */
3253             bundle_remove(&ofport->up);
3254         }
3255     }
3256     return error;
3257 }
3258
3259 static int
3260 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3261 {
3262     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3263     int error;
3264
3265     error = netdev_get_stats(ofport->up.netdev, stats);
3266
3267     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3268         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3269
3270         ovs_mutex_lock(&ofproto->stats_mutex);
3271         /* ofproto->stats.tx_packets represents packets that we created
3272          * internally and sent to some port (e.g. packets sent with
3273          * ofproto_dpif_send_packet()).  Account for them as if they had
3274          * come from OFPP_LOCAL and got forwarded. */
3275
3276         if (stats->rx_packets != UINT64_MAX) {
3277             stats->rx_packets += ofproto->stats.tx_packets;
3278         }
3279
3280         if (stats->rx_bytes != UINT64_MAX) {
3281             stats->rx_bytes += ofproto->stats.tx_bytes;
3282         }
3283
3284         /* ofproto->stats.rx_packets represents packets that were received on
3285          * some port and we processed internally and dropped (e.g. STP).
3286          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3287
3288         if (stats->tx_packets != UINT64_MAX) {
3289             stats->tx_packets += ofproto->stats.rx_packets;
3290         }
3291
3292         if (stats->tx_bytes != UINT64_MAX) {
3293             stats->tx_bytes += ofproto->stats.rx_bytes;
3294         }
3295         ovs_mutex_unlock(&ofproto->stats_mutex);
3296     }
3297
3298     return error;
3299 }
3300
3301 struct port_dump_state {
3302     uint32_t bucket;
3303     uint32_t offset;
3304     bool ghost;
3305
3306     struct ofproto_port port;
3307     bool has_port;
3308 };
3309
3310 static int
3311 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3312 {
3313     *statep = xzalloc(sizeof(struct port_dump_state));
3314     return 0;
3315 }
3316
3317 static int
3318 port_dump_next(const struct ofproto *ofproto_, void *state_,
3319                struct ofproto_port *port)
3320 {
3321     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3322     struct port_dump_state *state = state_;
3323     const struct sset *sset;
3324     struct sset_node *node;
3325
3326     if (state->has_port) {
3327         ofproto_port_destroy(&state->port);
3328         state->has_port = false;
3329     }
3330     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3331     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3332         int error;
3333
3334         error = port_query_by_name(ofproto_, node->name, &state->port);
3335         if (!error) {
3336             *port = state->port;
3337             state->has_port = true;
3338             return 0;
3339         } else if (error != ENODEV) {
3340             return error;
3341         }
3342     }
3343
3344     if (!state->ghost) {
3345         state->ghost = true;
3346         state->bucket = 0;
3347         state->offset = 0;
3348         return port_dump_next(ofproto_, state_, port);
3349     }
3350
3351     return EOF;
3352 }
3353
3354 static int
3355 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3356 {
3357     struct port_dump_state *state = state_;
3358
3359     if (state->has_port) {
3360         ofproto_port_destroy(&state->port);
3361     }
3362     free(state);
3363     return 0;
3364 }
3365
3366 static int
3367 port_poll(const struct ofproto *ofproto_, char **devnamep)
3368 {
3369     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3370
3371     if (ofproto->port_poll_errno) {
3372         int error = ofproto->port_poll_errno;
3373         ofproto->port_poll_errno = 0;
3374         return error;
3375     }
3376
3377     if (sset_is_empty(&ofproto->port_poll_set)) {
3378         return EAGAIN;
3379     }
3380
3381     *devnamep = sset_pop(&ofproto->port_poll_set);
3382     return 0;
3383 }
3384
3385 static void
3386 port_poll_wait(const struct ofproto *ofproto_)
3387 {
3388     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3389     dpif_port_poll_wait(ofproto->backer->dpif);
3390 }
3391
3392 static int
3393 port_is_lacp_current(const struct ofport *ofport_)
3394 {
3395     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3396     return (ofport->bundle && ofport->bundle->lacp
3397             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3398             : -1);
3399 }
3400 \f
3401 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3402  * then delete it entirely. */
3403 static void
3404 rule_expire(struct rule_dpif *rule)
3405     OVS_REQUIRES(ofproto_mutex)
3406 {
3407     uint16_t hard_timeout, idle_timeout;
3408     long long int now = time_msec();
3409     int reason = -1;
3410
3411     hard_timeout = rule->up.hard_timeout;
3412     idle_timeout = rule->up.idle_timeout;
3413
3414     /* Has 'rule' expired? */
3415     if (hard_timeout) {
3416         long long int modified;
3417
3418         ovs_mutex_lock(&rule->up.mutex);
3419         modified = rule->up.modified;
3420         ovs_mutex_unlock(&rule->up.mutex);
3421
3422         if (now > modified + hard_timeout * 1000) {
3423             reason = OFPRR_HARD_TIMEOUT;
3424         }
3425     }
3426
3427     if (reason < 0 && idle_timeout) {
3428         long long int used;
3429
3430         ovs_mutex_lock(&rule->stats_mutex);
3431         used = rule->stats.used;
3432         ovs_mutex_unlock(&rule->stats_mutex);
3433
3434         if (now > used + idle_timeout * 1000) {
3435             reason = OFPRR_IDLE_TIMEOUT;
3436         }
3437     }
3438
3439     if (reason >= 0) {
3440         COVERAGE_INC(ofproto_dpif_expired);
3441         ofproto_rule_expire(&rule->up, reason);
3442     }
3443 }
3444
3445 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3446  * 'flow' must reflect the data in 'packet'. */
3447 int
3448 ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
3449                              const struct flow *flow,
3450                              struct rule_dpif *rule,
3451                              const struct ofpact *ofpacts, size_t ofpacts_len,
3452                              struct ofpbuf *packet)
3453 {
3454     struct dpif_flow_stats stats;
3455     struct xlate_out xout;
3456     struct xlate_in xin;
3457     ofp_port_t in_port;
3458     struct dpif_execute execute;
3459     int error;
3460
3461     ovs_assert((rule != NULL) != (ofpacts != NULL));
3462
3463     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
3464
3465     if (rule) {
3466         rule_dpif_credit_stats(rule, &stats);
3467     }
3468
3469     xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
3470                   stats.tcp_flags, packet);
3471     xin.ofpacts = ofpacts;
3472     xin.ofpacts_len = ofpacts_len;
3473     xin.resubmit_stats = &stats;
3474     xlate_actions(&xin, &xout);
3475
3476     execute.actions = ofpbuf_data(xout.odp_actions);
3477     execute.actions_len = ofpbuf_size(xout.odp_actions);
3478     execute.packet = packet;
3479     execute.md = pkt_metadata_from_flow(flow);
3480     execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
3481
3482     /* Fix up in_port. */
3483     in_port = flow->in_port.ofp_port;
3484     if (in_port == OFPP_NONE) {
3485         in_port = OFPP_LOCAL;
3486     }
3487     execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
3488
3489     error = dpif_execute(ofproto->backer->dpif, &execute);
3490
3491     xlate_out_uninit(&xout);
3492
3493     return error;
3494 }
3495
3496 void
3497 rule_dpif_credit_stats(struct rule_dpif *rule,
3498                        const struct dpif_flow_stats *stats)
3499 {
3500     ovs_mutex_lock(&rule->stats_mutex);
3501     rule->stats.n_packets += stats->n_packets;
3502     rule->stats.n_bytes += stats->n_bytes;
3503     rule->stats.used = MAX(rule->stats.used, stats->used);
3504     ovs_mutex_unlock(&rule->stats_mutex);
3505 }
3506
3507 ovs_be64
3508 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
3509     OVS_REQUIRES(rule->up.mutex)
3510 {
3511     return rule->up.flow_cookie;
3512 }
3513
3514 void
3515 rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
3516                      uint16_t hard_timeout)
3517 {
3518     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
3519 }
3520
3521 /* Returns 'rule''s actions.  The caller owns a reference on the returned
3522  * actions and must eventually release it (with rule_actions_unref()) to avoid
3523  * a memory leak. */
3524 const struct rule_actions *
3525 rule_dpif_get_actions(const struct rule_dpif *rule)
3526 {
3527     return rule_get_actions(&rule->up);
3528 }
3529
3530 /* Sets 'rule''s recirculation id. */
3531 static void
3532 rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
3533     OVS_REQUIRES(rule->up.mutex)
3534 {
3535     ovs_assert(!rule->recirc_id);
3536     rule->recirc_id = id;
3537 }
3538
3539 /* Returns 'rule''s recirculation id. */
3540 uint32_t
3541 rule_dpif_get_recirc_id(struct rule_dpif *rule)
3542     OVS_REQUIRES(rule->up.mutex)
3543 {
3544     if (!rule->recirc_id) {
3545         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3546
3547         rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
3548     }
3549     return rule->recirc_id;
3550 }
3551
3552 /* Sets 'rule''s recirculation id. */
3553 void
3554 rule_set_recirc_id(struct rule *rule_, uint32_t id)
3555 {
3556     struct rule_dpif *rule = rule_dpif_cast(rule_);
3557
3558     ovs_mutex_lock(&rule->up.mutex);
3559     rule_dpif_set_recirc_id(rule, id);
3560     ovs_mutex_unlock(&rule->up.mutex);
3561 }
3562
3563 /* Lookup 'flow' in table 0 of 'ofproto''s classifier.
3564  * If 'wc' is non-null, sets the fields that were relevant as part of
3565  * the lookup. Returns the table_id where a match or miss occurred.
3566  *
3567  * The return value will be zero unless there was a miss and
3568  * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
3569  * where misses occur.
3570  *
3571  * The rule is returned in '*rule', which is valid at least until the next
3572  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3573  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3574  * on it before this returns. */
3575 uint8_t
3576 rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
3577                  struct flow_wildcards *wc, struct rule_dpif **rule,
3578                  bool take_ref, const struct dpif_flow_stats *stats)
3579 {
3580     enum rule_dpif_lookup_verdict verdict;
3581     enum ofputil_port_config config = 0;
3582     uint8_t table_id;
3583
3584     if (ofproto_dpif_get_enable_recirc(ofproto)) {
3585         /* Always exactly match recirc_id since datapath supports
3586          * recirculation.  */
3587         if (wc) {
3588             wc->masks.recirc_id = UINT32_MAX;
3589         }
3590
3591         /* Start looking up from internal table for post recirculation flows
3592          * or packets. We can also simply send all, including normal flows
3593          * or packets to the internal table. They will not match any post
3594          * recirculation rules except the 'catch all' rule that resubmit
3595          * them to table 0.
3596          *
3597          * As an optimization, we send normal flows and packets to table 0
3598          * directly, saving one table lookup.  */
3599         table_id = flow->recirc_id ? TBL_INTERNAL : 0;
3600     } else {
3601         table_id = 0;
3602     }
3603
3604     verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true,
3605                                           &table_id, rule, take_ref, stats);
3606
3607     switch (verdict) {
3608     case RULE_DPIF_LOOKUP_VERDICT_MATCH:
3609         return table_id;
3610     case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: {
3611         struct ofport_dpif *port;
3612
3613         port = get_ofp_port(ofproto, flow->in_port.ofp_port);
3614         if (!port) {
3615             VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
3616                          flow->in_port.ofp_port);
3617         }
3618         config = port ? port->up.pp.config : 0;
3619         break;
3620     }
3621     case RULE_DPIF_LOOKUP_VERDICT_DROP:
3622         config = OFPUTIL_PC_NO_PACKET_IN;
3623         break;
3624     case RULE_DPIF_LOOKUP_VERDICT_DEFAULT:
3625         if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
3626             config = OFPUTIL_PC_NO_PACKET_IN;
3627         }
3628         break;
3629     default:
3630         OVS_NOT_REACHED();
3631     }
3632
3633     choose_miss_rule(config, ofproto->miss_rule,
3634                      ofproto->no_packet_in_rule, rule, take_ref);
3635     return table_id;
3636 }
3637
3638 /* The returned rule is valid at least until the next RCU quiescent period.
3639  * If the '*rule' needs to stay around longer, a non-zero 'take_ref' must be
3640  * passed in to cause a reference to be taken on it before this returns. */
3641 static struct rule_dpif *
3642 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
3643                           const struct flow *flow, struct flow_wildcards *wc,
3644                           bool take_ref)
3645 {
3646     struct classifier *cls = &ofproto->up.tables[table_id].cls;
3647     const struct cls_rule *cls_rule;
3648     struct rule_dpif *rule;
3649     struct flow ofpc_normal_flow;
3650
3651     if (ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
3652         /* We always unwildcard dl_type and nw_frag (for IP), so they
3653          * need not be unwildcarded here. */
3654
3655         if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
3656             if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
3657                 /* We must pretend that transport ports are unavailable. */
3658                 ofpc_normal_flow = *flow;
3659                 ofpc_normal_flow.tp_src = htons(0);
3660                 ofpc_normal_flow.tp_dst = htons(0);
3661                 flow = &ofpc_normal_flow;
3662             } else {
3663                 /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
3664                  * Use the drop_frags_rule (which cannot disappear). */
3665                 cls_rule = &ofproto->drop_frags_rule->up.cr;
3666                 rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3667                 if (take_ref) {
3668                     rule_dpif_ref(rule);
3669                 }
3670                 return rule;
3671             }
3672         }
3673     }
3674
3675     do {
3676         cls_rule = classifier_lookup(cls, flow, wc);
3677
3678         rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3679
3680         /* Try again if the rule was released before we get the reference. */
3681     } while (rule && take_ref && !rule_dpif_try_ref(rule));
3682
3683     return rule;
3684 }
3685
3686 /* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
3687  * Stores the rule that was found in '*rule', or NULL if none was found.
3688  * Updates 'wc', if nonnull, to reflect the fields that were used during the
3689  * lookup.
3690  *
3691  * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
3692  * if none is found then the table miss configuration for that table is
3693  * honored, which can result in additional lookups in other OpenFlow tables.
3694  * In this case the function updates '*table_id' to reflect the final OpenFlow
3695  * table that was searched.
3696  *
3697  * If 'honor_table_miss' is false, then only one table lookup occurs, in
3698  * '*table_id'.
3699  *
3700  * Returns:
3701  *
3702  *    - RULE_DPIF_LOOKUP_VERDICT_MATCH if a rule (in '*rule') was found.
3703  *
3704  *    - RULE_OFPTC_TABLE_MISS_CONTROLLER if no rule was found and either:
3705  *      + 'honor_table_miss' is false
3706  *      + a table miss configuration specified that the packet should be
3707  *        sent to the controller in this case.
3708  *
3709  *    - RULE_DPIF_LOOKUP_VERDICT_DROP if no rule was found, 'honor_table_miss'
3710  *      is true and a table miss configuration specified that the packet
3711  *      should be dropped in this case.
3712  *
3713  *    - RULE_DPIF_LOOKUP_VERDICT_DEFAULT if no rule was found,
3714  *      'honor_table_miss' is true and a table miss configuration has
3715  *      not been specified in this case.
3716  *
3717  * The rule is returned in '*rule', which is valid at least until the next
3718  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3719  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3720  * on it before this returns. */
3721 enum rule_dpif_lookup_verdict
3722 rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
3723                             const struct flow *flow,
3724                             struct flow_wildcards *wc,
3725                             bool honor_table_miss,
3726                             uint8_t *table_id, struct rule_dpif **rule,
3727                             bool take_ref, const struct dpif_flow_stats *stats)
3728 {
3729     uint8_t next_id;
3730
3731     for (next_id = *table_id;
3732          next_id < ofproto->up.n_tables;
3733          next_id++, next_id += (next_id == TBL_INTERNAL))
3734     {
3735         *table_id = next_id;
3736         *rule = rule_dpif_lookup_in_table(ofproto, *table_id, flow, wc,
3737                                           take_ref);
3738         if (stats) {
3739             struct oftable *tbl = &ofproto->up.tables[next_id];
3740             unsigned long orig;
3741
3742             atomic_add_relaxed(*rule ? &tbl->n_matched : &tbl->n_missed,
3743                                stats->n_packets, &orig);
3744         }
3745         if (*rule) {
3746             return RULE_DPIF_LOOKUP_VERDICT_MATCH;
3747         } else if (!honor_table_miss) {
3748             return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3749         } else {
3750             switch (ofproto_table_get_miss_config(&ofproto->up, *table_id)) {
3751             case OFPUTIL_TABLE_MISS_CONTINUE:
3752                 break;
3753
3754             case OFPUTIL_TABLE_MISS_CONTROLLER:
3755                 return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3756
3757             case OFPUTIL_TABLE_MISS_DROP:
3758                 return RULE_DPIF_LOOKUP_VERDICT_DROP;
3759
3760             case OFPUTIL_TABLE_MISS_DEFAULT:
3761                 return RULE_DPIF_LOOKUP_VERDICT_DEFAULT;
3762             }
3763         }
3764     }
3765
3766     return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER;
3767 }
3768
3769 /* Given a port configuration (specified as zero if there's no port), chooses
3770  * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
3771  * flow table miss.
3772  *
3773  * The rule is returned in '*rule', which is valid at least until the next
3774  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3775  * a reference must be taken on it (rule_dpif_ref()).
3776  */
3777 void
3778 choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
3779                  struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule,
3780                  bool take_ref)
3781 {
3782     *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
3783     if (take_ref) {
3784         rule_dpif_ref(*rule);
3785     }
3786 }
3787
3788 static void
3789 complete_operation(struct rule_dpif *rule)
3790     OVS_REQUIRES(ofproto_mutex)
3791 {
3792     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3793
3794     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3795 }
3796
3797 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
3798 {
3799     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
3800 }
3801
3802 static struct rule *
3803 rule_alloc(void)
3804 {
3805     struct rule_dpif *rule = xmalloc(sizeof *rule);
3806     return &rule->up;
3807 }
3808
3809 static void
3810 rule_dealloc(struct rule *rule_)
3811 {
3812     struct rule_dpif *rule = rule_dpif_cast(rule_);
3813     free(rule);
3814 }
3815
3816 static enum ofperr
3817 rule_construct(struct rule *rule_)
3818     OVS_NO_THREAD_SAFETY_ANALYSIS
3819 {
3820     struct rule_dpif *rule = rule_dpif_cast(rule_);
3821     ovs_mutex_init_adaptive(&rule->stats_mutex);
3822     rule->stats.n_packets = 0;
3823     rule->stats.n_bytes = 0;
3824     rule->stats.used = rule->up.modified;
3825     rule->recirc_id = 0;
3826
3827     return 0;
3828 }
3829
3830 static enum ofperr
3831 rule_insert(struct rule *rule_)
3832     OVS_REQUIRES(ofproto_mutex)
3833 {
3834     struct rule_dpif *rule = rule_dpif_cast(rule_);
3835     complete_operation(rule);
3836     return 0;
3837 }
3838
3839 static void
3840 rule_delete(struct rule *rule_)
3841     OVS_REQUIRES(ofproto_mutex)
3842 {
3843     struct rule_dpif *rule = rule_dpif_cast(rule_);
3844     complete_operation(rule);
3845 }
3846
3847 static void
3848 rule_destruct(struct rule *rule_)
3849 {
3850     struct rule_dpif *rule = rule_dpif_cast(rule_);
3851
3852     ovs_mutex_destroy(&rule->stats_mutex);
3853     if (rule->recirc_id) {
3854         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3855
3856         ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
3857     }
3858 }
3859
3860 static void
3861 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
3862                long long int *used)
3863 {
3864     struct rule_dpif *rule = rule_dpif_cast(rule_);
3865
3866     ovs_mutex_lock(&rule->stats_mutex);
3867     *packets = rule->stats.n_packets;
3868     *bytes = rule->stats.n_bytes;
3869     *used = rule->stats.used;
3870     ovs_mutex_unlock(&rule->stats_mutex);
3871 }
3872
3873 static void
3874 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
3875                   struct ofpbuf *packet)
3876 {
3877     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3878
3879     ofproto_dpif_execute_actions(ofproto, flow, rule, NULL, 0, packet);
3880 }
3881
3882 static enum ofperr
3883 rule_execute(struct rule *rule, const struct flow *flow,
3884              struct ofpbuf *packet)
3885 {
3886     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
3887     ofpbuf_delete(packet);
3888     return 0;
3889 }
3890
3891 static void
3892 rule_modify_actions(struct rule *rule_, bool reset_counters)
3893     OVS_REQUIRES(ofproto_mutex)
3894 {
3895     struct rule_dpif *rule = rule_dpif_cast(rule_);
3896
3897     if (reset_counters) {
3898         ovs_mutex_lock(&rule->stats_mutex);
3899         rule->stats.n_packets = 0;
3900         rule->stats.n_bytes = 0;
3901         ovs_mutex_unlock(&rule->stats_mutex);
3902     }
3903
3904     complete_operation(rule);
3905 }
3906
3907 static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
3908 {
3909     return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
3910 }
3911
3912 static struct ofgroup *
3913 group_alloc(void)
3914 {
3915     struct group_dpif *group = xzalloc(sizeof *group);
3916     return &group->up;
3917 }
3918
3919 static void
3920 group_dealloc(struct ofgroup *group_)
3921 {
3922     struct group_dpif *group = group_dpif_cast(group_);
3923     free(group);
3924 }
3925
3926 static void
3927 group_construct_stats(struct group_dpif *group)
3928     OVS_REQUIRES(group->stats_mutex)
3929 {
3930     struct ofputil_bucket *bucket;
3931     const struct list *buckets;
3932
3933     group->packet_count = 0;
3934     group->byte_count = 0;
3935
3936     group_dpif_get_buckets(group, &buckets);
3937     LIST_FOR_EACH (bucket, list_node, buckets) {
3938         bucket->stats.packet_count = 0;
3939         bucket->stats.byte_count = 0;
3940     }
3941 }
3942
3943 void
3944 group_dpif_credit_stats(struct group_dpif *group,
3945                         struct ofputil_bucket *bucket,
3946                         const struct dpif_flow_stats *stats)
3947 {
3948     ovs_mutex_lock(&group->stats_mutex);
3949     group->packet_count += stats->n_packets;
3950     group->byte_count += stats->n_bytes;
3951     if (bucket) {
3952         bucket->stats.packet_count += stats->n_packets;
3953         bucket->stats.byte_count += stats->n_bytes;
3954     } else { /* Credit to all buckets */
3955         const struct list *buckets;
3956
3957         group_dpif_get_buckets(group, &buckets);
3958         LIST_FOR_EACH (bucket, list_node, buckets) {
3959             bucket->stats.packet_count += stats->n_packets;
3960             bucket->stats.byte_count += stats->n_bytes;
3961         }
3962     }
3963     ovs_mutex_unlock(&group->stats_mutex);
3964 }
3965
3966 static enum ofperr
3967 group_construct(struct ofgroup *group_)
3968 {
3969     struct group_dpif *group = group_dpif_cast(group_);
3970     const struct ofputil_bucket *bucket;
3971
3972     /* Prevent group chaining because our locking structure makes it hard to
3973      * implement deadlock-free.  (See xlate_group_resource_check().) */
3974     LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
3975         const struct ofpact *a;
3976
3977         OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
3978             if (a->type == OFPACT_GROUP) {
3979                 return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
3980             }
3981         }
3982     }
3983
3984     ovs_mutex_init_adaptive(&group->stats_mutex);
3985     ovs_mutex_lock(&group->stats_mutex);
3986     group_construct_stats(group);
3987     ovs_mutex_unlock(&group->stats_mutex);
3988     return 0;
3989 }
3990
3991 static void
3992 group_destruct(struct ofgroup *group_)
3993 {
3994     struct group_dpif *group = group_dpif_cast(group_);
3995     ovs_mutex_destroy(&group->stats_mutex);
3996 }
3997
3998 static enum ofperr
3999 group_modify(struct ofgroup *group_)
4000 {
4001     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
4002
4003     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
4004
4005     return 0;
4006 }
4007
4008 static enum ofperr
4009 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
4010 {
4011     struct group_dpif *group = group_dpif_cast(group_);
4012     struct ofputil_bucket *bucket;
4013     const struct list *buckets;
4014     struct bucket_counter *bucket_stats;
4015
4016     ovs_mutex_lock(&group->stats_mutex);
4017     ogs->packet_count = group->packet_count;
4018     ogs->byte_count = group->byte_count;
4019
4020     group_dpif_get_buckets(group, &buckets);
4021     bucket_stats = ogs->bucket_stats;
4022     LIST_FOR_EACH (bucket, list_node, buckets) {
4023         bucket_stats->packet_count = bucket->stats.packet_count;
4024         bucket_stats->byte_count = bucket->stats.byte_count;
4025         bucket_stats++;
4026     }
4027     ovs_mutex_unlock(&group->stats_mutex);
4028
4029     return 0;
4030 }
4031
4032 /* If the group exists, this function increments the groups's reference count.
4033  *
4034  * Make sure to call group_dpif_unref() after no longer needing to maintain
4035  * a reference to the group. */
4036 bool
4037 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
4038                   struct group_dpif **group)
4039 {
4040     struct ofgroup *ofgroup;
4041     bool found;
4042
4043     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
4044     *group = found ?  group_dpif_cast(ofgroup) : NULL;
4045
4046     return found;
4047 }
4048
4049 void
4050 group_dpif_get_buckets(const struct group_dpif *group,
4051                        const struct list **buckets)
4052 {
4053     *buckets = &group->up.buckets;
4054 }
4055
4056 enum ofp11_group_type
4057 group_dpif_get_type(const struct group_dpif *group)
4058 {
4059     return group->up.type;
4060 }
4061 \f
4062 /* Sends 'packet' out 'ofport'.
4063  * May modify 'packet'.
4064  * Returns 0 if successful, otherwise a positive errno value. */
4065 int
4066 ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4067 {
4068     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4069     int error;
4070
4071     error = xlate_send_packet(ofport, packet);
4072
4073     ovs_mutex_lock(&ofproto->stats_mutex);
4074     ofproto->stats.tx_packets++;
4075     ofproto->stats.tx_bytes += ofpbuf_size(packet);
4076     ovs_mutex_unlock(&ofproto->stats_mutex);
4077     return error;
4078 }
4079 \f
4080 static bool
4081 set_frag_handling(struct ofproto *ofproto_,
4082                   enum ofp_config_flags frag_handling)
4083 {
4084     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4085     if (frag_handling != OFPC_FRAG_REASM) {
4086         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4087         return true;
4088     } else {
4089         return false;
4090     }
4091 }
4092
4093 static enum ofperr
4094 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
4095            const struct flow *flow,
4096            const struct ofpact *ofpacts, size_t ofpacts_len)
4097 {
4098     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4099
4100     ofproto_dpif_execute_actions(ofproto, flow, NULL, ofpacts,
4101                                  ofpacts_len, packet);
4102     return 0;
4103 }
4104 \f
4105 /* NetFlow. */
4106
4107 static int
4108 set_netflow(struct ofproto *ofproto_,
4109             const struct netflow_options *netflow_options)
4110 {
4111     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4112
4113     if (netflow_options) {
4114         if (!ofproto->netflow) {
4115             ofproto->netflow = netflow_create();
4116             ofproto->backer->need_revalidate = REV_RECONFIGURE;
4117         }
4118         return netflow_set_options(ofproto->netflow, netflow_options);
4119     } else if (ofproto->netflow) {
4120         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4121         netflow_unref(ofproto->netflow);
4122         ofproto->netflow = NULL;
4123     }
4124
4125     return 0;
4126 }
4127
4128 static void
4129 get_netflow_ids(const struct ofproto *ofproto_,
4130                 uint8_t *engine_type, uint8_t *engine_id)
4131 {
4132     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4133
4134     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
4135 }
4136 \f
4137 static struct ofproto_dpif *
4138 ofproto_dpif_lookup(const char *name)
4139 {
4140     struct ofproto_dpif *ofproto;
4141
4142     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
4143                              hash_string(name, 0), &all_ofproto_dpifs) {
4144         if (!strcmp(ofproto->up.name, name)) {
4145             return ofproto;
4146         }
4147     }
4148     return NULL;
4149 }
4150
4151 static void
4152 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
4153                           const char *argv[], void *aux OVS_UNUSED)
4154 {
4155     struct ofproto_dpif *ofproto;
4156
4157     if (argc > 1) {
4158         ofproto = ofproto_dpif_lookup(argv[1]);
4159         if (!ofproto) {
4160             unixctl_command_reply_error(conn, "no such bridge");
4161             return;
4162         }
4163         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4164         mac_learning_flush(ofproto->ml);
4165         ovs_rwlock_unlock(&ofproto->ml->rwlock);
4166     } else {
4167         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4168             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4169             mac_learning_flush(ofproto->ml);
4170             ovs_rwlock_unlock(&ofproto->ml->rwlock);
4171         }
4172     }
4173
4174     unixctl_command_reply(conn, "table successfully flushed");
4175 }
4176
4177 static void
4178 ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
4179                                      const char *argv[], void *aux OVS_UNUSED)
4180 {
4181     struct ofproto_dpif *ofproto;
4182
4183     if (argc > 1) {
4184         ofproto = ofproto_dpif_lookup(argv[1]);
4185         if (!ofproto) {
4186             unixctl_command_reply_error(conn, "no such bridge");
4187             return;
4188         }
4189
4190         if (!mcast_snooping_enabled(ofproto->ms)) {
4191             unixctl_command_reply_error(conn, "multicast snooping is disabled");
4192             return;
4193         }
4194         mcast_snooping_mdb_flush(ofproto->ms);
4195     } else {
4196         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4197             if (!mcast_snooping_enabled(ofproto->ms)) {
4198                 continue;
4199             }
4200             mcast_snooping_mdb_flush(ofproto->ms);
4201         }
4202     }
4203
4204     unixctl_command_reply(conn, "table successfully flushed");
4205 }
4206
4207 static struct ofport_dpif *
4208 ofbundle_get_a_port(const struct ofbundle *bundle)
4209 {
4210     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
4211                         bundle_node);
4212 }
4213
4214 static void
4215 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4216                          const char *argv[], void *aux OVS_UNUSED)
4217 {
4218     struct ds ds = DS_EMPTY_INITIALIZER;
4219     const struct ofproto_dpif *ofproto;
4220     const struct mac_entry *e;
4221
4222     ofproto = ofproto_dpif_lookup(argv[1]);
4223     if (!ofproto) {
4224         unixctl_command_reply_error(conn, "no such bridge");
4225         return;
4226     }
4227
4228     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
4229     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
4230     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
4231         struct ofbundle *bundle = e->port.p;
4232         char name[OFP_MAX_PORT_NAME_LEN];
4233
4234         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4235                                name, sizeof name);
4236         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
4237                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
4238                       mac_entry_age(ofproto->ml, e));
4239     }
4240     ovs_rwlock_unlock(&ofproto->ml->rwlock);
4241     unixctl_command_reply(conn, ds_cstr(&ds));
4242     ds_destroy(&ds);
4243 }
4244
4245 static void
4246 ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
4247                                     int argc OVS_UNUSED,
4248                                     const char *argv[],
4249                                     void *aux OVS_UNUSED)
4250 {
4251     struct ds ds = DS_EMPTY_INITIALIZER;
4252     const struct ofproto_dpif *ofproto;
4253     const struct ofbundle *bundle;
4254     const struct mcast_group *grp;
4255     struct mcast_group_bundle *b;
4256     struct mcast_mrouter_bundle *mrouter;
4257
4258     ofproto = ofproto_dpif_lookup(argv[1]);
4259     if (!ofproto) {
4260         unixctl_command_reply_error(conn, "no such bridge");
4261         return;
4262     }
4263
4264     if (!mcast_snooping_enabled(ofproto->ms)) {
4265         unixctl_command_reply_error(conn, "multicast snooping is disabled");
4266         return;
4267     }
4268
4269     ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
4270     ovs_rwlock_rdlock(&ofproto->ms->rwlock);
4271     LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
4272         LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
4273             char name[OFP_MAX_PORT_NAME_LEN];
4274
4275             bundle = b->port;
4276             ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4277                                    name, sizeof name);
4278             ds_put_format(&ds, "%5s  %4d  "IP_FMT"         %3d\n",
4279                           name, grp->vlan, IP_ARGS(grp->ip4),
4280                           mcast_bundle_age(ofproto->ms, b));
4281         }
4282     }
4283
4284     /* ports connected to multicast routers */
4285     LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
4286         char name[OFP_MAX_PORT_NAME_LEN];
4287
4288         bundle = mrouter->port;
4289         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4290                                name, sizeof name);
4291             ds_put_format(&ds, "%5s  %4d  querier             %3d\n",
4292                       name, mrouter->vlan,
4293                       mcast_mrouter_age(ofproto->ms, mrouter));
4294     }
4295     ovs_rwlock_unlock(&ofproto->ms->rwlock);
4296     unixctl_command_reply(conn, ds_cstr(&ds));
4297     ds_destroy(&ds);
4298 }
4299
4300 struct trace_ctx {
4301     struct xlate_out xout;
4302     struct xlate_in xin;
4303     const struct flow *key;
4304     struct flow flow;
4305     struct flow_wildcards wc;
4306     struct ds *result;
4307 };
4308
4309 static void
4310 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
4311 {
4312     const struct rule_actions *actions;
4313     ovs_be64 cookie;
4314
4315     ds_put_char_multiple(result, '\t', level);
4316     if (!rule) {
4317         ds_put_cstr(result, "No match\n");
4318         return;
4319     }
4320
4321     ovs_mutex_lock(&rule->up.mutex);
4322     cookie = rule->up.flow_cookie;
4323     ovs_mutex_unlock(&rule->up.mutex);
4324
4325     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
4326                   rule ? rule->up.table_id : 0, ntohll(cookie));
4327     cls_rule_format(&rule->up.cr, result);
4328     ds_put_char(result, '\n');
4329
4330     actions = rule_dpif_get_actions(rule);
4331
4332     ds_put_char_multiple(result, '\t', level);
4333     ds_put_cstr(result, "OpenFlow actions=");
4334     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
4335     ds_put_char(result, '\n');
4336 }
4337
4338 static void
4339 trace_format_flow(struct ds *result, int level, const char *title,
4340                   struct trace_ctx *trace)
4341 {
4342     ds_put_char_multiple(result, '\t', level);
4343     ds_put_format(result, "%s: ", title);
4344     /* Do not report unchanged flows for resubmits. */
4345     if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
4346         || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
4347         ds_put_cstr(result, "unchanged");
4348     } else {
4349         flow_format(result, &trace->xin.flow);
4350         trace->flow = trace->xin.flow;
4351     }
4352     ds_put_char(result, '\n');
4353 }
4354
4355 static void
4356 trace_format_regs(struct ds *result, int level, const char *title,
4357                   struct trace_ctx *trace)
4358 {
4359     size_t i;
4360
4361     ds_put_char_multiple(result, '\t', level);
4362     ds_put_format(result, "%s:", title);
4363     for (i = 0; i < FLOW_N_REGS; i++) {
4364         ds_put_format(result, " reg%"PRIuSIZE"=0x%"PRIx32, i, trace->flow.regs[i]);
4365     }
4366     ds_put_char(result, '\n');
4367 }
4368
4369 static void
4370 trace_format_odp(struct ds *result, int level, const char *title,
4371                  struct trace_ctx *trace)
4372 {
4373     struct ofpbuf *odp_actions = trace->xout.odp_actions;
4374
4375     ds_put_char_multiple(result, '\t', level);
4376     ds_put_format(result, "%s: ", title);
4377     format_odp_actions(result, ofpbuf_data(odp_actions),
4378                                ofpbuf_size(odp_actions));
4379     ds_put_char(result, '\n');
4380 }
4381
4382 static void
4383 trace_format_megaflow(struct ds *result, int level, const char *title,
4384                       struct trace_ctx *trace)
4385 {
4386     struct match match;
4387
4388     ds_put_char_multiple(result, '\t', level);
4389     ds_put_format(result, "%s: ", title);
4390     flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
4391     match_init(&match, trace->key, &trace->wc);
4392     match_format(&match, result, OFP_DEFAULT_PRIORITY);
4393     ds_put_char(result, '\n');
4394 }
4395
4396 static void
4397 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
4398 {
4399     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4400     struct ds *result = trace->result;
4401
4402     ds_put_char(result, '\n');
4403     trace_format_flow(result, recurse + 1, "Resubmitted flow", trace);
4404     trace_format_regs(result, recurse + 1, "Resubmitted regs", trace);
4405     trace_format_odp(result,  recurse + 1, "Resubmitted  odp", trace);
4406     trace_format_megaflow(result, recurse + 1, "Resubmitted megaflow", trace);
4407     trace_format_rule(result, recurse + 1, rule);
4408 }
4409
4410 static void
4411 trace_report(struct xlate_in *xin, const char *s, int recurse)
4412 {
4413     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4414     struct ds *result = trace->result;
4415
4416     ds_put_char_multiple(result, '\t', recurse);
4417     ds_put_cstr(result, s);
4418     ds_put_char(result, '\n');
4419 }
4420
4421 /* Parses the 'argc' elements of 'argv', ignoring argv[0].  The following
4422  * forms are supported:
4423  *
4424  *     - [dpname] odp_flow [-generate | packet]
4425  *     - bridge br_flow [-generate | packet]
4426  *
4427  * On success, initializes '*ofprotop' and 'flow' and returns NULL.  On failure
4428  * returns a nonnull malloced error message. */
4429 static char * WARN_UNUSED_RESULT
4430 parse_flow_and_packet(int argc, const char *argv[],
4431                       struct ofproto_dpif **ofprotop, struct flow *flow,
4432                       struct ofpbuf **packetp)
4433 {
4434     const struct dpif_backer *backer = NULL;
4435     const char *error = NULL;
4436     char *m_err = NULL;
4437     struct simap port_names = SIMAP_INITIALIZER(&port_names);
4438     struct ofpbuf *packet;
4439     struct ofpbuf odp_key;
4440     struct ofpbuf odp_mask;
4441
4442     ofpbuf_init(&odp_key, 0);
4443     ofpbuf_init(&odp_mask, 0);
4444
4445     /* Handle "-generate" or a hex string as the last argument. */
4446     if (!strcmp(argv[argc - 1], "-generate")) {
4447         packet = ofpbuf_new(0);
4448         argc--;
4449     } else {
4450         error = eth_from_hex(argv[argc - 1], &packet);
4451         if (!error) {
4452             argc--;
4453         } else if (argc == 4) {
4454             /* The 3-argument form must end in "-generate' or a hex string. */
4455             goto exit;
4456         }
4457         error = NULL;
4458     }
4459
4460     /* odp_flow can have its in_port specified as a name instead of port no.
4461      * We do not yet know whether a given flow is a odp_flow or a br_flow.
4462      * But, to know whether a flow is odp_flow through odp_flow_from_string(),
4463      * we need to create a simap of name to port no. */
4464     if (argc == 3) {
4465         const char *dp_type;
4466         if (!strncmp(argv[1], "ovs-", 4)) {
4467             dp_type = argv[1] + 4;
4468         } else {
4469             dp_type = argv[1];
4470         }
4471         backer = shash_find_data(&all_dpif_backers, dp_type);
4472     } else if (argc == 2) {
4473         struct shash_node *node;
4474         if (shash_count(&all_dpif_backers) == 1) {
4475             node = shash_first(&all_dpif_backers);
4476             backer = node->data;
4477         }
4478     } else {
4479         error = "Syntax error";
4480         goto exit;
4481     }
4482     if (backer && backer->dpif) {
4483         struct dpif_port dpif_port;
4484         struct dpif_port_dump port_dump;
4485         DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
4486             simap_put(&port_names, dpif_port.name,
4487                       odp_to_u32(dpif_port.port_no));
4488         }
4489     }
4490
4491     /* Parse the flow and determine whether a datapath or
4492      * bridge is specified. If function odp_flow_key_from_string()
4493      * returns 0, the flow is a odp_flow. If function
4494      * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
4495     if (!odp_flow_from_string(argv[argc - 1], &port_names,
4496                               &odp_key, &odp_mask)) {
4497         if (!backer) {
4498             error = "Cannot find the datapath";
4499             goto exit;
4500         }
4501
4502         if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key),
4503                                  flow) == ODP_FIT_ERROR) {
4504             error = "Failed to parse flow key";
4505             goto exit;
4506         }
4507
4508         *ofprotop = xlate_lookup_ofproto(backer, flow,
4509                                          &flow->in_port.ofp_port);
4510         if (*ofprotop == NULL) {
4511             error = "Invalid datapath flow";
4512             goto exit;
4513         }
4514
4515         vsp_adjust_flow(*ofprotop, flow, NULL);
4516
4517     } else {
4518         char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
4519
4520         if (err) {
4521             m_err = xasprintf("Bad flow syntax: %s", err);
4522             free(err);
4523             goto exit;
4524         } else {
4525             if (argc != 3) {
4526                 error = "Must specify bridge name";
4527                 goto exit;
4528             }
4529
4530             *ofprotop = ofproto_dpif_lookup(argv[1]);
4531             if (!*ofprotop) {
4532                 error = "Unknown bridge name";
4533                 goto exit;
4534             }
4535         }
4536     }
4537
4538     /* Generate a packet, if requested. */
4539     if (packet) {
4540         if (!ofpbuf_size(packet)) {
4541             flow_compose(packet, flow);
4542         } else {
4543             struct pkt_metadata md = pkt_metadata_from_flow(flow);
4544
4545             /* Use the metadata from the flow and the packet argument
4546              * to reconstruct the flow. */
4547             flow_extract(packet, &md, flow);
4548         }
4549     }
4550
4551 exit:
4552     if (error && !m_err) {
4553         m_err = xstrdup(error);
4554     }
4555     if (m_err) {
4556         ofpbuf_delete(packet);
4557         packet = NULL;
4558     }
4559     *packetp = packet;
4560     ofpbuf_uninit(&odp_key);
4561     ofpbuf_uninit(&odp_mask);
4562     simap_destroy(&port_names);
4563     return m_err;
4564 }
4565
4566 static void
4567 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
4568                       void *aux OVS_UNUSED)
4569 {
4570     struct ofproto_dpif *ofproto;
4571     struct ofpbuf *packet;
4572     char *error;
4573     struct flow flow;
4574
4575     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4576     if (!error) {
4577         struct ds result;
4578
4579         ds_init(&result);
4580         ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
4581         unixctl_command_reply(conn, ds_cstr(&result));
4582         ds_destroy(&result);
4583         ofpbuf_delete(packet);
4584     } else {
4585         unixctl_command_reply_error(conn, error);
4586         free(error);
4587     }
4588 }
4589
4590 static void
4591 ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
4592                               const char *argv[], void *aux OVS_UNUSED)
4593 {
4594     enum ofputil_protocol usable_protocols;
4595     struct ofproto_dpif *ofproto;
4596     bool enforce_consistency;
4597     struct ofpbuf ofpacts;
4598     struct ofpbuf *packet;
4599     struct ds result;
4600     struct flow flow;
4601     uint16_t in_port;
4602
4603     /* Three kinds of error return values! */
4604     enum ofperr retval;
4605     char *error;
4606
4607     packet = NULL;
4608     ds_init(&result);
4609     ofpbuf_init(&ofpacts, 0);
4610
4611     /* Parse actions. */
4612     error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
4613     if (error) {
4614         unixctl_command_reply_error(conn, error);
4615         free(error);
4616         goto exit;
4617     }
4618
4619     /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
4620      * consistency between the flow and the actions.  With -consistent, we
4621      * enforce consistency even for a flow supported in OpenFlow 1.0. */
4622     if (!strcmp(argv[1], "-consistent")) {
4623         enforce_consistency = true;
4624         argv++;
4625         argc--;
4626     } else {
4627         enforce_consistency = false;
4628     }
4629
4630     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4631     if (error) {
4632         unixctl_command_reply_error(conn, error);
4633         free(error);
4634         goto exit;
4635     }
4636
4637     /* Do the same checks as handle_packet_out() in ofproto.c.
4638      *
4639      * We pass a 'table_id' of 0 to ofproto_check_ofpacts(), which isn't
4640      * strictly correct because these actions aren't in any table, but it's OK
4641      * because it 'table_id' is used only to check goto_table instructions, but
4642      * packet-outs take a list of actions and therefore it can't include
4643      * instructions.
4644      *
4645      * We skip the "meter" check here because meter is an instruction, not an
4646      * action, and thus cannot appear in ofpacts. */
4647     in_port = ofp_to_u16(flow.in_port.ofp_port);
4648     if (in_port >= ofproto->up.max_ports && in_port < ofp_to_u16(OFPP_MAX)) {
4649         unixctl_command_reply_error(conn, "invalid in_port");
4650         goto exit;
4651     }
4652     if (enforce_consistency) {
4653         retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
4654                                            &flow, u16_to_ofp(ofproto->up.max_ports),
4655                                            0, 0, usable_protocols);
4656     } else {
4657         retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
4658                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
4659                                &usable_protocols);
4660     }
4661
4662     if (retval) {
4663         ds_clear(&result);
4664         ds_put_format(&result, "Bad actions: %s", ofperr_to_string(retval));
4665         unixctl_command_reply_error(conn, ds_cstr(&result));
4666         goto exit;
4667     }
4668
4669     ofproto_trace(ofproto, &flow, packet,
4670                   ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
4671     unixctl_command_reply(conn, ds_cstr(&result));
4672
4673 exit:
4674     ds_destroy(&result);
4675     ofpbuf_delete(packet);
4676     ofpbuf_uninit(&ofpacts);
4677 }
4678
4679 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
4680  * description of the results to 'ds'.
4681  *
4682  * The trace follows a packet with the specified 'flow' through the flow
4683  * table.  'packet' may be nonnull to trace an actual packet, with consequent
4684  * side effects (if it is nonnull then its flow must be 'flow').
4685  *
4686  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
4687  * trace, otherwise the actions are determined by a flow table lookup. */
4688 static void
4689 ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
4690               const struct ofpbuf *packet,
4691               const struct ofpact ofpacts[], size_t ofpacts_len,
4692               struct ds *ds)
4693 {
4694     struct rule_dpif *rule;
4695     struct trace_ctx trace;
4696
4697     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
4698     ds_put_cstr(ds, "Flow: ");
4699     flow_format(ds, flow);
4700     ds_put_char(ds, '\n');
4701
4702     flow_wildcards_init_catchall(&trace.wc);
4703     if (ofpacts) {
4704         rule = NULL;
4705     } else {
4706         rule_dpif_lookup(ofproto, flow, &trace.wc, &rule, false, NULL);
4707
4708         trace_format_rule(ds, 0, rule);
4709         if (rule == ofproto->miss_rule) {
4710             ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n");
4711         } else if (rule == ofproto->no_packet_in_rule) {
4712             ds_put_cstr(ds, "\nNo match, packets dropped because "
4713                         "OFPPC_NO_PACKET_IN is set on in_port.\n");
4714         } else if (rule == ofproto->drop_frags_rule) {
4715             ds_put_cstr(ds, "\nPackets dropped because they are IP fragments "
4716                         "and the fragment handling mode is \"drop\".\n");
4717         }
4718     }
4719
4720     if (rule || ofpacts) {
4721         trace.result = ds;
4722         trace.key = flow; /* Original flow key, used for megaflow. */
4723         trace.flow = *flow; /* May be modified by actions. */
4724         xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, rule,
4725                       ntohs(flow->tcp_flags), packet);
4726         if (ofpacts) {
4727             trace.xin.ofpacts = ofpacts;
4728             trace.xin.ofpacts_len = ofpacts_len;
4729         }
4730         trace.xin.resubmit_hook = trace_resubmit;
4731         trace.xin.report_hook = trace_report;
4732
4733         xlate_actions(&trace.xin, &trace.xout);
4734
4735         ds_put_char(ds, '\n');
4736         trace_format_flow(ds, 0, "Final flow", &trace);
4737         trace_format_megaflow(ds, 0, "Megaflow", &trace);
4738
4739         ds_put_cstr(ds, "Datapath actions: ");
4740         format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions),
4741                            ofpbuf_size(trace.xout.odp_actions));
4742
4743         if (trace.xout.slow) {
4744             enum slow_path_reason slow;
4745
4746             ds_put_cstr(ds, "\nThis flow is handled by the userspace "
4747                         "slow path because it:");
4748
4749             slow = trace.xout.slow;
4750             while (slow) {
4751                 enum slow_path_reason bit = rightmost_1bit(slow);
4752
4753                 ds_put_format(ds, "\n\t- %s.",
4754                               slow_path_reason_to_explanation(bit));
4755
4756                 slow &= ~bit;
4757             }
4758         }
4759
4760         xlate_out_uninit(&trace.xout);
4761     }
4762 }
4763
4764 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
4765  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
4766  * to destroy 'ofproto_shash' and free the returned value. */
4767 static const struct shash_node **
4768 get_ofprotos(struct shash *ofproto_shash)
4769 {
4770     const struct ofproto_dpif *ofproto;
4771
4772     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4773         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
4774         shash_add_nocopy(ofproto_shash, name, ofproto);
4775     }
4776
4777     return shash_sort(ofproto_shash);
4778 }
4779
4780 static void
4781 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
4782                               const char *argv[] OVS_UNUSED,
4783                               void *aux OVS_UNUSED)
4784 {
4785     struct ds ds = DS_EMPTY_INITIALIZER;
4786     struct shash ofproto_shash;
4787     const struct shash_node **sorted_ofprotos;
4788     int i;
4789
4790     shash_init(&ofproto_shash);
4791     sorted_ofprotos = get_ofprotos(&ofproto_shash);
4792     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4793         const struct shash_node *node = sorted_ofprotos[i];
4794         ds_put_format(&ds, "%s\n", node->name);
4795     }
4796
4797     shash_destroy(&ofproto_shash);
4798     free(sorted_ofprotos);
4799
4800     unixctl_command_reply(conn, ds_cstr(&ds));
4801     ds_destroy(&ds);
4802 }
4803
4804 static void
4805 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
4806 {
4807     const struct shash_node **ofprotos;
4808     struct dpif_dp_stats dp_stats;
4809     struct shash ofproto_shash;
4810     size_t i;
4811
4812     dpif_get_dp_stats(backer->dpif, &dp_stats);
4813
4814     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
4815                   dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
4816
4817     shash_init(&ofproto_shash);
4818     ofprotos = get_ofprotos(&ofproto_shash);
4819     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4820         struct ofproto_dpif *ofproto = ofprotos[i]->data;
4821         const struct shash_node **ports;
4822         size_t j;
4823
4824         if (ofproto->backer != backer) {
4825             continue;
4826         }
4827
4828         ds_put_format(ds, "\t%s:\n", ofproto->up.name);
4829
4830         ports = shash_sort(&ofproto->up.port_by_name);
4831         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
4832             const struct shash_node *node = ports[j];
4833             struct ofport *ofport = node->data;
4834             struct smap config;
4835             odp_port_t odp_port;
4836
4837             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
4838                           ofport->ofp_port);
4839
4840             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
4841             if (odp_port != ODPP_NONE) {
4842                 ds_put_format(ds, "%"PRIu32":", odp_port);
4843             } else {
4844                 ds_put_cstr(ds, "none:");
4845             }
4846
4847             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
4848
4849             smap_init(&config);
4850             if (!netdev_get_config(ofport->netdev, &config)) {
4851                 const struct smap_node **nodes;
4852                 size_t i;
4853
4854                 nodes = smap_sort(&config);
4855                 for (i = 0; i < smap_count(&config); i++) {
4856                     const struct smap_node *node = nodes[i];
4857                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
4858                                   node->key, node->value);
4859                 }
4860                 free(nodes);
4861             }
4862             smap_destroy(&config);
4863
4864             ds_put_char(ds, ')');
4865             ds_put_char(ds, '\n');
4866         }
4867         free(ports);
4868     }
4869     shash_destroy(&ofproto_shash);
4870     free(ofprotos);
4871 }
4872
4873 static void
4874 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4875                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
4876 {
4877     struct ds ds = DS_EMPTY_INITIALIZER;
4878     const struct shash_node **backers;
4879     int i;
4880
4881     backers = shash_sort(&all_dpif_backers);
4882     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
4883         dpif_show_backer(backers[i]->data, &ds);
4884     }
4885     free(backers);
4886
4887     unixctl_command_reply(conn, ds_cstr(&ds));
4888     ds_destroy(&ds);
4889 }
4890
4891 static void
4892 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
4893                                 int argc OVS_UNUSED, const char *argv[],
4894                                 void *aux OVS_UNUSED)
4895 {
4896     const struct ofproto_dpif *ofproto;
4897
4898     struct ds ds = DS_EMPTY_INITIALIZER;
4899     bool verbosity = false;
4900
4901     struct dpif_port dpif_port;
4902     struct dpif_port_dump port_dump;
4903     struct hmap portno_names;
4904
4905     struct dpif_flow_dump *flow_dump;
4906     struct dpif_flow_dump_thread *flow_dump_thread;
4907     struct dpif_flow f;
4908     int error;
4909
4910     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
4911     if (!ofproto) {
4912         unixctl_command_reply_error(conn, "no such bridge");
4913         return;
4914     }
4915
4916     if (argc > 2 && !strcmp(argv[1], "-m")) {
4917         verbosity = true;
4918     }
4919
4920     hmap_init(&portno_names);
4921     DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
4922         odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
4923     }
4924
4925     ds_init(&ds);
4926     flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
4927     flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
4928     while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
4929         struct flow flow;
4930
4931         if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
4932             || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
4933             continue;
4934         }
4935
4936         odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
4937                         &portno_names, &ds, verbosity);
4938         ds_put_cstr(&ds, ", ");
4939         dpif_flow_stats_format(&f.stats, &ds);
4940         ds_put_cstr(&ds, ", actions:");
4941         format_odp_actions(&ds, f.actions, f.actions_len);
4942         ds_put_char(&ds, '\n');
4943     }
4944     dpif_flow_dump_thread_destroy(flow_dump_thread);
4945     error = dpif_flow_dump_destroy(flow_dump);
4946
4947     if (error) {
4948         ds_clear(&ds);
4949         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
4950         unixctl_command_reply_error(conn, ds_cstr(&ds));
4951     } else {
4952         unixctl_command_reply(conn, ds_cstr(&ds));
4953     }
4954     odp_portno_names_destroy(&portno_names);
4955     hmap_destroy(&portno_names);
4956     ds_destroy(&ds);
4957 }
4958
4959 static void
4960 ofproto_dpif_unixctl_init(void)
4961 {
4962     static bool registered;
4963     if (registered) {
4964         return;
4965     }
4966     registered = true;
4967
4968     unixctl_command_register(
4969         "ofproto/trace",
4970         "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
4971         1, 3, ofproto_unixctl_trace, NULL);
4972     unixctl_command_register(
4973         "ofproto/trace-packet-out",
4974         "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
4975         2, 6, ofproto_unixctl_trace_actions, NULL);
4976     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
4977                              ofproto_unixctl_fdb_flush, NULL);
4978     unixctl_command_register("fdb/show", "bridge", 1, 1,
4979                              ofproto_unixctl_fdb_show, NULL);
4980     unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
4981                              ofproto_unixctl_mcast_snooping_flush, NULL);
4982     unixctl_command_register("mdb/show", "bridge", 1, 1,
4983                              ofproto_unixctl_mcast_snooping_show, NULL);
4984     unixctl_command_register("dpif/dump-dps", "", 0, 0,
4985                              ofproto_unixctl_dpif_dump_dps, NULL);
4986     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
4987                              NULL);
4988     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
4989                              ofproto_unixctl_dpif_dump_flows, NULL);
4990 }
4991
4992 /* Returns true if 'table' is the table used for internal rules,
4993  * false otherwise. */
4994 bool
4995 table_is_internal(uint8_t table_id)
4996 {
4997     return table_id == TBL_INTERNAL;
4998 }
4999 \f
5000 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
5001  *
5002  * This is deprecated.  It is only for compatibility with broken device drivers
5003  * in old versions of Linux that do not properly support VLANs when VLAN
5004  * devices are not used.  When broken device drivers are no longer in
5005  * widespread use, we will delete these interfaces. */
5006
5007 static int
5008 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5009 {
5010     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5011     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5012
5013     if (realdev_ofp_port == ofport->realdev_ofp_port
5014         && vid == ofport->vlandev_vid) {
5015         return 0;
5016     }
5017
5018     ofproto->backer->need_revalidate = REV_RECONFIGURE;
5019
5020     if (ofport->realdev_ofp_port) {
5021         vsp_remove(ofport);
5022     }
5023     if (realdev_ofp_port && ofport->bundle) {
5024         /* vlandevs are enslaved to their realdevs, so they are not allowed to
5025          * themselves be part of a bundle. */
5026         bundle_set(ofport_->ofproto, ofport->bundle, NULL);
5027     }
5028
5029     ofport->realdev_ofp_port = realdev_ofp_port;
5030     ofport->vlandev_vid = vid;
5031
5032     if (realdev_ofp_port) {
5033         vsp_add(ofport, realdev_ofp_port, vid);
5034     }
5035
5036     return 0;
5037 }
5038
5039 static uint32_t
5040 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
5041 {
5042     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
5043 }
5044
5045 bool
5046 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
5047     OVS_EXCLUDED(ofproto->vsp_mutex)
5048 {
5049     /* hmap_is_empty is thread safe. */
5050     return !hmap_is_empty(&ofproto->realdev_vid_map);
5051 }
5052
5053
5054 static ofp_port_t
5055 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
5056                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5057     OVS_REQUIRES(ofproto->vsp_mutex)
5058 {
5059     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
5060         int vid = vlan_tci_to_vid(vlan_tci);
5061         const struct vlan_splinter *vsp;
5062
5063         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
5064                                  hash_realdev_vid(realdev_ofp_port, vid),
5065                                  &ofproto->realdev_vid_map) {
5066             if (vsp->realdev_ofp_port == realdev_ofp_port
5067                 && vsp->vid == vid) {
5068                 return vsp->vlandev_ofp_port;
5069             }
5070         }
5071     }
5072     return realdev_ofp_port;
5073 }
5074
5075 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5076  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5077  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
5078  * 'vlan_tci' 9, it would return the port number of eth0.9.
5079  *
5080  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5081  * function just returns its 'realdev_ofp_port' argument. */
5082 ofp_port_t
5083 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
5084                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5085     OVS_EXCLUDED(ofproto->vsp_mutex)
5086 {
5087     ofp_port_t ret;
5088
5089     /* hmap_is_empty is thread safe, see if we can return immediately. */
5090     if (hmap_is_empty(&ofproto->realdev_vid_map)) {
5091         return realdev_ofp_port;
5092     }
5093     ovs_mutex_lock(&ofproto->vsp_mutex);
5094     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
5095     ovs_mutex_unlock(&ofproto->vsp_mutex);
5096     return ret;
5097 }
5098
5099 static struct vlan_splinter *
5100 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
5101 {
5102     struct vlan_splinter *vsp;
5103
5104     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
5105                              hash_ofp_port(vlandev_ofp_port),
5106                              &ofproto->vlandev_map) {
5107         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
5108             return vsp;
5109         }
5110     }
5111
5112     return NULL;
5113 }
5114
5115 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5116  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5117  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
5118  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5119  * eth0 and store 9 in '*vid'.
5120  *
5121  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5122  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
5123  * always does.*/
5124 static ofp_port_t
5125 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
5126                        ofp_port_t vlandev_ofp_port, int *vid)
5127     OVS_REQUIRES(ofproto->vsp_mutex)
5128 {
5129     if (!hmap_is_empty(&ofproto->vlandev_map)) {
5130         const struct vlan_splinter *vsp;
5131
5132         vsp = vlandev_find(ofproto, vlandev_ofp_port);
5133         if (vsp) {
5134             if (vid) {
5135                 *vid = vsp->vid;
5136             }
5137             return vsp->realdev_ofp_port;
5138         }
5139     }
5140     return 0;
5141 }
5142
5143 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5144  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
5145  * 'flow->in_port' to the "real" device backing the VLAN device, sets
5146  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Optionally pushes the
5147  * appropriate VLAN on 'packet' if provided.  Otherwise (which is always the
5148  * case unless VLAN splinters are enabled), returns false without making any
5149  * changes. */
5150 bool
5151 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
5152                 struct ofpbuf *packet)
5153     OVS_EXCLUDED(ofproto->vsp_mutex)
5154 {
5155     ofp_port_t realdev;
5156     int vid;
5157
5158     /* hmap_is_empty is thread safe. */
5159     if (hmap_is_empty(&ofproto->vlandev_map)) {
5160         return false;
5161     }
5162
5163     ovs_mutex_lock(&ofproto->vsp_mutex);
5164     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
5165     ovs_mutex_unlock(&ofproto->vsp_mutex);
5166     if (!realdev) {
5167         return false;
5168     }
5169
5170     /* Cause the flow to be processed as if it came in on the real device with
5171      * the VLAN device's VLAN ID. */
5172     flow->in_port.ofp_port = realdev;
5173     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
5174
5175     if (packet) {
5176         /* Make the packet resemble the flow, so that it gets sent to an
5177          * OpenFlow controller properly, so that it looks correct for sFlow,
5178          * and so that flow_extract() will get the correct vlan_tci if it is
5179          * called on 'packet'. */
5180         eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
5181     }
5182
5183     return true;
5184 }
5185
5186 static void
5187 vsp_remove(struct ofport_dpif *port)
5188 {
5189     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5190     struct vlan_splinter *vsp;
5191
5192     ovs_mutex_lock(&ofproto->vsp_mutex);
5193     vsp = vlandev_find(ofproto, port->up.ofp_port);
5194     if (vsp) {
5195         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
5196         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
5197         free(vsp);
5198
5199         port->realdev_ofp_port = 0;
5200     } else {
5201         VLOG_ERR("missing vlan device record");
5202     }
5203     ovs_mutex_unlock(&ofproto->vsp_mutex);
5204 }
5205
5206 static void
5207 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
5208 {
5209     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5210
5211     ovs_mutex_lock(&ofproto->vsp_mutex);
5212     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
5213         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
5214             == realdev_ofp_port)) {
5215         struct vlan_splinter *vsp;
5216
5217         vsp = xmalloc(sizeof *vsp);
5218         vsp->realdev_ofp_port = realdev_ofp_port;
5219         vsp->vlandev_ofp_port = port->up.ofp_port;
5220         vsp->vid = vid;
5221
5222         port->realdev_ofp_port = realdev_ofp_port;
5223
5224         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
5225                     hash_ofp_port(port->up.ofp_port));
5226         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
5227                     hash_realdev_vid(realdev_ofp_port, vid));
5228     } else {
5229         VLOG_ERR("duplicate vlan device record");
5230     }
5231     ovs_mutex_unlock(&ofproto->vsp_mutex);
5232 }
5233
5234 static odp_port_t
5235 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
5236 {
5237     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
5238     return ofport ? ofport->odp_port : ODPP_NONE;
5239 }
5240
5241 struct ofport_dpif *
5242 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
5243 {
5244     struct ofport_dpif *port;
5245
5246     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
5247     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
5248                              &backer->odp_to_ofport_map) {
5249         if (port->odp_port == odp_port) {
5250             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5251             return port;
5252         }
5253     }
5254
5255     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5256     return NULL;
5257 }
5258
5259 static ofp_port_t
5260 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
5261 {
5262     struct ofport_dpif *port;
5263
5264     port = odp_port_to_ofport(ofproto->backer, odp_port);
5265     if (port && &ofproto->up == port->up.ofproto) {
5266         return port->up.ofp_port;
5267     } else {
5268         return OFPP_NONE;
5269     }
5270 }
5271
5272 uint32_t
5273 ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
5274 {
5275     struct dpif_backer *backer = ofproto->backer;
5276
5277     return  recirc_id_alloc(backer->rid_pool);
5278 }
5279
5280 void
5281 ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
5282 {
5283     struct dpif_backer *backer = ofproto->backer;
5284
5285     recirc_id_free(backer->rid_pool, recirc_id);
5286 }
5287
5288 int
5289 ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
5290                                const struct match *match, int priority,
5291                                uint16_t idle_timeout,
5292                                const struct ofpbuf *ofpacts,
5293                                struct rule **rulep)
5294 {
5295     struct ofputil_flow_mod fm;
5296     struct rule_dpif *rule;
5297     int error;
5298
5299     fm.match = *match;
5300     fm.priority = priority;
5301     fm.new_cookie = htonll(0);
5302     fm.cookie = htonll(0);
5303     fm.cookie_mask = htonll(0);
5304     fm.modify_cookie = false;
5305     fm.table_id = TBL_INTERNAL;
5306     fm.command = OFPFC_ADD;
5307     fm.idle_timeout = idle_timeout;
5308     fm.hard_timeout = 0;
5309     fm.buffer_id = 0;
5310     fm.out_port = 0;
5311     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5312     fm.ofpacts = ofpbuf_data(ofpacts);
5313     fm.ofpacts_len = ofpbuf_size(ofpacts);
5314
5315     error = ofproto_flow_mod(&ofproto->up, &fm);
5316     if (error) {
5317         VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
5318                     ofperr_to_string(error));
5319         *rulep = NULL;
5320         return error;
5321     }
5322
5323     rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
5324                                      &fm.match.wc, false);
5325     if (rule) {
5326         *rulep = &rule->up;
5327     } else {
5328         OVS_NOT_REACHED();
5329     }
5330     return 0;
5331 }
5332
5333 int
5334 ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
5335                                   struct match *match, int priority)
5336 {
5337     struct ofputil_flow_mod fm;
5338     int error;
5339
5340     fm.match = *match;
5341     fm.priority = priority;
5342     fm.new_cookie = htonll(0);
5343     fm.cookie = htonll(0);
5344     fm.cookie_mask = htonll(0);
5345     fm.modify_cookie = false;
5346     fm.table_id = TBL_INTERNAL;
5347     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5348     fm.command = OFPFC_DELETE_STRICT;
5349
5350     error = ofproto_flow_mod(&ofproto->up, &fm);
5351     if (error) {
5352         VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
5353                     ofperr_to_string(error));
5354         return error;
5355     }
5356
5357     return 0;
5358 }
5359
5360 const struct ofproto_class ofproto_dpif_class = {
5361     init,
5362     enumerate_types,
5363     enumerate_names,
5364     del,
5365     port_open_type,
5366     type_run,
5367     type_wait,
5368     alloc,
5369     construct,
5370     destruct,
5371     dealloc,
5372     run,
5373     wait,
5374     NULL,                       /* get_memory_usage. */
5375     type_get_memory_usage,
5376     flush,
5377     query_tables,
5378     port_alloc,
5379     port_construct,
5380     port_destruct,
5381     port_dealloc,
5382     port_modified,
5383     port_reconfigured,
5384     port_query_by_name,
5385     port_add,
5386     port_del,
5387     port_get_stats,
5388     port_dump_start,
5389     port_dump_next,
5390     port_dump_done,
5391     port_poll,
5392     port_poll_wait,
5393     port_is_lacp_current,
5394     NULL,                       /* rule_choose_table */
5395     rule_alloc,
5396     rule_construct,
5397     rule_insert,
5398     rule_delete,
5399     rule_destruct,
5400     rule_dealloc,
5401     rule_get_stats,
5402     rule_execute,
5403     NULL,                       /* rule_premodify_actions */
5404     rule_modify_actions,
5405     set_frag_handling,
5406     packet_out,
5407     set_netflow,
5408     get_netflow_ids,
5409     set_sflow,
5410     set_ipfix,
5411     set_cfm,
5412     cfm_status_changed,
5413     get_cfm_status,
5414     set_bfd,
5415     bfd_status_changed,
5416     get_bfd_status,
5417     set_stp,
5418     get_stp_status,
5419     set_stp_port,
5420     get_stp_port_status,
5421     get_stp_port_stats,
5422     set_rstp,
5423     get_rstp_status,
5424     set_rstp_port,
5425     get_rstp_port_status,
5426     set_queues,
5427     bundle_set,
5428     bundle_remove,
5429     mirror_set__,
5430     mirror_get_stats__,
5431     set_flood_vlans,
5432     is_mirror_output_bundle,
5433     forward_bpdu_changed,
5434     set_mac_table_config,
5435     set_mcast_snooping,
5436     set_mcast_snooping_port,
5437     set_realdev,
5438     NULL,                       /* meter_get_features */
5439     NULL,                       /* meter_set */
5440     NULL,                       /* meter_get */
5441     NULL,                       /* meter_del */
5442     group_alloc,                /* group_alloc */
5443     group_construct,            /* group_construct */
5444     group_destruct,             /* group_destruct */
5445     group_dealloc,              /* group_dealloc */
5446     group_modify,               /* group_modify */
5447     group_get_stats,            /* group_get_stats */
5448 };