08612548eeaf765cb8ab2379055710aac833a6cf
[cascardo/ovs.git] / ofproto / ofproto-dpif.c
1 /*
2  * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18
19 #include "ofproto/ofproto-dpif.h"
20 #include "ofproto/ofproto-provider.h"
21
22 #include <errno.h>
23
24 #include "bfd.h"
25 #include "bond.h"
26 #include "bundle.h"
27 #include "byte-order.h"
28 #include "connectivity.h"
29 #include "connmgr.h"
30 #include "coverage.h"
31 #include "cfm.h"
32 #include "dpif.h"
33 #include "dynamic-string.h"
34 #include "fail-open.h"
35 #include "guarded-list.h"
36 #include "hmapx.h"
37 #include "lacp.h"
38 #include "learn.h"
39 #include "mac-learning.h"
40 #include "mcast-snooping.h"
41 #include "meta-flow.h"
42 #include "multipath.h"
43 #include "netdev-vport.h"
44 #include "netdev.h"
45 #include "netlink.h"
46 #include "nx-match.h"
47 #include "odp-util.h"
48 #include "odp-execute.h"
49 #include "ofp-util.h"
50 #include "ofpbuf.h"
51 #include "ofp-actions.h"
52 #include "ofp-parse.h"
53 #include "ofp-print.h"
54 #include "ofproto-dpif-ipfix.h"
55 #include "ofproto-dpif-mirror.h"
56 #include "ofproto-dpif-monitor.h"
57 #include "ofproto-dpif-rid.h"
58 #include "ofproto-dpif-sflow.h"
59 #include "ofproto-dpif-upcall.h"
60 #include "ofproto-dpif-xlate.h"
61 #include "ovs-router.h"
62 #include "poll-loop.h"
63 #include "seq.h"
64 #include "simap.h"
65 #include "smap.h"
66 #include "timer.h"
67 #include "tunnel.h"
68 #include "unaligned.h"
69 #include "unixctl.h"
70 #include "vlan-bitmap.h"
71 #include "vlog.h"
72
73 VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
74
75 COVERAGE_DEFINE(ofproto_dpif_expired);
76 COVERAGE_DEFINE(packet_in_overflow);
77
78 struct flow_miss;
79
80 struct rule_dpif {
81     struct rule up;
82
83     /* These statistics:
84      *
85      *   - Do include packets and bytes from datapath flows which have not
86      *   recently been processed by a revalidator. */
87     struct ovs_mutex stats_mutex;
88     struct dpif_flow_stats stats OVS_GUARDED;
89
90     /* If non-zero then the recirculation id that has
91      * been allocated for use with this rule.
92      * The recirculation id and associated internal flow should
93      * be freed when the rule is freed */
94     uint32_t recirc_id;
95 };
96
97 /* RULE_CAST() depends on this. */
98 BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
99
100 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
101                            long long int *used);
102 static struct rule_dpif *rule_dpif_cast(const struct rule *);
103 static void rule_expire(struct rule_dpif *);
104
105 struct group_dpif {
106     struct ofgroup up;
107
108     /* These statistics:
109      *
110      *   - Do include packets and bytes from datapath flows which have not
111      *   recently been processed by a revalidator. */
112     struct ovs_mutex stats_mutex;
113     uint64_t packet_count OVS_GUARDED;  /* Number of packets received. */
114     uint64_t byte_count OVS_GUARDED;    /* Number of bytes received. */
115 };
116
117 struct ofbundle {
118     struct hmap_node hmap_node; /* In struct ofproto's "bundles" hmap. */
119     struct ofproto_dpif *ofproto; /* Owning ofproto. */
120     void *aux;                  /* Key supplied by ofproto's client. */
121     char *name;                 /* Identifier for log messages. */
122
123     /* Configuration. */
124     struct list ports;          /* Contains "struct ofport"s. */
125     enum port_vlan_mode vlan_mode; /* VLAN mode */
126     int vlan;                   /* -1=trunk port, else a 12-bit VLAN ID. */
127     unsigned long *trunks;      /* Bitmap of trunked VLANs, if 'vlan' == -1.
128                                  * NULL if all VLANs are trunked. */
129     struct lacp *lacp;          /* LACP if LACP is enabled, otherwise NULL. */
130     struct bond *bond;          /* Nonnull iff more than one port. */
131     bool use_priority_tags;     /* Use 802.1p tag for frames in VLAN 0? */
132
133     /* Status. */
134     bool floodable;          /* True if no port has OFPUTIL_PC_NO_FLOOD set. */
135 };
136
137 static void bundle_remove(struct ofport *);
138 static void bundle_update(struct ofbundle *);
139 static void bundle_destroy(struct ofbundle *);
140 static void bundle_del_port(struct ofport_dpif *);
141 static void bundle_run(struct ofbundle *);
142 static void bundle_wait(struct ofbundle *);
143
144 static void stp_run(struct ofproto_dpif *ofproto);
145 static void stp_wait(struct ofproto_dpif *ofproto);
146 static int set_stp_port(struct ofport *,
147                         const struct ofproto_port_stp_settings *);
148
149 static void rstp_run(struct ofproto_dpif *ofproto);
150 static void set_rstp_port(struct ofport *,
151                          const struct ofproto_port_rstp_settings *);
152
153 struct ofport_dpif {
154     struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
155     struct ofport up;
156
157     odp_port_t odp_port;
158     struct ofbundle *bundle;    /* Bundle that contains this port, if any. */
159     struct list bundle_node;    /* In struct ofbundle's "ports" list. */
160     struct cfm *cfm;            /* Connectivity Fault Management, if any. */
161     struct bfd *bfd;            /* BFD, if any. */
162     bool may_enable;            /* May be enabled in bonds. */
163     bool is_tunnel;             /* This port is a tunnel. */
164     bool is_layer3;             /* This is a layer 3 port. */
165     long long int carrier_seq;  /* Carrier status changes. */
166     struct ofport_dpif *peer;   /* Peer if patch port. */
167
168     /* Spanning tree. */
169     struct stp_port *stp_port;  /* Spanning Tree Protocol, if any. */
170     enum stp_state stp_state;   /* Always STP_DISABLED if STP not in use. */
171     long long int stp_state_entered;
172
173     /* Rapid Spanning Tree. */
174     struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
175     enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
176
177     /* Queue to DSCP mapping. */
178     struct ofproto_port_queue *qdscp;
179     size_t n_qdscp;
180
181     /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
182      *
183      * This is deprecated.  It is only for compatibility with broken device
184      * drivers in old versions of Linux that do not properly support VLANs when
185      * VLAN devices are not used.  When broken device drivers are no longer in
186      * widespread use, we will delete these interfaces. */
187     ofp_port_t realdev_ofp_port;
188     int vlandev_vid;
189 };
190
191 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
192  *
193  * This is deprecated.  It is only for compatibility with broken device drivers
194  * in old versions of Linux that do not properly support VLANs when VLAN
195  * devices are not used.  When broken device drivers are no longer in
196  * widespread use, we will delete these interfaces. */
197 struct vlan_splinter {
198     struct hmap_node realdev_vid_node;
199     struct hmap_node vlandev_node;
200     ofp_port_t realdev_ofp_port;
201     ofp_port_t vlandev_ofp_port;
202     int vid;
203 };
204
205 static void vsp_remove(struct ofport_dpif *);
206 static void vsp_add(struct ofport_dpif *, ofp_port_t realdev_ofp_port, int vid);
207
208 static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *,
209                                        ofp_port_t);
210
211 static ofp_port_t odp_port_to_ofp_port(const struct ofproto_dpif *,
212                                        odp_port_t);
213
214 static struct ofport_dpif *
215 ofport_dpif_cast(const struct ofport *ofport)
216 {
217     return ofport ? CONTAINER_OF(ofport, struct ofport_dpif, up) : NULL;
218 }
219
220 static void port_run(struct ofport_dpif *);
221 static int set_bfd(struct ofport *, const struct smap *);
222 static int set_cfm(struct ofport *, const struct cfm_settings *);
223 static void ofport_update_peer(struct ofport_dpif *);
224
225 /* Reasons that we might need to revalidate every datapath flow, and
226  * corresponding coverage counters.
227  *
228  * A value of 0 means that there is no need to revalidate.
229  *
230  * It would be nice to have some cleaner way to integrate with coverage
231  * counters, but with only a few reasons I guess this is good enough for
232  * now. */
233 enum revalidate_reason {
234     REV_RECONFIGURE = 1,       /* Switch configuration changed. */
235     REV_STP,                   /* Spanning tree protocol port status change. */
236     REV_RSTP,                  /* RSTP port status change. */
237     REV_BOND,                  /* Bonding changed. */
238     REV_PORT_TOGGLED,          /* Port enabled or disabled by CFM, LACP, ...*/
239     REV_FLOW_TABLE,            /* Flow table changed. */
240     REV_MAC_LEARNING,          /* Mac learning changed. */
241     REV_MCAST_SNOOPING,        /* Multicast snooping changed. */
242 };
243 COVERAGE_DEFINE(rev_reconfigure);
244 COVERAGE_DEFINE(rev_stp);
245 COVERAGE_DEFINE(rev_rstp);
246 COVERAGE_DEFINE(rev_bond);
247 COVERAGE_DEFINE(rev_port_toggled);
248 COVERAGE_DEFINE(rev_flow_table);
249 COVERAGE_DEFINE(rev_mac_learning);
250 COVERAGE_DEFINE(rev_mcast_snooping);
251
252 /* All datapaths of a given type share a single dpif backer instance. */
253 struct dpif_backer {
254     char *type;
255     int refcount;
256     struct dpif *dpif;
257     struct udpif *udpif;
258
259     struct ovs_rwlock odp_to_ofport_lock;
260     struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
261
262     struct simap tnl_backers;      /* Set of dpif ports backing tunnels. */
263
264     enum revalidate_reason need_revalidate; /* Revalidate all flows. */
265
266     bool recv_set_enable; /* Enables or disables receiving packets. */
267
268     /* Recirculation. */
269     struct recirc_id_pool *rid_pool;       /* Recirculation ID pool. */
270     bool enable_recirc;   /* True if the datapath supports recirculation */
271
272     /* True if the datapath supports variable-length
273      * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.
274      * False if the datapath supports only 8-byte (or shorter) userdata. */
275     bool variable_length_userdata;
276
277     /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET
278      * actions. */
279     bool masked_set_action;
280
281     /* Maximum number of MPLS label stack entries that the datapath supports
282      * in a match */
283     size_t max_mpls_depth;
284
285     /* Version string of the datapath stored in OVSDB. */
286     char *dp_version_string;
287 };
288
289 /* All existing ofproto_backer instances, indexed by ofproto->up.type. */
290 static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
291
292 struct ofproto_dpif {
293     struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
294     struct ofproto up;
295     struct dpif_backer *backer;
296
297     uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
298
299     /* Special OpenFlow rules. */
300     struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
301     struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
302     struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */
303
304     /* Bridging. */
305     struct netflow *netflow;
306     struct dpif_sflow *sflow;
307     struct dpif_ipfix *ipfix;
308     struct hmap bundles;        /* Contains "struct ofbundle"s. */
309     struct mac_learning *ml;
310     struct mcast_snooping *ms;
311     bool has_bonded_bundles;
312     bool lacp_enabled;
313     struct mbridge *mbridge;
314
315     struct ovs_mutex stats_mutex;
316     struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
317                                             * consumed in userspace. */
318
319     /* Spanning tree. */
320     struct stp *stp;
321     long long int stp_last_tick;
322
323     /* Rapid Spanning Tree. */
324     struct rstp *rstp;
325     long long int rstp_last_tick;
326
327     /* VLAN splinters. */
328     struct ovs_mutex vsp_mutex;
329     struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
330     struct hmap vlandev_map OVS_GUARDED;     /* vlandev -> (realdev,vid). */
331
332     /* Ports. */
333     struct sset ports;             /* Set of standard port names. */
334     struct sset ghost_ports;       /* Ports with no datapath port. */
335     struct sset port_poll_set;     /* Queued names for port_poll() reply. */
336     int port_poll_errno;           /* Last errno for port_poll() reply. */
337     uint64_t change_seq;           /* Connectivity status changes. */
338
339     /* Work queues. */
340     struct guarded_list pins;      /* Contains "struct ofputil_packet_in"s. */
341     struct seq *pins_seq;          /* For notifying 'pins' reception. */
342     uint64_t pins_seqno;
343 };
344
345 /* All existing ofproto_dpif instances, indexed by ->up.name. */
346 static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
347
348 static void ofproto_dpif_unixctl_init(void);
349
350 static inline struct ofproto_dpif *
351 ofproto_dpif_cast(const struct ofproto *ofproto)
352 {
353     ovs_assert(ofproto->ofproto_class == &ofproto_dpif_class);
354     return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
355 }
356
357 size_t
358 ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto)
359 {
360     return ofproto->backer->max_mpls_depth;
361 }
362
363 bool
364 ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto)
365 {
366     return ofproto->backer->enable_recirc;
367 }
368
369 static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
370                                         ofp_port_t ofp_port);
371 static void ofproto_trace(struct ofproto_dpif *, struct flow *,
372                           const struct ofpbuf *packet,
373                           const struct ofpact[], size_t ofpacts_len,
374                           struct ds *);
375
376 /* Global variables. */
377 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
378
379 /* Initial mappings of port to bridge mappings. */
380 static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports);
381
382 /* Executes 'fm'.  The caller retains ownership of 'fm' and everything in
383  * it. */
384 void
385 ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
386                       struct ofputil_flow_mod *fm)
387 {
388     ofproto_flow_mod(&ofproto->up, fm);
389 }
390
391 /* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
392  * Takes ownership of 'pin' and pin->packet. */
393 void
394 ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto,
395                             struct ofproto_packet_in *pin)
396 {
397     if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) {
398         COVERAGE_INC(packet_in_overflow);
399         free(CONST_CAST(void *, pin->up.packet));
400         free(pin);
401     }
402
403     /* Wakes up main thread for packet-in I/O. */
404     seq_change(ofproto->pins_seq);
405 }
406
407 /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
408  * packet rather than to send the packet to the controller.
409  *
410  * This function returns false to indicate that a packet_in message
411  * for a "table-miss" should be sent to at least one controller.
412  * False otherwise. */
413 bool
414 ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
415 {
416     return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
417 }
418 \f
419 /* Factory functions. */
420
421 static void
422 init(const struct shash *iface_hints)
423 {
424     struct shash_node *node;
425
426     /* Make a local copy, since we don't own 'iface_hints' elements. */
427     SHASH_FOR_EACH(node, iface_hints) {
428         const struct iface_hint *orig_hint = node->data;
429         struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
430
431         new_hint->br_name = xstrdup(orig_hint->br_name);
432         new_hint->br_type = xstrdup(orig_hint->br_type);
433         new_hint->ofp_port = orig_hint->ofp_port;
434
435         shash_add(&init_ofp_ports, node->name, new_hint);
436     }
437 }
438
439 static void
440 enumerate_types(struct sset *types)
441 {
442     dp_enumerate_types(types);
443 }
444
445 static int
446 enumerate_names(const char *type, struct sset *names)
447 {
448     struct ofproto_dpif *ofproto;
449
450     sset_clear(names);
451     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
452         if (strcmp(type, ofproto->up.type)) {
453             continue;
454         }
455         sset_add(names, ofproto->up.name);
456     }
457
458     return 0;
459 }
460
461 static int
462 del(const char *type, const char *name)
463 {
464     struct dpif *dpif;
465     int error;
466
467     error = dpif_open(name, type, &dpif);
468     if (!error) {
469         error = dpif_delete(dpif);
470         dpif_close(dpif);
471     }
472     return error;
473 }
474 \f
475 static const char *
476 port_open_type(const char *datapath_type, const char *port_type)
477 {
478     return dpif_port_open_type(datapath_type, port_type);
479 }
480
481 /* Type functions. */
482
483 static void process_dpif_port_changes(struct dpif_backer *);
484 static void process_dpif_all_ports_changed(struct dpif_backer *);
485 static void process_dpif_port_change(struct dpif_backer *,
486                                      const char *devname);
487 static void process_dpif_port_error(struct dpif_backer *, int error);
488
489 static struct ofproto_dpif *
490 lookup_ofproto_dpif_by_port_name(const char *name)
491 {
492     struct ofproto_dpif *ofproto;
493
494     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
495         if (sset_contains(&ofproto->ports, name)) {
496             return ofproto;
497         }
498     }
499
500     return NULL;
501 }
502
503 static int
504 type_run(const char *type)
505 {
506     struct dpif_backer *backer;
507
508     backer = shash_find_data(&all_dpif_backers, type);
509     if (!backer) {
510         /* This is not necessarily a problem, since backers are only
511          * created on demand. */
512         return 0;
513     }
514
515     dpif_run(backer->dpif);
516     udpif_run(backer->udpif);
517
518     /* If vswitchd started with other_config:flow_restore_wait set as "true",
519      * and the configuration has now changed to "false", enable receiving
520      * packets from the datapath. */
521     if (!backer->recv_set_enable && !ofproto_get_flow_restore_wait()) {
522         int error;
523
524         backer->recv_set_enable = true;
525
526         error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
527         if (error) {
528             VLOG_ERR("Failed to enable receiving packets in dpif.");
529             return error;
530         }
531         dpif_flow_flush(backer->dpif);
532         backer->need_revalidate = REV_RECONFIGURE;
533     }
534
535     if (backer->recv_set_enable) {
536         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
537     }
538
539     dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
540
541     if (backer->need_revalidate) {
542         struct ofproto_dpif *ofproto;
543         struct simap_node *node;
544         struct simap tmp_backers;
545
546         /* Handle tunnel garbage collection. */
547         simap_init(&tmp_backers);
548         simap_swap(&backer->tnl_backers, &tmp_backers);
549
550         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
551             struct ofport_dpif *iter;
552
553             if (backer != ofproto->backer) {
554                 continue;
555             }
556
557             HMAP_FOR_EACH (iter, up.hmap_node, &ofproto->up.ports) {
558                 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
559                 const char *dp_port;
560
561                 if (!iter->is_tunnel) {
562                     continue;
563                 }
564
565                 dp_port = netdev_vport_get_dpif_port(iter->up.netdev,
566                                                      namebuf, sizeof namebuf);
567                 node = simap_find(&tmp_backers, dp_port);
568                 if (node) {
569                     simap_put(&backer->tnl_backers, dp_port, node->data);
570                     simap_delete(&tmp_backers, node);
571                     node = simap_find(&backer->tnl_backers, dp_port);
572                 } else {
573                     node = simap_find(&backer->tnl_backers, dp_port);
574                     if (!node) {
575                         odp_port_t odp_port = ODPP_NONE;
576
577                         if (!dpif_port_add(backer->dpif, iter->up.netdev,
578                                            &odp_port)) {
579                             simap_put(&backer->tnl_backers, dp_port,
580                                       odp_to_u32(odp_port));
581                             node = simap_find(&backer->tnl_backers, dp_port);
582                         }
583                     }
584                 }
585
586                 iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
587                 if (tnl_port_reconfigure(iter, iter->up.netdev,
588                                          iter->odp_port)) {
589                     backer->need_revalidate = REV_RECONFIGURE;
590                 }
591             }
592         }
593
594         SIMAP_FOR_EACH (node, &tmp_backers) {
595             dpif_port_del(backer->dpif, u32_to_odp(node->data));
596         }
597         simap_destroy(&tmp_backers);
598
599         switch (backer->need_revalidate) {
600         case REV_RECONFIGURE:    COVERAGE_INC(rev_reconfigure);    break;
601         case REV_STP:            COVERAGE_INC(rev_stp);            break;
602         case REV_RSTP:           COVERAGE_INC(rev_rstp);           break;
603         case REV_BOND:           COVERAGE_INC(rev_bond);           break;
604         case REV_PORT_TOGGLED:   COVERAGE_INC(rev_port_toggled);   break;
605         case REV_FLOW_TABLE:     COVERAGE_INC(rev_flow_table);     break;
606         case REV_MAC_LEARNING:   COVERAGE_INC(rev_mac_learning);   break;
607         case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
608         }
609         backer->need_revalidate = 0;
610
611         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
612             struct ofport_dpif *ofport;
613             struct ofbundle *bundle;
614
615             if (ofproto->backer != backer) {
616                 continue;
617             }
618
619             xlate_txn_start();
620             xlate_ofproto_set(ofproto, ofproto->up.name,
621                               ofproto->backer->dpif, ofproto->ml,
622                               ofproto->stp, ofproto->rstp, ofproto->ms,
623                               ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
624                               ofproto->netflow,
625                               ofproto->up.forward_bpdu,
626                               connmgr_has_in_band(ofproto->up.connmgr),
627                               ofproto->backer->enable_recirc,
628                               ofproto->backer->variable_length_userdata,
629                               ofproto->backer->max_mpls_depth,
630                               ofproto->backer->masked_set_action);
631
632             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
633                 xlate_bundle_set(ofproto, bundle, bundle->name,
634                                  bundle->vlan_mode, bundle->vlan,
635                                  bundle->trunks, bundle->use_priority_tags,
636                                  bundle->bond, bundle->lacp,
637                                  bundle->floodable);
638             }
639
640             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
641                 int stp_port = ofport->stp_port
642                     ? stp_port_no(ofport->stp_port)
643                     : -1;
644                 xlate_ofport_set(ofproto, ofport->bundle, ofport,
645                                  ofport->up.ofp_port, ofport->odp_port,
646                                  ofport->up.netdev, ofport->cfm,
647                                  ofport->bfd, ofport->peer, stp_port,
648                                  ofport->rstp_port, ofport->qdscp,
649                                  ofport->n_qdscp, ofport->up.pp.config,
650                                  ofport->up.pp.state, ofport->is_tunnel,
651                                  ofport->may_enable);
652             }
653             xlate_txn_commit();
654         }
655
656         udpif_revalidate(backer->udpif);
657     }
658
659     process_dpif_port_changes(backer);
660
661     return 0;
662 }
663
664 /* Check for and handle port changes in 'backer''s dpif. */
665 static void
666 process_dpif_port_changes(struct dpif_backer *backer)
667 {
668     for (;;) {
669         char *devname;
670         int error;
671
672         error = dpif_port_poll(backer->dpif, &devname);
673         switch (error) {
674         case EAGAIN:
675             return;
676
677         case ENOBUFS:
678             process_dpif_all_ports_changed(backer);
679             break;
680
681         case 0:
682             process_dpif_port_change(backer, devname);
683             free(devname);
684             break;
685
686         default:
687             process_dpif_port_error(backer, error);
688             break;
689         }
690     }
691 }
692
693 static void
694 process_dpif_all_ports_changed(struct dpif_backer *backer)
695 {
696     struct ofproto_dpif *ofproto;
697     struct dpif_port dpif_port;
698     struct dpif_port_dump dump;
699     struct sset devnames;
700     const char *devname;
701
702     sset_init(&devnames);
703     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
704         if (ofproto->backer == backer) {
705             struct ofport *ofport;
706
707             HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
708                 sset_add(&devnames, netdev_get_name(ofport->netdev));
709             }
710         }
711     }
712     DPIF_PORT_FOR_EACH (&dpif_port, &dump, backer->dpif) {
713         sset_add(&devnames, dpif_port.name);
714     }
715
716     SSET_FOR_EACH (devname, &devnames) {
717         process_dpif_port_change(backer, devname);
718     }
719     sset_destroy(&devnames);
720 }
721
722 static void
723 process_dpif_port_change(struct dpif_backer *backer, const char *devname)
724 {
725     struct ofproto_dpif *ofproto;
726     struct dpif_port port;
727
728     /* Don't report on the datapath's device. */
729     if (!strcmp(devname, dpif_base_name(backer->dpif))) {
730         return;
731     }
732
733     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node,
734                    &all_ofproto_dpifs) {
735         if (simap_contains(&ofproto->backer->tnl_backers, devname)) {
736             return;
737         }
738     }
739
740     ofproto = lookup_ofproto_dpif_by_port_name(devname);
741     if (dpif_port_query_by_name(backer->dpif, devname, &port)) {
742         /* The port was removed.  If we know the datapath,
743          * report it through poll_set().  If we don't, it may be
744          * notifying us of a removal we initiated, so ignore it.
745          * If there's a pending ENOBUFS, let it stand, since
746          * everything will be reevaluated. */
747         if (ofproto && ofproto->port_poll_errno != ENOBUFS) {
748             sset_add(&ofproto->port_poll_set, devname);
749             ofproto->port_poll_errno = 0;
750         }
751     } else if (!ofproto) {
752         /* The port was added, but we don't know with which
753          * ofproto we should associate it.  Delete it. */
754         dpif_port_del(backer->dpif, port.port_no);
755     } else {
756         struct ofport_dpif *ofport;
757
758         ofport = ofport_dpif_cast(shash_find_data(
759                                       &ofproto->up.port_by_name, devname));
760         if (ofport
761             && ofport->odp_port != port.port_no
762             && !odp_port_to_ofport(backer, port.port_no))
763         {
764             /* 'ofport''s datapath port number has changed from
765              * 'ofport->odp_port' to 'port.port_no'.  Update our internal data
766              * structures to match. */
767             ovs_rwlock_wrlock(&backer->odp_to_ofport_lock);
768             hmap_remove(&backer->odp_to_ofport_map, &ofport->odp_port_node);
769             ofport->odp_port = port.port_no;
770             hmap_insert(&backer->odp_to_ofport_map, &ofport->odp_port_node,
771                         hash_odp_port(port.port_no));
772             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
773             backer->need_revalidate = REV_RECONFIGURE;
774         }
775     }
776     dpif_port_destroy(&port);
777 }
778
779 /* Propagate 'error' to all ofprotos based on 'backer'. */
780 static void
781 process_dpif_port_error(struct dpif_backer *backer, int error)
782 {
783     struct ofproto_dpif *ofproto;
784
785     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
786         if (ofproto->backer == backer) {
787             sset_clear(&ofproto->port_poll_set);
788             ofproto->port_poll_errno = error;
789         }
790     }
791 }
792
793 static void
794 type_wait(const char *type)
795 {
796     struct dpif_backer *backer;
797
798     backer = shash_find_data(&all_dpif_backers, type);
799     if (!backer) {
800         /* This is not necessarily a problem, since backers are only
801          * created on demand. */
802         return;
803     }
804
805     dpif_wait(backer->dpif);
806 }
807 \f
808 /* Basic life-cycle. */
809
810 static int add_internal_flows(struct ofproto_dpif *);
811
812 static struct ofproto *
813 alloc(void)
814 {
815     struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
816     return &ofproto->up;
817 }
818
819 static void
820 dealloc(struct ofproto *ofproto_)
821 {
822     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
823     free(ofproto);
824 }
825
826 static void
827 close_dpif_backer(struct dpif_backer *backer)
828 {
829     ovs_assert(backer->refcount > 0);
830
831     if (--backer->refcount) {
832         return;
833     }
834
835     udpif_destroy(backer->udpif);
836
837     simap_destroy(&backer->tnl_backers);
838     ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
839     hmap_destroy(&backer->odp_to_ofport_map);
840     shash_find_and_delete(&all_dpif_backers, backer->type);
841     recirc_id_pool_destroy(backer->rid_pool);
842     free(backer->type);
843     free(backer->dp_version_string);
844     dpif_close(backer->dpif);
845     free(backer);
846 }
847
848 /* Datapath port slated for removal from datapath. */
849 struct odp_garbage {
850     struct list list_node;
851     odp_port_t odp_port;
852 };
853
854 static bool check_variable_length_userdata(struct dpif_backer *backer);
855 static size_t check_max_mpls_depth(struct dpif_backer *backer);
856 static bool check_recirc(struct dpif_backer *backer);
857 static bool check_masked_set_action(struct dpif_backer *backer);
858
859 static int
860 open_dpif_backer(const char *type, struct dpif_backer **backerp)
861 {
862     struct dpif_backer *backer;
863     struct dpif_port_dump port_dump;
864     struct dpif_port port;
865     struct shash_node *node;
866     struct list garbage_list;
867     struct odp_garbage *garbage, *next;
868
869     struct sset names;
870     char *backer_name;
871     const char *name;
872     int error;
873
874     backer = shash_find_data(&all_dpif_backers, type);
875     if (backer) {
876         backer->refcount++;
877         *backerp = backer;
878         return 0;
879     }
880
881     backer_name = xasprintf("ovs-%s", type);
882
883     /* Remove any existing datapaths, since we assume we're the only
884      * userspace controlling the datapath. */
885     sset_init(&names);
886     dp_enumerate_names(type, &names);
887     SSET_FOR_EACH(name, &names) {
888         struct dpif *old_dpif;
889
890         /* Don't remove our backer if it exists. */
891         if (!strcmp(name, backer_name)) {
892             continue;
893         }
894
895         if (dpif_open(name, type, &old_dpif)) {
896             VLOG_WARN("couldn't open old datapath %s to remove it", name);
897         } else {
898             dpif_delete(old_dpif);
899             dpif_close(old_dpif);
900         }
901     }
902     sset_destroy(&names);
903
904     backer = xmalloc(sizeof *backer);
905
906     error = dpif_create_and_open(backer_name, type, &backer->dpif);
907     free(backer_name);
908     if (error) {
909         VLOG_ERR("failed to open datapath of type %s: %s", type,
910                  ovs_strerror(error));
911         free(backer);
912         return error;
913     }
914     backer->udpif = udpif_create(backer, backer->dpif);
915
916     backer->type = xstrdup(type);
917     backer->refcount = 1;
918     hmap_init(&backer->odp_to_ofport_map);
919     ovs_rwlock_init(&backer->odp_to_ofport_lock);
920     backer->need_revalidate = 0;
921     simap_init(&backer->tnl_backers);
922     backer->recv_set_enable = !ofproto_get_flow_restore_wait();
923     *backerp = backer;
924
925     if (backer->recv_set_enable) {
926         dpif_flow_flush(backer->dpif);
927     }
928
929     /* Loop through the ports already on the datapath and remove any
930      * that we don't need anymore. */
931     list_init(&garbage_list);
932     dpif_port_dump_start(&port_dump, backer->dpif);
933     while (dpif_port_dump_next(&port_dump, &port)) {
934         node = shash_find(&init_ofp_ports, port.name);
935         if (!node && strcmp(port.name, dpif_base_name(backer->dpif))) {
936             garbage = xmalloc(sizeof *garbage);
937             garbage->odp_port = port.port_no;
938             list_push_front(&garbage_list, &garbage->list_node);
939         }
940     }
941     dpif_port_dump_done(&port_dump);
942
943     LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
944         dpif_port_del(backer->dpif, garbage->odp_port);
945         list_remove(&garbage->list_node);
946         free(garbage);
947     }
948
949     shash_add(&all_dpif_backers, type, backer);
950
951     backer->enable_recirc = check_recirc(backer);
952     backer->max_mpls_depth = check_max_mpls_depth(backer);
953     backer->masked_set_action = check_masked_set_action(backer);
954     backer->rid_pool = recirc_id_pool_create();
955
956     error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
957     if (error) {
958         VLOG_ERR("failed to listen on datapath of type %s: %s",
959                  type, ovs_strerror(error));
960         close_dpif_backer(backer);
961         return error;
962     }
963
964     if (backer->recv_set_enable) {
965         udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
966     }
967
968     /* This check fails if performed before udpif threads have been set,
969      * as the kernel module checks that the 'pid' in userspace action
970      * is non-zero. */
971     backer->variable_length_userdata = check_variable_length_userdata(backer);
972     backer->dp_version_string = dpif_get_dp_version(backer->dpif);
973
974     return error;
975 }
976
977 /* Tests whether 'backer''s datapath supports recirculation.  Only newer
978  * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys.  We need to disable some
979  * features on older datapaths that don't support this feature.
980  *
981  * Returns false if 'backer' definitely does not support recirculation, true if
982  * it seems to support recirculation or if at least the error we get is
983  * ambiguous. */
984 static bool
985 check_recirc(struct dpif_backer *backer)
986 {
987     struct flow flow;
988     struct odputil_keybuf keybuf;
989     struct ofpbuf key;
990     int error;
991     bool enable_recirc = false;
992
993     memset(&flow, 0, sizeof flow);
994     flow.recirc_id = 1;
995     flow.dp_hash = 1;
996
997     ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
998     odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
999
1000     error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1001                           ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
1002                           0, NULL);
1003     if (error && error != EEXIST) {
1004         if (error != EINVAL) {
1005             VLOG_WARN("%s: Reciculation flow probe failed (%s)",
1006                       dpif_name(backer->dpif), ovs_strerror(error));
1007         }
1008         goto done;
1009     }
1010
1011     error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key),
1012                           NULL);
1013     if (error) {
1014         VLOG_WARN("%s: failed to delete recirculation feature probe flow",
1015                   dpif_name(backer->dpif));
1016     }
1017
1018     enable_recirc = true;
1019
1020 done:
1021     if (enable_recirc) {
1022         VLOG_INFO("%s: Datapath supports recirculation",
1023                   dpif_name(backer->dpif));
1024     } else {
1025         VLOG_INFO("%s: Datapath does not support recirculation",
1026                   dpif_name(backer->dpif));
1027     }
1028
1029     return enable_recirc;
1030 }
1031
1032 /* Tests whether 'backer''s datapath supports variable-length
1033  * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions.  We need
1034  * to disable some features on older datapaths that don't support this
1035  * feature.
1036  *
1037  * Returns false if 'backer' definitely does not support variable-length
1038  * userdata, true if it seems to support them or if at least the error we get
1039  * is ambiguous. */
1040 static bool
1041 check_variable_length_userdata(struct dpif_backer *backer)
1042 {
1043     struct eth_header *eth;
1044     struct ofpbuf actions;
1045     struct dpif_execute execute;
1046     struct ofpbuf packet;
1047     size_t start;
1048     int error;
1049
1050     /* Compose a userspace action that will cause an ERANGE error on older
1051      * datapaths that don't support variable-length userdata.
1052      *
1053      * We really test for using userdata longer than 8 bytes, but older
1054      * datapaths accepted these, silently truncating the userdata to 8 bytes.
1055      * The same older datapaths rejected userdata shorter than 8 bytes, so we
1056      * test for that instead as a proxy for longer userdata support. */
1057     ofpbuf_init(&actions, 64);
1058     start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
1059     nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
1060                    dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
1061     nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
1062     nl_msg_end_nested(&actions, start);
1063
1064     /* Compose a dummy ethernet packet. */
1065     ofpbuf_init(&packet, ETH_HEADER_LEN);
1066     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1067     eth->eth_type = htons(0x1234);
1068
1069     /* Execute the actions.  On older datapaths this fails with ERANGE, on
1070      * newer datapaths it succeeds. */
1071     execute.actions = ofpbuf_data(&actions);
1072     execute.actions_len = ofpbuf_size(&actions);
1073     execute.packet = &packet;
1074     execute.md = PKT_METADATA_INITIALIZER(0);
1075     execute.needs_help = false;
1076     execute.probe = true;
1077
1078     error = dpif_execute(backer->dpif, &execute);
1079
1080     ofpbuf_uninit(&packet);
1081     ofpbuf_uninit(&actions);
1082
1083     switch (error) {
1084     case 0:
1085         return true;
1086
1087     case ERANGE:
1088         /* Variable-length userdata is not supported. */
1089         VLOG_WARN("%s: datapath does not support variable-length userdata "
1090                   "feature (needs Linux 3.10+ or kernel module from OVS "
1091                   "1..11+).  The NXAST_SAMPLE action will be ignored.",
1092                   dpif_name(backer->dpif));
1093         return false;
1094
1095     default:
1096         /* Something odd happened.  We're not sure whether variable-length
1097          * userdata is supported.  Default to "yes". */
1098         VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
1099                   dpif_name(backer->dpif), ovs_strerror(error));
1100         return true;
1101     }
1102 }
1103
1104 /* Tests the MPLS label stack depth supported by 'backer''s datapath.
1105  *
1106  * Returns the number of elements in a struct flow's mpls_lse field
1107  * if the datapath supports at least that many entries in an
1108  * MPLS label stack.
1109  * Otherwise returns the number of MPLS push actions supported by
1110  * the datapath. */
1111 static size_t
1112 check_max_mpls_depth(struct dpif_backer *backer)
1113 {
1114     struct flow flow;
1115     int n;
1116
1117     for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
1118         struct odputil_keybuf keybuf;
1119         struct ofpbuf key;
1120         int error;
1121
1122         memset(&flow, 0, sizeof flow);
1123         flow.dl_type = htons(ETH_TYPE_MPLS);
1124         flow_set_mpls_bos(&flow, n, 1);
1125
1126         ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
1127         odp_flow_key_from_flow(&key, &flow, NULL, 0, false);
1128
1129         error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_PROBE,
1130                               ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0,
1131                               NULL, 0, NULL);
1132         if (error && error != EEXIST) {
1133             if (error != EINVAL) {
1134                 VLOG_WARN("%s: MPLS stack length feature probe failed (%s)",
1135                           dpif_name(backer->dpif), ovs_strerror(error));
1136             }
1137             break;
1138         }
1139
1140         error = dpif_flow_del(backer->dpif, ofpbuf_data(&key),
1141                               ofpbuf_size(&key), NULL);
1142         if (error) {
1143             VLOG_WARN("%s: failed to delete MPLS feature probe flow",
1144                       dpif_name(backer->dpif));
1145         }
1146     }
1147
1148     VLOG_INFO("%s: MPLS label stack length probed as %d",
1149               dpif_name(backer->dpif), n);
1150     return n;
1151 }
1152
1153 /* Tests whether 'backer''s datapath supports masked data in
1154  * OVS_ACTION_ATTR_SET actions.  We need to disable some features on older
1155  * datapaths that don't support this feature. */
1156 static bool
1157 check_masked_set_action(struct dpif_backer *backer)
1158 {
1159     struct eth_header *eth;
1160     struct ofpbuf actions;
1161     struct dpif_execute execute;
1162     struct ofpbuf packet;
1163     int error;
1164     struct ovs_key_ethernet key, mask;
1165
1166     /* Compose a set action that will cause an EINVAL error on older
1167      * datapaths that don't support masked set actions.
1168      * Avoid using a full mask, as it could be translated to a non-masked
1169      * set action instead. */
1170     ofpbuf_init(&actions, 64);
1171     memset(&key, 0x53, sizeof key);
1172     memset(&mask, 0x7f, sizeof mask);
1173     commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
1174                              sizeof key);
1175
1176     /* Compose a dummy ethernet packet. */
1177     ofpbuf_init(&packet, ETH_HEADER_LEN);
1178     eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN);
1179     eth->eth_type = htons(0x1234);
1180
1181     /* Execute the actions.  On older datapaths this fails with EINVAL, on
1182      * newer datapaths it succeeds. */
1183     execute.actions = ofpbuf_data(&actions);
1184     execute.actions_len = ofpbuf_size(&actions);
1185     execute.packet = &packet;
1186     execute.md = PKT_METADATA_INITIALIZER(0);
1187     execute.needs_help = false;
1188     execute.probe = true;
1189
1190     error = dpif_execute(backer->dpif, &execute);
1191
1192     ofpbuf_uninit(&packet);
1193     ofpbuf_uninit(&actions);
1194
1195     if (error) {
1196         /* Masked set action is not supported. */
1197         VLOG_INFO("%s: datapath does not support masked set action feature.",
1198                   dpif_name(backer->dpif));
1199     }
1200     return !error;
1201 }
1202
1203 static int
1204 construct(struct ofproto *ofproto_)
1205 {
1206     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1207     struct shash_node *node, *next;
1208     int error;
1209
1210     error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
1211     if (error) {
1212         return error;
1213     }
1214
1215     ofproto->netflow = NULL;
1216     ofproto->sflow = NULL;
1217     ofproto->ipfix = NULL;
1218     ofproto->stp = NULL;
1219     ofproto->rstp = NULL;
1220     ofproto->dump_seq = 0;
1221     hmap_init(&ofproto->bundles);
1222     ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
1223     ofproto->ms = NULL;
1224     ofproto->mbridge = mbridge_create();
1225     ofproto->has_bonded_bundles = false;
1226     ofproto->lacp_enabled = false;
1227     ovs_mutex_init_adaptive(&ofproto->stats_mutex);
1228     ovs_mutex_init(&ofproto->vsp_mutex);
1229
1230     guarded_list_init(&ofproto->pins);
1231
1232     ofproto_dpif_unixctl_init();
1233     ovs_router_unixctl_register();
1234
1235     hmap_init(&ofproto->vlandev_map);
1236     hmap_init(&ofproto->realdev_vid_map);
1237
1238     sset_init(&ofproto->ports);
1239     sset_init(&ofproto->ghost_ports);
1240     sset_init(&ofproto->port_poll_set);
1241     ofproto->port_poll_errno = 0;
1242     ofproto->change_seq = 0;
1243     ofproto->pins_seq = seq_create();
1244     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1245
1246
1247     SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
1248         struct iface_hint *iface_hint = node->data;
1249
1250         if (!strcmp(iface_hint->br_name, ofproto->up.name)) {
1251             /* Check if the datapath already has this port. */
1252             if (dpif_port_exists(ofproto->backer->dpif, node->name)) {
1253                 sset_add(&ofproto->ports, node->name);
1254             }
1255
1256             free(iface_hint->br_name);
1257             free(iface_hint->br_type);
1258             free(iface_hint);
1259             shash_delete(&init_ofp_ports, node);
1260         }
1261     }
1262
1263     hmap_insert(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node,
1264                 hash_string(ofproto->up.name, 0));
1265     memset(&ofproto->stats, 0, sizeof ofproto->stats);
1266
1267     ofproto_init_tables(ofproto_, N_TABLES);
1268     error = add_internal_flows(ofproto);
1269
1270     ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
1271
1272     return error;
1273 }
1274
1275 static int
1276 add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
1277                   const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
1278 {
1279     struct match match;
1280     int error;
1281     struct rule *rule;
1282
1283     match_init_catchall(&match);
1284     match_set_reg(&match, 0, id);
1285
1286     error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
1287                                            &rule);
1288     *rulep = error ? NULL : rule_dpif_cast(rule);
1289
1290     return error;
1291 }
1292
1293 static int
1294 add_internal_flows(struct ofproto_dpif *ofproto)
1295 {
1296     struct ofpact_controller *controller;
1297     uint64_t ofpacts_stub[128 / 8];
1298     struct ofpbuf ofpacts;
1299     struct rule *unused_rulep OVS_UNUSED;
1300     struct ofpact_resubmit *resubmit;
1301     struct match match;
1302     int error;
1303     int id;
1304
1305     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
1306     id = 1;
1307
1308     controller = ofpact_put_CONTROLLER(&ofpacts);
1309     controller->max_len = UINT16_MAX;
1310     controller->controller_id = 0;
1311     controller->reason = OFPR_NO_MATCH;
1312     ofpact_pad(&ofpacts);
1313
1314     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1315                                    &ofproto->miss_rule);
1316     if (error) {
1317         return error;
1318     }
1319
1320     ofpbuf_clear(&ofpacts);
1321     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1322                                    &ofproto->no_packet_in_rule);
1323     if (error) {
1324         return error;
1325     }
1326
1327     error = add_internal_miss_flow(ofproto, id++, &ofpacts,
1328                                    &ofproto->drop_frags_rule);
1329     if (error) {
1330         return error;
1331     }
1332
1333     /* Continue non-recirculation rule lookups from table 0.
1334      *
1335      * (priority=2), recirc=0, actions=resubmit(, 0)
1336      */
1337     resubmit = ofpact_put_RESUBMIT(&ofpacts);
1338     resubmit->in_port = OFPP_IN_PORT;
1339     resubmit->table_id = 0;
1340
1341     match_init_catchall(&match);
1342     match_set_recirc_id(&match, 0);
1343
1344     error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
1345                                            &unused_rulep);
1346     if (error) {
1347         return error;
1348     }
1349
1350     /* Drop any run away recirc rule lookups. Recirc_id has to be
1351      * non-zero when reaching this rule.
1352      *
1353      * (priority=1), *, actions=drop
1354      */
1355     ofpbuf_clear(&ofpacts);
1356     match_init_catchall(&match);
1357     error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts,
1358                                            &unused_rulep);
1359
1360     return error;
1361 }
1362
1363 static void
1364 destruct(struct ofproto *ofproto_)
1365 {
1366     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1367     struct ofproto_packet_in *pin, *next_pin;
1368     struct rule_dpif *rule;
1369     struct oftable *table;
1370     struct list pins;
1371
1372     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1373     xlate_txn_start();
1374     xlate_remove_ofproto(ofproto);
1375     xlate_txn_commit();
1376
1377     /* Ensure that the upcall processing threads have no remaining references
1378      * to the ofproto or anything in it. */
1379     udpif_synchronize(ofproto->backer->udpif);
1380
1381     hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
1382
1383     OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
1384         CLS_FOR_EACH_SAFE (rule, up.cr, &table->cls) {
1385             ofproto_rule_delete(&ofproto->up, &rule->up);
1386         }
1387     }
1388
1389     guarded_list_pop_all(&ofproto->pins, &pins);
1390     LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1391         list_remove(&pin->list_node);
1392         free(CONST_CAST(void *, pin->up.packet));
1393         free(pin);
1394     }
1395     guarded_list_destroy(&ofproto->pins);
1396
1397     mbridge_unref(ofproto->mbridge);
1398
1399     netflow_unref(ofproto->netflow);
1400     dpif_sflow_unref(ofproto->sflow);
1401     dpif_ipfix_unref(ofproto->ipfix);
1402     hmap_destroy(&ofproto->bundles);
1403     mac_learning_unref(ofproto->ml);
1404     mcast_snooping_unref(ofproto->ms);
1405
1406     hmap_destroy(&ofproto->vlandev_map);
1407     hmap_destroy(&ofproto->realdev_vid_map);
1408
1409     sset_destroy(&ofproto->ports);
1410     sset_destroy(&ofproto->ghost_ports);
1411     sset_destroy(&ofproto->port_poll_set);
1412
1413     ovs_mutex_destroy(&ofproto->stats_mutex);
1414     ovs_mutex_destroy(&ofproto->vsp_mutex);
1415
1416     seq_destroy(ofproto->pins_seq);
1417
1418     close_dpif_backer(ofproto->backer);
1419 }
1420
1421 static int
1422 run(struct ofproto *ofproto_)
1423 {
1424     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1425     uint64_t new_seq, new_dump_seq;
1426
1427     if (mbridge_need_revalidate(ofproto->mbridge)) {
1428         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1429         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1430         mac_learning_flush(ofproto->ml);
1431         ovs_rwlock_unlock(&ofproto->ml->rwlock);
1432         mcast_snooping_mdb_flush(ofproto->ms);
1433     }
1434
1435     /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
1436      * flow restore.  Even though nothing is processed during flow restore,
1437      * all queued 'pins' will be handled immediately when flow restore
1438      * completes. */
1439     ofproto->pins_seqno = seq_read(ofproto->pins_seq);
1440
1441     /* Do not perform any periodic activity required by 'ofproto' while
1442      * waiting for flow restore to complete. */
1443     if (!ofproto_get_flow_restore_wait()) {
1444         struct ofproto_packet_in *pin, *next_pin;
1445         struct list pins;
1446
1447         guarded_list_pop_all(&ofproto->pins, &pins);
1448         LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
1449             connmgr_send_packet_in(ofproto->up.connmgr, pin);
1450             list_remove(&pin->list_node);
1451             free(CONST_CAST(void *, pin->up.packet));
1452             free(pin);
1453         }
1454     }
1455
1456     if (ofproto->netflow) {
1457         netflow_run(ofproto->netflow);
1458     }
1459     if (ofproto->sflow) {
1460         dpif_sflow_run(ofproto->sflow);
1461     }
1462     if (ofproto->ipfix) {
1463         dpif_ipfix_run(ofproto->ipfix);
1464     }
1465
1466     new_seq = seq_read(connectivity_seq_get());
1467     if (ofproto->change_seq != new_seq) {
1468         struct ofport_dpif *ofport;
1469
1470         HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1471             port_run(ofport);
1472         }
1473
1474         ofproto->change_seq = new_seq;
1475     }
1476     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1477         struct ofbundle *bundle;
1478
1479         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1480             bundle_run(bundle);
1481         }
1482     }
1483
1484     stp_run(ofproto);
1485     rstp_run(ofproto);
1486     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
1487     if (mac_learning_run(ofproto->ml)) {
1488         ofproto->backer->need_revalidate = REV_MAC_LEARNING;
1489     }
1490     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1491
1492     if (mcast_snooping_run(ofproto->ms)) {
1493         ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
1494     }
1495
1496     new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
1497     if (ofproto->dump_seq != new_dump_seq) {
1498         struct rule *rule, *next_rule;
1499
1500         /* We know stats are relatively fresh, so now is a good time to do some
1501          * periodic work. */
1502         ofproto->dump_seq = new_dump_seq;
1503
1504         /* Expire OpenFlow flows whose idle_timeout or hard_timeout
1505          * has passed. */
1506         ovs_mutex_lock(&ofproto_mutex);
1507         LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
1508                             &ofproto->up.expirable) {
1509             rule_expire(rule_dpif_cast(rule));
1510         }
1511         ovs_mutex_unlock(&ofproto_mutex);
1512
1513         /* All outstanding data in existing flows has been accounted, so it's a
1514          * good time to do bond rebalancing. */
1515         if (ofproto->has_bonded_bundles) {
1516             struct ofbundle *bundle;
1517
1518             HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1519                 if (bundle->bond) {
1520                     bond_rebalance(bundle->bond);
1521                 }
1522             }
1523         }
1524     }
1525
1526     return 0;
1527 }
1528
1529 static void
1530 wait(struct ofproto *ofproto_)
1531 {
1532     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1533
1534     if (ofproto_get_flow_restore_wait()) {
1535         return;
1536     }
1537
1538     if (ofproto->sflow) {
1539         dpif_sflow_wait(ofproto->sflow);
1540     }
1541     if (ofproto->ipfix) {
1542         dpif_ipfix_wait(ofproto->ipfix);
1543     }
1544     if (ofproto->lacp_enabled || ofproto->has_bonded_bundles) {
1545         struct ofbundle *bundle;
1546
1547         HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
1548             bundle_wait(bundle);
1549         }
1550     }
1551     if (ofproto->netflow) {
1552         netflow_wait(ofproto->netflow);
1553     }
1554     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
1555     mac_learning_wait(ofproto->ml);
1556     ovs_rwlock_unlock(&ofproto->ml->rwlock);
1557     mcast_snooping_wait(ofproto->ms);
1558     stp_wait(ofproto);
1559     if (ofproto->backer->need_revalidate) {
1560         /* Shouldn't happen, but if it does just go around again. */
1561         VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
1562         poll_immediate_wake();
1563     }
1564
1565     seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
1566     seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
1567 }
1568
1569 static void
1570 type_get_memory_usage(const char *type, struct simap *usage)
1571 {
1572     struct dpif_backer *backer;
1573
1574     backer = shash_find_data(&all_dpif_backers, type);
1575     if (backer) {
1576         udpif_get_memory_usage(backer->udpif, usage);
1577     }
1578 }
1579
1580 static void
1581 flush(struct ofproto *ofproto_)
1582 {
1583     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1584     struct dpif_backer *backer = ofproto->backer;
1585
1586     if (backer) {
1587         udpif_flush(backer->udpif);
1588     }
1589 }
1590
1591 static void
1592 query_tables(struct ofproto *ofproto,
1593              struct ofputil_table_features *features,
1594              struct ofputil_table_stats *stats)
1595 {
1596     strcpy(features->name, "classifier");
1597
1598     if (stats) {
1599         int i;
1600
1601         for (i = 0; i < ofproto->n_tables; i++) {
1602             unsigned long missed, matched;
1603
1604             atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
1605             atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
1606
1607             stats[i].matched_count = matched;
1608             stats[i].lookup_count = matched + missed;
1609         }
1610     }
1611 }
1612
1613 static struct ofport *
1614 port_alloc(void)
1615 {
1616     struct ofport_dpif *port = xmalloc(sizeof *port);
1617     return &port->up;
1618 }
1619
1620 static void
1621 port_dealloc(struct ofport *port_)
1622 {
1623     struct ofport_dpif *port = ofport_dpif_cast(port_);
1624     free(port);
1625 }
1626
1627 static int
1628 port_construct(struct ofport *port_)
1629 {
1630     struct ofport_dpif *port = ofport_dpif_cast(port_);
1631     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1632     const struct netdev *netdev = port->up.netdev;
1633     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1634     struct dpif_port dpif_port;
1635     int error;
1636
1637     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1638     port->bundle = NULL;
1639     port->cfm = NULL;
1640     port->bfd = NULL;
1641     port->may_enable = false;
1642     port->stp_port = NULL;
1643     port->stp_state = STP_DISABLED;
1644     port->rstp_port = NULL;
1645     port->rstp_state = RSTP_DISABLED;
1646     port->is_tunnel = false;
1647     port->peer = NULL;
1648     port->qdscp = NULL;
1649     port->n_qdscp = 0;
1650     port->realdev_ofp_port = 0;
1651     port->vlandev_vid = 0;
1652     port->carrier_seq = netdev_get_carrier_resets(netdev);
1653     port->is_layer3 = netdev_vport_is_layer3(netdev);
1654
1655     if (netdev_vport_is_patch(netdev)) {
1656         /* By bailing out here, we don't submit the port to the sFlow module
1657          * to be considered for counter polling export.  This is correct
1658          * because the patch port represents an interface that sFlow considers
1659          * to be "internal" to the switch as a whole, and therefore not an
1660          * candidate for counter polling. */
1661         port->odp_port = ODPP_NONE;
1662         ofport_update_peer(port);
1663         return 0;
1664     }
1665
1666     error = dpif_port_query_by_name(ofproto->backer->dpif,
1667                                     netdev_vport_get_dpif_port(netdev, namebuf,
1668                                                                sizeof namebuf),
1669                                     &dpif_port);
1670     if (error) {
1671         return error;
1672     }
1673
1674     port->odp_port = dpif_port.port_no;
1675
1676     if (netdev_get_tunnel_config(netdev)) {
1677         tnl_port_add(port, port->up.netdev, port->odp_port);
1678         port->is_tunnel = true;
1679         if (ofproto->ipfix) {
1680            dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
1681         }
1682     } else {
1683         /* Sanity-check that a mapping doesn't already exist.  This
1684          * shouldn't happen for non-tunnel ports. */
1685         if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
1686             VLOG_ERR("port %s already has an OpenFlow port number",
1687                      dpif_port.name);
1688             dpif_port_destroy(&dpif_port);
1689             return EBUSY;
1690         }
1691
1692         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1693         hmap_insert(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node,
1694                     hash_odp_port(port->odp_port));
1695         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1696     }
1697     dpif_port_destroy(&dpif_port);
1698
1699     if (ofproto->sflow) {
1700         dpif_sflow_add_port(ofproto->sflow, port_, port->odp_port);
1701     }
1702
1703     return 0;
1704 }
1705
1706 static void
1707 port_destruct(struct ofport *port_)
1708 {
1709     struct ofport_dpif *port = ofport_dpif_cast(port_);
1710     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1711     const char *devname = netdev_get_name(port->up.netdev);
1712     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
1713     const char *dp_port_name;
1714
1715     ofproto->backer->need_revalidate = REV_RECONFIGURE;
1716     xlate_txn_start();
1717     xlate_ofport_remove(port);
1718     xlate_txn_commit();
1719
1720     dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
1721                                               sizeof namebuf);
1722     if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
1723         /* The underlying device is still there, so delete it.  This
1724          * happens when the ofproto is being destroyed, since the caller
1725          * assumes that removal of attached ports will happen as part of
1726          * destruction. */
1727         if (!port->is_tunnel) {
1728             dpif_port_del(ofproto->backer->dpif, port->odp_port);
1729         }
1730     }
1731
1732     if (port->peer) {
1733         port->peer->peer = NULL;
1734         port->peer = NULL;
1735     }
1736
1737     if (port->odp_port != ODPP_NONE && !port->is_tunnel) {
1738         ovs_rwlock_wrlock(&ofproto->backer->odp_to_ofport_lock);
1739         hmap_remove(&ofproto->backer->odp_to_ofport_map, &port->odp_port_node);
1740         ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
1741     }
1742
1743     if (port->is_tunnel && ofproto->ipfix) {
1744        dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
1745     }
1746
1747     tnl_port_del(port);
1748     sset_find_and_delete(&ofproto->ports, devname);
1749     sset_find_and_delete(&ofproto->ghost_ports, devname);
1750     bundle_remove(port_);
1751     set_cfm(port_, NULL);
1752     set_bfd(port_, NULL);
1753     if (port->stp_port) {
1754         stp_port_disable(port->stp_port);
1755     }
1756     set_rstp_port(port_, NULL);
1757     if (ofproto->sflow) {
1758         dpif_sflow_del_port(ofproto->sflow, port->odp_port);
1759     }
1760
1761     free(port->qdscp);
1762 }
1763
1764 static void
1765 port_modified(struct ofport *port_)
1766 {
1767     struct ofport_dpif *port = ofport_dpif_cast(port_);
1768
1769     if (port->bundle && port->bundle->bond) {
1770         bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
1771     }
1772
1773     if (port->cfm) {
1774         cfm_set_netdev(port->cfm, port->up.netdev);
1775     }
1776
1777     if (port->bfd) {
1778         bfd_set_netdev(port->bfd, port->up.netdev);
1779     }
1780
1781     ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
1782                                      port->up.pp.hw_addr);
1783
1784     if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
1785                                                 port->odp_port)) {
1786         ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
1787             REV_RECONFIGURE;
1788     }
1789
1790     ofport_update_peer(port);
1791 }
1792
1793 static void
1794 port_reconfigured(struct ofport *port_, enum ofputil_port_config old_config)
1795 {
1796     struct ofport_dpif *port = ofport_dpif_cast(port_);
1797     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
1798     enum ofputil_port_config changed = old_config ^ port->up.pp.config;
1799
1800     if (changed & (OFPUTIL_PC_NO_RECV | OFPUTIL_PC_NO_RECV_STP |
1801                    OFPUTIL_PC_NO_FWD | OFPUTIL_PC_NO_FLOOD |
1802                    OFPUTIL_PC_NO_PACKET_IN)) {
1803         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1804
1805         if (changed & OFPUTIL_PC_NO_FLOOD && port->bundle) {
1806             bundle_update(port->bundle);
1807         }
1808     }
1809 }
1810
1811 static int
1812 set_sflow(struct ofproto *ofproto_,
1813           const struct ofproto_sflow_options *sflow_options)
1814 {
1815     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1816     struct dpif_sflow *ds = ofproto->sflow;
1817
1818     if (sflow_options) {
1819         if (!ds) {
1820             struct ofport_dpif *ofport;
1821
1822             ds = ofproto->sflow = dpif_sflow_create();
1823             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1824                 dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
1825             }
1826             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1827         }
1828         dpif_sflow_set_options(ds, sflow_options);
1829     } else {
1830         if (ds) {
1831             dpif_sflow_unref(ds);
1832             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1833             ofproto->sflow = NULL;
1834         }
1835     }
1836     return 0;
1837 }
1838
1839 static int
1840 set_ipfix(
1841     struct ofproto *ofproto_,
1842     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
1843     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
1844     size_t n_flow_exporters_options)
1845 {
1846     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
1847     struct dpif_ipfix *di = ofproto->ipfix;
1848     bool has_options = bridge_exporter_options || flow_exporters_options;
1849     bool new_di = false;
1850
1851     if (has_options && !di) {
1852         di = ofproto->ipfix = dpif_ipfix_create();
1853         new_di = true;
1854     }
1855
1856     if (di) {
1857         /* Call set_options in any case to cleanly flush the flow
1858          * caches in the last exporters that are to be destroyed. */
1859         dpif_ipfix_set_options(
1860             di, bridge_exporter_options, flow_exporters_options,
1861             n_flow_exporters_options);
1862
1863         /* Add tunnel ports only when a new ipfix created */
1864         if (new_di == true) {
1865             struct ofport_dpif *ofport;
1866             HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
1867                 if (ofport->is_tunnel == true) {
1868                     dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
1869                 }
1870             }
1871         }
1872
1873         if (!has_options) {
1874             dpif_ipfix_unref(di);
1875             ofproto->ipfix = NULL;
1876         }
1877     }
1878
1879     return 0;
1880 }
1881
1882 static int
1883 set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
1884 {
1885     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1886     int error = 0;
1887
1888     if (s) {
1889         if (!ofport->cfm) {
1890             struct ofproto_dpif *ofproto;
1891
1892             ofproto = ofproto_dpif_cast(ofport->up.ofproto);
1893             ofproto->backer->need_revalidate = REV_RECONFIGURE;
1894             ofport->cfm = cfm_create(ofport->up.netdev);
1895         }
1896
1897         if (cfm_configure(ofport->cfm, s)) {
1898             error = 0;
1899             goto out;
1900         }
1901
1902         error = EINVAL;
1903     }
1904     cfm_unref(ofport->cfm);
1905     ofport->cfm = NULL;
1906 out:
1907     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1908                                      ofport->up.pp.hw_addr);
1909     return error;
1910 }
1911
1912 static bool
1913 cfm_status_changed(struct ofport *ofport_)
1914 {
1915     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1916
1917     return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
1918 }
1919
1920 static int
1921 get_cfm_status(const struct ofport *ofport_,
1922                struct cfm_status *status)
1923 {
1924     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1925     int ret = 0;
1926
1927     if (ofport->cfm) {
1928         cfm_get_status(ofport->cfm, status);
1929     } else {
1930         ret = ENOENT;
1931     }
1932
1933     return ret;
1934 }
1935
1936 static int
1937 set_bfd(struct ofport *ofport_, const struct smap *cfg)
1938 {
1939     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
1940     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1941     struct bfd *old;
1942
1943     old = ofport->bfd;
1944     ofport->bfd = bfd_configure(old, netdev_get_name(ofport->up.netdev),
1945                                 cfg, ofport->up.netdev);
1946     if (ofport->bfd != old) {
1947         ofproto->backer->need_revalidate = REV_RECONFIGURE;
1948     }
1949     ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
1950                                      ofport->up.pp.hw_addr);
1951     return 0;
1952 }
1953
1954 static bool
1955 bfd_status_changed(struct ofport *ofport_)
1956 {
1957     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1958
1959     return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
1960 }
1961
1962 static int
1963 get_bfd_status(struct ofport *ofport_, struct smap *smap)
1964 {
1965     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
1966     int ret = 0;
1967
1968     if (ofport->bfd) {
1969         bfd_get_status(ofport->bfd, smap);
1970     } else {
1971         ret = ENOENT;
1972     }
1973
1974     return ret;
1975 }
1976 \f
1977 /* Spanning Tree. */
1978
1979 /* Called while rstp_mutex is held. */
1980 static void
1981 rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_)
1982 {
1983     struct ofproto_dpif *ofproto = ofproto_;
1984     struct ofport_dpif *ofport = ofport_;
1985     struct eth_header *eth = ofpbuf_l2(pkt);
1986
1987     netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
1988     if (eth_addr_is_zero(eth->eth_src)) {
1989         VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
1990                      "does not have a configured source MAC address.",
1991                      ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
1992     } else {
1993         ofproto_dpif_send_packet(ofport, pkt);
1994     }
1995     ofpbuf_delete(pkt);
1996 }
1997
1998 static void
1999 send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
2000 {
2001     struct ofproto_dpif *ofproto = ofproto_;
2002     struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
2003     struct ofport_dpif *ofport;
2004
2005     ofport = stp_port_get_aux(sp);
2006     if (!ofport) {
2007         VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
2008                      ofproto->up.name, port_num);
2009     } else {
2010         struct eth_header *eth = ofpbuf_l2(pkt);
2011
2012         netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
2013         if (eth_addr_is_zero(eth->eth_src)) {
2014             VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
2015                          "with unknown MAC", ofproto->up.name, port_num);
2016         } else {
2017             ofproto_dpif_send_packet(ofport, pkt);
2018         }
2019     }
2020     ofpbuf_delete(pkt);
2021 }
2022
2023 /* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
2024 static void
2025 set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
2026 {
2027     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2028
2029     /* Only revalidate flows if the configuration changed. */
2030     if (!s != !ofproto->rstp) {
2031         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2032     }
2033
2034     if (s) {
2035         if (!ofproto->rstp) {
2036             ofproto->rstp = rstp_create(ofproto_->name, s->address,
2037                                         rstp_send_bpdu_cb, ofproto);
2038             ofproto->rstp_last_tick = time_msec();
2039         }
2040         rstp_set_bridge_address(ofproto->rstp, s->address);
2041         rstp_set_bridge_priority(ofproto->rstp, s->priority);
2042         rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
2043         rstp_set_bridge_force_protocol_version(ofproto->rstp,
2044                                                s->force_protocol_version);
2045         rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
2046         rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
2047         rstp_set_bridge_transmit_hold_count(ofproto->rstp,
2048                                             s->transmit_hold_count);
2049     } else {
2050         struct ofport *ofport;
2051         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2052             set_rstp_port(ofport, NULL);
2053         }
2054         rstp_unref(ofproto->rstp);
2055         ofproto->rstp = NULL;
2056     }
2057 }
2058
2059 static void
2060 get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
2061 {
2062     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2063
2064     if (ofproto->rstp) {
2065         s->enabled = true;
2066         s->root_id = rstp_get_root_id(ofproto->rstp);
2067         s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
2068         s->designated_id = rstp_get_designated_id(ofproto->rstp);
2069         s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
2070         s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
2071         s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
2072     } else {
2073         s->enabled = false;
2074     }
2075 }
2076
2077 static void
2078 update_rstp_port_state(struct ofport_dpif *ofport)
2079 {
2080     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2081     enum rstp_state state;
2082
2083     /* Figure out new state. */
2084     state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
2085         : RSTP_DISABLED;
2086
2087     /* Update state. */
2088     if (ofport->rstp_state != state) {
2089         enum ofputil_port_state of_state;
2090         bool fwd_change;
2091
2092         VLOG_DBG("port %s: RSTP state changed from %s to %s",
2093                  netdev_get_name(ofport->up.netdev),
2094                  rstp_state_name(ofport->rstp_state),
2095                  rstp_state_name(state));
2096         if (rstp_learn_in_state(ofport->rstp_state)
2097                 != rstp_learn_in_state(state)) {
2098             /* xxx Learning action flows should also be flushed. */
2099             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2100             mac_learning_flush(ofproto->ml);
2101             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2102         }
2103         fwd_change = rstp_forward_in_state(ofport->rstp_state)
2104             != rstp_forward_in_state(state);
2105
2106         ofproto->backer->need_revalidate = REV_RSTP;
2107         ofport->rstp_state = state;
2108
2109         if (fwd_change && ofport->bundle) {
2110             bundle_update(ofport->bundle);
2111         }
2112
2113         /* Update the RSTP state bits in the OpenFlow port description. */
2114         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2115         of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
2116                 : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2117                 : state == RSTP_DISCARDING ?  OFPUTIL_PS_STP_LISTEN
2118                 : 0);
2119         ofproto_port_set_state(&ofport->up, of_state);
2120     }
2121 }
2122
2123 static void
2124 rstp_run(struct ofproto_dpif *ofproto)
2125 {
2126     if (ofproto->rstp) {
2127         long long int now = time_msec();
2128         long long int elapsed = now - ofproto->rstp_last_tick;
2129         struct rstp_port *rp;
2130         struct ofport_dpif *ofport;
2131
2132         /* Every second, decrease the values of the timers. */
2133         if (elapsed >= 1000) {
2134             rstp_tick_timers(ofproto->rstp);
2135             ofproto->rstp_last_tick = now;
2136         }
2137         rp = NULL;
2138         while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
2139             update_rstp_port_state(ofport);
2140         }
2141         /* FIXME: This check should be done on-event (i.e., when setting
2142          * p->fdb_flush) and not periodically.
2143          */
2144         if (rstp_check_and_reset_fdb_flush(ofproto->rstp)) {
2145             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2146             /* FIXME: RSTP should be able to flush the entries pertaining to a
2147              * single port, not the whole table.
2148              */
2149             mac_learning_flush(ofproto->ml);
2150             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2151         }
2152     }
2153 }
2154
2155 /* Configures STP on 'ofproto_' using the settings defined in 's'. */
2156 static int
2157 set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s)
2158 {
2159     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2160
2161     /* Only revalidate flows if the configuration changed. */
2162     if (!s != !ofproto->stp) {
2163         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2164     }
2165
2166     if (s) {
2167         if (!ofproto->stp) {
2168             ofproto->stp = stp_create(ofproto_->name, s->system_id,
2169                                       send_bpdu_cb, ofproto);
2170             ofproto->stp_last_tick = time_msec();
2171         }
2172
2173         stp_set_bridge_id(ofproto->stp, s->system_id);
2174         stp_set_bridge_priority(ofproto->stp, s->priority);
2175         stp_set_hello_time(ofproto->stp, s->hello_time);
2176         stp_set_max_age(ofproto->stp, s->max_age);
2177         stp_set_forward_delay(ofproto->stp, s->fwd_delay);
2178     }  else {
2179         struct ofport *ofport;
2180
2181         HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
2182             set_stp_port(ofport, NULL);
2183         }
2184
2185         stp_unref(ofproto->stp);
2186         ofproto->stp = NULL;
2187     }
2188
2189     return 0;
2190 }
2191
2192 static int
2193 get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s)
2194 {
2195     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2196
2197     if (ofproto->stp) {
2198         s->enabled = true;
2199         s->bridge_id = stp_get_bridge_id(ofproto->stp);
2200         s->designated_root = stp_get_designated_root(ofproto->stp);
2201         s->root_path_cost = stp_get_root_path_cost(ofproto->stp);
2202     } else {
2203         s->enabled = false;
2204     }
2205
2206     return 0;
2207 }
2208
2209 static void
2210 update_stp_port_state(struct ofport_dpif *ofport)
2211 {
2212     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2213     enum stp_state state;
2214
2215     /* Figure out new state. */
2216     state = ofport->stp_port ? stp_port_get_state(ofport->stp_port)
2217                              : STP_DISABLED;
2218
2219     /* Update state. */
2220     if (ofport->stp_state != state) {
2221         enum ofputil_port_state of_state;
2222         bool fwd_change;
2223
2224         VLOG_DBG("port %s: STP state changed from %s to %s",
2225                  netdev_get_name(ofport->up.netdev),
2226                  stp_state_name(ofport->stp_state),
2227                  stp_state_name(state));
2228         if (stp_learn_in_state(ofport->stp_state)
2229                 != stp_learn_in_state(state)) {
2230             /* xxx Learning action flows should also be flushed. */
2231             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2232             mac_learning_flush(ofproto->ml);
2233             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2234             mcast_snooping_mdb_flush(ofproto->ms);
2235         }
2236         fwd_change = stp_forward_in_state(ofport->stp_state)
2237                         != stp_forward_in_state(state);
2238
2239         ofproto->backer->need_revalidate = REV_STP;
2240         ofport->stp_state = state;
2241         ofport->stp_state_entered = time_msec();
2242
2243         if (fwd_change && ofport->bundle) {
2244             bundle_update(ofport->bundle);
2245         }
2246
2247         /* Update the STP state bits in the OpenFlow port description. */
2248         of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
2249         of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN
2250                      : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN
2251                      : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
2252                      : state == STP_BLOCKING ?  OFPUTIL_PS_STP_BLOCK
2253                      : 0);
2254         ofproto_port_set_state(&ofport->up, of_state);
2255     }
2256 }
2257
2258 /* Configures STP on 'ofport_' using the settings defined in 's'.  The
2259  * caller is responsible for assigning STP port numbers and ensuring
2260  * there are no duplicates. */
2261 static int
2262 set_stp_port(struct ofport *ofport_,
2263              const struct ofproto_port_stp_settings *s)
2264 {
2265     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2266     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2267     struct stp_port *sp = ofport->stp_port;
2268
2269     if (!s || !s->enable) {
2270         if (sp) {
2271             ofport->stp_port = NULL;
2272             stp_port_disable(sp);
2273             update_stp_port_state(ofport);
2274         }
2275         return 0;
2276     } else if (sp && stp_port_no(sp) != s->port_num
2277                && ofport == stp_port_get_aux(sp)) {
2278         /* The port-id changed, so disable the old one if it's not
2279          * already in use by another port. */
2280         stp_port_disable(sp);
2281     }
2282
2283     sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
2284
2285     /* Set name before enabling the port so that debugging messages can print
2286      * the name. */
2287     stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
2288     stp_port_enable(sp);
2289
2290     stp_port_set_aux(sp, ofport);
2291     stp_port_set_priority(sp, s->priority);
2292     stp_port_set_path_cost(sp, s->path_cost);
2293
2294     update_stp_port_state(ofport);
2295
2296     return 0;
2297 }
2298
2299 static int
2300 get_stp_port_status(struct ofport *ofport_,
2301                     struct ofproto_port_stp_status *s)
2302 {
2303     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2304     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2305     struct stp_port *sp = ofport->stp_port;
2306
2307     if (!ofproto->stp || !sp) {
2308         s->enabled = false;
2309         return 0;
2310     }
2311
2312     s->enabled = true;
2313     s->port_id = stp_port_get_id(sp);
2314     s->state = stp_port_get_state(sp);
2315     s->sec_in_state = (time_msec() - ofport->stp_state_entered) / 1000;
2316     s->role = stp_port_get_role(sp);
2317
2318     return 0;
2319 }
2320
2321 static int
2322 get_stp_port_stats(struct ofport *ofport_,
2323                    struct ofproto_port_stp_stats *s)
2324 {
2325     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2326     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2327     struct stp_port *sp = ofport->stp_port;
2328
2329     if (!ofproto->stp || !sp) {
2330         s->enabled = false;
2331         return 0;
2332     }
2333
2334     s->enabled = true;
2335     stp_port_get_counts(sp, &s->tx_count, &s->rx_count, &s->error_count);
2336
2337     return 0;
2338 }
2339
2340 static void
2341 stp_run(struct ofproto_dpif *ofproto)
2342 {
2343     if (ofproto->stp) {
2344         long long int now = time_msec();
2345         long long int elapsed = now - ofproto->stp_last_tick;
2346         struct stp_port *sp;
2347
2348         if (elapsed > 0) {
2349             stp_tick(ofproto->stp, MIN(INT_MAX, elapsed));
2350             ofproto->stp_last_tick = now;
2351         }
2352         while (stp_get_changed_port(ofproto->stp, &sp)) {
2353             struct ofport_dpif *ofport = stp_port_get_aux(sp);
2354
2355             if (ofport) {
2356                 update_stp_port_state(ofport);
2357             }
2358         }
2359
2360         if (stp_check_and_reset_fdb_flush(ofproto->stp)) {
2361             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2362             mac_learning_flush(ofproto->ml);
2363             ovs_rwlock_unlock(&ofproto->ml->rwlock);
2364             mcast_snooping_mdb_flush(ofproto->ms);
2365         }
2366     }
2367 }
2368
2369 static void
2370 stp_wait(struct ofproto_dpif *ofproto)
2371 {
2372     if (ofproto->stp) {
2373         poll_timer_wait(1000);
2374     }
2375 }
2376
2377 /* Configures RSTP on 'ofport_' using the settings defined in 's'.  The
2378  * caller is responsible for assigning RSTP port numbers and ensuring
2379  * there are no duplicates. */
2380 static void
2381 set_rstp_port(struct ofport *ofport_,
2382               const struct ofproto_port_rstp_settings *s)
2383 {
2384     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2385     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2386     struct rstp_port *rp = ofport->rstp_port;
2387
2388     if (!s || !s->enable) {
2389         if (rp) {
2390             rstp_port_unref(rp);
2391             ofport->rstp_port = NULL;
2392             update_rstp_port_state(ofport);
2393         }
2394         return;
2395     }
2396
2397     /* Check if need to add a new port. */
2398     if (!rp) {
2399         rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
2400     }
2401
2402     rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
2403                   s->admin_edge_port, s->auto_edge, s->mcheck, ofport);
2404     update_rstp_port_state(ofport);
2405     /* Synchronize operational status. */
2406     rstp_port_set_mac_operational(rp, ofport->may_enable);
2407 }
2408
2409 static void
2410 get_rstp_port_status(struct ofport *ofport_,
2411         struct ofproto_port_rstp_status *s)
2412 {
2413     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2414     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2415     struct rstp_port *rp = ofport->rstp_port;
2416
2417     if (!ofproto->rstp || !rp) {
2418         s->enabled = false;
2419         return;
2420     }
2421
2422     s->enabled = true;
2423     rstp_port_get_status(rp, &s->port_id, &s->state, &s->role, &s->tx_count,
2424                          &s->rx_count, &s->error_count, &s->uptime);
2425 }
2426
2427 \f
2428 static int
2429 set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
2430            size_t n_qdscp)
2431 {
2432     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
2433     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
2434
2435     if (ofport->n_qdscp != n_qdscp
2436         || (n_qdscp && memcmp(ofport->qdscp, qdscp,
2437                               n_qdscp * sizeof *qdscp))) {
2438         ofproto->backer->need_revalidate = REV_RECONFIGURE;
2439         free(ofport->qdscp);
2440         ofport->qdscp = n_qdscp
2441             ? xmemdup(qdscp, n_qdscp * sizeof *qdscp)
2442             : NULL;
2443         ofport->n_qdscp = n_qdscp;
2444     }
2445
2446     return 0;
2447 }
2448 \f
2449 /* Bundles. */
2450
2451 /* Expires all MAC learning entries associated with 'bundle' and forces its
2452  * ofproto to revalidate every flow.
2453  *
2454  * Normally MAC learning entries are removed only from the ofproto associated
2455  * with 'bundle', but if 'all_ofprotos' is true, then the MAC learning entries
2456  * are removed from every ofproto.  When patch ports and SLB bonds are in use
2457  * and a VM migration happens and the gratuitous ARPs are somehow lost, this
2458  * avoids a MAC_ENTRY_IDLE_TIME delay before the migrated VM can communicate
2459  * with the host from which it migrated. */
2460 static void
2461 bundle_flush_macs(struct ofbundle *bundle, bool all_ofprotos)
2462 {
2463     struct ofproto_dpif *ofproto = bundle->ofproto;
2464     struct mac_learning *ml = ofproto->ml;
2465     struct mac_entry *mac, *next_mac;
2466
2467     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2468     ovs_rwlock_wrlock(&ml->rwlock);
2469     LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
2470         if (mac->port.p == bundle) {
2471             if (all_ofprotos) {
2472                 struct ofproto_dpif *o;
2473
2474                 HMAP_FOR_EACH (o, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
2475                     if (o != ofproto) {
2476                         struct mac_entry *e;
2477
2478                         ovs_rwlock_wrlock(&o->ml->rwlock);
2479                         e = mac_learning_lookup(o->ml, mac->mac, mac->vlan);
2480                         if (e) {
2481                             mac_learning_expire(o->ml, e);
2482                         }
2483                         ovs_rwlock_unlock(&o->ml->rwlock);
2484                     }
2485                 }
2486             }
2487
2488             mac_learning_expire(ml, mac);
2489         }
2490     }
2491     ovs_rwlock_unlock(&ml->rwlock);
2492 }
2493
2494 static struct ofbundle *
2495 bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
2496 {
2497     struct ofbundle *bundle;
2498
2499     HMAP_FOR_EACH_IN_BUCKET (bundle, hmap_node, hash_pointer(aux, 0),
2500                              &ofproto->bundles) {
2501         if (bundle->aux == aux) {
2502             return bundle;
2503         }
2504     }
2505     return NULL;
2506 }
2507
2508 static void
2509 bundle_update(struct ofbundle *bundle)
2510 {
2511     struct ofport_dpif *port;
2512
2513     bundle->floodable = true;
2514     LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2515         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2516             || port->is_layer3
2517             || !stp_forward_in_state(port->stp_state)) {
2518             bundle->floodable = false;
2519             break;
2520         }
2521     }
2522 }
2523
2524 static void
2525 bundle_del_port(struct ofport_dpif *port)
2526 {
2527     struct ofbundle *bundle = port->bundle;
2528
2529     bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2530
2531     list_remove(&port->bundle_node);
2532     port->bundle = NULL;
2533
2534     if (bundle->lacp) {
2535         lacp_slave_unregister(bundle->lacp, port);
2536     }
2537     if (bundle->bond) {
2538         bond_slave_unregister(bundle->bond, port);
2539     }
2540
2541     bundle_update(bundle);
2542 }
2543
2544 static bool
2545 bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
2546                 struct lacp_slave_settings *lacp)
2547 {
2548     struct ofport_dpif *port;
2549
2550     port = get_ofp_port(bundle->ofproto, ofp_port);
2551     if (!port) {
2552         return false;
2553     }
2554
2555     if (port->bundle != bundle) {
2556         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2557         if (port->bundle) {
2558             bundle_remove(&port->up);
2559         }
2560
2561         port->bundle = bundle;
2562         list_push_back(&bundle->ports, &port->bundle_node);
2563         if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
2564             || port->is_layer3
2565             || !stp_forward_in_state(port->stp_state)) {
2566             bundle->floodable = false;
2567         }
2568     }
2569     if (lacp) {
2570         bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
2571         lacp_slave_register(bundle->lacp, port, lacp);
2572     }
2573
2574     return true;
2575 }
2576
2577 static void
2578 bundle_destroy(struct ofbundle *bundle)
2579 {
2580     struct ofproto_dpif *ofproto;
2581     struct ofport_dpif *port, *next_port;
2582
2583     if (!bundle) {
2584         return;
2585     }
2586
2587     ofproto = bundle->ofproto;
2588     mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
2589
2590     xlate_txn_start();
2591     xlate_bundle_remove(bundle);
2592     xlate_txn_commit();
2593
2594     LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2595         bundle_del_port(port);
2596     }
2597
2598     bundle_flush_macs(bundle, true);
2599     hmap_remove(&ofproto->bundles, &bundle->hmap_node);
2600     free(bundle->name);
2601     free(bundle->trunks);
2602     lacp_unref(bundle->lacp);
2603     bond_unref(bundle->bond);
2604     free(bundle);
2605 }
2606
2607 static int
2608 bundle_set(struct ofproto *ofproto_, void *aux,
2609            const struct ofproto_bundle_settings *s)
2610 {
2611     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2612     bool need_flush = false;
2613     struct ofport_dpif *port;
2614     struct ofbundle *bundle;
2615     unsigned long *trunks;
2616     int vlan;
2617     size_t i;
2618     bool ok;
2619
2620     if (!s) {
2621         bundle_destroy(bundle_lookup(ofproto, aux));
2622         return 0;
2623     }
2624
2625     ovs_assert(s->n_slaves == 1 || s->bond != NULL);
2626     ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
2627
2628     bundle = bundle_lookup(ofproto, aux);
2629     if (!bundle) {
2630         bundle = xmalloc(sizeof *bundle);
2631
2632         bundle->ofproto = ofproto;
2633         hmap_insert(&ofproto->bundles, &bundle->hmap_node,
2634                     hash_pointer(aux, 0));
2635         bundle->aux = aux;
2636         bundle->name = NULL;
2637
2638         list_init(&bundle->ports);
2639         bundle->vlan_mode = PORT_VLAN_TRUNK;
2640         bundle->vlan = -1;
2641         bundle->trunks = NULL;
2642         bundle->use_priority_tags = s->use_priority_tags;
2643         bundle->lacp = NULL;
2644         bundle->bond = NULL;
2645
2646         bundle->floodable = true;
2647         mbridge_register_bundle(ofproto->mbridge, bundle);
2648     }
2649
2650     if (!bundle->name || strcmp(s->name, bundle->name)) {
2651         free(bundle->name);
2652         bundle->name = xstrdup(s->name);
2653     }
2654
2655     /* LACP. */
2656     if (s->lacp) {
2657         ofproto->lacp_enabled = true;
2658         if (!bundle->lacp) {
2659             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2660             bundle->lacp = lacp_create();
2661         }
2662         lacp_configure(bundle->lacp, s->lacp);
2663     } else {
2664         lacp_unref(bundle->lacp);
2665         bundle->lacp = NULL;
2666     }
2667
2668     /* Update set of ports. */
2669     ok = true;
2670     for (i = 0; i < s->n_slaves; i++) {
2671         if (!bundle_add_port(bundle, s->slaves[i],
2672                              s->lacp ? &s->lacp_slaves[i] : NULL)) {
2673             ok = false;
2674         }
2675     }
2676     if (!ok || list_size(&bundle->ports) != s->n_slaves) {
2677         struct ofport_dpif *next_port;
2678
2679         LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
2680             for (i = 0; i < s->n_slaves; i++) {
2681                 if (s->slaves[i] == port->up.ofp_port) {
2682                     goto found;
2683                 }
2684             }
2685
2686             bundle_del_port(port);
2687         found: ;
2688         }
2689     }
2690     ovs_assert(list_size(&bundle->ports) <= s->n_slaves);
2691
2692     if (list_is_empty(&bundle->ports)) {
2693         bundle_destroy(bundle);
2694         return EINVAL;
2695     }
2696
2697     /* Set VLAN tagging mode */
2698     if (s->vlan_mode != bundle->vlan_mode
2699         || s->use_priority_tags != bundle->use_priority_tags) {
2700         bundle->vlan_mode = s->vlan_mode;
2701         bundle->use_priority_tags = s->use_priority_tags;
2702         need_flush = true;
2703     }
2704
2705     /* Set VLAN tag. */
2706     vlan = (s->vlan_mode == PORT_VLAN_TRUNK ? -1
2707             : s->vlan >= 0 && s->vlan <= 4095 ? s->vlan
2708             : 0);
2709     if (vlan != bundle->vlan) {
2710         bundle->vlan = vlan;
2711         need_flush = true;
2712     }
2713
2714     /* Get trunked VLANs. */
2715     switch (s->vlan_mode) {
2716     case PORT_VLAN_ACCESS:
2717         trunks = NULL;
2718         break;
2719
2720     case PORT_VLAN_TRUNK:
2721         trunks = CONST_CAST(unsigned long *, s->trunks);
2722         break;
2723
2724     case PORT_VLAN_NATIVE_UNTAGGED:
2725     case PORT_VLAN_NATIVE_TAGGED:
2726         if (vlan != 0 && (!s->trunks
2727                           || !bitmap_is_set(s->trunks, vlan)
2728                           || bitmap_is_set(s->trunks, 0))) {
2729             /* Force trunking the native VLAN and prohibit trunking VLAN 0. */
2730             if (s->trunks) {
2731                 trunks = bitmap_clone(s->trunks, 4096);
2732             } else {
2733                 trunks = bitmap_allocate1(4096);
2734             }
2735             bitmap_set1(trunks, vlan);
2736             bitmap_set0(trunks, 0);
2737         } else {
2738             trunks = CONST_CAST(unsigned long *, s->trunks);
2739         }
2740         break;
2741
2742     default:
2743         OVS_NOT_REACHED();
2744     }
2745     if (!vlan_bitmap_equal(trunks, bundle->trunks)) {
2746         free(bundle->trunks);
2747         if (trunks == s->trunks) {
2748             bundle->trunks = vlan_bitmap_clone(trunks);
2749         } else {
2750             bundle->trunks = trunks;
2751             trunks = NULL;
2752         }
2753         need_flush = true;
2754     }
2755     if (trunks != s->trunks) {
2756         free(trunks);
2757     }
2758
2759     /* Bonding. */
2760     if (!list_is_short(&bundle->ports)) {
2761         bundle->ofproto->has_bonded_bundles = true;
2762         if (bundle->bond) {
2763             if (bond_reconfigure(bundle->bond, s->bond)) {
2764                 ofproto->backer->need_revalidate = REV_RECONFIGURE;
2765             }
2766         } else {
2767             bundle->bond = bond_create(s->bond, ofproto);
2768             ofproto->backer->need_revalidate = REV_RECONFIGURE;
2769         }
2770
2771         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2772             bond_slave_register(bundle->bond, port,
2773                                 port->up.ofp_port, port->up.netdev);
2774         }
2775     } else {
2776         bond_unref(bundle->bond);
2777         bundle->bond = NULL;
2778     }
2779
2780     /* If we changed something that would affect MAC learning, un-learn
2781      * everything on this port and force flow revalidation. */
2782     if (need_flush) {
2783         bundle_flush_macs(bundle, false);
2784     }
2785
2786     return 0;
2787 }
2788
2789 static void
2790 bundle_remove(struct ofport *port_)
2791 {
2792     struct ofport_dpif *port = ofport_dpif_cast(port_);
2793     struct ofbundle *bundle = port->bundle;
2794
2795     if (bundle) {
2796         bundle_del_port(port);
2797         if (list_is_empty(&bundle->ports)) {
2798             bundle_destroy(bundle);
2799         } else if (list_is_short(&bundle->ports)) {
2800             bond_unref(bundle->bond);
2801             bundle->bond = NULL;
2802         }
2803     }
2804 }
2805
2806 static void
2807 send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
2808 {
2809     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
2810     struct ofport_dpif *port = port_;
2811     uint8_t ea[ETH_ADDR_LEN];
2812     int error;
2813
2814     error = netdev_get_etheraddr(port->up.netdev, ea);
2815     if (!error) {
2816         struct ofpbuf packet;
2817         void *packet_pdu;
2818
2819         ofpbuf_init(&packet, 0);
2820         packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
2821                                  pdu_size);
2822         memcpy(packet_pdu, pdu, pdu_size);
2823
2824         ofproto_dpif_send_packet(port, &packet);
2825         ofpbuf_uninit(&packet);
2826     } else {
2827         VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
2828                     "%s (%s)", port->bundle->name,
2829                     netdev_get_name(port->up.netdev), ovs_strerror(error));
2830     }
2831 }
2832
2833 static void
2834 bundle_send_learning_packets(struct ofbundle *bundle)
2835 {
2836     struct ofproto_dpif *ofproto = bundle->ofproto;
2837     struct ofpbuf *learning_packet;
2838     int error, n_packets, n_errors;
2839     struct mac_entry *e;
2840     struct list packets;
2841
2842     list_init(&packets);
2843     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
2844     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
2845         if (e->port.p != bundle) {
2846             void *port_void;
2847
2848             learning_packet = bond_compose_learning_packet(bundle->bond,
2849                                                            e->mac, e->vlan,
2850                                                            &port_void);
2851             /* Temporarily use 'frame' as a private pointer (see below). */
2852             ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet));
2853             learning_packet->frame = port_void;
2854             list_push_back(&packets, &learning_packet->list_node);
2855         }
2856     }
2857     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2858
2859     error = n_packets = n_errors = 0;
2860     LIST_FOR_EACH (learning_packet, list_node, &packets) {
2861         int ret;
2862         void *port_void = learning_packet->frame;
2863
2864         /* Restore 'frame'. */
2865         learning_packet->frame = ofpbuf_data(learning_packet);
2866         ret = ofproto_dpif_send_packet(port_void, learning_packet);
2867         if (ret) {
2868             error = ret;
2869             n_errors++;
2870         }
2871         n_packets++;
2872     }
2873     ofpbuf_list_delete(&packets);
2874
2875     if (n_errors) {
2876         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2877         VLOG_WARN_RL(&rl, "bond %s: %d errors sending %d gratuitous learning "
2878                      "packets, last error was: %s",
2879                      bundle->name, n_errors, n_packets, ovs_strerror(error));
2880     } else {
2881         VLOG_DBG("bond %s: sent %d gratuitous learning packets",
2882                  bundle->name, n_packets);
2883     }
2884 }
2885
2886 static void
2887 bundle_run(struct ofbundle *bundle)
2888 {
2889     if (bundle->lacp) {
2890         lacp_run(bundle->lacp, send_pdu_cb);
2891     }
2892     if (bundle->bond) {
2893         struct ofport_dpif *port;
2894
2895         LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
2896             bond_slave_set_may_enable(bundle->bond, port, port->may_enable);
2897         }
2898
2899         if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
2900             bundle->ofproto->backer->need_revalidate = REV_BOND;
2901         }
2902
2903         if (bond_should_send_learning_packets(bundle->bond)) {
2904             bundle_send_learning_packets(bundle);
2905         }
2906     }
2907 }
2908
2909 static void
2910 bundle_wait(struct ofbundle *bundle)
2911 {
2912     if (bundle->lacp) {
2913         lacp_wait(bundle->lacp);
2914     }
2915     if (bundle->bond) {
2916         bond_wait(bundle->bond);
2917     }
2918 }
2919 \f
2920 /* Mirrors. */
2921
2922 static int
2923 mirror_set__(struct ofproto *ofproto_, void *aux,
2924              const struct ofproto_mirror_settings *s)
2925 {
2926     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2927     struct ofbundle **srcs, **dsts;
2928     int error;
2929     size_t i;
2930
2931     if (!s) {
2932         mirror_destroy(ofproto->mbridge, aux);
2933         return 0;
2934     }
2935
2936     srcs = xmalloc(s->n_srcs * sizeof *srcs);
2937     dsts = xmalloc(s->n_dsts * sizeof *dsts);
2938
2939     for (i = 0; i < s->n_srcs; i++) {
2940         srcs[i] = bundle_lookup(ofproto, s->srcs[i]);
2941     }
2942
2943     for (i = 0; i < s->n_dsts; i++) {
2944         dsts[i] = bundle_lookup(ofproto, s->dsts[i]);
2945     }
2946
2947     error = mirror_set(ofproto->mbridge, aux, s->name, srcs, s->n_srcs, dsts,
2948                        s->n_dsts, s->src_vlans,
2949                        bundle_lookup(ofproto, s->out_bundle), s->out_vlan);
2950     free(srcs);
2951     free(dsts);
2952     return error;
2953 }
2954
2955 static int
2956 mirror_get_stats__(struct ofproto *ofproto, void *aux,
2957                    uint64_t *packets, uint64_t *bytes)
2958 {
2959     return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
2960                             bytes);
2961 }
2962
2963 static int
2964 set_flood_vlans(struct ofproto *ofproto_, unsigned long *flood_vlans)
2965 {
2966     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2967     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2968     if (mac_learning_set_flood_vlans(ofproto->ml, flood_vlans)) {
2969         mac_learning_flush(ofproto->ml);
2970     }
2971     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2972     return 0;
2973 }
2974
2975 static bool
2976 is_mirror_output_bundle(const struct ofproto *ofproto_, void *aux)
2977 {
2978     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2979     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
2980     return bundle && mirror_bundle_out(ofproto->mbridge, bundle) != 0;
2981 }
2982
2983 static void
2984 forward_bpdu_changed(struct ofproto *ofproto_)
2985 {
2986     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2987     ofproto->backer->need_revalidate = REV_RECONFIGURE;
2988 }
2989
2990 static void
2991 set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time,
2992                      size_t max_entries)
2993 {
2994     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
2995     ovs_rwlock_wrlock(&ofproto->ml->rwlock);
2996     mac_learning_set_idle_time(ofproto->ml, idle_time);
2997     mac_learning_set_max_entries(ofproto->ml, max_entries);
2998     ovs_rwlock_unlock(&ofproto->ml->rwlock);
2999 }
3000
3001 /* Configures multicast snooping on 'ofport' using the settings
3002  * defined in 's'. */
3003 static int
3004 set_mcast_snooping(struct ofproto *ofproto_,
3005                    const struct ofproto_mcast_snooping_settings *s)
3006 {
3007     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3008
3009     /* Only revalidate flows if the configuration changed. */
3010     if (!s != !ofproto->ms) {
3011         ofproto->backer->need_revalidate = REV_RECONFIGURE;
3012     }
3013
3014     if (s) {
3015         if (!ofproto->ms) {
3016             ofproto->ms = mcast_snooping_create();
3017         }
3018
3019         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3020         mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
3021         mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
3022         if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
3023             ofproto->backer->need_revalidate = REV_RECONFIGURE;
3024         }
3025         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3026     } else {
3027         mcast_snooping_unref(ofproto->ms);
3028         ofproto->ms = NULL;
3029     }
3030
3031     return 0;
3032 }
3033
3034 /* Configures multicast snooping port's flood setting on 'ofproto'. */
3035 static int
3036 set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood)
3037 {
3038     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3039     struct ofbundle *bundle = bundle_lookup(ofproto, aux);
3040
3041     if (ofproto->ms) {
3042         ovs_rwlock_wrlock(&ofproto->ms->rwlock);
3043         mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle,
3044                                       flood);
3045         ovs_rwlock_unlock(&ofproto->ms->rwlock);
3046     }
3047     return 0;
3048 }
3049
3050 \f
3051 /* Ports. */
3052
3053 static struct ofport_dpif *
3054 get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
3055 {
3056     struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
3057     return ofport ? ofport_dpif_cast(ofport) : NULL;
3058 }
3059
3060 static void
3061 ofproto_port_from_dpif_port(struct ofproto_dpif *ofproto,
3062                             struct ofproto_port *ofproto_port,
3063                             struct dpif_port *dpif_port)
3064 {
3065     ofproto_port->name = dpif_port->name;
3066     ofproto_port->type = dpif_port->type;
3067     ofproto_port->ofp_port = odp_port_to_ofp_port(ofproto, dpif_port->port_no);
3068 }
3069
3070 static void
3071 ofport_update_peer(struct ofport_dpif *ofport)
3072 {
3073     const struct ofproto_dpif *ofproto;
3074     struct dpif_backer *backer;
3075     char *peer_name;
3076
3077     if (!netdev_vport_is_patch(ofport->up.netdev)) {
3078         return;
3079     }
3080
3081     backer = ofproto_dpif_cast(ofport->up.ofproto)->backer;
3082     backer->need_revalidate = REV_RECONFIGURE;
3083
3084     if (ofport->peer) {
3085         ofport->peer->peer = NULL;
3086         ofport->peer = NULL;
3087     }
3088
3089     peer_name = netdev_vport_patch_peer(ofport->up.netdev);
3090     if (!peer_name) {
3091         return;
3092     }
3093
3094     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
3095         struct ofport *peer_ofport;
3096         struct ofport_dpif *peer;
3097         char *peer_peer;
3098
3099         if (ofproto->backer != backer) {
3100             continue;
3101         }
3102
3103         peer_ofport = shash_find_data(&ofproto->up.port_by_name, peer_name);
3104         if (!peer_ofport) {
3105             continue;
3106         }
3107
3108         peer = ofport_dpif_cast(peer_ofport);
3109         peer_peer = netdev_vport_patch_peer(peer->up.netdev);
3110         if (peer_peer && !strcmp(netdev_get_name(ofport->up.netdev),
3111                                  peer_peer)) {
3112             ofport->peer = peer;
3113             ofport->peer->peer = ofport;
3114         }
3115         free(peer_peer);
3116
3117         break;
3118     }
3119     free(peer_name);
3120 }
3121
3122 static void
3123 port_run(struct ofport_dpif *ofport)
3124 {
3125     long long int carrier_seq = netdev_get_carrier_resets(ofport->up.netdev);
3126     bool carrier_changed = carrier_seq != ofport->carrier_seq;
3127     bool enable = netdev_get_carrier(ofport->up.netdev);
3128     bool cfm_enable = false;
3129     bool bfd_enable = false;
3130
3131     ofport->carrier_seq = carrier_seq;
3132
3133     if (ofport->cfm) {
3134         int cfm_opup = cfm_get_opup(ofport->cfm);
3135
3136         cfm_enable = !cfm_get_fault(ofport->cfm);
3137
3138         if (cfm_opup >= 0) {
3139             cfm_enable = cfm_enable && cfm_opup;
3140         }
3141     }
3142
3143     if (ofport->bfd) {
3144         bfd_enable = bfd_forwarding(ofport->bfd);
3145     }
3146
3147     if (ofport->bfd || ofport->cfm) {
3148         enable = enable && (cfm_enable || bfd_enable);
3149     }
3150
3151     if (ofport->bundle) {
3152         enable = enable && lacp_slave_may_enable(ofport->bundle->lacp, ofport);
3153         if (carrier_changed) {
3154             lacp_slave_carrier_changed(ofport->bundle->lacp, ofport);
3155         }
3156     }
3157
3158     if (ofport->may_enable != enable) {
3159         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3160
3161         ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
3162
3163         if (ofport->rstp_port) {
3164             rstp_port_set_mac_operational(ofport->rstp_port, enable);
3165         }
3166     }
3167
3168     ofport->may_enable = enable;
3169 }
3170
3171 static int
3172 port_query_by_name(const struct ofproto *ofproto_, const char *devname,
3173                    struct ofproto_port *ofproto_port)
3174 {
3175     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3176     struct dpif_port dpif_port;
3177     int error;
3178
3179     if (sset_contains(&ofproto->ghost_ports, devname)) {
3180         const char *type = netdev_get_type_from_name(devname);
3181
3182         /* We may be called before ofproto->up.port_by_name is populated with
3183          * the appropriate ofport.  For this reason, we must get the name and
3184          * type from the netdev layer directly. */
3185         if (type) {
3186             const struct ofport *ofport;
3187
3188             ofport = shash_find_data(&ofproto->up.port_by_name, devname);
3189             ofproto_port->ofp_port = ofport ? ofport->ofp_port : OFPP_NONE;
3190             ofproto_port->name = xstrdup(devname);
3191             ofproto_port->type = xstrdup(type);
3192             return 0;
3193         }
3194         return ENODEV;
3195     }
3196
3197     if (!sset_contains(&ofproto->ports, devname)) {
3198         return ENODEV;
3199     }
3200     error = dpif_port_query_by_name(ofproto->backer->dpif,
3201                                     devname, &dpif_port);
3202     if (!error) {
3203         ofproto_port_from_dpif_port(ofproto, ofproto_port, &dpif_port);
3204     }
3205     return error;
3206 }
3207
3208 static int
3209 port_add(struct ofproto *ofproto_, struct netdev *netdev)
3210 {
3211     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3212     const char *devname = netdev_get_name(netdev);
3213     char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
3214     const char *dp_port_name;
3215
3216     if (netdev_vport_is_patch(netdev)) {
3217         sset_add(&ofproto->ghost_ports, netdev_get_name(netdev));
3218         return 0;
3219     }
3220
3221     dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
3222     if (!dpif_port_exists(ofproto->backer->dpif, dp_port_name)) {
3223         odp_port_t port_no = ODPP_NONE;
3224         int error;
3225
3226         error = dpif_port_add(ofproto->backer->dpif, netdev, &port_no);
3227         if (error) {
3228             return error;
3229         }
3230         if (netdev_get_tunnel_config(netdev)) {
3231             simap_put(&ofproto->backer->tnl_backers,
3232                       dp_port_name, odp_to_u32(port_no));
3233         }
3234     }
3235
3236     if (netdev_get_tunnel_config(netdev)) {
3237         sset_add(&ofproto->ghost_ports, devname);
3238     } else {
3239         sset_add(&ofproto->ports, devname);
3240     }
3241     return 0;
3242 }
3243
3244 static int
3245 port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
3246 {
3247     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3248     struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
3249     int error = 0;
3250
3251     if (!ofport) {
3252         return 0;
3253     }
3254
3255     sset_find_and_delete(&ofproto->ghost_ports,
3256                          netdev_get_name(ofport->up.netdev));
3257     ofproto->backer->need_revalidate = REV_RECONFIGURE;
3258     if (!ofport->is_tunnel && !netdev_vport_is_patch(ofport->up.netdev)) {
3259         error = dpif_port_del(ofproto->backer->dpif, ofport->odp_port);
3260         if (!error) {
3261             /* The caller is going to close ofport->up.netdev.  If this is a
3262              * bonded port, then the bond is using that netdev, so remove it
3263              * from the bond.  The client will need to reconfigure everything
3264              * after deleting ports, so then the slave will get re-added. */
3265             bundle_remove(&ofport->up);
3266         }
3267     }
3268     return error;
3269 }
3270
3271 static int
3272 port_get_stats(const struct ofport *ofport_, struct netdev_stats *stats)
3273 {
3274     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3275     int error;
3276
3277     error = netdev_get_stats(ofport->up.netdev, stats);
3278
3279     if (!error && ofport_->ofp_port == OFPP_LOCAL) {
3280         struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
3281
3282         ovs_mutex_lock(&ofproto->stats_mutex);
3283         /* ofproto->stats.tx_packets represents packets that we created
3284          * internally and sent to some port (e.g. packets sent with
3285          * ofproto_dpif_send_packet()).  Account for them as if they had
3286          * come from OFPP_LOCAL and got forwarded. */
3287
3288         if (stats->rx_packets != UINT64_MAX) {
3289             stats->rx_packets += ofproto->stats.tx_packets;
3290         }
3291
3292         if (stats->rx_bytes != UINT64_MAX) {
3293             stats->rx_bytes += ofproto->stats.tx_bytes;
3294         }
3295
3296         /* ofproto->stats.rx_packets represents packets that were received on
3297          * some port and we processed internally and dropped (e.g. STP).
3298          * Account for them as if they had been forwarded to OFPP_LOCAL. */
3299
3300         if (stats->tx_packets != UINT64_MAX) {
3301             stats->tx_packets += ofproto->stats.rx_packets;
3302         }
3303
3304         if (stats->tx_bytes != UINT64_MAX) {
3305             stats->tx_bytes += ofproto->stats.rx_bytes;
3306         }
3307         ovs_mutex_unlock(&ofproto->stats_mutex);
3308     }
3309
3310     return error;
3311 }
3312
3313 static int
3314 port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats)
3315 {
3316     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3317     if (ofport->bundle && ofport->bundle->lacp) {
3318         if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) {
3319             return 0;
3320         }
3321     }
3322     return -1;
3323 }
3324
3325 struct port_dump_state {
3326     uint32_t bucket;
3327     uint32_t offset;
3328     bool ghost;
3329
3330     struct ofproto_port port;
3331     bool has_port;
3332 };
3333
3334 static int
3335 port_dump_start(const struct ofproto *ofproto_ OVS_UNUSED, void **statep)
3336 {
3337     *statep = xzalloc(sizeof(struct port_dump_state));
3338     return 0;
3339 }
3340
3341 static int
3342 port_dump_next(const struct ofproto *ofproto_, void *state_,
3343                struct ofproto_port *port)
3344 {
3345     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3346     struct port_dump_state *state = state_;
3347     const struct sset *sset;
3348     struct sset_node *node;
3349
3350     if (state->has_port) {
3351         ofproto_port_destroy(&state->port);
3352         state->has_port = false;
3353     }
3354     sset = state->ghost ? &ofproto->ghost_ports : &ofproto->ports;
3355     while ((node = sset_at_position(sset, &state->bucket, &state->offset))) {
3356         int error;
3357
3358         error = port_query_by_name(ofproto_, node->name, &state->port);
3359         if (!error) {
3360             *port = state->port;
3361             state->has_port = true;
3362             return 0;
3363         } else if (error != ENODEV) {
3364             return error;
3365         }
3366     }
3367
3368     if (!state->ghost) {
3369         state->ghost = true;
3370         state->bucket = 0;
3371         state->offset = 0;
3372         return port_dump_next(ofproto_, state_, port);
3373     }
3374
3375     return EOF;
3376 }
3377
3378 static int
3379 port_dump_done(const struct ofproto *ofproto_ OVS_UNUSED, void *state_)
3380 {
3381     struct port_dump_state *state = state_;
3382
3383     if (state->has_port) {
3384         ofproto_port_destroy(&state->port);
3385     }
3386     free(state);
3387     return 0;
3388 }
3389
3390 static int
3391 port_poll(const struct ofproto *ofproto_, char **devnamep)
3392 {
3393     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3394
3395     if (ofproto->port_poll_errno) {
3396         int error = ofproto->port_poll_errno;
3397         ofproto->port_poll_errno = 0;
3398         return error;
3399     }
3400
3401     if (sset_is_empty(&ofproto->port_poll_set)) {
3402         return EAGAIN;
3403     }
3404
3405     *devnamep = sset_pop(&ofproto->port_poll_set);
3406     return 0;
3407 }
3408
3409 static void
3410 port_poll_wait(const struct ofproto *ofproto_)
3411 {
3412     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
3413     dpif_port_poll_wait(ofproto->backer->dpif);
3414 }
3415
3416 static int
3417 port_is_lacp_current(const struct ofport *ofport_)
3418 {
3419     const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
3420     return (ofport->bundle && ofport->bundle->lacp
3421             ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
3422             : -1);
3423 }
3424 \f
3425 /* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
3426  * then delete it entirely. */
3427 static void
3428 rule_expire(struct rule_dpif *rule)
3429     OVS_REQUIRES(ofproto_mutex)
3430 {
3431     uint16_t hard_timeout, idle_timeout;
3432     long long int now = time_msec();
3433     int reason = -1;
3434
3435     hard_timeout = rule->up.hard_timeout;
3436     idle_timeout = rule->up.idle_timeout;
3437
3438     /* Has 'rule' expired? */
3439     if (hard_timeout) {
3440         long long int modified;
3441
3442         ovs_mutex_lock(&rule->up.mutex);
3443         modified = rule->up.modified;
3444         ovs_mutex_unlock(&rule->up.mutex);
3445
3446         if (now > modified + hard_timeout * 1000) {
3447             reason = OFPRR_HARD_TIMEOUT;
3448         }
3449     }
3450
3451     if (reason < 0 && idle_timeout) {
3452         long long int used;
3453
3454         ovs_mutex_lock(&rule->stats_mutex);
3455         used = rule->stats.used;
3456         ovs_mutex_unlock(&rule->stats_mutex);
3457
3458         if (now > used + idle_timeout * 1000) {
3459             reason = OFPRR_IDLE_TIMEOUT;
3460         }
3461     }
3462
3463     if (reason >= 0) {
3464         COVERAGE_INC(ofproto_dpif_expired);
3465         ofproto_rule_expire(&rule->up, reason);
3466     }
3467 }
3468
3469 /* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
3470  * 'flow' must reflect the data in 'packet'. */
3471 int
3472 ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
3473                              const struct flow *flow,
3474                              struct rule_dpif *rule,
3475                              const struct ofpact *ofpacts, size_t ofpacts_len,
3476                              struct ofpbuf *packet)
3477 {
3478     struct dpif_flow_stats stats;
3479     struct xlate_out xout;
3480     struct xlate_in xin;
3481     ofp_port_t in_port;
3482     struct dpif_execute execute;
3483     int error;
3484
3485     ovs_assert((rule != NULL) != (ofpacts != NULL));
3486
3487     dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
3488
3489     if (rule) {
3490         rule_dpif_credit_stats(rule, &stats);
3491     }
3492
3493     xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
3494                   stats.tcp_flags, packet);
3495     xin.ofpacts = ofpacts;
3496     xin.ofpacts_len = ofpacts_len;
3497     xin.resubmit_stats = &stats;
3498     xlate_actions(&xin, &xout);
3499
3500     execute.actions = ofpbuf_data(xout.odp_actions);
3501     execute.actions_len = ofpbuf_size(xout.odp_actions);
3502     execute.packet = packet;
3503     execute.md = pkt_metadata_from_flow(flow);
3504     execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
3505     execute.probe = false;
3506
3507     /* Fix up in_port. */
3508     in_port = flow->in_port.ofp_port;
3509     if (in_port == OFPP_NONE) {
3510         in_port = OFPP_LOCAL;
3511     }
3512     execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
3513
3514     error = dpif_execute(ofproto->backer->dpif, &execute);
3515
3516     xlate_out_uninit(&xout);
3517
3518     return error;
3519 }
3520
3521 void
3522 rule_dpif_credit_stats(struct rule_dpif *rule,
3523                        const struct dpif_flow_stats *stats)
3524 {
3525     ovs_mutex_lock(&rule->stats_mutex);
3526     rule->stats.n_packets += stats->n_packets;
3527     rule->stats.n_bytes += stats->n_bytes;
3528     rule->stats.used = MAX(rule->stats.used, stats->used);
3529     ovs_mutex_unlock(&rule->stats_mutex);
3530 }
3531
3532 ovs_be64
3533 rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
3534     OVS_REQUIRES(rule->up.mutex)
3535 {
3536     return rule->up.flow_cookie;
3537 }
3538
3539 void
3540 rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout,
3541                      uint16_t hard_timeout)
3542 {
3543     ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
3544 }
3545
3546 /* Returns 'rule''s actions.  The returned actions are RCU-protected, and can
3547  * be read until the calling thread quiesces. */
3548 const struct rule_actions *
3549 rule_dpif_get_actions(const struct rule_dpif *rule)
3550 {
3551     return rule_get_actions(&rule->up);
3552 }
3553
3554 /* Sets 'rule''s recirculation id. */
3555 static void
3556 rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
3557     OVS_REQUIRES(rule->up.mutex)
3558 {
3559     ovs_assert(!rule->recirc_id);
3560     rule->recirc_id = id;
3561 }
3562
3563 /* Returns 'rule''s recirculation id. */
3564 uint32_t
3565 rule_dpif_get_recirc_id(struct rule_dpif *rule)
3566     OVS_REQUIRES(rule->up.mutex)
3567 {
3568     if (!rule->recirc_id) {
3569         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3570
3571         rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto));
3572     }
3573     return rule->recirc_id;
3574 }
3575
3576 /* Sets 'rule''s recirculation id. */
3577 void
3578 rule_set_recirc_id(struct rule *rule_, uint32_t id)
3579 {
3580     struct rule_dpif *rule = rule_dpif_cast(rule_);
3581
3582     ovs_mutex_lock(&rule->up.mutex);
3583     rule_dpif_set_recirc_id(rule, id);
3584     ovs_mutex_unlock(&rule->up.mutex);
3585 }
3586
3587 /* Lookup 'flow' in table 0 of 'ofproto''s classifier.
3588  * If 'wc' is non-null, sets the fields that were relevant as part of
3589  * the lookup. Returns the table id where a match or miss occurred via
3590  * 'table_id'.  This will be zero unless there was a miss and
3591  * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables
3592  * where misses occur, or TBL_INTERNAL if the rule has a non-zero
3593  * recirculation ID, and a match was found in the internal table, or if
3594  * there was no match and one of the special rules (drop_frags_rule,
3595  * miss_rule, or no_packet_in_rule) was returned.
3596  *
3597  * The return value is the found rule, which is valid at least until the next
3598  * RCU quiescent period.  If the rule needs to stay around longer,
3599  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3600  * on it before this returns. */
3601 struct rule_dpif *
3602 rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow,
3603                  struct flow_wildcards *wc, bool take_ref,
3604                  const struct dpif_flow_stats *stats, uint8_t *table_id)
3605 {
3606     *table_id = 0;
3607
3608     if (ofproto_dpif_get_enable_recirc(ofproto)) {
3609         /* Always exactly match recirc_id since datapath supports
3610          * recirculation.  */
3611         if (wc) {
3612             wc->masks.recirc_id = UINT32_MAX;
3613         }
3614         if (flow->recirc_id) {
3615             /* Start looking up from internal table for post recirculation
3616              * flows or packets. */
3617             *table_id = TBL_INTERNAL;
3618         }
3619     }
3620
3621     return rule_dpif_lookup_from_table(ofproto, flow, wc, take_ref, stats,
3622                                        table_id, flow->in_port.ofp_port, true,
3623                                        true);
3624 }
3625
3626 /* The returned rule (if any) is valid at least until the next RCU quiescent
3627  * period.  If the rule needs to stay around longer, a non-zero 'take_ref'
3628  * must be passed in to cause a reference to be taken on it. */
3629 static struct rule_dpif *
3630 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id,
3631                           const struct flow *flow, struct flow_wildcards *wc,
3632                           bool take_ref)
3633 {
3634     struct classifier *cls = &ofproto->up.tables[table_id].cls;
3635     const struct cls_rule *cls_rule;
3636     struct rule_dpif *rule;
3637
3638     do {
3639         cls_rule = classifier_lookup(cls, flow, wc);
3640
3641         rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
3642
3643         /* Try again if the rule was released before we get the reference. */
3644     } while (rule && take_ref && !rule_dpif_try_ref(rule));
3645
3646     return rule;
3647 }
3648
3649 /* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'.
3650  * Returns the rule that was found, which may be one of the special rules
3651  * according to packet miss hadling.  If 'may_packet_in' is false, returning of
3652  * the miss_rule (which issues packet ins for the controller) is avoided.
3653  * Updates 'wc', if nonnull, to reflect the fields that were used during the
3654  * lookup.
3655  *
3656  * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
3657  * if none is found then the table miss configuration for that table is
3658  * honored, which can result in additional lookups in other OpenFlow tables.
3659  * In this case the function updates '*table_id' to reflect the final OpenFlow
3660  * table that was searched.
3661  *
3662  * If 'honor_table_miss' is false, then only one table lookup occurs, in
3663  * '*table_id'.
3664  *
3665  * The rule is returned in '*rule', which is valid at least until the next
3666  * RCU quiescent period.  If the '*rule' needs to stay around longer,
3667  * a non-zero 'take_ref' must be passed in to cause a reference to be taken
3668  * on it before this returns.
3669  *
3670  * 'in_port' allows the lookup to take place as if the in port had the value
3671  * 'in_port'.  This is needed for resubmit action support. */
3672 struct rule_dpif *
3673 rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow,
3674                             struct flow_wildcards *wc, bool take_ref,
3675                             const struct dpif_flow_stats *stats,
3676                             uint8_t *table_id, ofp_port_t in_port,
3677                             bool may_packet_in, bool honor_table_miss)
3678 {
3679     ovs_be16 old_tp_src = flow->tp_src, old_tp_dst = flow->tp_dst;
3680     ofp_port_t old_in_port = flow->in_port.ofp_port;
3681     enum ofputil_table_miss miss_config;
3682     struct rule_dpif *rule;
3683     uint8_t next_id;
3684
3685     /* We always unwildcard nw_frag (for IP), so they
3686      * need not be unwildcarded here. */
3687     if (flow->nw_frag & FLOW_NW_FRAG_ANY
3688         && ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
3689         if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
3690             /* We must pretend that transport ports are unavailable. */
3691             flow->tp_src = htons(0);
3692             flow->tp_dst = htons(0);
3693         } else {
3694             /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
3695              * Use the drop_frags_rule (which cannot disappear). */
3696             rule = ofproto->drop_frags_rule;
3697             if (take_ref) {
3698                 rule_dpif_ref(rule);
3699             }
3700             if (stats) {
3701                 struct oftable *tbl = &ofproto->up.tables[*table_id];
3702                 unsigned long orig;
3703
3704                 atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig);
3705             }
3706             return rule;
3707         }
3708     }
3709
3710     /* Look up a flow with 'in_port' as the input port.  Then restore the
3711      * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
3712      * have surprising behavior). */
3713     flow->in_port.ofp_port = in_port;
3714
3715     /* Our current implementation depends on n_tables == N_TABLES, and
3716      * TBL_INTERNAL being the last table. */
3717     BUILD_ASSERT_DECL(N_TABLES == TBL_INTERNAL + 1);
3718
3719     miss_config = OFPUTIL_TABLE_MISS_CONTINUE;
3720
3721     for (next_id = *table_id;
3722          next_id < ofproto->up.n_tables;
3723          next_id++, next_id += (next_id == TBL_INTERNAL))
3724     {
3725         *table_id = next_id;
3726         rule = rule_dpif_lookup_in_table(ofproto, next_id, flow, wc, take_ref);
3727         if (stats) {
3728             struct oftable *tbl = &ofproto->up.tables[next_id];
3729             unsigned long orig;
3730
3731             atomic_add_relaxed(rule ? &tbl->n_matched : &tbl->n_missed,
3732                                stats->n_packets, &orig);
3733         }
3734         if (rule) {
3735             goto out;   /* Match. */
3736         }
3737         if (honor_table_miss) {
3738             miss_config = ofproto_table_get_miss_config(&ofproto->up,
3739                                                         *table_id);
3740             if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE) {
3741                 continue;
3742             }
3743         }
3744         break;
3745     }
3746     /* Miss. */
3747     rule = ofproto->no_packet_in_rule;
3748     if (may_packet_in) {
3749         if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE
3750             || miss_config == OFPUTIL_TABLE_MISS_CONTROLLER) {
3751             struct ofport_dpif *port;
3752
3753             port = get_ofp_port(ofproto, old_in_port);
3754             if (!port) {
3755                 VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
3756                              old_in_port);
3757             } else if (!(port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN)) {
3758                 rule = ofproto->miss_rule;
3759             }
3760         } else if (miss_config == OFPUTIL_TABLE_MISS_DEFAULT &&
3761                    connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
3762             rule = ofproto->miss_rule;
3763         }
3764     }
3765     if (take_ref) {
3766         rule_dpif_ref(rule);
3767     }
3768 out:
3769     /* Restore port numbers, as they may have been modified above. */
3770     flow->tp_src = old_tp_src;
3771     flow->tp_dst = old_tp_dst;
3772     /* Restore the old in port. */
3773     flow->in_port.ofp_port = old_in_port;
3774
3775     return rule;
3776 }
3777
3778 static void
3779 complete_operation(struct rule_dpif *rule)
3780     OVS_REQUIRES(ofproto_mutex)
3781 {
3782     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3783
3784     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3785 }
3786
3787 static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
3788 {
3789     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
3790 }
3791
3792 static struct rule *
3793 rule_alloc(void)
3794 {
3795     struct rule_dpif *rule = xmalloc(sizeof *rule);
3796     return &rule->up;
3797 }
3798
3799 static void
3800 rule_dealloc(struct rule *rule_)
3801 {
3802     struct rule_dpif *rule = rule_dpif_cast(rule_);
3803     free(rule);
3804 }
3805
3806 static enum ofperr
3807 rule_construct(struct rule *rule_)
3808     OVS_NO_THREAD_SAFETY_ANALYSIS
3809 {
3810     struct rule_dpif *rule = rule_dpif_cast(rule_);
3811     ovs_mutex_init_adaptive(&rule->stats_mutex);
3812     rule->stats.n_packets = 0;
3813     rule->stats.n_bytes = 0;
3814     rule->stats.used = rule->up.modified;
3815     rule->recirc_id = 0;
3816
3817     return 0;
3818 }
3819
3820 static enum ofperr
3821 rule_insert(struct rule *rule_)
3822     OVS_REQUIRES(ofproto_mutex)
3823 {
3824     struct rule_dpif *rule = rule_dpif_cast(rule_);
3825     complete_operation(rule);
3826     return 0;
3827 }
3828
3829 static void
3830 rule_delete(struct rule *rule_)
3831     OVS_REQUIRES(ofproto_mutex)
3832 {
3833     struct rule_dpif *rule = rule_dpif_cast(rule_);
3834     complete_operation(rule);
3835 }
3836
3837 static void
3838 rule_destruct(struct rule *rule_)
3839 {
3840     struct rule_dpif *rule = rule_dpif_cast(rule_);
3841
3842     ovs_mutex_destroy(&rule->stats_mutex);
3843     if (rule->recirc_id) {
3844         struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3845
3846         ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id);
3847     }
3848 }
3849
3850 static void
3851 rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
3852                long long int *used)
3853 {
3854     struct rule_dpif *rule = rule_dpif_cast(rule_);
3855
3856     ovs_mutex_lock(&rule->stats_mutex);
3857     *packets = rule->stats.n_packets;
3858     *bytes = rule->stats.n_bytes;
3859     *used = rule->stats.used;
3860     ovs_mutex_unlock(&rule->stats_mutex);
3861 }
3862
3863 static void
3864 rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
3865                   struct ofpbuf *packet)
3866 {
3867     struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
3868
3869     ofproto_dpif_execute_actions(ofproto, flow, rule, NULL, 0, packet);
3870 }
3871
3872 static enum ofperr
3873 rule_execute(struct rule *rule, const struct flow *flow,
3874              struct ofpbuf *packet)
3875 {
3876     rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
3877     ofpbuf_delete(packet);
3878     return 0;
3879 }
3880
3881 static void
3882 rule_modify_actions(struct rule *rule_, bool reset_counters)
3883     OVS_REQUIRES(ofproto_mutex)
3884 {
3885     struct rule_dpif *rule = rule_dpif_cast(rule_);
3886
3887     if (reset_counters) {
3888         ovs_mutex_lock(&rule->stats_mutex);
3889         rule->stats.n_packets = 0;
3890         rule->stats.n_bytes = 0;
3891         ovs_mutex_unlock(&rule->stats_mutex);
3892     }
3893
3894     complete_operation(rule);
3895 }
3896
3897 static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
3898 {
3899     return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
3900 }
3901
3902 static struct ofgroup *
3903 group_alloc(void)
3904 {
3905     struct group_dpif *group = xzalloc(sizeof *group);
3906     return &group->up;
3907 }
3908
3909 static void
3910 group_dealloc(struct ofgroup *group_)
3911 {
3912     struct group_dpif *group = group_dpif_cast(group_);
3913     free(group);
3914 }
3915
3916 static void
3917 group_construct_stats(struct group_dpif *group)
3918     OVS_REQUIRES(group->stats_mutex)
3919 {
3920     struct ofputil_bucket *bucket;
3921     const struct list *buckets;
3922
3923     group->packet_count = 0;
3924     group->byte_count = 0;
3925
3926     group_dpif_get_buckets(group, &buckets);
3927     LIST_FOR_EACH (bucket, list_node, buckets) {
3928         bucket->stats.packet_count = 0;
3929         bucket->stats.byte_count = 0;
3930     }
3931 }
3932
3933 void
3934 group_dpif_credit_stats(struct group_dpif *group,
3935                         struct ofputil_bucket *bucket,
3936                         const struct dpif_flow_stats *stats)
3937 {
3938     ovs_mutex_lock(&group->stats_mutex);
3939     group->packet_count += stats->n_packets;
3940     group->byte_count += stats->n_bytes;
3941     if (bucket) {
3942         bucket->stats.packet_count += stats->n_packets;
3943         bucket->stats.byte_count += stats->n_bytes;
3944     } else { /* Credit to all buckets */
3945         const struct list *buckets;
3946
3947         group_dpif_get_buckets(group, &buckets);
3948         LIST_FOR_EACH (bucket, list_node, buckets) {
3949             bucket->stats.packet_count += stats->n_packets;
3950             bucket->stats.byte_count += stats->n_bytes;
3951         }
3952     }
3953     ovs_mutex_unlock(&group->stats_mutex);
3954 }
3955
3956 static enum ofperr
3957 group_construct(struct ofgroup *group_)
3958 {
3959     struct group_dpif *group = group_dpif_cast(group_);
3960     const struct ofputil_bucket *bucket;
3961
3962     /* Prevent group chaining because our locking structure makes it hard to
3963      * implement deadlock-free.  (See xlate_group_resource_check().) */
3964     LIST_FOR_EACH (bucket, list_node, &group->up.buckets) {
3965         const struct ofpact *a;
3966
3967         OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) {
3968             if (a->type == OFPACT_GROUP) {
3969                 return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED;
3970             }
3971         }
3972     }
3973
3974     ovs_mutex_init_adaptive(&group->stats_mutex);
3975     ovs_mutex_lock(&group->stats_mutex);
3976     group_construct_stats(group);
3977     ovs_mutex_unlock(&group->stats_mutex);
3978     return 0;
3979 }
3980
3981 static void
3982 group_destruct(struct ofgroup *group_)
3983 {
3984     struct group_dpif *group = group_dpif_cast(group_);
3985     ovs_mutex_destroy(&group->stats_mutex);
3986 }
3987
3988 static enum ofperr
3989 group_modify(struct ofgroup *group_)
3990 {
3991     struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
3992
3993     ofproto->backer->need_revalidate = REV_FLOW_TABLE;
3994
3995     return 0;
3996 }
3997
3998 static enum ofperr
3999 group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
4000 {
4001     struct group_dpif *group = group_dpif_cast(group_);
4002     struct ofputil_bucket *bucket;
4003     const struct list *buckets;
4004     struct bucket_counter *bucket_stats;
4005
4006     ovs_mutex_lock(&group->stats_mutex);
4007     ogs->packet_count = group->packet_count;
4008     ogs->byte_count = group->byte_count;
4009
4010     group_dpif_get_buckets(group, &buckets);
4011     bucket_stats = ogs->bucket_stats;
4012     LIST_FOR_EACH (bucket, list_node, buckets) {
4013         bucket_stats->packet_count = bucket->stats.packet_count;
4014         bucket_stats->byte_count = bucket->stats.byte_count;
4015         bucket_stats++;
4016     }
4017     ovs_mutex_unlock(&group->stats_mutex);
4018
4019     return 0;
4020 }
4021
4022 /* If the group exists, this function increments the groups's reference count.
4023  *
4024  * Make sure to call group_dpif_unref() after no longer needing to maintain
4025  * a reference to the group. */
4026 bool
4027 group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
4028                   struct group_dpif **group)
4029 {
4030     struct ofgroup *ofgroup;
4031     bool found;
4032
4033     found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
4034     *group = found ?  group_dpif_cast(ofgroup) : NULL;
4035
4036     return found;
4037 }
4038
4039 void
4040 group_dpif_get_buckets(const struct group_dpif *group,
4041                        const struct list **buckets)
4042 {
4043     *buckets = &group->up.buckets;
4044 }
4045
4046 enum ofp11_group_type
4047 group_dpif_get_type(const struct group_dpif *group)
4048 {
4049     return group->up.type;
4050 }
4051 \f
4052 /* Sends 'packet' out 'ofport'.
4053  * May modify 'packet'.
4054  * Returns 0 if successful, otherwise a positive errno value. */
4055 int
4056 ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
4057 {
4058     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
4059     int error;
4060
4061     error = xlate_send_packet(ofport, packet);
4062
4063     ovs_mutex_lock(&ofproto->stats_mutex);
4064     ofproto->stats.tx_packets++;
4065     ofproto->stats.tx_bytes += ofpbuf_size(packet);
4066     ovs_mutex_unlock(&ofproto->stats_mutex);
4067     return error;
4068 }
4069 \f
4070 /* Return the version string of the datapath that backs up
4071  * this 'ofproto'.
4072  */
4073 static const char *
4074 get_datapath_version(const struct ofproto *ofproto_)
4075 {
4076     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4077
4078     return ofproto->backer->dp_version_string;
4079 }
4080
4081 static bool
4082 set_frag_handling(struct ofproto *ofproto_,
4083                   enum ofp_config_flags frag_handling)
4084 {
4085     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4086     if (frag_handling != OFPC_FRAG_REASM) {
4087         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4088         return true;
4089     } else {
4090         return false;
4091     }
4092 }
4093
4094 static enum ofperr
4095 packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
4096            const struct flow *flow,
4097            const struct ofpact *ofpacts, size_t ofpacts_len)
4098 {
4099     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4100
4101     ofproto_dpif_execute_actions(ofproto, flow, NULL, ofpacts,
4102                                  ofpacts_len, packet);
4103     return 0;
4104 }
4105 \f
4106 /* NetFlow. */
4107
4108 static int
4109 set_netflow(struct ofproto *ofproto_,
4110             const struct netflow_options *netflow_options)
4111 {
4112     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4113
4114     if (netflow_options) {
4115         if (!ofproto->netflow) {
4116             ofproto->netflow = netflow_create();
4117             ofproto->backer->need_revalidate = REV_RECONFIGURE;
4118         }
4119         return netflow_set_options(ofproto->netflow, netflow_options);
4120     } else if (ofproto->netflow) {
4121         ofproto->backer->need_revalidate = REV_RECONFIGURE;
4122         netflow_unref(ofproto->netflow);
4123         ofproto->netflow = NULL;
4124     }
4125
4126     return 0;
4127 }
4128
4129 static void
4130 get_netflow_ids(const struct ofproto *ofproto_,
4131                 uint8_t *engine_type, uint8_t *engine_id)
4132 {
4133     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
4134
4135     dpif_get_netflow_ids(ofproto->backer->dpif, engine_type, engine_id);
4136 }
4137 \f
4138 static struct ofproto_dpif *
4139 ofproto_dpif_lookup(const char *name)
4140 {
4141     struct ofproto_dpif *ofproto;
4142
4143     HMAP_FOR_EACH_WITH_HASH (ofproto, all_ofproto_dpifs_node,
4144                              hash_string(name, 0), &all_ofproto_dpifs) {
4145         if (!strcmp(ofproto->up.name, name)) {
4146             return ofproto;
4147         }
4148     }
4149     return NULL;
4150 }
4151
4152 static void
4153 ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc,
4154                           const char *argv[], void *aux OVS_UNUSED)
4155 {
4156     struct ofproto_dpif *ofproto;
4157
4158     if (argc > 1) {
4159         ofproto = ofproto_dpif_lookup(argv[1]);
4160         if (!ofproto) {
4161             unixctl_command_reply_error(conn, "no such bridge");
4162             return;
4163         }
4164         ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4165         mac_learning_flush(ofproto->ml);
4166         ovs_rwlock_unlock(&ofproto->ml->rwlock);
4167     } else {
4168         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4169             ovs_rwlock_wrlock(&ofproto->ml->rwlock);
4170             mac_learning_flush(ofproto->ml);
4171             ovs_rwlock_unlock(&ofproto->ml->rwlock);
4172         }
4173     }
4174
4175     unixctl_command_reply(conn, "table successfully flushed");
4176 }
4177
4178 static void
4179 ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
4180                                      const char *argv[], void *aux OVS_UNUSED)
4181 {
4182     struct ofproto_dpif *ofproto;
4183
4184     if (argc > 1) {
4185         ofproto = ofproto_dpif_lookup(argv[1]);
4186         if (!ofproto) {
4187             unixctl_command_reply_error(conn, "no such bridge");
4188             return;
4189         }
4190
4191         if (!mcast_snooping_enabled(ofproto->ms)) {
4192             unixctl_command_reply_error(conn, "multicast snooping is disabled");
4193             return;
4194         }
4195         mcast_snooping_mdb_flush(ofproto->ms);
4196     } else {
4197         HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4198             if (!mcast_snooping_enabled(ofproto->ms)) {
4199                 continue;
4200             }
4201             mcast_snooping_mdb_flush(ofproto->ms);
4202         }
4203     }
4204
4205     unixctl_command_reply(conn, "table successfully flushed");
4206 }
4207
4208 static struct ofport_dpif *
4209 ofbundle_get_a_port(const struct ofbundle *bundle)
4210 {
4211     return CONTAINER_OF(list_front(&bundle->ports), struct ofport_dpif,
4212                         bundle_node);
4213 }
4214
4215 static void
4216 ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4217                          const char *argv[], void *aux OVS_UNUSED)
4218 {
4219     struct ds ds = DS_EMPTY_INITIALIZER;
4220     const struct ofproto_dpif *ofproto;
4221     const struct mac_entry *e;
4222
4223     ofproto = ofproto_dpif_lookup(argv[1]);
4224     if (!ofproto) {
4225         unixctl_command_reply_error(conn, "no such bridge");
4226         return;
4227     }
4228
4229     ds_put_cstr(&ds, " port  VLAN  MAC                Age\n");
4230     ovs_rwlock_rdlock(&ofproto->ml->rwlock);
4231     LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
4232         struct ofbundle *bundle = e->port.p;
4233         char name[OFP_MAX_PORT_NAME_LEN];
4234
4235         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4236                                name, sizeof name);
4237         ds_put_format(&ds, "%5s  %4d  "ETH_ADDR_FMT"  %3d\n",
4238                       name, e->vlan, ETH_ADDR_ARGS(e->mac),
4239                       mac_entry_age(ofproto->ml, e));
4240     }
4241     ovs_rwlock_unlock(&ofproto->ml->rwlock);
4242     unixctl_command_reply(conn, ds_cstr(&ds));
4243     ds_destroy(&ds);
4244 }
4245
4246 static void
4247 ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
4248                                     int argc OVS_UNUSED,
4249                                     const char *argv[],
4250                                     void *aux OVS_UNUSED)
4251 {
4252     struct ds ds = DS_EMPTY_INITIALIZER;
4253     const struct ofproto_dpif *ofproto;
4254     const struct ofbundle *bundle;
4255     const struct mcast_group *grp;
4256     struct mcast_group_bundle *b;
4257     struct mcast_mrouter_bundle *mrouter;
4258
4259     ofproto = ofproto_dpif_lookup(argv[1]);
4260     if (!ofproto) {
4261         unixctl_command_reply_error(conn, "no such bridge");
4262         return;
4263     }
4264
4265     if (!mcast_snooping_enabled(ofproto->ms)) {
4266         unixctl_command_reply_error(conn, "multicast snooping is disabled");
4267         return;
4268     }
4269
4270     ds_put_cstr(&ds, " port  VLAN  GROUP                Age\n");
4271     ovs_rwlock_rdlock(&ofproto->ms->rwlock);
4272     LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
4273         LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
4274             char name[OFP_MAX_PORT_NAME_LEN];
4275
4276             bundle = b->port;
4277             ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4278                                    name, sizeof name);
4279             ds_put_format(&ds, "%5s  %4d  "IP_FMT"         %3d\n",
4280                           name, grp->vlan, IP_ARGS(grp->ip4),
4281                           mcast_bundle_age(ofproto->ms, b));
4282         }
4283     }
4284
4285     /* ports connected to multicast routers */
4286     LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
4287         char name[OFP_MAX_PORT_NAME_LEN];
4288
4289         bundle = mrouter->port;
4290         ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
4291                                name, sizeof name);
4292             ds_put_format(&ds, "%5s  %4d  querier             %3d\n",
4293                       name, mrouter->vlan,
4294                       mcast_mrouter_age(ofproto->ms, mrouter));
4295     }
4296     ovs_rwlock_unlock(&ofproto->ms->rwlock);
4297     unixctl_command_reply(conn, ds_cstr(&ds));
4298     ds_destroy(&ds);
4299 }
4300
4301 struct trace_ctx {
4302     struct xlate_out xout;
4303     struct xlate_in xin;
4304     const struct flow *key;
4305     struct flow flow;
4306     struct flow_wildcards wc;
4307     struct ds *result;
4308 };
4309
4310 static void
4311 trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
4312 {
4313     const struct rule_actions *actions;
4314     ovs_be64 cookie;
4315
4316     ds_put_char_multiple(result, '\t', level);
4317     if (!rule) {
4318         ds_put_cstr(result, "No match\n");
4319         return;
4320     }
4321
4322     ovs_mutex_lock(&rule->up.mutex);
4323     cookie = rule->up.flow_cookie;
4324     ovs_mutex_unlock(&rule->up.mutex);
4325
4326     ds_put_format(result, "Rule: table=%"PRIu8" cookie=%#"PRIx64" ",
4327                   rule ? rule->up.table_id : 0, ntohll(cookie));
4328     cls_rule_format(&rule->up.cr, result);
4329     ds_put_char(result, '\n');
4330
4331     actions = rule_dpif_get_actions(rule);
4332
4333     ds_put_char_multiple(result, '\t', level);
4334     ds_put_cstr(result, "OpenFlow actions=");
4335     ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
4336     ds_put_char(result, '\n');
4337 }
4338
4339 static void
4340 trace_format_flow(struct ds *result, int level, const char *title,
4341                   struct trace_ctx *trace)
4342 {
4343     ds_put_char_multiple(result, '\t', level);
4344     ds_put_format(result, "%s: ", title);
4345     /* Do not report unchanged flows for resubmits. */
4346     if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
4347         || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
4348         ds_put_cstr(result, "unchanged");
4349     } else {
4350         flow_format(result, &trace->xin.flow);
4351         trace->flow = trace->xin.flow;
4352     }
4353     ds_put_char(result, '\n');
4354 }
4355
4356 static void
4357 trace_format_regs(struct ds *result, int level, const char *title,
4358                   struct trace_ctx *trace)
4359 {
4360     size_t i;
4361
4362     ds_put_char_multiple(result, '\t', level);
4363     ds_put_format(result, "%s:", title);
4364     for (i = 0; i < FLOW_N_REGS; i++) {
4365         ds_put_format(result, " reg%"PRIuSIZE"=0x%"PRIx32, i, trace->flow.regs[i]);
4366     }
4367     ds_put_char(result, '\n');
4368 }
4369
4370 static void
4371 trace_format_odp(struct ds *result, int level, const char *title,
4372                  struct trace_ctx *trace)
4373 {
4374     struct ofpbuf *odp_actions = trace->xout.odp_actions;
4375
4376     ds_put_char_multiple(result, '\t', level);
4377     ds_put_format(result, "%s: ", title);
4378     format_odp_actions(result, ofpbuf_data(odp_actions),
4379                                ofpbuf_size(odp_actions));
4380     ds_put_char(result, '\n');
4381 }
4382
4383 static void
4384 trace_format_megaflow(struct ds *result, int level, const char *title,
4385                       struct trace_ctx *trace)
4386 {
4387     struct match match;
4388
4389     ds_put_char_multiple(result, '\t', level);
4390     ds_put_format(result, "%s: ", title);
4391     flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc);
4392     match_init(&match, trace->key, &trace->wc);
4393     match_format(&match, result, OFP_DEFAULT_PRIORITY);
4394     ds_put_char(result, '\n');
4395 }
4396
4397 static void trace_report(struct xlate_in *xin, const char *s, int recurse);
4398
4399 static void
4400 trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
4401 {
4402     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4403     struct ds *result = trace->result;
4404
4405     if (!recurse) {
4406         if (rule == xin->ofproto->miss_rule) {
4407             trace_report(xin, "No match, flow generates \"packet in\"s.",
4408                          recurse);
4409         } else if (rule == xin->ofproto->no_packet_in_rule) {
4410             trace_report(xin, "No match, packets dropped because "
4411                          "OFPPC_NO_PACKET_IN is set on in_port.", recurse);
4412         } else if (rule == xin->ofproto->drop_frags_rule) {
4413             trace_report(xin, "Packets dropped because they are IP "
4414                          "fragments and the fragment handling mode is "
4415                          "\"drop\".", recurse);
4416         }
4417     }
4418
4419     ds_put_char(result, '\n');
4420     if (recurse) {
4421         trace_format_flow(result, recurse, "Resubmitted flow", trace);
4422         trace_format_regs(result, recurse, "Resubmitted regs", trace);
4423         trace_format_odp(result,  recurse, "Resubmitted  odp", trace);
4424         trace_format_megaflow(result, recurse, "Resubmitted megaflow", trace);
4425     }
4426     trace_format_rule(result, recurse, rule);
4427 }
4428
4429 static void
4430 trace_report(struct xlate_in *xin, const char *s, int recurse)
4431 {
4432     struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
4433     struct ds *result = trace->result;
4434
4435     ds_put_char_multiple(result, '\t', recurse);
4436     ds_put_cstr(result, s);
4437     ds_put_char(result, '\n');
4438 }
4439
4440 /* Parses the 'argc' elements of 'argv', ignoring argv[0].  The following
4441  * forms are supported:
4442  *
4443  *     - [dpname] odp_flow [-generate | packet]
4444  *     - bridge br_flow [-generate | packet]
4445  *
4446  * On success, initializes '*ofprotop' and 'flow' and returns NULL.  On failure
4447  * returns a nonnull malloced error message. */
4448 static char * WARN_UNUSED_RESULT
4449 parse_flow_and_packet(int argc, const char *argv[],
4450                       struct ofproto_dpif **ofprotop, struct flow *flow,
4451                       struct ofpbuf **packetp)
4452 {
4453     const struct dpif_backer *backer = NULL;
4454     const char *error = NULL;
4455     char *m_err = NULL;
4456     struct simap port_names = SIMAP_INITIALIZER(&port_names);
4457     struct ofpbuf *packet;
4458     struct ofpbuf odp_key;
4459     struct ofpbuf odp_mask;
4460
4461     ofpbuf_init(&odp_key, 0);
4462     ofpbuf_init(&odp_mask, 0);
4463
4464     /* Handle "-generate" or a hex string as the last argument. */
4465     if (!strcmp(argv[argc - 1], "-generate")) {
4466         packet = ofpbuf_new(0);
4467         argc--;
4468     } else {
4469         error = eth_from_hex(argv[argc - 1], &packet);
4470         if (!error) {
4471             argc--;
4472         } else if (argc == 4) {
4473             /* The 3-argument form must end in "-generate' or a hex string. */
4474             goto exit;
4475         }
4476         error = NULL;
4477     }
4478
4479     /* odp_flow can have its in_port specified as a name instead of port no.
4480      * We do not yet know whether a given flow is a odp_flow or a br_flow.
4481      * But, to know whether a flow is odp_flow through odp_flow_from_string(),
4482      * we need to create a simap of name to port no. */
4483     if (argc == 3) {
4484         const char *dp_type;
4485         if (!strncmp(argv[1], "ovs-", 4)) {
4486             dp_type = argv[1] + 4;
4487         } else {
4488             dp_type = argv[1];
4489         }
4490         backer = shash_find_data(&all_dpif_backers, dp_type);
4491     } else if (argc == 2) {
4492         struct shash_node *node;
4493         if (shash_count(&all_dpif_backers) == 1) {
4494             node = shash_first(&all_dpif_backers);
4495             backer = node->data;
4496         }
4497     } else {
4498         error = "Syntax error";
4499         goto exit;
4500     }
4501     if (backer && backer->dpif) {
4502         struct dpif_port dpif_port;
4503         struct dpif_port_dump port_dump;
4504         DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, backer->dpif) {
4505             simap_put(&port_names, dpif_port.name,
4506                       odp_to_u32(dpif_port.port_no));
4507         }
4508     }
4509
4510     /* Parse the flow and determine whether a datapath or
4511      * bridge is specified. If function odp_flow_key_from_string()
4512      * returns 0, the flow is a odp_flow. If function
4513      * parse_ofp_exact_flow() returns NULL, the flow is a br_flow. */
4514     if (!odp_flow_from_string(argv[argc - 1], &port_names,
4515                               &odp_key, &odp_mask)) {
4516         if (!backer) {
4517             error = "Cannot find the datapath";
4518             goto exit;
4519         }
4520
4521         if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key),
4522                                  flow) == ODP_FIT_ERROR) {
4523             error = "Failed to parse flow key";
4524             goto exit;
4525         }
4526
4527         *ofprotop = xlate_lookup_ofproto(backer, flow,
4528                                          &flow->in_port.ofp_port);
4529         if (*ofprotop == NULL) {
4530             error = "Invalid datapath flow";
4531             goto exit;
4532         }
4533
4534         vsp_adjust_flow(*ofprotop, flow, NULL);
4535
4536     } else {
4537         char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
4538
4539         if (err) {
4540             m_err = xasprintf("Bad flow syntax: %s", err);
4541             free(err);
4542             goto exit;
4543         } else {
4544             if (argc != 3) {
4545                 error = "Must specify bridge name";
4546                 goto exit;
4547             }
4548
4549             *ofprotop = ofproto_dpif_lookup(argv[1]);
4550             if (!*ofprotop) {
4551                 error = "Unknown bridge name";
4552                 goto exit;
4553             }
4554         }
4555     }
4556
4557     /* Generate a packet, if requested. */
4558     if (packet) {
4559         if (!ofpbuf_size(packet)) {
4560             flow_compose(packet, flow);
4561         } else {
4562             struct pkt_metadata md = pkt_metadata_from_flow(flow);
4563
4564             /* Use the metadata from the flow and the packet argument
4565              * to reconstruct the flow. */
4566             flow_extract(packet, &md, flow);
4567         }
4568     }
4569
4570 exit:
4571     if (error && !m_err) {
4572         m_err = xstrdup(error);
4573     }
4574     if (m_err) {
4575         ofpbuf_delete(packet);
4576         packet = NULL;
4577     }
4578     *packetp = packet;
4579     ofpbuf_uninit(&odp_key);
4580     ofpbuf_uninit(&odp_mask);
4581     simap_destroy(&port_names);
4582     return m_err;
4583 }
4584
4585 static void
4586 ofproto_unixctl_trace(struct unixctl_conn *conn, int argc, const char *argv[],
4587                       void *aux OVS_UNUSED)
4588 {
4589     struct ofproto_dpif *ofproto;
4590     struct ofpbuf *packet;
4591     char *error;
4592     struct flow flow;
4593
4594     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4595     if (!error) {
4596         struct ds result;
4597
4598         ds_init(&result);
4599         ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
4600         unixctl_command_reply(conn, ds_cstr(&result));
4601         ds_destroy(&result);
4602         ofpbuf_delete(packet);
4603     } else {
4604         unixctl_command_reply_error(conn, error);
4605         free(error);
4606     }
4607 }
4608
4609 static void
4610 ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc,
4611                               const char *argv[], void *aux OVS_UNUSED)
4612 {
4613     enum ofputil_protocol usable_protocols;
4614     struct ofproto_dpif *ofproto;
4615     bool enforce_consistency;
4616     struct ofpbuf ofpacts;
4617     struct ofpbuf *packet;
4618     struct ds result;
4619     struct flow flow;
4620     uint16_t in_port;
4621
4622     /* Three kinds of error return values! */
4623     enum ofperr retval;
4624     char *error;
4625
4626     packet = NULL;
4627     ds_init(&result);
4628     ofpbuf_init(&ofpacts, 0);
4629
4630     /* Parse actions. */
4631     error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
4632     if (error) {
4633         unixctl_command_reply_error(conn, error);
4634         free(error);
4635         goto exit;
4636     }
4637
4638     /* OpenFlow 1.1 and later suggest that the switch enforces certain forms of
4639      * consistency between the flow and the actions.  With -consistent, we
4640      * enforce consistency even for a flow supported in OpenFlow 1.0. */
4641     if (!strcmp(argv[1], "-consistent")) {
4642         enforce_consistency = true;
4643         argv++;
4644         argc--;
4645     } else {
4646         enforce_consistency = false;
4647     }
4648
4649     error = parse_flow_and_packet(argc, argv, &ofproto, &flow, &packet);
4650     if (error) {
4651         unixctl_command_reply_error(conn, error);
4652         free(error);
4653         goto exit;
4654     }
4655
4656     /* Do the same checks as handle_packet_out() in ofproto.c.
4657      *
4658      * We pass a 'table_id' of 0 to ofpacts_check(), which isn't
4659      * strictly correct because these actions aren't in any table, but it's OK
4660      * because it 'table_id' is used only to check goto_table instructions, but
4661      * packet-outs take a list of actions and therefore it can't include
4662      * instructions.
4663      *
4664      * We skip the "meter" check here because meter is an instruction, not an
4665      * action, and thus cannot appear in ofpacts. */
4666     in_port = ofp_to_u16(flow.in_port.ofp_port);
4667     if (in_port >= ofproto->up.max_ports && in_port < ofp_to_u16(OFPP_MAX)) {
4668         unixctl_command_reply_error(conn, "invalid in_port");
4669         goto exit;
4670     }
4671     if (enforce_consistency) {
4672         retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts),
4673                                            &flow, u16_to_ofp(ofproto->up.max_ports),
4674                                            0, 0, usable_protocols);
4675     } else {
4676         retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow,
4677                                u16_to_ofp(ofproto->up.max_ports), 0, 0,
4678                                &usable_protocols);
4679     }
4680
4681     if (retval) {
4682         ds_clear(&result);
4683         ds_put_format(&result, "Bad actions: %s", ofperr_to_string(retval));
4684         unixctl_command_reply_error(conn, ds_cstr(&result));
4685         goto exit;
4686     }
4687
4688     ofproto_trace(ofproto, &flow, packet,
4689                   ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result);
4690     unixctl_command_reply(conn, ds_cstr(&result));
4691
4692 exit:
4693     ds_destroy(&result);
4694     ofpbuf_delete(packet);
4695     ofpbuf_uninit(&ofpacts);
4696 }
4697
4698 /* Implements a "trace" through 'ofproto''s flow table, appending a textual
4699  * description of the results to 'ds'.
4700  *
4701  * The trace follows a packet with the specified 'flow' through the flow
4702  * table.  'packet' may be nonnull to trace an actual packet, with consequent
4703  * side effects (if it is nonnull then its flow must be 'flow').
4704  *
4705  * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
4706  * trace, otherwise the actions are determined by a flow table lookup. */
4707 static void
4708 ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
4709               const struct ofpbuf *packet,
4710               const struct ofpact ofpacts[], size_t ofpacts_len,
4711               struct ds *ds)
4712 {
4713     struct trace_ctx trace;
4714
4715     ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
4716     ds_put_cstr(ds, "Flow: ");
4717     flow_format(ds, flow);
4718     ds_put_char(ds, '\n');
4719
4720     flow_wildcards_init_catchall(&trace.wc);
4721
4722     trace.result = ds;
4723     trace.key = flow; /* Original flow key, used for megaflow. */
4724     trace.flow = *flow; /* May be modified by actions. */
4725     xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, NULL,
4726                   ntohs(flow->tcp_flags), packet);
4727     trace.xin.ofpacts = ofpacts;
4728     trace.xin.ofpacts_len = ofpacts_len;
4729     trace.xin.resubmit_hook = trace_resubmit;
4730     trace.xin.report_hook = trace_report;
4731
4732     xlate_actions(&trace.xin, &trace.xout);
4733
4734     ds_put_char(ds, '\n');
4735     trace_format_flow(ds, 0, "Final flow", &trace);
4736     trace_format_megaflow(ds, 0, "Megaflow", &trace);
4737
4738     ds_put_cstr(ds, "Datapath actions: ");
4739     format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions),
4740                        ofpbuf_size(trace.xout.odp_actions));
4741
4742     if (trace.xout.slow) {
4743         enum slow_path_reason slow;
4744
4745         ds_put_cstr(ds, "\nThis flow is handled by the userspace "
4746                     "slow path because it:");
4747
4748         slow = trace.xout.slow;
4749         while (slow) {
4750             enum slow_path_reason bit = rightmost_1bit(slow);
4751
4752             ds_put_format(ds, "\n\t- %s.",
4753                           slow_path_reason_to_explanation(bit));
4754
4755             slow &= ~bit;
4756         }
4757     }
4758
4759     xlate_out_uninit(&trace.xout);
4760 }
4761
4762 /* Store the current ofprotos in 'ofproto_shash'.  Returns a sorted list
4763  * of the 'ofproto_shash' nodes.  It is the responsibility of the caller
4764  * to destroy 'ofproto_shash' and free the returned value. */
4765 static const struct shash_node **
4766 get_ofprotos(struct shash *ofproto_shash)
4767 {
4768     const struct ofproto_dpif *ofproto;
4769
4770     HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
4771         char *name = xasprintf("%s@%s", ofproto->up.type, ofproto->up.name);
4772         shash_add_nocopy(ofproto_shash, name, ofproto);
4773     }
4774
4775     return shash_sort(ofproto_shash);
4776 }
4777
4778 static void
4779 ofproto_unixctl_dpif_dump_dps(struct unixctl_conn *conn, int argc OVS_UNUSED,
4780                               const char *argv[] OVS_UNUSED,
4781                               void *aux OVS_UNUSED)
4782 {
4783     struct ds ds = DS_EMPTY_INITIALIZER;
4784     struct shash ofproto_shash;
4785     const struct shash_node **sorted_ofprotos;
4786     int i;
4787
4788     shash_init(&ofproto_shash);
4789     sorted_ofprotos = get_ofprotos(&ofproto_shash);
4790     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4791         const struct shash_node *node = sorted_ofprotos[i];
4792         ds_put_format(&ds, "%s\n", node->name);
4793     }
4794
4795     shash_destroy(&ofproto_shash);
4796     free(sorted_ofprotos);
4797
4798     unixctl_command_reply(conn, ds_cstr(&ds));
4799     ds_destroy(&ds);
4800 }
4801
4802 static void
4803 dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
4804 {
4805     const struct shash_node **ofprotos;
4806     struct dpif_dp_stats dp_stats;
4807     struct shash ofproto_shash;
4808     size_t i;
4809
4810     dpif_get_dp_stats(backer->dpif, &dp_stats);
4811
4812     ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
4813                   dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
4814
4815     shash_init(&ofproto_shash);
4816     ofprotos = get_ofprotos(&ofproto_shash);
4817     for (i = 0; i < shash_count(&ofproto_shash); i++) {
4818         struct ofproto_dpif *ofproto = ofprotos[i]->data;
4819         const struct shash_node **ports;
4820         size_t j;
4821
4822         if (ofproto->backer != backer) {
4823             continue;
4824         }
4825
4826         ds_put_format(ds, "\t%s:\n", ofproto->up.name);
4827
4828         ports = shash_sort(&ofproto->up.port_by_name);
4829         for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
4830             const struct shash_node *node = ports[j];
4831             struct ofport *ofport = node->data;
4832             struct smap config;
4833             odp_port_t odp_port;
4834
4835             ds_put_format(ds, "\t\t%s %u/", netdev_get_name(ofport->netdev),
4836                           ofport->ofp_port);
4837
4838             odp_port = ofp_port_to_odp_port(ofproto, ofport->ofp_port);
4839             if (odp_port != ODPP_NONE) {
4840                 ds_put_format(ds, "%"PRIu32":", odp_port);
4841             } else {
4842                 ds_put_cstr(ds, "none:");
4843             }
4844
4845             ds_put_format(ds, " (%s", netdev_get_type(ofport->netdev));
4846
4847             smap_init(&config);
4848             if (!netdev_get_config(ofport->netdev, &config)) {
4849                 const struct smap_node **nodes;
4850                 size_t i;
4851
4852                 nodes = smap_sort(&config);
4853                 for (i = 0; i < smap_count(&config); i++) {
4854                     const struct smap_node *node = nodes[i];
4855                     ds_put_format(ds, "%c %s=%s", i ? ',' : ':',
4856                                   node->key, node->value);
4857                 }
4858                 free(nodes);
4859             }
4860             smap_destroy(&config);
4861
4862             ds_put_char(ds, ')');
4863             ds_put_char(ds, '\n');
4864         }
4865         free(ports);
4866     }
4867     shash_destroy(&ofproto_shash);
4868     free(ofprotos);
4869 }
4870
4871 static void
4872 ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
4873                           const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
4874 {
4875     struct ds ds = DS_EMPTY_INITIALIZER;
4876     const struct shash_node **backers;
4877     int i;
4878
4879     backers = shash_sort(&all_dpif_backers);
4880     for (i = 0; i < shash_count(&all_dpif_backers); i++) {
4881         dpif_show_backer(backers[i]->data, &ds);
4882     }
4883     free(backers);
4884
4885     unixctl_command_reply(conn, ds_cstr(&ds));
4886     ds_destroy(&ds);
4887 }
4888
4889 static void
4890 ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
4891                                 int argc OVS_UNUSED, const char *argv[],
4892                                 void *aux OVS_UNUSED)
4893 {
4894     const struct ofproto_dpif *ofproto;
4895
4896     struct ds ds = DS_EMPTY_INITIALIZER;
4897     bool verbosity = false;
4898
4899     struct dpif_port dpif_port;
4900     struct dpif_port_dump port_dump;
4901     struct hmap portno_names;
4902
4903     struct dpif_flow_dump *flow_dump;
4904     struct dpif_flow_dump_thread *flow_dump_thread;
4905     struct dpif_flow f;
4906     int error;
4907
4908     ofproto = ofproto_dpif_lookup(argv[argc - 1]);
4909     if (!ofproto) {
4910         unixctl_command_reply_error(conn, "no such bridge");
4911         return;
4912     }
4913
4914     if (argc > 2 && !strcmp(argv[1], "-m")) {
4915         verbosity = true;
4916     }
4917
4918     hmap_init(&portno_names);
4919     DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
4920         odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
4921     }
4922
4923     ds_init(&ds);
4924     flow_dump = dpif_flow_dump_create(ofproto->backer->dpif);
4925     flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
4926     while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
4927         struct flow flow;
4928
4929         if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
4930             || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
4931             continue;
4932         }
4933
4934         odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
4935                         &portno_names, &ds, verbosity);
4936         ds_put_cstr(&ds, ", ");
4937         dpif_flow_stats_format(&f.stats, &ds);
4938         ds_put_cstr(&ds, ", actions:");
4939         format_odp_actions(&ds, f.actions, f.actions_len);
4940         ds_put_char(&ds, '\n');
4941     }
4942     dpif_flow_dump_thread_destroy(flow_dump_thread);
4943     error = dpif_flow_dump_destroy(flow_dump);
4944
4945     if (error) {
4946         ds_clear(&ds);
4947         ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
4948         unixctl_command_reply_error(conn, ds_cstr(&ds));
4949     } else {
4950         unixctl_command_reply(conn, ds_cstr(&ds));
4951     }
4952     odp_portno_names_destroy(&portno_names);
4953     hmap_destroy(&portno_names);
4954     ds_destroy(&ds);
4955 }
4956
4957 static void
4958 ofproto_dpif_unixctl_init(void)
4959 {
4960     static bool registered;
4961     if (registered) {
4962         return;
4963     }
4964     registered = true;
4965
4966     unixctl_command_register(
4967         "ofproto/trace",
4968         "{[dp_name] odp_flow | bridge br_flow} [-generate|packet]",
4969         1, 3, ofproto_unixctl_trace, NULL);
4970     unixctl_command_register(
4971         "ofproto/trace-packet-out",
4972         "[-consistent] {[dp_name] odp_flow | bridge br_flow} [-generate|packet] actions",
4973         2, 6, ofproto_unixctl_trace_actions, NULL);
4974     unixctl_command_register("fdb/flush", "[bridge]", 0, 1,
4975                              ofproto_unixctl_fdb_flush, NULL);
4976     unixctl_command_register("fdb/show", "bridge", 1, 1,
4977                              ofproto_unixctl_fdb_show, NULL);
4978     unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
4979                              ofproto_unixctl_mcast_snooping_flush, NULL);
4980     unixctl_command_register("mdb/show", "bridge", 1, 1,
4981                              ofproto_unixctl_mcast_snooping_show, NULL);
4982     unixctl_command_register("dpif/dump-dps", "", 0, 0,
4983                              ofproto_unixctl_dpif_dump_dps, NULL);
4984     unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
4985                              NULL);
4986     unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
4987                              ofproto_unixctl_dpif_dump_flows, NULL);
4988 }
4989
4990 /* Returns true if 'table' is the table used for internal rules,
4991  * false otherwise. */
4992 bool
4993 table_is_internal(uint8_t table_id)
4994 {
4995     return table_id == TBL_INTERNAL;
4996 }
4997 \f
4998 /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
4999  *
5000  * This is deprecated.  It is only for compatibility with broken device drivers
5001  * in old versions of Linux that do not properly support VLANs when VLAN
5002  * devices are not used.  When broken device drivers are no longer in
5003  * widespread use, we will delete these interfaces. */
5004
5005 static int
5006 set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid)
5007 {
5008     struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport_->ofproto);
5009     struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
5010
5011     if (realdev_ofp_port == ofport->realdev_ofp_port
5012         && vid == ofport->vlandev_vid) {
5013         return 0;
5014     }
5015
5016     ofproto->backer->need_revalidate = REV_RECONFIGURE;
5017
5018     if (ofport->realdev_ofp_port) {
5019         vsp_remove(ofport);
5020     }
5021     if (realdev_ofp_port && ofport->bundle) {
5022         /* vlandevs are enslaved to their realdevs, so they are not allowed to
5023          * themselves be part of a bundle. */
5024         bundle_set(ofport_->ofproto, ofport->bundle, NULL);
5025     }
5026
5027     ofport->realdev_ofp_port = realdev_ofp_port;
5028     ofport->vlandev_vid = vid;
5029
5030     if (realdev_ofp_port) {
5031         vsp_add(ofport, realdev_ofp_port, vid);
5032     }
5033
5034     return 0;
5035 }
5036
5037 static uint32_t
5038 hash_realdev_vid(ofp_port_t realdev_ofp_port, int vid)
5039 {
5040     return hash_2words(ofp_to_u16(realdev_ofp_port), vid);
5041 }
5042
5043 bool
5044 ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
5045     OVS_EXCLUDED(ofproto->vsp_mutex)
5046 {
5047     /* hmap_is_empty is thread safe. */
5048     return !hmap_is_empty(&ofproto->realdev_vid_map);
5049 }
5050
5051
5052 static ofp_port_t
5053 vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
5054                          ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5055     OVS_REQUIRES(ofproto->vsp_mutex)
5056 {
5057     if (!hmap_is_empty(&ofproto->realdev_vid_map)) {
5058         int vid = vlan_tci_to_vid(vlan_tci);
5059         const struct vlan_splinter *vsp;
5060
5061         HMAP_FOR_EACH_WITH_HASH (vsp, realdev_vid_node,
5062                                  hash_realdev_vid(realdev_ofp_port, vid),
5063                                  &ofproto->realdev_vid_map) {
5064             if (vsp->realdev_ofp_port == realdev_ofp_port
5065                 && vsp->vid == vid) {
5066                 return vsp->vlandev_ofp_port;
5067             }
5068         }
5069     }
5070     return realdev_ofp_port;
5071 }
5072
5073 /* Returns the OFP port number of the Linux VLAN device that corresponds to
5074  * 'vlan_tci' on the network device with port number 'realdev_ofp_port' in
5075  * 'struct ofport_dpif'.  For example, given 'realdev_ofp_port' of eth0 and
5076  * 'vlan_tci' 9, it would return the port number of eth0.9.
5077  *
5078  * Unless VLAN splinters are enabled for port 'realdev_ofp_port', this
5079  * function just returns its 'realdev_ofp_port' argument. */
5080 ofp_port_t
5081 vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto,
5082                        ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
5083     OVS_EXCLUDED(ofproto->vsp_mutex)
5084 {
5085     ofp_port_t ret;
5086
5087     /* hmap_is_empty is thread safe, see if we can return immediately. */
5088     if (hmap_is_empty(&ofproto->realdev_vid_map)) {
5089         return realdev_ofp_port;
5090     }
5091     ovs_mutex_lock(&ofproto->vsp_mutex);
5092     ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
5093     ovs_mutex_unlock(&ofproto->vsp_mutex);
5094     return ret;
5095 }
5096
5097 static struct vlan_splinter *
5098 vlandev_find(const struct ofproto_dpif *ofproto, ofp_port_t vlandev_ofp_port)
5099 {
5100     struct vlan_splinter *vsp;
5101
5102     HMAP_FOR_EACH_WITH_HASH (vsp, vlandev_node,
5103                              hash_ofp_port(vlandev_ofp_port),
5104                              &ofproto->vlandev_map) {
5105         if (vsp->vlandev_ofp_port == vlandev_ofp_port) {
5106             return vsp;
5107         }
5108     }
5109
5110     return NULL;
5111 }
5112
5113 /* Returns the OpenFlow port number of the "real" device underlying the Linux
5114  * VLAN device with OpenFlow port number 'vlandev_ofp_port' and stores the
5115  * VLAN VID of the Linux VLAN device in '*vid'.  For example, given
5116  * 'vlandev_ofp_port' of eth0.9, it would return the OpenFlow port number of
5117  * eth0 and store 9 in '*vid'.
5118  *
5119  * Returns 0 and does not modify '*vid' if 'vlandev_ofp_port' is not a Linux
5120  * VLAN device.  Unless VLAN splinters are enabled, this is what this function
5121  * always does.*/
5122 static ofp_port_t
5123 vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto,
5124                        ofp_port_t vlandev_ofp_port, int *vid)
5125     OVS_REQUIRES(ofproto->vsp_mutex)
5126 {
5127     if (!hmap_is_empty(&ofproto->vlandev_map)) {
5128         const struct vlan_splinter *vsp;
5129
5130         vsp = vlandev_find(ofproto, vlandev_ofp_port);
5131         if (vsp) {
5132             if (vid) {
5133                 *vid = vsp->vid;
5134             }
5135             return vsp->realdev_ofp_port;
5136         }
5137     }
5138     return 0;
5139 }
5140
5141 /* Given 'flow', a flow representing a packet received on 'ofproto', checks
5142  * whether 'flow->in_port' represents a Linux VLAN device.  If so, changes
5143  * 'flow->in_port' to the "real" device backing the VLAN device, sets
5144  * 'flow->vlan_tci' to the VLAN VID, and returns true.  Optionally pushes the
5145  * appropriate VLAN on 'packet' if provided.  Otherwise (which is always the
5146  * case unless VLAN splinters are enabled), returns false without making any
5147  * changes. */
5148 bool
5149 vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
5150                 struct ofpbuf *packet)
5151     OVS_EXCLUDED(ofproto->vsp_mutex)
5152 {
5153     ofp_port_t realdev;
5154     int vid;
5155
5156     /* hmap_is_empty is thread safe. */
5157     if (hmap_is_empty(&ofproto->vlandev_map)) {
5158         return false;
5159     }
5160
5161     ovs_mutex_lock(&ofproto->vsp_mutex);
5162     realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
5163     ovs_mutex_unlock(&ofproto->vsp_mutex);
5164     if (!realdev) {
5165         return false;
5166     }
5167
5168     /* Cause the flow to be processed as if it came in on the real device with
5169      * the VLAN device's VLAN ID. */
5170     flow->in_port.ofp_port = realdev;
5171     flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
5172
5173     if (packet) {
5174         /* Make the packet resemble the flow, so that it gets sent to an
5175          * OpenFlow controller properly, so that it looks correct for sFlow,
5176          * and so that flow_extract() will get the correct vlan_tci if it is
5177          * called on 'packet'. */
5178         eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
5179     }
5180
5181     return true;
5182 }
5183
5184 static void
5185 vsp_remove(struct ofport_dpif *port)
5186 {
5187     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5188     struct vlan_splinter *vsp;
5189
5190     ovs_mutex_lock(&ofproto->vsp_mutex);
5191     vsp = vlandev_find(ofproto, port->up.ofp_port);
5192     if (vsp) {
5193         hmap_remove(&ofproto->vlandev_map, &vsp->vlandev_node);
5194         hmap_remove(&ofproto->realdev_vid_map, &vsp->realdev_vid_node);
5195         free(vsp);
5196
5197         port->realdev_ofp_port = 0;
5198     } else {
5199         VLOG_ERR("missing vlan device record");
5200     }
5201     ovs_mutex_unlock(&ofproto->vsp_mutex);
5202 }
5203
5204 static void
5205 vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid)
5206 {
5207     struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
5208
5209     ovs_mutex_lock(&ofproto->vsp_mutex);
5210     if (!vsp_vlandev_to_realdev(ofproto, port->up.ofp_port, NULL)
5211         && (vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, htons(vid))
5212             == realdev_ofp_port)) {
5213         struct vlan_splinter *vsp;
5214
5215         vsp = xmalloc(sizeof *vsp);
5216         vsp->realdev_ofp_port = realdev_ofp_port;
5217         vsp->vlandev_ofp_port = port->up.ofp_port;
5218         vsp->vid = vid;
5219
5220         port->realdev_ofp_port = realdev_ofp_port;
5221
5222         hmap_insert(&ofproto->vlandev_map, &vsp->vlandev_node,
5223                     hash_ofp_port(port->up.ofp_port));
5224         hmap_insert(&ofproto->realdev_vid_map, &vsp->realdev_vid_node,
5225                     hash_realdev_vid(realdev_ofp_port, vid));
5226     } else {
5227         VLOG_ERR("duplicate vlan device record");
5228     }
5229     ovs_mutex_unlock(&ofproto->vsp_mutex);
5230 }
5231
5232 static odp_port_t
5233 ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
5234 {
5235     const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
5236     return ofport ? ofport->odp_port : ODPP_NONE;
5237 }
5238
5239 struct ofport_dpif *
5240 odp_port_to_ofport(const struct dpif_backer *backer, odp_port_t odp_port)
5241 {
5242     struct ofport_dpif *port;
5243
5244     ovs_rwlock_rdlock(&backer->odp_to_ofport_lock);
5245     HMAP_FOR_EACH_IN_BUCKET (port, odp_port_node, hash_odp_port(odp_port),
5246                              &backer->odp_to_ofport_map) {
5247         if (port->odp_port == odp_port) {
5248             ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5249             return port;
5250         }
5251     }
5252
5253     ovs_rwlock_unlock(&backer->odp_to_ofport_lock);
5254     return NULL;
5255 }
5256
5257 static ofp_port_t
5258 odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port)
5259 {
5260     struct ofport_dpif *port;
5261
5262     port = odp_port_to_ofport(ofproto->backer, odp_port);
5263     if (port && &ofproto->up == port->up.ofproto) {
5264         return port->up.ofp_port;
5265     } else {
5266         return OFPP_NONE;
5267     }
5268 }
5269
5270 uint32_t
5271 ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto)
5272 {
5273     struct dpif_backer *backer = ofproto->backer;
5274
5275     return  recirc_id_alloc(backer->rid_pool);
5276 }
5277
5278 void
5279 ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id)
5280 {
5281     struct dpif_backer *backer = ofproto->backer;
5282
5283     recirc_id_free(backer->rid_pool, recirc_id);
5284 }
5285
5286 int
5287 ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
5288                                const struct match *match, int priority,
5289                                uint16_t idle_timeout,
5290                                const struct ofpbuf *ofpacts,
5291                                struct rule **rulep)
5292 {
5293     struct ofputil_flow_mod fm;
5294     struct rule_dpif *rule;
5295     int error;
5296
5297     fm.match = *match;
5298     fm.priority = priority;
5299     fm.new_cookie = htonll(0);
5300     fm.cookie = htonll(0);
5301     fm.cookie_mask = htonll(0);
5302     fm.modify_cookie = false;
5303     fm.table_id = TBL_INTERNAL;
5304     fm.command = OFPFC_ADD;
5305     fm.idle_timeout = idle_timeout;
5306     fm.hard_timeout = 0;
5307     fm.importance = 0;
5308     fm.buffer_id = 0;
5309     fm.out_port = 0;
5310     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5311     fm.ofpacts = ofpbuf_data(ofpacts);
5312     fm.ofpacts_len = ofpbuf_size(ofpacts);
5313
5314     error = ofproto_flow_mod(&ofproto->up, &fm);
5315     if (error) {
5316         VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
5317                     ofperr_to_string(error));
5318         *rulep = NULL;
5319         return error;
5320     }
5321
5322     rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow,
5323                                      &fm.match.wc, false);
5324     if (rule) {
5325         *rulep = &rule->up;
5326     } else {
5327         OVS_NOT_REACHED();
5328     }
5329     return 0;
5330 }
5331
5332 int
5333 ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
5334                                   struct match *match, int priority)
5335 {
5336     struct ofputil_flow_mod fm;
5337     int error;
5338
5339     fm.match = *match;
5340     fm.priority = priority;
5341     fm.new_cookie = htonll(0);
5342     fm.cookie = htonll(0);
5343     fm.cookie_mask = htonll(0);
5344     fm.modify_cookie = false;
5345     fm.table_id = TBL_INTERNAL;
5346     fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
5347     fm.command = OFPFC_DELETE_STRICT;
5348
5349     error = ofproto_flow_mod(&ofproto->up, &fm);
5350     if (error) {
5351         VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
5352                     ofperr_to_string(error));
5353         return error;
5354     }
5355
5356     return 0;
5357 }
5358
5359 const struct ofproto_class ofproto_dpif_class = {
5360     init,
5361     enumerate_types,
5362     enumerate_names,
5363     del,
5364     port_open_type,
5365     type_run,
5366     type_wait,
5367     alloc,
5368     construct,
5369     destruct,
5370     dealloc,
5371     run,
5372     wait,
5373     NULL,                       /* get_memory_usage. */
5374     type_get_memory_usage,
5375     flush,
5376     query_tables,
5377     port_alloc,
5378     port_construct,
5379     port_destruct,
5380     port_dealloc,
5381     port_modified,
5382     port_reconfigured,
5383     port_query_by_name,
5384     port_add,
5385     port_del,
5386     port_get_stats,
5387     port_dump_start,
5388     port_dump_next,
5389     port_dump_done,
5390     port_poll,
5391     port_poll_wait,
5392     port_is_lacp_current,
5393     port_get_lacp_stats,
5394     NULL,                       /* rule_choose_table */
5395     rule_alloc,
5396     rule_construct,
5397     rule_insert,
5398     rule_delete,
5399     rule_destruct,
5400     rule_dealloc,
5401     rule_get_stats,
5402     rule_execute,
5403     NULL,                       /* rule_premodify_actions */
5404     rule_modify_actions,
5405     set_frag_handling,
5406     packet_out,
5407     set_netflow,
5408     get_netflow_ids,
5409     set_sflow,
5410     set_ipfix,
5411     set_cfm,
5412     cfm_status_changed,
5413     get_cfm_status,
5414     set_bfd,
5415     bfd_status_changed,
5416     get_bfd_status,
5417     set_stp,
5418     get_stp_status,
5419     set_stp_port,
5420     get_stp_port_status,
5421     get_stp_port_stats,
5422     set_rstp,
5423     get_rstp_status,
5424     set_rstp_port,
5425     get_rstp_port_status,
5426     set_queues,
5427     bundle_set,
5428     bundle_remove,
5429     mirror_set__,
5430     mirror_get_stats__,
5431     set_flood_vlans,
5432     is_mirror_output_bundle,
5433     forward_bpdu_changed,
5434     set_mac_table_config,
5435     set_mcast_snooping,
5436     set_mcast_snooping_port,
5437     set_realdev,
5438     NULL,                       /* meter_get_features */
5439     NULL,                       /* meter_set */
5440     NULL,                       /* meter_get */
5441     NULL,                       /* meter_del */
5442     group_alloc,                /* group_alloc */
5443     group_construct,            /* group_construct */
5444     group_destruct,             /* group_destruct */
5445     group_dealloc,              /* group_dealloc */
5446     group_modify,               /* group_modify */
5447     group_get_stats,            /* group_get_stats */
5448     get_datapath_version,       /* get_datapath_version */
5449 };