ofproto/ofproto-dpif-xlate.c

   1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
   2  *
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License. */
  14
  15 #include <config.h>
  16
  17 #include "ofproto/ofproto-dpif-xlate.h"
  18
  19 #include <errno.h>
  20 #include <arpa/inet.h>
  21 #include <net/if.h>
  22 #include <sys/socket.h>
  23 #include <netinet/in.h>
  24
  25 #include "tnl-arp-cache.h"
  26 #include "bfd.h"
  27 #include "bitmap.h"
  28 #include "bond.h"
  29 #include "bundle.h"
  30 #include "byte-order.h"
  31 #include "cfm.h"
  32 #include "connmgr.h"
  33 #include "coverage.h"
  34 #include "dp-packet.h"
  35 #include "dpif.h"
  36 #include "dynamic-string.h"
  37 #include "in-band.h"
  38 #include "lacp.h"
  39 #include "learn.h"
  40 #include "list.h"
  41 #include "ovs-lldp.h"
  42 #include "mac-learning.h"
  43 #include "mcast-snooping.h"
  44 #include "meta-flow.h"
  45 #include "multipath.h"
  46 #include "netdev-vport.h"
  47 #include "netlink.h"
  48 #include "nx-match.h"
  49 #include "odp-execute.h"
  50 #include "ofp-actions.h"
  51 #include "ofproto/ofproto-dpif-ipfix.h"
  52 #include "ofproto/ofproto-dpif-mirror.h"
  53 #include "ofproto/ofproto-dpif-monitor.h"
  54 #include "ofproto/ofproto-dpif-sflow.h"
  55 #include "ofproto/ofproto-dpif.h"
  56 #include "ofproto/ofproto-provider.h"
  57 #include "ovs-router.h"
  58 #include "tnl-ports.h"
  59 #include "tunnel.h"
  60 #include "openvswitch/vlog.h"
  61
  62 COVERAGE_DEFINE(xlate_actions);
  63 COVERAGE_DEFINE(xlate_actions_oversize);
  64 COVERAGE_DEFINE(xlate_actions_too_many_output);
  65
  66 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
  67
  68 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  69  * flow translation. */
  70 #define MAX_RESUBMIT_RECURSION 64
  71 #define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules in
  72                                       internal table. */
  73
  74 /* Maximum number of resubmit actions in a flow translation, whether they are
  75  * recursive or not. */
  76 #define MAX_RESUBMITS (MAX_RESUBMIT_RECURSION * MAX_RESUBMIT_RECURSION)
  77
  78 struct xbridge {
  79     struct hmap_node hmap_node;   /* Node in global 'xbridges' map. */
  80     struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
  81
  82     struct ovs_list xbundles;     /* Owned xbundles. */
  83     struct hmap xports;           /* Indexed by ofp_port. */
  84
  85     char *name;                   /* Name used in log messages. */
  86     struct dpif *dpif;            /* Datapath interface. */
  87     struct mac_learning *ml;      /* Mac learning handle. */
  88     struct mcast_snooping *ms;    /* Multicast Snooping handle. */
  89     struct mbridge *mbridge;      /* Mirroring. */
  90     struct dpif_sflow *sflow;     /* SFlow handle, or null. */
  91     struct dpif_ipfix *ipfix;     /* Ipfix handle, or null. */
  92     struct netflow *netflow;      /* Netflow handle, or null. */
  93     struct stp *stp;              /* STP or null if disabled. */
  94     struct rstp *rstp;            /* RSTP or null if disabled. */
  95
  96     bool has_in_band;             /* Bridge has in band control? */
  97     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
  98
  99     /* Datapath feature support. */
 100     struct dpif_backer_support support;
 101 };
 102
 103 struct xbundle {
 104     struct hmap_node hmap_node;    /* In global 'xbundles' map. */
 105     struct ofbundle *ofbundle;     /* Key in global 'xbundles' map. */
 106
 107     struct ovs_list list_node;     /* In parent 'xbridges' list. */
 108     struct xbridge *xbridge;       /* Parent xbridge. */
 109
 110     struct ovs_list xports;        /* Contains "struct xport"s. */
 111
 112     char *name;                    /* Name used in log messages. */
 113     struct bond *bond;             /* Nonnull iff more than one port. */
 114     struct lacp *lacp;             /* LACP handle or null. */
 115
 116     enum port_vlan_mode vlan_mode; /* VLAN mode. */
 117     int vlan;                      /* -1=trunk port, else a 12-bit VLAN ID. */
 118     unsigned long *trunks;         /* Bitmap of trunked VLANs, if 'vlan' == -1.
 119                                     * NULL if all VLANs are trunked. */
 120     bool use_priority_tags;        /* Use 802.1p tag for frames in VLAN 0? */
 121     bool floodable;                /* No port has OFPUTIL_PC_NO_FLOOD set? */
 122 };
 123
 124 struct xport {
 125     struct hmap_node hmap_node;      /* Node in global 'xports' map. */
 126     struct ofport_dpif *ofport;      /* Key in global 'xports map. */
 127
 128     struct hmap_node ofp_node;       /* Node in parent xbridge 'xports' map. */
 129     ofp_port_t ofp_port;             /* Key in parent xbridge 'xports' map. */
 130
 131     odp_port_t odp_port;             /* Datapath port number or ODPP_NONE. */
 132
 133     struct ovs_list bundle_node;     /* In parent xbundle (if it exists). */
 134     struct xbundle *xbundle;         /* Parent xbundle or null. */
 135
 136     struct netdev *netdev;           /* 'ofport''s netdev. */
 137
 138     struct xbridge *xbridge;         /* Parent bridge. */
 139     struct xport *peer;              /* Patch port peer or null. */
 140
 141     enum ofputil_port_config config; /* OpenFlow port configuration. */
 142     enum ofputil_port_state state;   /* OpenFlow port state. */
 143     int stp_port_no;                 /* STP port number or -1 if not in use. */
 144     struct rstp_port *rstp_port;     /* RSTP port or null. */
 145
 146     struct hmap skb_priorities;      /* Map of 'skb_priority_to_dscp's. */
 147
 148     bool may_enable;                 /* May be enabled in bonds. */
 149     bool is_tunnel;                  /* Is a tunnel port. */
 150
 151     struct cfm *cfm;                 /* CFM handle or null. */
 152     struct bfd *bfd;                 /* BFD handle or null. */
 153     struct lldp *lldp;               /* LLDP handle or null. */
 154 };
 155
 156 struct xlate_ctx {
 157     struct xlate_in *xin;
 158     struct xlate_out *xout;
 159
 160     const struct xbridge *xbridge;
 161
 162     /* Flow tables version at the beginning of the translation. */
 163     cls_version_t tables_version;
 164
 165     /* Flow at the last commit. */
 166     struct flow base_flow;
 167
 168     /* Tunnel IP destination address as received.  This is stored separately
 169      * as the base_flow.tunnel is cleared on init to reflect the datapath
 170      * behavior.  Used to make sure not to send tunneled output to ourselves,
 171      * which might lead to an infinite loop.  This could happen easily
 172      * if a tunnel is marked as 'ip_remote=flow', and the flow does not
 173      * actually set the tun_dst field. */
 174     ovs_be32 orig_tunnel_ip_dst;
 175
 176     /* Stack for the push and pop actions.  Each stack element is of type
 177      * "union mf_subvalue". */
 178     struct ofpbuf stack;
 179
 180     /* The rule that we are currently translating, or NULL. */
 181     struct rule_dpif *rule;
 182
 183     /* Flow translation populates this with wildcards relevant in translation.
 184      * When 'xin->wc' is nonnull, this is the same pointer.  When 'xin->wc' is
 185      * null, this is a pointer to uninitialized scratch memory.  This allows
 186      * code to blindly write to 'ctx->wc' without worrying about whether the
 187      * caller really wants wildcards. */
 188     struct flow_wildcards *wc;
 189
 190     /* Output buffer for datapath actions.  When 'xin->odp_actions' is nonnull,
 191      * this is the same pointer.  When 'xin->odp_actions' is null, this points
 192      * to a scratch ofpbuf.  This allows code to add actions to
 193      * 'ctx->odp_actions' without worrying about whether the caller really
 194      * wants actions. */
 195     struct ofpbuf *odp_actions;
 196
 197     /* Resubmit statistics, via xlate_table_action(). */
 198     int recurse;                /* Current resubmit nesting depth. */
 199     int resubmits;              /* Total number of resubmits. */
 200     bool in_group;              /* Currently translating ofgroup, if true. */
 201     bool in_action_set;         /* Currently translating action_set, if true. */
 202
 203     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
 204     ovs_be64 rule_cookie;       /* Cookie of the rule being translated. */
 205     uint32_t orig_skb_priority; /* Priority when packet arrived. */
 206     uint32_t sflow_n_outputs;   /* Number of output ports. */
 207     odp_port_t sflow_odp_port;  /* Output port for composing sFlow action. */
 208     ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
 209     bool exit;                  /* No further actions should be processed. */
 210     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 211
 212    /* These are used for non-bond recirculation.  The recirculation IDs are
 213     * stored in xout and must be associated with a datapath flow (ukey),
 214     * otherwise they will be freed when the xout is uninitialized.
 215     *
 216     *
 217     * Steps in Recirculation Translation
 218     * ==================================
 219     *
 220     * At some point during translation, the code recognizes the need for
 221     * recirculation.  For example, recirculation is necessary when, after
 222     * popping the last MPLS label, an action or a match tries to examine or
 223     * modify a field that has been newly revealed following the MPLS label.
 224     *
 225     * The simplest part of the work to be done is to commit existing changes to
 226     * the packet, which produces datapath actions corresponding to the changes,
 227     * and after this, add an OVS_ACTION_ATTR_RECIRC datapath action.
 228     *
 229     * The main problem here is preserving state.  When the datapath executes
 230     * OVS_ACTION_ATTR_RECIRC, it will upcall to userspace to get a translation
 231     * for the post-recirculation actions.  At this point userspace has to
 232     * resume the translation where it left off, which means that it has to
 233     * execute the following:
 234     *
 235     *     - The action that prompted recirculation, and any actions following
 236     *       it within the same flow.
 237     *
 238     *     - If the action that prompted recirculation was invoked within a
 239     *       NXAST_RESUBMIT, then any actions following the resubmit.  These
 240     *       "resubmit"s can be nested, so this has to go all the way up the
 241     *       control stack.
 242     *
 243     *     - The OpenFlow 1.1+ action set.
 244     *
 245     * State that actions and flow table lookups can depend on, such as the
 246     * following, must also be preserved:
 247     *
 248     *     - Metadata fields (input port, registers, OF1.1+ metadata, ...).
 249     *
 250     *     - Action set, stack
 251     *
 252     *     - The table ID and cookie of the flow being translated at each level
 253     *       of the control stack (since OFPAT_CONTROLLER actions send these to
 254     *       the controller).
 255     *
 256     * Translation allows for the control of this state preservation via these
 257     * members.  When a need for recirculation is identified, the translation
 258     * process:
 259     *
 260     * 1. Sets 'recirc_action_offset' to the current size of 'action_set'.  The
 261     *    action set is part of what needs to be preserved, so this allows the
 262     *    action set and the additional state to share the 'action_set' buffer.
 263     *    Later steps can tell that setup for recirculation is in progress from
 264     *    the nonnegative value of 'recirc_action_offset'.
 265     *
 266     * 2. Sets 'exit' to true to tell later steps that we're exiting from the
 267     *    translation process.
 268     *
 269     * 3. Adds an OFPACT_UNROLL_XLATE action to 'action_set'.  This action
 270     *    holds the current table ID and cookie so that they can be restored
 271     *    during a post-recirculation upcall translation.
 272     *
 273     * 4. Adds the action that prompted recirculation and any actions following
 274     *    it within the same flow to 'action_set', so that they can be executed
 275     *    during a post-recirculation upcall translation.
 276     *
 277     * 5. Returns.
 278     *
 279     * 6. The action that prompted recirculation might be nested in a stack of
 280     *    nested "resubmit"s that have actions remaining.  Each of these notices
 281     *    that we're exiting (from 'exit') and that recirculation setup is in
 282     *    progress (from 'recirc_action_offset') and responds by adding more
 283     *    OFPACT_UNROLL_XLATE actions to 'action_set', as necessary, and any
 284     *    actions that were yet unprocessed.
 285     *
 286     * The caller stores all the state produced by this process associated with
 287     * the recirculation ID.  For post-recirculation upcall translation, the
 288     * caller passes it back in for the new translation to execute.  The
 289     * process yielded a set of ofpacts that can be translated directly, so it
 290     * is not much of a special case at that point.
 291     */
 292     int recirc_action_offset;   /* Offset in 'action_set' to actions to be
 293                                  * executed after recirculation, or -1. */
 294     int last_unroll_offset;     /* Offset in 'action_set' to the latest unroll
 295                                  * action, or -1. */
 296
 297     /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
 298      * This is a trigger for recirculation in cases where translating an action
 299      * or looking up a flow requires access to the fields of the packet after
 300      * the MPLS label stack that was originally present. */
 301     bool was_mpls;
 302
 303     /* OpenFlow 1.1+ action set.
 304      *
 305      * 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
 306      * When translation is otherwise complete, ofpacts_execute_action_set()
 307      * converts it to a set of "struct ofpact"s that can be translated into
 308      * datapath actions. */
 309     bool action_set_has_group;  /* Action set contains OFPACT_GROUP? */
 310     struct ofpbuf action_set;   /* Action set. */
 311 };
 312
 313 static void xlate_action_set(struct xlate_ctx *ctx);
 314
 315 static void
 316 ctx_trigger_recirculation(struct xlate_ctx *ctx)
 317 {
 318     ctx->exit = true;
 319     ctx->recirc_action_offset = ctx->action_set.size;
 320 }
 321
 322 static bool
 323 ctx_first_recirculation_action(const struct xlate_ctx *ctx)
 324 {
 325     return ctx->recirc_action_offset == ctx->action_set.size;
 326 }
 327
 328 static inline bool
 329 exit_recirculates(const struct xlate_ctx *ctx)
 330 {
 331     /* When recirculating the 'recirc_action_offset' has a non-negative value.
 332      */
 333     return ctx->recirc_action_offset >= 0;
 334 }
 335
 336 static void compose_recirculate_action(struct xlate_ctx *ctx);
 337
 338 /* A controller may use OFPP_NONE as the ingress port to indicate that
 339  * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
 340  * when an input bundle is needed for validation (e.g., mirroring or
 341  * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
 342  * any 'port' structs, so care must be taken when dealing with it. */
 343 static struct xbundle ofpp_none_bundle = {
 344     .name      = "OFPP_NONE",
 345     .vlan_mode = PORT_VLAN_TRUNK
 346 };
 347
 348 /* Node in 'xport''s 'skb_priorities' map.  Used to maintain a map from
 349  * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
 350  * traffic egressing the 'ofport' with that priority should be marked with. */
 351 struct skb_priority_to_dscp {
 352     struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
 353     uint32_t skb_priority;      /* Priority of this queue (see struct flow). */
 354
 355     uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
 356 };
 357
 358 enum xc_type {
 359     XC_RULE,
 360     XC_BOND,
 361     XC_NETDEV,
 362     XC_NETFLOW,
 363     XC_MIRROR,
 364     XC_LEARN,
 365     XC_NORMAL,
 366     XC_FIN_TIMEOUT,
 367     XC_GROUP,
 368     XC_TNL_ARP,
 369 };
 370
 371 /* xlate_cache entries hold enough information to perform the side effects of
 372  * xlate_actions() for a rule, without needing to perform rule translation
 373  * from scratch. The primary usage of these is to submit statistics to objects
 374  * that a flow relates to, although they may be used for other effects as well
 375  * (for instance, refreshing hard timeouts for learned flows). */
 376 struct xc_entry {
 377     enum xc_type type;
 378     union {
 379         struct rule_dpif *rule;
 380         struct {
 381             struct netdev *tx;
 382             struct netdev *rx;
 383             struct bfd *bfd;
 384         } dev;
 385         struct {
 386             struct netflow *netflow;
 387             struct flow *flow;
 388             ofp_port_t iface;
 389         } nf;
 390         struct {
 391             struct mbridge *mbridge;
 392             mirror_mask_t mirrors;
 393         } mirror;
 394         struct {
 395             struct bond *bond;
 396             struct flow *flow;
 397             uint16_t vid;
 398         } bond;
 399         struct {
 400             struct ofproto_dpif *ofproto;
 401             struct ofputil_flow_mod *fm;
 402             struct ofpbuf *ofpacts;
 403         } learn;
 404         struct {
 405             struct ofproto_dpif *ofproto;
 406             struct flow *flow;
 407             int vlan;
 408         } normal;
 409         struct {
 410             struct rule_dpif *rule;
 411             uint16_t idle;
 412             uint16_t hard;
 413         } fin;
 414         struct {
 415             struct group_dpif *group;
 416             struct ofputil_bucket *bucket;
 417         } group;
 418         struct {
 419             char br_name[IFNAMSIZ];
 420             ovs_be32 d_ip;
 421         } tnl_arp_cache;
 422     } u;
 423 };
 424
 425 #define XC_ENTRY_FOR_EACH(entry, entries, xcache)               \
 426     entries = xcache->entries;                                  \
 427     for (entry = ofpbuf_try_pull(&entries, sizeof *entry);      \
 428          entry;                                                 \
 429          entry = ofpbuf_try_pull(&entries, sizeof *entry))
 430
 431 struct xlate_cache {
 432     struct ofpbuf entries;
 433 };
 434
 435 /* Xlate config contains hash maps of all bridges, bundles and ports.
 436  * Xcfgp contains the pointer to the current xlate configuration.
 437  * When the main thread needs to change the configuration, it copies xcfgp to
 438  * new_xcfg and edits new_xcfg. This enables the use of RCU locking which
 439  * does not block handler and revalidator threads. */
 440 struct xlate_cfg {
 441     struct hmap xbridges;
 442     struct hmap xbundles;
 443     struct hmap xports;
 444 };
 445 static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
 446 static struct xlate_cfg *new_xcfg = NULL;
 447
 448 static bool may_receive(const struct xport *, struct xlate_ctx *);
 449 static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
 450                              struct xlate_ctx *);
 451 static void xlate_normal(struct xlate_ctx *);
 452 static inline void xlate_report(struct xlate_ctx *, const char *, ...)
 453     OVS_PRINTF_FORMAT(2, 3);
 454 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
 455                                uint8_t table_id, bool may_packet_in,
 456                                bool honor_table_miss);
 457 static bool input_vid_is_valid(uint16_t vid, struct xbundle *, bool warn);
 458 static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid);
 459 static void output_normal(struct xlate_ctx *, const struct xbundle *,
 460                           uint16_t vlan);
 461
 462 /* Optional bond recirculation parameter to compose_output_action(). */
 463 struct xlate_bond_recirc {
 464     uint32_t recirc_id;  /* !0 Use recirculation instead of output. */
 465     uint8_t  hash_alg;   /* !0 Compute hash for recirc before. */
 466     uint32_t hash_basis;  /* Compute hash for recirc before. */
 467 };
 468
 469 static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
 470                                   const struct xlate_bond_recirc *xr);
 471
 472 static struct xbridge *xbridge_lookup(struct xlate_cfg *,
 473                                       const struct ofproto_dpif *);
 474 static struct xbundle *xbundle_lookup(struct xlate_cfg *,
 475                                       const struct ofbundle *);
 476 static struct xport *xport_lookup(struct xlate_cfg *,
 477                                   const struct ofport_dpif *);
 478 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
 479 static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
 480                                                      uint32_t skb_priority);
 481 static void clear_skb_priorities(struct xport *);
 482 static size_t count_skb_priorities(const struct xport *);
 483 static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
 484                                    uint8_t *dscp);
 485
 486 static struct xc_entry *xlate_cache_add_entry(struct xlate_cache *xc,
 487                                               enum xc_type type);
 488 static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
 489 static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
 490 static void xlate_xport_init(struct xlate_cfg *, struct xport *);
 491 static void xlate_xbridge_set(struct xbridge *, struct dpif *,
 492                               const struct mac_learning *, struct stp *,
 493                               struct rstp *, const struct mcast_snooping *,
 494                               const struct mbridge *,
 495                               const struct dpif_sflow *,
 496                               const struct dpif_ipfix *,
 497                               const struct netflow *,
 498                               bool forward_bpdu, bool has_in_band,
 499                               const struct dpif_backer_support *);
 500 static void xlate_xbundle_set(struct xbundle *xbundle,
 501                               enum port_vlan_mode vlan_mode, int vlan,
 502                               unsigned long *trunks, bool use_priority_tags,
 503                               const struct bond *bond, const struct lacp *lacp,
 504                               bool floodable);
 505 static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 506                             const struct netdev *netdev, const struct cfm *cfm,
 507                             const struct bfd *bfd, const struct lldp *lldp,
 508                             int stp_port_no, const struct rstp_port *rstp_port,
 509                             enum ofputil_port_config config,
 510                             enum ofputil_port_state state, bool is_tunnel,
 511                             bool may_enable);
 512 static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
 513 static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
 514 static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
 515 static void xlate_xbridge_copy(struct xbridge *);
 516 static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
 517 static void xlate_xport_copy(struct xbridge *, struct xbundle *,
 518                              struct xport *);
 519 static void xlate_xcfg_free(struct xlate_cfg *);
 520
 521 static inline void
 522 xlate_report(struct xlate_ctx *ctx, const char *format, ...)
 523 {
 524     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
 525         va_list args;
 526
 527         va_start(args, format);
 528         ctx->xin->report_hook(ctx->xin, ctx->recurse, format, args);
 529         va_end(args);
 530     }
 531 }
 532
 533 static void
 534 xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 535 {
 536     list_init(&xbridge->xbundles);
 537     hmap_init(&xbridge->xports);
 538     hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
 539                 hash_pointer(xbridge->ofproto, 0));
 540 }
 541
 542 static void
 543 xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
 544 {
 545     list_init(&xbundle->xports);
 546     list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
 547     hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
 548                 hash_pointer(xbundle->ofbundle, 0));
 549 }
 550
 551 static void
 552 xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
 553 {
 554     hmap_init(&xport->skb_priorities);
 555     hmap_insert(&xcfg->xports, &xport->hmap_node,
 556                 hash_pointer(xport->ofport, 0));
 557     hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
 558                 hash_ofp_port(xport->ofp_port));
 559 }
 560
 561 static void
 562 xlate_xbridge_set(struct xbridge *xbridge,
 563                   struct dpif *dpif,
 564                   const struct mac_learning *ml, struct stp *stp,
 565                   struct rstp *rstp, const struct mcast_snooping *ms,
 566                   const struct mbridge *mbridge,
 567                   const struct dpif_sflow *sflow,
 568                   const struct dpif_ipfix *ipfix,
 569                   const struct netflow *netflow,
 570                   bool forward_bpdu, bool has_in_band,
 571                   const struct dpif_backer_support *support)
 572 {
 573     if (xbridge->ml != ml) {
 574         mac_learning_unref(xbridge->ml);
 575         xbridge->ml = mac_learning_ref(ml);
 576     }
 577
 578     if (xbridge->ms != ms) {
 579         mcast_snooping_unref(xbridge->ms);
 580         xbridge->ms = mcast_snooping_ref(ms);
 581     }
 582
 583     if (xbridge->mbridge != mbridge) {
 584         mbridge_unref(xbridge->mbridge);
 585         xbridge->mbridge = mbridge_ref(mbridge);
 586     }
 587
 588     if (xbridge->sflow != sflow) {
 589         dpif_sflow_unref(xbridge->sflow);
 590         xbridge->sflow = dpif_sflow_ref(sflow);
 591     }
 592
 593     if (xbridge->ipfix != ipfix) {
 594         dpif_ipfix_unref(xbridge->ipfix);
 595         xbridge->ipfix = dpif_ipfix_ref(ipfix);
 596     }
 597
 598     if (xbridge->stp != stp) {
 599         stp_unref(xbridge->stp);
 600         xbridge->stp = stp_ref(stp);
 601     }
 602
 603     if (xbridge->rstp != rstp) {
 604         rstp_unref(xbridge->rstp);
 605         xbridge->rstp = rstp_ref(rstp);
 606     }
 607
 608     if (xbridge->netflow != netflow) {
 609         netflow_unref(xbridge->netflow);
 610         xbridge->netflow = netflow_ref(netflow);
 611     }
 612
 613     xbridge->dpif = dpif;
 614     xbridge->forward_bpdu = forward_bpdu;
 615     xbridge->has_in_band = has_in_band;
 616     xbridge->support = *support;
 617 }
 618
 619 static void
 620 xlate_xbundle_set(struct xbundle *xbundle,
 621                   enum port_vlan_mode vlan_mode, int vlan,
 622                   unsigned long *trunks, bool use_priority_tags,
 623                   const struct bond *bond, const struct lacp *lacp,
 624                   bool floodable)
 625 {
 626     ovs_assert(xbundle->xbridge);
 627
 628     xbundle->vlan_mode = vlan_mode;
 629     xbundle->vlan = vlan;
 630     xbundle->trunks = trunks;
 631     xbundle->use_priority_tags = use_priority_tags;
 632     xbundle->floodable = floodable;
 633
 634     if (xbundle->bond != bond) {
 635         bond_unref(xbundle->bond);
 636         xbundle->bond = bond_ref(bond);
 637     }
 638
 639     if (xbundle->lacp != lacp) {
 640         lacp_unref(xbundle->lacp);
 641         xbundle->lacp = lacp_ref(lacp);
 642     }
 643 }
 644
 645 static void
 646 xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 647                 const struct netdev *netdev, const struct cfm *cfm,
 648                 const struct bfd *bfd, const struct lldp *lldp, int stp_port_no,
 649                 const struct rstp_port* rstp_port,
 650                 enum ofputil_port_config config, enum ofputil_port_state state,
 651                 bool is_tunnel, bool may_enable)
 652 {
 653     xport->config = config;
 654     xport->state = state;
 655     xport->stp_port_no = stp_port_no;
 656     xport->is_tunnel = is_tunnel;
 657     xport->may_enable = may_enable;
 658     xport->odp_port = odp_port;
 659
 660     if (xport->rstp_port != rstp_port) {
 661         rstp_port_unref(xport->rstp_port);
 662         xport->rstp_port = rstp_port_ref(rstp_port);
 663     }
 664
 665     if (xport->cfm != cfm) {
 666         cfm_unref(xport->cfm);
 667         xport->cfm = cfm_ref(cfm);
 668     }
 669
 670     if (xport->bfd != bfd) {
 671         bfd_unref(xport->bfd);
 672         xport->bfd = bfd_ref(bfd);
 673     }
 674
 675     if (xport->lldp != lldp) {
 676         lldp_unref(xport->lldp);
 677         xport->lldp = lldp_ref(lldp);
 678     }
 679
 680     if (xport->netdev != netdev) {
 681         netdev_close(xport->netdev);
 682         xport->netdev = netdev_ref(netdev);
 683     }
 684 }
 685
 686 static void
 687 xlate_xbridge_copy(struct xbridge *xbridge)
 688 {
 689     struct xbundle *xbundle;
 690     struct xport *xport;
 691     struct xbridge *new_xbridge = xzalloc(sizeof *xbridge);
 692     new_xbridge->ofproto = xbridge->ofproto;
 693     new_xbridge->name = xstrdup(xbridge->name);
 694     xlate_xbridge_init(new_xcfg, new_xbridge);
 695
 696     xlate_xbridge_set(new_xbridge,
 697                       xbridge->dpif, xbridge->ml, xbridge->stp,
 698                       xbridge->rstp, xbridge->ms, xbridge->mbridge,
 699                       xbridge->sflow, xbridge->ipfix, xbridge->netflow,
 700                       xbridge->forward_bpdu, xbridge->has_in_band,
 701                       &xbridge->support);
 702     LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
 703         xlate_xbundle_copy(new_xbridge, xbundle);
 704     }
 705
 706     /* Copy xports which are not part of a xbundle */
 707     HMAP_FOR_EACH (xport, ofp_node, &xbridge->xports) {
 708         if (!xport->xbundle) {
 709             xlate_xport_copy(new_xbridge, NULL, xport);
 710         }
 711     }
 712 }
 713
 714 static void
 715 xlate_xbundle_copy(struct xbridge *xbridge, struct xbundle *xbundle)
 716 {
 717     struct xport *xport;
 718     struct xbundle *new_xbundle = xzalloc(sizeof *xbundle);
 719     new_xbundle->ofbundle = xbundle->ofbundle;
 720     new_xbundle->xbridge = xbridge;
 721     new_xbundle->name = xstrdup(xbundle->name);
 722     xlate_xbundle_init(new_xcfg, new_xbundle);
 723
 724     xlate_xbundle_set(new_xbundle, xbundle->vlan_mode,
 725                       xbundle->vlan, xbundle->trunks,
 726                       xbundle->use_priority_tags, xbundle->bond, xbundle->lacp,
 727                       xbundle->floodable);
 728     LIST_FOR_EACH (xport, bundle_node, &xbundle->xports) {
 729         xlate_xport_copy(xbridge, new_xbundle, xport);
 730     }
 731 }
 732
 733 static void
 734 xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
 735                  struct xport *xport)
 736 {
 737     struct skb_priority_to_dscp *pdscp, *new_pdscp;
 738     struct xport *new_xport = xzalloc(sizeof *xport);
 739     new_xport->ofport = xport->ofport;
 740     new_xport->ofp_port = xport->ofp_port;
 741     new_xport->xbridge = xbridge;
 742     xlate_xport_init(new_xcfg, new_xport);
 743
 744     xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
 745                     xport->bfd, xport->lldp, xport->stp_port_no,
 746                     xport->rstp_port, xport->config, xport->state,
 747                     xport->is_tunnel, xport->may_enable);
 748
 749     if (xport->peer) {
 750         struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
 751         if (peer) {
 752             new_xport->peer = peer;
 753             new_xport->peer->peer = new_xport;
 754         }
 755     }
 756
 757     if (xbundle) {
 758         new_xport->xbundle = xbundle;
 759         list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
 760     }
 761
 762     HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
 763         new_pdscp = xmalloc(sizeof *pdscp);
 764         new_pdscp->skb_priority = pdscp->skb_priority;
 765         new_pdscp->dscp = pdscp->dscp;
 766         hmap_insert(&new_xport->skb_priorities, &new_pdscp->hmap_node,
 767                     hash_int(new_pdscp->skb_priority, 0));
 768     }
 769 }
 770
 771 /* Sets the current xlate configuration to new_xcfg and frees the old xlate
 772  * configuration in xcfgp.
 773  *
 774  * This needs to be called after editing the xlate configuration.
 775  *
 776  * Functions that edit the new xlate configuration are
 777  * xlate_<ofport/bundle/ofport>_set and xlate_<ofport/bundle/ofport>_remove.
 778  *
 779  * A sample workflow:
 780  *
 781  * xlate_txn_start();
 782  * ...
 783  * edit_xlate_configuration();
 784  * ...
 785  * xlate_txn_commit(); */
 786 void
 787 xlate_txn_commit(void)
 788 {
 789     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 790
 791     ovsrcu_set(&xcfgp, new_xcfg);
 792     ovsrcu_synchronize();
 793     xlate_xcfg_free(xcfg);
 794     new_xcfg = NULL;
 795 }
 796
 797 /* Copies the current xlate configuration in xcfgp to new_xcfg.
 798  *
 799  * This needs to be called prior to editing the xlate configuration. */
 800 void
 801 xlate_txn_start(void)
 802 {
 803     struct xbridge *xbridge;
 804     struct xlate_cfg *xcfg;
 805
 806     ovs_assert(!new_xcfg);
 807
 808     new_xcfg = xmalloc(sizeof *new_xcfg);
 809     hmap_init(&new_xcfg->xbridges);
 810     hmap_init(&new_xcfg->xbundles);
 811     hmap_init(&new_xcfg->xports);
 812
 813     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 814     if (!xcfg) {
 815         return;
 816     }
 817
 818     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
 819         xlate_xbridge_copy(xbridge);
 820     }
 821 }
 822
 823
 824 static void
 825 xlate_xcfg_free(struct xlate_cfg *xcfg)
 826 {
 827     struct xbridge *xbridge, *next_xbridge;
 828
 829     if (!xcfg) {
 830         return;
 831     }
 832
 833     HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) {
 834         xlate_xbridge_remove(xcfg, xbridge);
 835     }
 836
 837     hmap_destroy(&xcfg->xbridges);
 838     hmap_destroy(&xcfg->xbundles);
 839     hmap_destroy(&xcfg->xports);
 840     free(xcfg);
 841 }
 842
 843 void
 844 xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
 845                   struct dpif *dpif,
 846                   const struct mac_learning *ml, struct stp *stp,
 847                   struct rstp *rstp, const struct mcast_snooping *ms,
 848                   const struct mbridge *mbridge,
 849                   const struct dpif_sflow *sflow,
 850                   const struct dpif_ipfix *ipfix,
 851                   const struct netflow *netflow,
 852                   bool forward_bpdu, bool has_in_band,
 853                   const struct dpif_backer_support *support)
 854 {
 855     struct xbridge *xbridge;
 856
 857     ovs_assert(new_xcfg);
 858
 859     xbridge = xbridge_lookup(new_xcfg, ofproto);
 860     if (!xbridge) {
 861         xbridge = xzalloc(sizeof *xbridge);
 862         xbridge->ofproto = ofproto;
 863
 864         xlate_xbridge_init(new_xcfg, xbridge);
 865     }
 866
 867     free(xbridge->name);
 868     xbridge->name = xstrdup(name);
 869
 870     xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix,
 871                       netflow, forward_bpdu, has_in_band, support);
 872 }
 873
 874 static void
 875 xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 876 {
 877     struct xbundle *xbundle, *next_xbundle;
 878     struct xport *xport, *next_xport;
 879
 880     if (!xbridge) {
 881         return;
 882     }
 883
 884     HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) {
 885         xlate_xport_remove(xcfg, xport);
 886     }
 887
 888     LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) {
 889         xlate_xbundle_remove(xcfg, xbundle);
 890     }
 891
 892     hmap_remove(&xcfg->xbridges, &xbridge->hmap_node);
 893     mac_learning_unref(xbridge->ml);
 894     mcast_snooping_unref(xbridge->ms);
 895     mbridge_unref(xbridge->mbridge);
 896     dpif_sflow_unref(xbridge->sflow);
 897     dpif_ipfix_unref(xbridge->ipfix);
 898     stp_unref(xbridge->stp);
 899     rstp_unref(xbridge->rstp);
 900     hmap_destroy(&xbridge->xports);
 901     free(xbridge->name);
 902     free(xbridge);
 903 }
 904
 905 void
 906 xlate_remove_ofproto(struct ofproto_dpif *ofproto)
 907 {
 908     struct xbridge *xbridge;
 909
 910     ovs_assert(new_xcfg);
 911
 912     xbridge = xbridge_lookup(new_xcfg, ofproto);
 913     xlate_xbridge_remove(new_xcfg, xbridge);
 914 }
 915
 916 void
 917 xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
 918                  const char *name, enum port_vlan_mode vlan_mode, int vlan,
 919                  unsigned long *trunks, bool use_priority_tags,
 920                  const struct bond *bond, const struct lacp *lacp,
 921                  bool floodable)
 922 {
 923     struct xbundle *xbundle;
 924
 925     ovs_assert(new_xcfg);
 926
 927     xbundle = xbundle_lookup(new_xcfg, ofbundle);
 928     if (!xbundle) {
 929         xbundle = xzalloc(sizeof *xbundle);
 930         xbundle->ofbundle = ofbundle;
 931         xbundle->xbridge = xbridge_lookup(new_xcfg, ofproto);
 932
 933         xlate_xbundle_init(new_xcfg, xbundle);
 934     }
 935
 936     free(xbundle->name);
 937     xbundle->name = xstrdup(name);
 938
 939     xlate_xbundle_set(xbundle, vlan_mode, vlan, trunks,
 940                       use_priority_tags, bond, lacp, floodable);
 941 }
 942
 943 static void
 944 xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle)
 945 {
 946     struct xport *xport;
 947
 948     if (!xbundle) {
 949         return;
 950     }
 951
 952     LIST_FOR_EACH_POP (xport, bundle_node, &xbundle->xports) {
 953         xport->xbundle = NULL;
 954     }
 955
 956     hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
 957     list_remove(&xbundle->list_node);
 958     bond_unref(xbundle->bond);
 959     lacp_unref(xbundle->lacp);
 960     free(xbundle->name);
 961     free(xbundle);
 962 }
 963
 964 void
 965 xlate_bundle_remove(struct ofbundle *ofbundle)
 966 {
 967     struct xbundle *xbundle;
 968
 969     ovs_assert(new_xcfg);
 970
 971     xbundle = xbundle_lookup(new_xcfg, ofbundle);
 972     xlate_xbundle_remove(new_xcfg, xbundle);
 973 }
 974
 975 void
 976 xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
 977                  struct ofport_dpif *ofport, ofp_port_t ofp_port,
 978                  odp_port_t odp_port, const struct netdev *netdev,
 979                  const struct cfm *cfm, const struct bfd *bfd,
 980                  const struct lldp *lldp, struct ofport_dpif *peer,
 981                  int stp_port_no, const struct rstp_port *rstp_port,
 982                  const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
 983                  enum ofputil_port_config config,
 984                  enum ofputil_port_state state, bool is_tunnel,
 985                  bool may_enable)
 986 {
 987     size_t i;
 988     struct xport *xport;
 989
 990     ovs_assert(new_xcfg);
 991
 992     xport = xport_lookup(new_xcfg, ofport);
 993     if (!xport) {
 994         xport = xzalloc(sizeof *xport);
 995         xport->ofport = ofport;
 996         xport->xbridge = xbridge_lookup(new_xcfg, ofproto);
 997         xport->ofp_port = ofp_port;
 998
 999         xlate_xport_init(new_xcfg, xport);
1000     }
1001
1002     ovs_assert(xport->ofp_port == ofp_port);
1003
1004     xlate_xport_set(xport, odp_port, netdev, cfm, bfd, lldp,
1005                     stp_port_no, rstp_port, config, state, is_tunnel,
1006                     may_enable);
1007
1008     if (xport->peer) {
1009         xport->peer->peer = NULL;
1010     }
1011     xport->peer = xport_lookup(new_xcfg, peer);
1012     if (xport->peer) {
1013         xport->peer->peer = xport;
1014     }
1015
1016     if (xport->xbundle) {
1017         list_remove(&xport->bundle_node);
1018     }
1019     xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
1020     if (xport->xbundle) {
1021         list_insert(&xport->xbundle->xports, &xport->bundle_node);
1022     }
1023
1024     clear_skb_priorities(xport);
1025     for (i = 0; i < n_qdscp; i++) {
1026         struct skb_priority_to_dscp *pdscp;
1027         uint32_t skb_priority;
1028
1029         if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
1030                                    &skb_priority)) {
1031             continue;
1032         }
1033
1034         pdscp = xmalloc(sizeof *pdscp);
1035         pdscp->skb_priority = skb_priority;
1036         pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
1037         hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
1038                     hash_int(pdscp->skb_priority, 0));
1039     }
1040 }
1041
1042 static void
1043 xlate_xport_remove(struct xlate_cfg *xcfg, struct xport *xport)
1044 {
1045     if (!xport) {
1046         return;
1047     }
1048
1049     if (xport->peer) {
1050         xport->peer->peer = NULL;
1051         xport->peer = NULL;
1052     }
1053
1054     if (xport->xbundle) {
1055         list_remove(&xport->bundle_node);
1056     }
1057
1058     clear_skb_priorities(xport);
1059     hmap_destroy(&xport->skb_priorities);
1060
1061     hmap_remove(&xcfg->xports, &xport->hmap_node);
1062     hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
1063
1064     netdev_close(xport->netdev);
1065     rstp_port_unref(xport->rstp_port);
1066     cfm_unref(xport->cfm);
1067     bfd_unref(xport->bfd);
1068     lldp_unref(xport->lldp);
1069     free(xport);
1070 }
1071
1072 void
1073 xlate_ofport_remove(struct ofport_dpif *ofport)
1074 {
1075     struct xport *xport;
1076
1077     ovs_assert(new_xcfg);
1078
1079     xport = xport_lookup(new_xcfg, ofport);
1080     xlate_xport_remove(new_xcfg, xport);
1081 }
1082
1083 static struct ofproto_dpif *
1084 xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
1085                       ofp_port_t *ofp_in_port, const struct xport **xportp)
1086 {
1087     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1088     const struct xport *xport;
1089
1090     xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
1091                          ? tnl_port_receive(flow)
1092                          : odp_port_to_ofport(backer, flow->in_port.odp_port));
1093     if (OVS_UNLIKELY(!xport)) {
1094         return NULL;
1095     }
1096     *xportp = xport;
1097     if (ofp_in_port) {
1098         *ofp_in_port = xport->ofp_port;
1099     }
1100     return xport->xbridge->ofproto;
1101 }
1102
1103 /* Given a datapath and flow metadata ('backer', and 'flow' respectively)
1104  * returns the corresponding struct ofproto_dpif and OpenFlow port number. */
1105 struct ofproto_dpif *
1106 xlate_lookup_ofproto(const struct dpif_backer *backer, const struct flow *flow,
1107                      ofp_port_t *ofp_in_port)
1108 {
1109     const struct xport *xport;
1110
1111     return xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1112 }
1113
1114 /* Given a datapath and flow metadata ('backer', and 'flow' respectively),
1115  * optionally populates 'ofproto' with the ofproto_dpif, 'ofp_in_port' with the
1116  * openflow in_port, and 'ipfix', 'sflow', and 'netflow' with the appropriate
1117  * handles for those protocols if they're enabled.  Caller may use the returned
1118  * pointers until quiescing, for longer term use additional references must
1119  * be taken.
1120  *
1121  * Returns 0 if successful, ENODEV if the parsed flow has no associated ofproto.
1122  */
1123 int
1124 xlate_lookup(const struct dpif_backer *backer, const struct flow *flow,
1125              struct ofproto_dpif **ofprotop, struct dpif_ipfix **ipfix,
1126              struct dpif_sflow **sflow, struct netflow **netflow,
1127              ofp_port_t *ofp_in_port)
1128 {
1129     struct ofproto_dpif *ofproto;
1130     const struct xport *xport;
1131
1132     ofproto = xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1133
1134     if (!ofproto) {
1135         return ENODEV;
1136     }
1137
1138     if (ofprotop) {
1139         *ofprotop = ofproto;
1140     }
1141
1142     if (ipfix) {
1143         *ipfix = xport ? xport->xbridge->ipfix : NULL;
1144     }
1145
1146     if (sflow) {
1147         *sflow = xport ? xport->xbridge->sflow : NULL;
1148     }
1149
1150     if (netflow) {
1151         *netflow = xport ? xport->xbridge->netflow : NULL;
1152     }
1153
1154     return 0;
1155 }
1156
1157 static struct xbridge *
1158 xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
1159 {
1160     struct hmap *xbridges;
1161     struct xbridge *xbridge;
1162
1163     if (!ofproto || !xcfg) {
1164         return NULL;
1165     }
1166
1167     xbridges = &xcfg->xbridges;
1168
1169     HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
1170                              xbridges) {
1171         if (xbridge->ofproto == ofproto) {
1172             return xbridge;
1173         }
1174     }
1175     return NULL;
1176 }
1177
1178 static struct xbundle *
1179 xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
1180 {
1181     struct hmap *xbundles;
1182     struct xbundle *xbundle;
1183
1184     if (!ofbundle || !xcfg) {
1185         return NULL;
1186     }
1187
1188     xbundles = &xcfg->xbundles;
1189
1190     HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
1191                              xbundles) {
1192         if (xbundle->ofbundle == ofbundle) {
1193             return xbundle;
1194         }
1195     }
1196     return NULL;
1197 }
1198
1199 static struct xport *
1200 xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
1201 {
1202     struct hmap *xports;
1203     struct xport *xport;
1204
1205     if (!ofport || !xcfg) {
1206         return NULL;
1207     }
1208
1209     xports = &xcfg->xports;
1210
1211     HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
1212                              xports) {
1213         if (xport->ofport == ofport) {
1214             return xport;
1215         }
1216     }
1217     return NULL;
1218 }
1219
1220 static struct stp_port *
1221 xport_get_stp_port(const struct xport *xport)
1222 {
1223     return xport->xbridge->stp && xport->stp_port_no != -1
1224         ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
1225         : NULL;
1226 }
1227
1228 static bool
1229 xport_stp_learn_state(const struct xport *xport)
1230 {
1231     struct stp_port *sp = xport_get_stp_port(xport);
1232     return sp
1233         ? stp_learn_in_state(stp_port_get_state(sp))
1234         : true;
1235 }
1236
1237 static bool
1238 xport_stp_forward_state(const struct xport *xport)
1239 {
1240     struct stp_port *sp = xport_get_stp_port(xport);
1241     return sp
1242         ? stp_forward_in_state(stp_port_get_state(sp))
1243         : true;
1244 }
1245
1246 static bool
1247 xport_stp_should_forward_bpdu(const struct xport *xport)
1248 {
1249     struct stp_port *sp = xport_get_stp_port(xport);
1250     return stp_should_forward_bpdu(sp ? stp_port_get_state(sp) : STP_DISABLED);
1251 }
1252
1253 /* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
1254  * were used to make the determination.*/
1255 static bool
1256 stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
1257 {
1258     /* is_stp() also checks dl_type, but dl_type is always set in 'wc'. */
1259     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1260     return is_stp(flow);
1261 }
1262
1263 static void
1264 stp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1265 {
1266     struct stp_port *sp = xport_get_stp_port(xport);
1267     struct dp_packet payload = *packet;
1268     struct eth_header *eth = dp_packet_data(&payload);
1269
1270     /* Sink packets on ports that have STP disabled when the bridge has
1271      * STP enabled. */
1272     if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
1273         return;
1274     }
1275
1276     /* Trim off padding on payload. */
1277     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1278         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1279     }
1280
1281     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1282         stp_received_bpdu(sp, dp_packet_data(&payload), dp_packet_size(&payload));
1283     }
1284 }
1285
1286 static enum rstp_state
1287 xport_get_rstp_port_state(const struct xport *xport)
1288 {
1289     return xport->rstp_port
1290         ? rstp_port_get_state(xport->rstp_port)
1291         : RSTP_DISABLED;
1292 }
1293
1294 static bool
1295 xport_rstp_learn_state(const struct xport *xport)
1296 {
1297     return xport->xbridge->rstp && xport->rstp_port
1298         ? rstp_learn_in_state(xport_get_rstp_port_state(xport))
1299         : true;
1300 }
1301
1302 static bool
1303 xport_rstp_forward_state(const struct xport *xport)
1304 {
1305     return xport->xbridge->rstp && xport->rstp_port
1306         ? rstp_forward_in_state(xport_get_rstp_port_state(xport))
1307         : true;
1308 }
1309
1310 static bool
1311 xport_rstp_should_manage_bpdu(const struct xport *xport)
1312 {
1313     return rstp_should_manage_bpdu(xport_get_rstp_port_state(xport));
1314 }
1315
1316 static void
1317 rstp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1318 {
1319     struct dp_packet payload = *packet;
1320     struct eth_header *eth = dp_packet_data(&payload);
1321
1322     /* Sink packets on ports that have no RSTP. */
1323     if (!xport->rstp_port) {
1324         return;
1325     }
1326
1327     /* Trim off padding on payload. */
1328     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1329         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1330     }
1331
1332     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1333         rstp_port_received_bpdu(xport->rstp_port, dp_packet_data(&payload),
1334                                 dp_packet_size(&payload));
1335     }
1336 }
1337
1338 static struct xport *
1339 get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1340 {
1341     struct xport *xport;
1342
1343     HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port),
1344                              &xbridge->xports) {
1345         if (xport->ofp_port == ofp_port) {
1346             return xport;
1347         }
1348     }
1349     return NULL;
1350 }
1351
1352 static odp_port_t
1353 ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1354 {
1355     const struct xport *xport = get_ofp_port(xbridge, ofp_port);
1356     return xport ? xport->odp_port : ODPP_NONE;
1357 }
1358
1359 static bool
1360 odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port)
1361 {
1362     struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
1363     return xport && xport->may_enable;
1364 }
1365
1366 static struct ofputil_bucket *
1367 group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *,
1368                         int depth);
1369
1370 static bool
1371 group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
1372 {
1373     struct group_dpif *group;
1374
1375     if (group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group)) {
1376         struct ofputil_bucket *bucket;
1377
1378         bucket = group_first_live_bucket(ctx, group, depth);
1379         group_dpif_unref(group);
1380         return bucket == NULL;
1381     }
1382
1383     return false;
1384 }
1385
1386 #define MAX_LIVENESS_RECURSION 128 /* Arbitrary limit */
1387
1388 static bool
1389 bucket_is_alive(const struct xlate_ctx *ctx,
1390                 struct ofputil_bucket *bucket, int depth)
1391 {
1392     if (depth >= MAX_LIVENESS_RECURSION) {
1393         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1394
1395         VLOG_WARN_RL(&rl, "bucket chaining exceeded %d links",
1396                      MAX_LIVENESS_RECURSION);
1397         return false;
1398     }
1399
1400     return (!ofputil_bucket_has_liveness(bucket)
1401             || (bucket->watch_port != OFPP_ANY
1402                && odp_port_is_alive(ctx, bucket->watch_port))
1403             || (bucket->watch_group != OFPG_ANY
1404                && group_is_alive(ctx, bucket->watch_group, depth + 1)));
1405 }
1406
1407 static struct ofputil_bucket *
1408 group_first_live_bucket(const struct xlate_ctx *ctx,
1409                         const struct group_dpif *group, int depth)
1410 {
1411     struct ofputil_bucket *bucket;
1412     const struct ovs_list *buckets;
1413
1414     group_dpif_get_buckets(group, &buckets);
1415     LIST_FOR_EACH (bucket, list_node, buckets) {
1416         if (bucket_is_alive(ctx, bucket, depth)) {
1417             return bucket;
1418         }
1419     }
1420
1421     return NULL;
1422 }
1423
1424 static struct ofputil_bucket *
1425 group_best_live_bucket(const struct xlate_ctx *ctx,
1426                        const struct group_dpif *group,
1427                        uint32_t basis)
1428 {
1429     struct ofputil_bucket *best_bucket = NULL;
1430     uint32_t best_score = 0;
1431     int i = 0;
1432
1433     struct ofputil_bucket *bucket;
1434     const struct ovs_list *buckets;
1435
1436     group_dpif_get_buckets(group, &buckets);
1437     LIST_FOR_EACH (bucket, list_node, buckets) {
1438         if (bucket_is_alive(ctx, bucket, 0)) {
1439             uint32_t score = (hash_int(i, basis) & 0xffff) * bucket->weight;
1440             if (score >= best_score) {
1441                 best_bucket = bucket;
1442                 best_score = score;
1443             }
1444         }
1445         i++;
1446     }
1447
1448     return best_bucket;
1449 }
1450
1451 static bool
1452 xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan)
1453 {
1454     return (bundle->vlan_mode != PORT_VLAN_ACCESS
1455             && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
1456 }
1457
1458 static bool
1459 xbundle_includes_vlan(const struct xbundle *xbundle, uint16_t vlan)
1460 {
1461     return vlan == xbundle->vlan || xbundle_trunks_vlan(xbundle, vlan);
1462 }
1463
1464 static mirror_mask_t
1465 xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle)
1466 {
1467     return xbundle != &ofpp_none_bundle
1468         ? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle)
1469         : 0;
1470 }
1471
1472 static mirror_mask_t
1473 xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle)
1474 {
1475     return xbundle != &ofpp_none_bundle
1476         ? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle)
1477         : 0;
1478 }
1479
1480 static mirror_mask_t
1481 xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle)
1482 {
1483     return xbundle != &ofpp_none_bundle
1484         ? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle)
1485         : 0;
1486 }
1487
1488 static struct xbundle *
1489 lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port,
1490                     bool warn, struct xport **in_xportp)
1491 {
1492     struct xport *xport;
1493
1494     /* Find the port and bundle for the received packet. */
1495     xport = get_ofp_port(xbridge, in_port);
1496     if (in_xportp) {
1497         *in_xportp = xport;
1498     }
1499     if (xport && xport->xbundle) {
1500         return xport->xbundle;
1501     }
1502
1503     /* Special-case OFPP_NONE (OF1.0) and OFPP_CONTROLLER (OF1.1+),
1504      * which a controller may use as the ingress port for traffic that
1505      * it is sourcing. */
1506     if (in_port == OFPP_CONTROLLER || in_port == OFPP_NONE) {
1507         return &ofpp_none_bundle;
1508     }
1509
1510     /* Odd.  A few possible reasons here:
1511      *
1512      * - We deleted a port but there are still a few packets queued up
1513      *   from it.
1514      *
1515      * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
1516      *   we don't know about.
1517      *
1518      * - The ofproto client didn't configure the port as part of a bundle.
1519      *   This is particularly likely to happen if a packet was received on the
1520      *   port after it was created, but before the client had a chance to
1521      *   configure its bundle.
1522      */
1523     if (warn) {
1524         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1525
1526         VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
1527                      "port %"PRIu16, xbridge->name, in_port);
1528     }
1529     return NULL;
1530 }
1531
1532 static void
1533 mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
1534               mirror_mask_t mirrors)
1535 {
1536     bool warn = ctx->xin->packet != NULL;
1537     uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci);
1538     if (!input_vid_is_valid(vid, xbundle, warn)) {
1539         return;
1540     }
1541     uint16_t vlan = input_vid_to_vlan(xbundle, vid);
1542
1543     const struct xbridge *xbridge = ctx->xbridge;
1544
1545     /* Don't mirror to destinations that we've already mirrored to. */
1546     mirrors &= ~ctx->mirrors;
1547     if (!mirrors) {
1548         return;
1549     }
1550
1551     /* Record these mirrors so that we don't mirror to them again. */
1552     ctx->mirrors |= mirrors;
1553
1554     if (ctx->xin->resubmit_stats) {
1555         mirror_update_stats(xbridge->mbridge, mirrors,
1556                             ctx->xin->resubmit_stats->n_packets,
1557                             ctx->xin->resubmit_stats->n_bytes);
1558     }
1559     if (ctx->xin->xcache) {
1560         struct xc_entry *entry;
1561
1562         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_MIRROR);
1563         entry->u.mirror.mbridge = mbridge_ref(xbridge->mbridge);
1564         entry->u.mirror.mirrors = mirrors;
1565     }
1566
1567     while (mirrors) {
1568         const unsigned long *vlans;
1569         mirror_mask_t dup_mirrors;
1570         struct ofbundle *out;
1571         int out_vlan;
1572
1573         bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
1574                                      &vlans, &dup_mirrors, &out, &out_vlan);
1575         ovs_assert(has_mirror);
1576
1577         if (vlans) {
1578             ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
1579         }
1580
1581         if (vlans && !bitmap_is_set(vlans, vlan)) {
1582             mirrors = zero_rightmost_1bit(mirrors);
1583             continue;
1584         }
1585
1586         mirrors &= ~dup_mirrors;
1587         ctx->mirrors |= dup_mirrors;
1588         if (out) {
1589             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1590             struct xbundle *out_xbundle = xbundle_lookup(xcfg, out);
1591             if (out_xbundle) {
1592                 output_normal(ctx, out_xbundle, vlan);
1593             }
1594         } else if (vlan != out_vlan
1595                    && !eth_addr_is_reserved(ctx->xin->flow.dl_dst)) {
1596             struct xbundle *xbundle;
1597
1598             LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
1599                 if (xbundle_includes_vlan(xbundle, out_vlan)
1600                     && !xbundle_mirror_out(xbridge, xbundle)) {
1601                     output_normal(ctx, xbundle, out_vlan);
1602                 }
1603             }
1604         }
1605     }
1606 }
1607
1608 static void
1609 mirror_ingress_packet(struct xlate_ctx *ctx)
1610 {
1611     if (mbridge_has_mirrors(ctx->xbridge->mbridge)) {
1612         bool warn = ctx->xin->packet != NULL;
1613         struct xbundle *xbundle = lookup_input_bundle(
1614             ctx->xbridge, ctx->xin->flow.in_port.ofp_port, warn, NULL);
1615         if (xbundle) {
1616             mirror_packet(ctx, xbundle,
1617                           xbundle_mirror_src(ctx->xbridge, xbundle));
1618         }
1619     }
1620 }
1621
1622 /* Given 'vid', the VID obtained from the 802.1Q header that was received as
1623  * part of a packet (specify 0 if there was no 802.1Q header), and 'in_xbundle',
1624  * the bundle on which the packet was received, returns the VLAN to which the
1625  * packet belongs.
1626  *
1627  * Both 'vid' and the return value are in the range 0...4095. */
1628 static uint16_t
1629 input_vid_to_vlan(const struct xbundle *in_xbundle, uint16_t vid)
1630 {
1631     switch (in_xbundle->vlan_mode) {
1632     case PORT_VLAN_ACCESS:
1633         return in_xbundle->vlan;
1634         break;
1635
1636     case PORT_VLAN_TRUNK:
1637         return vid;
1638
1639     case PORT_VLAN_NATIVE_UNTAGGED:
1640     case PORT_VLAN_NATIVE_TAGGED:
1641         return vid ? vid : in_xbundle->vlan;
1642
1643     default:
1644         OVS_NOT_REACHED();
1645     }
1646 }
1647
1648 /* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'.
1649  * If so, returns true.  Otherwise, returns false and, if 'warn' is true, logs
1650  * a warning.
1651  *
1652  * 'vid' should be the VID obtained from the 802.1Q header that was received as
1653  * part of a packet (specify 0 if there was no 802.1Q header), in the range
1654  * 0...4095. */
1655 static bool
1656 input_vid_is_valid(uint16_t vid, struct xbundle *in_xbundle, bool warn)
1657 {
1658     /* Allow any VID on the OFPP_NONE port. */
1659     if (in_xbundle == &ofpp_none_bundle) {
1660         return true;
1661     }
1662
1663     switch (in_xbundle->vlan_mode) {
1664     case PORT_VLAN_ACCESS:
1665         if (vid) {
1666             if (warn) {
1667                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1668                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" tagged "
1669                              "packet received on port %s configured as VLAN "
1670                              "%"PRIu16" access port", vid, in_xbundle->name,
1671                              in_xbundle->vlan);
1672             }
1673             return false;
1674         }
1675         return true;
1676
1677     case PORT_VLAN_NATIVE_UNTAGGED:
1678     case PORT_VLAN_NATIVE_TAGGED:
1679         if (!vid) {
1680             /* Port must always carry its native VLAN. */
1681             return true;
1682         }
1683         /* Fall through. */
1684     case PORT_VLAN_TRUNK:
1685         if (!xbundle_includes_vlan(in_xbundle, vid)) {
1686             if (warn) {
1687                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1688                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" packet "
1689                              "received on port %s not configured for trunking "
1690                              "VLAN %"PRIu16, vid, in_xbundle->name, vid);
1691             }
1692             return false;
1693         }
1694         return true;
1695
1696     default:
1697         OVS_NOT_REACHED();
1698     }
1699
1700 }
1701
1702 /* Given 'vlan', the VLAN that a packet belongs to, and
1703  * 'out_xbundle', a bundle on which the packet is to be output, returns the VID
1704  * that should be included in the 802.1Q header.  (If the return value is 0,
1705  * then the 802.1Q header should only be included in the packet if there is a
1706  * nonzero PCP.)
1707  *
1708  * Both 'vlan' and the return value are in the range 0...4095. */
1709 static uint16_t
1710 output_vlan_to_vid(const struct xbundle *out_xbundle, uint16_t vlan)
1711 {
1712     switch (out_xbundle->vlan_mode) {
1713     case PORT_VLAN_ACCESS:
1714         return 0;
1715
1716     case PORT_VLAN_TRUNK:
1717     case PORT_VLAN_NATIVE_TAGGED:
1718         return vlan;
1719
1720     case PORT_VLAN_NATIVE_UNTAGGED:
1721         return vlan == out_xbundle->vlan ? 0 : vlan;
1722
1723     default:
1724         OVS_NOT_REACHED();
1725     }
1726 }
1727
1728 static void
1729 output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
1730               uint16_t vlan)
1731 {
1732     ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci;
1733     uint16_t vid;
1734     ovs_be16 tci, old_tci;
1735     struct xport *xport;
1736     struct xlate_bond_recirc xr;
1737     bool use_recirc = false;
1738
1739     vid = output_vlan_to_vid(out_xbundle, vlan);
1740     if (list_is_empty(&out_xbundle->xports)) {
1741         /* Partially configured bundle with no slaves.  Drop the packet. */
1742         return;
1743     } else if (!out_xbundle->bond) {
1744         xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport,
1745                              bundle_node);
1746     } else {
1747         struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1748         struct flow_wildcards *wc = ctx->wc;
1749         struct ofport_dpif *ofport;
1750
1751         if (ctx->xbridge->support.odp.recirc) {
1752             use_recirc = bond_may_recirc(
1753                 out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
1754
1755             if (use_recirc) {
1756                 /* Only TCP mode uses recirculation. */
1757                 xr.hash_alg = OVS_HASH_ALG_L4;
1758                 bond_update_post_recirc_rules(out_xbundle->bond, false);
1759
1760                 /* Recirculation does not require unmasking hash fields. */
1761                 wc = NULL;
1762             }
1763         }
1764
1765         ofport = bond_choose_output_slave(out_xbundle->bond,
1766                                           &ctx->xin->flow, wc, vid);
1767         xport = xport_lookup(xcfg, ofport);
1768
1769         if (!xport) {
1770             /* No slaves enabled, so drop packet. */
1771             return;
1772         }
1773
1774         /* If use_recirc is set, the main thread will handle stats
1775          * accounting for this bond. */
1776         if (!use_recirc) {
1777             if (ctx->xin->resubmit_stats) {
1778                 bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
1779                              ctx->xin->resubmit_stats->n_bytes);
1780             }
1781             if (ctx->xin->xcache) {
1782                 struct xc_entry *entry;
1783                 struct flow *flow;
1784
1785                 flow = &ctx->xin->flow;
1786                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_BOND);
1787                 entry->u.bond.bond = bond_ref(out_xbundle->bond);
1788                 entry->u.bond.flow = xmemdup(flow, sizeof *flow);
1789                 entry->u.bond.vid = vid;
1790             }
1791         }
1792     }
1793
1794     old_tci = *flow_tci;
1795     tci = htons(vid);
1796     if (tci || out_xbundle->use_priority_tags) {
1797         tci |= *flow_tci & htons(VLAN_PCP_MASK);
1798         if (tci) {
1799             tci |= htons(VLAN_CFI);
1800         }
1801     }
1802     *flow_tci = tci;
1803
1804     compose_output_action(ctx, xport->ofp_port, use_recirc ? &xr : NULL);
1805     *flow_tci = old_tci;
1806 }
1807
1808 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
1809  * migration.  Older Citrix-patched Linux DomU used gratuitous ARP replies to
1810  * indicate this; newer upstream kernels use gratuitous ARP requests. */
1811 static bool
1812 is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
1813 {
1814     if (flow->dl_type != htons(ETH_TYPE_ARP)) {
1815         return false;
1816     }
1817
1818     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1819     if (!eth_addr_is_broadcast(flow->dl_dst)) {
1820         return false;
1821     }
1822
1823     memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
1824     if (flow->nw_proto == ARP_OP_REPLY) {
1825         return true;
1826     } else if (flow->nw_proto == ARP_OP_REQUEST) {
1827         memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
1828         memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
1829
1830         return flow->nw_src == flow->nw_dst;
1831     } else {
1832         return false;
1833     }
1834 }
1835
1836 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
1837  * dropped.  Returns true if they may be forwarded, false if they should be
1838  * dropped.
1839  *
1840  * 'in_port' must be the xport that corresponds to flow->in_port.
1841  * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
1842  *
1843  * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
1844  * returned by input_vid_to_vlan().  It must be a valid VLAN for 'in_port', as
1845  * checked by input_vid_is_valid().
1846  *
1847  * May also add tags to '*tags', although the current implementation only does
1848  * so in one special case.
1849  */
1850 static bool
1851 is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
1852               uint16_t vlan)
1853 {
1854     struct xbundle *in_xbundle = in_port->xbundle;
1855     const struct xbridge *xbridge = ctx->xbridge;
1856     struct flow *flow = &ctx->xin->flow;
1857
1858     /* Drop frames for reserved multicast addresses
1859      * only if forward_bpdu option is absent. */
1860     if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
1861         xlate_report(ctx, "packet has reserved destination MAC, dropping");
1862         return false;
1863     }
1864
1865     if (in_xbundle->bond) {
1866         struct mac_entry *mac;
1867
1868         switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
1869                                          flow->dl_dst)) {
1870         case BV_ACCEPT:
1871             break;
1872
1873         case BV_DROP:
1874             xlate_report(ctx, "bonding refused admissibility, dropping");
1875             return false;
1876
1877         case BV_DROP_IF_MOVED:
1878             ovs_rwlock_rdlock(&xbridge->ml->rwlock);
1879             mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
1880             if (mac
1881                 && mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
1882                 && (!is_gratuitous_arp(flow, ctx->wc)
1883                     || mac_entry_is_grat_arp_locked(mac))) {
1884                 ovs_rwlock_unlock(&xbridge->ml->rwlock);
1885                 xlate_report(ctx, "SLB bond thinks this packet looped back, "
1886                              "dropping");
1887                 return false;
1888             }
1889             ovs_rwlock_unlock(&xbridge->ml->rwlock);
1890             break;
1891         }
1892     }
1893
1894     return true;
1895 }
1896
1897 /* Checks whether a MAC learning update is necessary for MAC learning table
1898  * 'ml' given that a packet matching 'flow' was received  on 'in_xbundle' in
1899  * 'vlan'.
1900  *
1901  * Most packets processed through the MAC learning table do not actually
1902  * change it in any way.  This function requires only a read lock on the MAC
1903  * learning table, so it is much cheaper in this common case.
1904  *
1905  * Keep the code here synchronized with that in update_learning_table__()
1906  * below. */
1907 static bool
1908 is_mac_learning_update_needed(const struct mac_learning *ml,
1909                               const struct flow *flow,
1910                               struct flow_wildcards *wc,
1911                               int vlan, struct xbundle *in_xbundle)
1912 OVS_REQ_RDLOCK(ml->rwlock)
1913 {
1914     struct mac_entry *mac;
1915
1916     if (!mac_learning_may_learn(ml, flow->dl_src, vlan)) {
1917         return false;
1918     }
1919
1920     mac = mac_learning_lookup(ml, flow->dl_src, vlan);
1921     if (!mac || mac_entry_age(ml, mac)) {
1922         return true;
1923     }
1924
1925     if (is_gratuitous_arp(flow, wc)) {
1926         /* We don't want to learn from gratuitous ARP packets that are
1927          * reflected back over bond slaves so we lock the learning table. */
1928         if (!in_xbundle->bond) {
1929             return true;
1930         } else if (mac_entry_is_grat_arp_locked(mac)) {
1931             return false;
1932         }
1933     }
1934
1935     return mac_entry_get_port(ml, mac) != in_xbundle->ofbundle;
1936 }
1937
1938
1939 /* Updates MAC learning table 'ml' given that a packet matching 'flow' was
1940  * received on 'in_xbundle' in 'vlan'.
1941  *
1942  * This code repeats all the checks in is_mac_learning_update_needed() because
1943  * the lock was released between there and here and thus the MAC learning state
1944  * could have changed.
1945  *
1946  * Keep the code here synchronized with that in is_mac_learning_update_needed()
1947  * above. */
1948 static void
1949 update_learning_table__(const struct xbridge *xbridge,
1950                         const struct flow *flow, struct flow_wildcards *wc,
1951                         int vlan, struct xbundle *in_xbundle)
1952 OVS_REQ_WRLOCK(xbridge->ml->rwlock)
1953 {
1954     struct mac_entry *mac;
1955
1956     if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
1957         return;
1958     }
1959
1960     mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
1961     if (is_gratuitous_arp(flow, wc)) {
1962         /* We don't want to learn from gratuitous ARP packets that are
1963          * reflected back over bond slaves so we lock the learning table. */
1964         if (!in_xbundle->bond) {
1965             mac_entry_set_grat_arp_lock(mac);
1966         } else if (mac_entry_is_grat_arp_locked(mac)) {
1967             return;
1968         }
1969     }
1970
1971     if (mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle) {
1972         /* The log messages here could actually be useful in debugging,
1973          * so keep the rate limit relatively high. */
1974         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
1975
1976         VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
1977                     "on port %s in VLAN %d",
1978                     xbridge->name, ETH_ADDR_ARGS(flow->dl_src),
1979                     in_xbundle->name, vlan);
1980
1981         mac_entry_set_port(xbridge->ml, mac, in_xbundle->ofbundle);
1982     }
1983 }
1984
1985 static void
1986 update_learning_table(const struct xbridge *xbridge,
1987                       const struct flow *flow, struct flow_wildcards *wc,
1988                       int vlan, struct xbundle *in_xbundle)
1989 {
1990     bool need_update;
1991
1992     /* Don't learn the OFPP_NONE port. */
1993     if (in_xbundle == &ofpp_none_bundle) {
1994         return;
1995     }
1996
1997     /* First try the common case: no change to MAC learning table. */
1998     ovs_rwlock_rdlock(&xbridge->ml->rwlock);
1999     need_update = is_mac_learning_update_needed(xbridge->ml, flow, wc, vlan,
2000                                                 in_xbundle);
2001     ovs_rwlock_unlock(&xbridge->ml->rwlock);
2002
2003     if (need_update) {
2004         /* Slow path: MAC learning table might need an update. */
2005         ovs_rwlock_wrlock(&xbridge->ml->rwlock);
2006         update_learning_table__(xbridge, flow, wc, vlan, in_xbundle);
2007         ovs_rwlock_unlock(&xbridge->ml->rwlock);
2008     }
2009 }
2010
2011 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2012  * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
2013 static void
2014 update_mcast_snooping_table4__(const struct xbridge *xbridge,
2015                                const struct flow *flow,
2016                                struct mcast_snooping *ms, int vlan,
2017                                struct xbundle *in_xbundle,
2018                                const struct dp_packet *packet)
2019     OVS_REQ_WRLOCK(ms->rwlock)
2020 {
2021     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2022     int count;
2023     ovs_be32 ip4 = flow->igmp_group_ip4;
2024
2025     switch (ntohs(flow->tp_src)) {
2026     case IGMP_HOST_MEMBERSHIP_REPORT:
2027     case IGMPV2_HOST_MEMBERSHIP_REPORT:
2028         if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2029             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
2030                         IP_FMT" is on port %s in VLAN %d",
2031                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2032         }
2033         break;
2034     case IGMP_HOST_LEAVE_MESSAGE:
2035         if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2036             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
2037                         IP_FMT" is on port %s in VLAN %d",
2038                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2039         }
2040         break;
2041     case IGMP_HOST_MEMBERSHIP_QUERY:
2042         if (flow->nw_src && mcast_snooping_add_mrouter(ms, vlan,
2043             in_xbundle->ofbundle)) {
2044             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query from "
2045                         IP_FMT" is on port %s in VLAN %d",
2046                         xbridge->name, IP_ARGS(flow->nw_src),
2047                         in_xbundle->name, vlan);
2048         }
2049         break;
2050     case IGMPV3_HOST_MEMBERSHIP_REPORT:
2051         if ((count = mcast_snooping_add_report(ms, packet, vlan,
2052                                                in_xbundle->ofbundle))) {
2053             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2054                         "addresses on port %s in VLAN %d",
2055                         xbridge->name, count, in_xbundle->name, vlan);
2056         }
2057         break;
2058     }
2059 }
2060
2061 static void
2062 update_mcast_snooping_table6__(const struct xbridge *xbridge,
2063                                const struct flow *flow,
2064                                struct mcast_snooping *ms, int vlan,
2065                                struct xbundle *in_xbundle,
2066                                const struct dp_packet *packet)
2067     OVS_REQ_WRLOCK(ms->rwlock)
2068 {
2069     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2070     int count;
2071
2072     switch (ntohs(flow->tp_src)) {
2073     case MLD_QUERY:
2074         if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
2075             && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
2076             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
2077                         "in VLAN %d",
2078                         xbridge->name, in_xbundle->name, vlan);
2079         }
2080         break;
2081     case MLD_REPORT:
2082     case MLD_DONE:
2083     case MLD2_REPORT:
2084         count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
2085         if (count) {
2086             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2087                         "addresses on port %s in VLAN %d",
2088                         xbridge->name, count, in_xbundle->name, vlan);
2089         }
2090         break;
2091     }
2092 }
2093
2094 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2095  * was received on 'in_xbundle' in 'vlan'. */
2096 static void
2097 update_mcast_snooping_table(const struct xbridge *xbridge,
2098                             const struct flow *flow, int vlan,
2099                             struct xbundle *in_xbundle,
2100                             const struct dp_packet *packet)
2101 {
2102     struct mcast_snooping *ms = xbridge->ms;
2103     struct xlate_cfg *xcfg;
2104     struct xbundle *mcast_xbundle;
2105     struct mcast_port_bundle *fport;
2106
2107     /* Don't learn the OFPP_NONE port. */
2108     if (in_xbundle == &ofpp_none_bundle) {
2109         return;
2110     }
2111
2112     /* Don't learn from flood ports */
2113     mcast_xbundle = NULL;
2114     ovs_rwlock_wrlock(&ms->rwlock);
2115     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2116     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2117         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2118         if (mcast_xbundle == in_xbundle) {
2119             break;
2120         }
2121     }
2122
2123     if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
2124         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2125             update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
2126                                            in_xbundle, packet);
2127         } else {
2128             update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
2129                                            in_xbundle, packet);
2130         }
2131     }
2132     ovs_rwlock_unlock(&ms->rwlock);
2133 }
2134
2135 /* send the packet to ports having the multicast group learned */
2136 static void
2137 xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
2138                               struct mcast_snooping *ms OVS_UNUSED,
2139                               struct mcast_group *grp,
2140                               struct xbundle *in_xbundle, uint16_t vlan)
2141     OVS_REQ_RDLOCK(ms->rwlock)
2142 {
2143     struct xlate_cfg *xcfg;
2144     struct mcast_group_bundle *b;
2145     struct xbundle *mcast_xbundle;
2146
2147     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2148     LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
2149         mcast_xbundle = xbundle_lookup(xcfg, b->port);
2150         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2151             xlate_report(ctx, "forwarding to mcast group port");
2152             output_normal(ctx, mcast_xbundle, vlan);
2153         } else if (!mcast_xbundle) {
2154             xlate_report(ctx, "mcast group port is unknown, dropping");
2155         } else {
2156             xlate_report(ctx, "mcast group port is input port, dropping");
2157         }
2158     }
2159 }
2160
2161 /* send the packet to ports connected to multicast routers */
2162 static void
2163 xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
2164                                  struct mcast_snooping *ms,
2165                                  struct xbundle *in_xbundle, uint16_t vlan)
2166     OVS_REQ_RDLOCK(ms->rwlock)
2167 {
2168     struct xlate_cfg *xcfg;
2169     struct mcast_mrouter_bundle *mrouter;
2170     struct xbundle *mcast_xbundle;
2171
2172     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2173     LIST_FOR_EACH(mrouter, mrouter_node, &ms->mrouter_lru) {
2174         mcast_xbundle = xbundle_lookup(xcfg, mrouter->port);
2175         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2176             xlate_report(ctx, "forwarding to mcast router port");
2177             output_normal(ctx, mcast_xbundle, vlan);
2178         } else if (!mcast_xbundle) {
2179             xlate_report(ctx, "mcast router port is unknown, dropping");
2180         } else {
2181             xlate_report(ctx, "mcast router port is input port, dropping");
2182         }
2183     }
2184 }
2185
2186 /* send the packet to ports flagged to be flooded */
2187 static void
2188 xlate_normal_mcast_send_fports(struct xlate_ctx *ctx,
2189                                struct mcast_snooping *ms,
2190                                struct xbundle *in_xbundle, uint16_t vlan)
2191     OVS_REQ_RDLOCK(ms->rwlock)
2192 {
2193     struct xlate_cfg *xcfg;
2194     struct mcast_port_bundle *fport;
2195     struct xbundle *mcast_xbundle;
2196
2197     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2198     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2199         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2200         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2201             xlate_report(ctx, "forwarding to mcast flood port");
2202             output_normal(ctx, mcast_xbundle, vlan);
2203         } else if (!mcast_xbundle) {
2204             xlate_report(ctx, "mcast flood port is unknown, dropping");
2205         } else {
2206             xlate_report(ctx, "mcast flood port is input port, dropping");
2207         }
2208     }
2209 }
2210
2211 /* forward the Reports to configured ports */
2212 static void
2213 xlate_normal_mcast_send_rports(struct xlate_ctx *ctx,
2214                                struct mcast_snooping *ms,
2215                                struct xbundle *in_xbundle, uint16_t vlan)
2216     OVS_REQ_RDLOCK(ms->rwlock)
2217 {
2218     struct xlate_cfg *xcfg;
2219     struct mcast_port_bundle *rport;
2220     struct xbundle *mcast_xbundle;
2221
2222     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2223     LIST_FOR_EACH(rport, node, &ms->rport_list) {
2224         mcast_xbundle = xbundle_lookup(xcfg, rport->port);
2225         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2226             xlate_report(ctx, "forwarding Report to mcast flagged port");
2227             output_normal(ctx, mcast_xbundle, vlan);
2228         } else if (!mcast_xbundle) {
2229             xlate_report(ctx, "mcast port is unknown, dropping the Report");
2230         } else {
2231             xlate_report(ctx, "mcast port is input port, dropping the Report");
2232         }
2233     }
2234 }
2235
2236 static void
2237 xlate_normal_flood(struct xlate_ctx *ctx, struct xbundle *in_xbundle,
2238                    uint16_t vlan)
2239 {
2240     struct xbundle *xbundle;
2241
2242     LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) {
2243         if (xbundle != in_xbundle
2244             && xbundle_includes_vlan(xbundle, vlan)
2245             && xbundle->floodable
2246             && !xbundle_mirror_out(ctx->xbridge, xbundle)) {
2247             output_normal(ctx, xbundle, vlan);
2248         }
2249     }
2250     ctx->nf_output_iface = NF_OUT_FLOOD;
2251 }
2252
2253 static void
2254 xlate_normal(struct xlate_ctx *ctx)
2255 {
2256     struct flow_wildcards *wc = ctx->wc;
2257     struct flow *flow = &ctx->xin->flow;
2258     struct xbundle *in_xbundle;
2259     struct xport *in_port;
2260     struct mac_entry *mac;
2261     void *mac_port;
2262     uint16_t vlan;
2263     uint16_t vid;
2264
2265     memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2266     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2267     wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2268
2269     in_xbundle = lookup_input_bundle(ctx->xbridge, flow->in_port.ofp_port,
2270                                      ctx->xin->packet != NULL, &in_port);
2271     if (!in_xbundle) {
2272         xlate_report(ctx, "no input bundle, dropping");
2273         return;
2274     }
2275
2276     /* Drop malformed frames. */
2277     if (flow->dl_type == htons(ETH_TYPE_VLAN) &&
2278         !(flow->vlan_tci & htons(VLAN_CFI))) {
2279         if (ctx->xin->packet != NULL) {
2280             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2281             VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
2282                          "VLAN tag received on port %s",
2283                          ctx->xbridge->name, in_xbundle->name);
2284         }
2285         xlate_report(ctx, "partial VLAN tag, dropping");
2286         return;
2287     }
2288
2289     /* Drop frames on bundles reserved for mirroring. */
2290     if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) {
2291         if (ctx->xin->packet != NULL) {
2292             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2293             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
2294                          "%s, which is reserved exclusively for mirroring",
2295                          ctx->xbridge->name, in_xbundle->name);
2296         }
2297         xlate_report(ctx, "input port is mirror output port, dropping");
2298         return;
2299     }
2300
2301     /* Check VLAN. */
2302     vid = vlan_tci_to_vid(flow->vlan_tci);
2303     if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) {
2304         xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
2305         return;
2306     }
2307     vlan = input_vid_to_vlan(in_xbundle, vid);
2308
2309     /* Check other admissibility requirements. */
2310     if (in_port && !is_admissible(ctx, in_port, vlan)) {
2311         return;
2312     }
2313
2314     /* Learn source MAC. */
2315     if (ctx->xin->may_learn) {
2316         update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
2317     }
2318     if (ctx->xin->xcache) {
2319         struct xc_entry *entry;
2320
2321         /* Save enough info to update mac learning table later. */
2322         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
2323         entry->u.normal.ofproto = ctx->xbridge->ofproto;
2324         entry->u.normal.flow = xmemdup(flow, sizeof *flow);
2325         entry->u.normal.vlan = vlan;
2326     }
2327
2328     /* Determine output bundle. */
2329     if (mcast_snooping_enabled(ctx->xbridge->ms)
2330         && !eth_addr_is_broadcast(flow->dl_dst)
2331         && eth_addr_is_multicast(flow->dl_dst)
2332         && is_ip_any(flow)) {
2333         struct mcast_snooping *ms = ctx->xbridge->ms;
2334         struct mcast_group *grp = NULL;
2335
2336         if (is_igmp(flow)) {
2337             if (mcast_snooping_is_membership(flow->tp_src) ||
2338                 mcast_snooping_is_query(flow->tp_src)) {
2339                 if (ctx->xin->may_learn) {
2340                     update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2341                                                 in_xbundle, ctx->xin->packet);
2342                 }
2343                 /*
2344                  * IGMP packets need to take the slow path, in order to be
2345                  * processed for mdb updates. That will prevent expires
2346                  * firing off even after hosts have sent reports.
2347                  */
2348                 ctx->xout->slow |= SLOW_ACTION;
2349             }
2350
2351             if (mcast_snooping_is_membership(flow->tp_src)) {
2352                 ovs_rwlock_rdlock(&ms->rwlock);
2353                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2354                 /* RFC4541: section 2.1.1, item 1: A snooping switch should
2355                  * forward IGMP Membership Reports only to those ports where
2356                  * multicast routers are attached.  Alternatively stated: a
2357                  * snooping switch should not forward IGMP Membership Reports
2358                  * to ports on which only hosts are attached.
2359                  * An administrative control may be provided to override this
2360                  * restriction, allowing the report messages to be flooded to
2361                  * other ports. */
2362                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2363                 ovs_rwlock_unlock(&ms->rwlock);
2364             } else {
2365                 xlate_report(ctx, "multicast traffic, flooding");
2366                 xlate_normal_flood(ctx, in_xbundle, vlan);
2367             }
2368             return;
2369         } else if (is_mld(flow)) {
2370             ctx->xout->slow |= SLOW_ACTION;
2371             if (ctx->xin->may_learn) {
2372                 update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2373                                             in_xbundle, ctx->xin->packet);
2374             }
2375             if (is_mld_report(flow)) {
2376                 ovs_rwlock_rdlock(&ms->rwlock);
2377                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2378                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2379                 ovs_rwlock_unlock(&ms->rwlock);
2380             } else {
2381                 xlate_report(ctx, "MLD query, flooding");
2382                 xlate_normal_flood(ctx, in_xbundle, vlan);
2383             }
2384         } else {
2385             if ((flow->dl_type == htons(ETH_TYPE_IP)
2386                  && ip_is_local_multicast(flow->nw_dst))
2387                 || (flow->dl_type == htons(ETH_TYPE_IPV6)
2388                     && ipv6_is_all_hosts(&flow->ipv6_dst))) {
2389                 /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
2390                  * address in the 224.0.0.x range which are not IGMP must
2391                  * be forwarded on all ports */
2392                 xlate_report(ctx, "RFC4541: section 2.1.2, item 2, flooding");
2393                 xlate_normal_flood(ctx, in_xbundle, vlan);
2394                 return;
2395             }
2396         }
2397
2398         /* forwarding to group base ports */
2399         ovs_rwlock_rdlock(&ms->rwlock);
2400         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2401             grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
2402         } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2403             grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
2404         }
2405         if (grp) {
2406             xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
2407             xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2408             xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2409         } else {
2410             if (mcast_snooping_flood_unreg(ms)) {
2411                 xlate_report(ctx, "unregistered multicast, flooding");
2412                 xlate_normal_flood(ctx, in_xbundle, vlan);
2413             } else {
2414                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2415                 xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2416             }
2417         }
2418         ovs_rwlock_unlock(&ms->rwlock);
2419     } else {
2420         ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
2421         mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
2422         mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
2423         ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
2424
2425         if (mac_port) {
2426             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2427             struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);
2428             if (mac_xbundle && mac_xbundle != in_xbundle) {
2429                 xlate_report(ctx, "forwarding to learned port");
2430                 output_normal(ctx, mac_xbundle, vlan);
2431             } else if (!mac_xbundle) {
2432                 xlate_report(ctx, "learned port is unknown, dropping");
2433             } else {
2434                 xlate_report(ctx, "learned port is input port, dropping");
2435             }
2436         } else {
2437             xlate_report(ctx, "no learned MAC for destination, flooding");
2438             xlate_normal_flood(ctx, in_xbundle, vlan);
2439         }
2440     }
2441 }
2442
2443 /* Appends a "sample" action for sFlow or IPFIX to 'ctx->odp_actions'.  The
2444  * 'probability' is the number of packets out of UINT32_MAX to sample.  The
2445  * 'cookie' (of length 'cookie_size' bytes) is passed back in the callback for
2446  * each sampled packet.  'tunnel_out_port', if not ODPP_NONE, is added as the
2447  * OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute.  If 'include_actions', an
2448  * OVS_USERSPACE_ATTR_ACTIONS attribute is added.
2449  */
2450 static size_t
2451 compose_sample_action(struct xlate_ctx *ctx,
2452                       const uint32_t probability,
2453                       const union user_action_cookie *cookie,
2454                       const size_t cookie_size,
2455                       const odp_port_t tunnel_out_port,
2456                       bool include_actions)
2457 {
2458     size_t sample_offset = nl_msg_start_nested(ctx->odp_actions,
2459                                                OVS_ACTION_ATTR_SAMPLE);
2460
2461     nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
2462
2463     size_t actions_offset = nl_msg_start_nested(ctx->odp_actions,
2464                                                 OVS_SAMPLE_ATTR_ACTIONS);
2465
2466     odp_port_t odp_port = ofp_port_to_odp_port(
2467         ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
2468     uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
2469                                      flow_hash_5tuple(&ctx->xin->flow, 0));
2470     int cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
2471                                                  tunnel_out_port,
2472                                                  include_actions,
2473                                                  ctx->odp_actions);
2474
2475     nl_msg_end_nested(ctx->odp_actions, actions_offset);
2476     nl_msg_end_nested(ctx->odp_actions, sample_offset);
2477
2478     return cookie_offset;
2479 }
2480
2481 /* If sFLow is not enabled, returns 0 without doing anything.
2482  *
2483  * If sFlow is enabled, appends a template "sample" action to the ODP actions
2484  * in 'ctx'.  This action is a template because some of the information needed
2485  * to fill it out is not available until flow translation is complete.  In this
2486  * case, this functions returns an offset, which is always nonzero, to pass
2487  * later to fix_sflow_action() to fill in the rest of the template. */
2488 static size_t
2489 compose_sflow_action(struct xlate_ctx *ctx)
2490 {
2491     struct dpif_sflow *sflow = ctx->xbridge->sflow;
2492     if (!sflow || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2493         return 0;
2494     }
2495
2496     union user_action_cookie cookie = { .type = USER_ACTION_COOKIE_SFLOW };
2497     return compose_sample_action(ctx, dpif_sflow_get_probability(sflow),
2498                                  &cookie, sizeof cookie.sflow, ODPP_NONE,
2499                                  true);
2500 }
2501
2502 /* If IPFIX is enabled, this appends a "sample" action to implement IPFIX to
2503  * 'ctx->odp_actions'. */
2504 static void
2505 compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
2506 {
2507     struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
2508     odp_port_t tunnel_out_port = ODPP_NONE;
2509
2510     if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2511         return;
2512     }
2513
2514     /* For input case, output_odp_port is ODPP_NONE, which is an invalid port
2515      * number. */
2516     if (output_odp_port == ODPP_NONE &&
2517         !dpif_ipfix_get_bridge_exporter_input_sampling(ipfix)) {
2518         return;
2519     }
2520
2521     /* For output case, output_odp_port is valid*/
2522     if (output_odp_port != ODPP_NONE) {
2523         if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
2524             return;
2525         }
2526         /* If tunnel sampling is enabled, put an additional option attribute:
2527          * OVS_USERSPACE_ATTR_TUNNEL_OUT_PORT
2528          */
2529         if (dpif_ipfix_get_bridge_exporter_tunnel_sampling(ipfix) &&
2530             dpif_ipfix_get_tunnel_port(ipfix, output_odp_port) ) {
2531            tunnel_out_port = output_odp_port;
2532         }
2533     }
2534
2535     union user_action_cookie cookie = {
2536         .ipfix = {
2537             .type = USER_ACTION_COOKIE_IPFIX,
2538             .output_odp_port = output_odp_port,
2539         }
2540     };
2541     compose_sample_action(ctx,
2542                           dpif_ipfix_get_bridge_exporter_probability(ipfix),
2543                           &cookie, sizeof cookie.ipfix, tunnel_out_port,
2544                           false);
2545 }
2546
2547 /* Fix "sample" action according to data collected while composing ODP actions,
2548  * as described in compose_sflow_action().
2549  *
2550  * 'user_cookie_offset' must be the offset returned by add_sflow_action(). */
2551 static void
2552 fix_sflow_action(struct xlate_ctx *ctx, unsigned int user_cookie_offset)
2553 {
2554     const struct flow *base = &ctx->base_flow;
2555     union user_action_cookie *cookie;
2556
2557     cookie = ofpbuf_at(ctx->odp_actions, user_cookie_offset,
2558                        sizeof cookie->sflow);
2559     ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
2560
2561     cookie->type = USER_ACTION_COOKIE_SFLOW;
2562     cookie->sflow.vlan_tci = base->vlan_tci;
2563
2564     /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
2565      * port information") for the interpretation of cookie->output. */
2566     switch (ctx->sflow_n_outputs) {
2567     case 0:
2568         /* 0x40000000 | 256 means "packet dropped for unknown reason". */
2569         cookie->sflow.output = 0x40000000 | 256;
2570         break;
2571
2572     case 1:
2573         cookie->sflow.output = dpif_sflow_odp_port_to_ifindex(
2574             ctx->xbridge->sflow, ctx->sflow_odp_port);
2575         if (cookie->sflow.output) {
2576             break;
2577         }
2578         /* Fall through. */
2579     default:
2580         /* 0x80000000 means "multiple output ports. */
2581         cookie->sflow.output = 0x80000000 | ctx->sflow_n_outputs;
2582         break;
2583     }
2584 }
2585
2586 static bool
2587 process_special(struct xlate_ctx *ctx, const struct xport *xport)
2588 {
2589     const struct flow *flow = &ctx->xin->flow;
2590     struct flow_wildcards *wc = ctx->wc;
2591     const struct xbridge *xbridge = ctx->xbridge;
2592     const struct dp_packet *packet = ctx->xin->packet;
2593     enum slow_path_reason slow;
2594
2595     if (!xport) {
2596         slow = 0;
2597     } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
2598         if (packet) {
2599             cfm_process_heartbeat(xport->cfm, packet);
2600         }
2601         slow = SLOW_CFM;
2602     } else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) {
2603         if (packet) {
2604             bfd_process_packet(xport->bfd, flow, packet);
2605             /* If POLL received, immediately sends FINAL back. */
2606             if (bfd_should_send_packet(xport->bfd)) {
2607                 ofproto_dpif_monitor_port_send_soon(xport->ofport);
2608             }
2609         }
2610         slow = SLOW_BFD;
2611     } else if (xport->xbundle && xport->xbundle->lacp
2612                && flow->dl_type == htons(ETH_TYPE_LACP)) {
2613         if (packet) {
2614             lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
2615         }
2616         slow = SLOW_LACP;
2617     } else if ((xbridge->stp || xbridge->rstp) &&
2618                stp_should_process_flow(flow, wc)) {
2619         if (packet) {
2620             xbridge->stp
2621                 ? stp_process_packet(xport, packet)
2622                 : rstp_process_packet(xport, packet);
2623         }
2624         slow = SLOW_STP;
2625     } else if (xport->lldp && lldp_should_process_flow(xport->lldp, flow)) {
2626         if (packet) {
2627             lldp_process_packet(xport->lldp, packet);
2628         }
2629         slow = SLOW_LLDP;
2630     } else {
2631         slow = 0;
2632     }
2633
2634     if (slow) {
2635         ctx->xout->slow |= slow;
2636         return true;
2637     } else {
2638         return false;
2639     }
2640 }
2641
2642 static int
2643 tnl_route_lookup_flow(const struct flow *oflow,
2644                       ovs_be32 *ip, struct xport **out_port)
2645 {
2646     char out_dev[IFNAMSIZ];
2647     struct xbridge *xbridge;
2648     struct xlate_cfg *xcfg;
2649     ovs_be32 gw;
2650
2651     if (!ovs_router_lookup(oflow->tunnel.ip_dst, out_dev, &gw)) {
2652         return -ENOENT;
2653     }
2654
2655     if (gw) {
2656         *ip = gw;
2657     } else {
2658         *ip = oflow->tunnel.ip_dst;
2659     }
2660
2661     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2662     ovs_assert(xcfg);
2663
2664     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
2665         if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
2666             struct xport *port;
2667
2668             HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
2669                 if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
2670                     *out_port = port;
2671                     return 0;
2672                 }
2673             }
2674         }
2675     }
2676     return -ENOENT;
2677 }
2678
2679 static int
2680 xlate_flood_packet(struct xbridge *xbridge, struct dp_packet *packet)
2681 {
2682     struct ofpact_output output;
2683     struct flow flow;
2684
2685     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
2686     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
2687     flow_extract(packet, &flow);
2688     flow.in_port.ofp_port = OFPP_NONE;
2689     output.port = OFPP_FLOOD;
2690     output.max_len = 0;
2691
2692     return ofproto_dpif_execute_actions(xbridge->ofproto, &flow, NULL,
2693                                         &output.ofpact, sizeof output,
2694                                         packet);
2695 }
2696
2697 static void
2698 tnl_send_arp_request(const struct xport *out_dev, const uint8_t eth_src[ETH_ADDR_LEN],
2699                      ovs_be32 ip_src, ovs_be32 ip_dst)
2700 {
2701     struct xbridge *xbridge = out_dev->xbridge;
2702     struct dp_packet packet;
2703
2704     dp_packet_init(&packet, 0);
2705     compose_arp(&packet, ARP_OP_REQUEST,
2706                 eth_src, eth_addr_zero, true, ip_src, ip_dst);
2707
2708     xlate_flood_packet(xbridge, &packet);
2709     dp_packet_uninit(&packet);
2710 }
2711
2712 static int
2713 build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
2714                   const struct flow *flow, odp_port_t tunnel_odp_port)
2715 {
2716     struct ovs_action_push_tnl tnl_push_data;
2717     struct xport *out_dev = NULL;
2718     ovs_be32 s_ip, d_ip = 0;
2719     uint8_t smac[ETH_ADDR_LEN];
2720     uint8_t dmac[ETH_ADDR_LEN];
2721     int err;
2722
2723     err = tnl_route_lookup_flow(flow, &d_ip, &out_dev);
2724     if (err) {
2725         xlate_report(ctx, "native tunnel routing failed");
2726         return err;
2727     }
2728     xlate_report(ctx, "tunneling to "IP_FMT" via %s",
2729                  IP_ARGS(d_ip), netdev_get_name(out_dev->netdev));
2730
2731     /* Use mac addr of bridge port of the peer. */
2732     err = netdev_get_etheraddr(out_dev->netdev, smac);
2733     if (err) {
2734         xlate_report(ctx, "tunnel output device lacks Ethernet address");
2735         return err;
2736     }
2737
2738     err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
2739     if (err) {
2740         xlate_report(ctx, "tunnel output device lacks IPv4 address");
2741         return err;
2742     }
2743
2744     err = tnl_arp_lookup(out_dev->xbridge->name, d_ip, dmac);
2745     if (err) {
2746         xlate_report(ctx, "ARP cache miss for "IP_FMT" on bridge %s, "
2747                      "sending ARP request",
2748                      IP_ARGS(d_ip), out_dev->xbridge->name);
2749         tnl_send_arp_request(out_dev, smac, s_ip, d_ip);
2750         return err;
2751     }
2752     if (ctx->xin->xcache) {
2753         struct xc_entry *entry;
2754
2755         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_ARP);
2756         ovs_strlcpy(entry->u.tnl_arp_cache.br_name, out_dev->xbridge->name,
2757                     sizeof entry->u.tnl_arp_cache.br_name);
2758         entry->u.tnl_arp_cache.d_ip = d_ip;
2759     }
2760
2761     xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" "IP_FMT
2762                  " to "ETH_ADDR_FMT" "IP_FMT,
2763                  ETH_ADDR_ARGS(smac), IP_ARGS(s_ip),
2764                  ETH_ADDR_ARGS(dmac), IP_ARGS(d_ip));
2765     err = tnl_port_build_header(xport->ofport, flow,
2766                                 dmac, smac, s_ip, &tnl_push_data);
2767     if (err) {
2768         return err;
2769     }
2770     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
2771     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
2772     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
2773     return 0;
2774 }
2775
2776 static void
2777 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
2778                         const struct xlate_bond_recirc *xr, bool check_stp)
2779 {
2780     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
2781     struct flow_wildcards *wc = ctx->wc;
2782     struct flow *flow = &ctx->xin->flow;
2783     struct flow_tnl flow_tnl;
2784     ovs_be16 flow_vlan_tci;
2785     uint32_t flow_pkt_mark;
2786     uint8_t flow_nw_tos;
2787     odp_port_t out_port, odp_port;
2788     bool tnl_push_pop_send = false;
2789     uint8_t dscp;
2790
2791     /* If 'struct flow' gets additional metadata, we'll need to zero it out
2792      * before traversing a patch port. */
2793     BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33);
2794     memset(&flow_tnl, 0, sizeof flow_tnl);
2795
2796     if (!xport) {
2797         xlate_report(ctx, "Nonexistent output port");
2798         return;
2799     } else if (xport->config & OFPUTIL_PC_NO_FWD) {
2800         xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
2801         return;
2802     } else if (check_stp) {
2803         if (is_stp(&ctx->base_flow)) {
2804             if (!xport_stp_should_forward_bpdu(xport) &&
2805                 !xport_rstp_should_manage_bpdu(xport)) {
2806                 if (ctx->xbridge->stp != NULL) {
2807                     xlate_report(ctx, "STP not in listening state, "
2808                             "skipping bpdu output");
2809                 } else if (ctx->xbridge->rstp != NULL) {
2810                     xlate_report(ctx, "RSTP not managing BPDU in this state, "
2811                             "skipping bpdu output");
2812                 }
2813                 return;
2814             }
2815         } else if (!xport_stp_forward_state(xport) ||
2816                    !xport_rstp_forward_state(xport)) {
2817             if (ctx->xbridge->stp != NULL) {
2818                 xlate_report(ctx, "STP not in forwarding state, "
2819                         "skipping output");
2820             } else if (ctx->xbridge->rstp != NULL) {
2821                 xlate_report(ctx, "RSTP not in forwarding state, "
2822                         "skipping output");
2823             }
2824             return;
2825         }
2826     }
2827
2828     if (xport->peer) {
2829         const struct xport *peer = xport->peer;
2830         struct flow old_flow = ctx->xin->flow;
2831         bool old_was_mpls = ctx->was_mpls;
2832         cls_version_t old_version = ctx->tables_version;
2833         struct ofpbuf old_stack = ctx->stack;
2834         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
2835         struct ofpbuf old_action_set = ctx->action_set;
2836         uint64_t actset_stub[1024 / 8];
2837
2838         ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
2839         ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
2840         ctx->xbridge = peer->xbridge;
2841         flow->in_port.ofp_port = peer->ofp_port;
2842         flow->metadata = htonll(0);
2843         memset(&flow->tunnel, 0, sizeof flow->tunnel);
2844         memset(flow->regs, 0, sizeof flow->regs);
2845         flow->actset_output = OFPP_UNSET;
2846
2847         /* The bridge is now known so obtain its table version. */
2848         ctx->tables_version
2849             = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
2850
2851         if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
2852             if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
2853                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
2854                 if (ctx->action_set.size) {
2855                     /* Translate action set only if not dropping the packet and
2856                      * not recirculating. */
2857                     if (!exit_recirculates(ctx)) {
2858                         xlate_action_set(ctx);
2859                     }
2860                 }
2861                 /* Check if need to recirculate. */
2862                 if (exit_recirculates(ctx)) {
2863                     compose_recirculate_action(ctx);
2864                 }
2865             } else {
2866                 /* Forwarding is disabled by STP and RSTP.  Let OFPP_NORMAL and
2867                  * the learning action look at the packet, then drop it. */
2868                 struct flow old_base_flow = ctx->base_flow;
2869                 size_t old_size = ctx->odp_actions->size;
2870                 mirror_mask_t old_mirrors = ctx->mirrors;
2871
2872                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
2873                 ctx->mirrors = old_mirrors;
2874                 ctx->base_flow = old_base_flow;
2875                 ctx->odp_actions->size = old_size;
2876
2877                 /* Undo changes that may have been done for recirculation. */
2878                 if (exit_recirculates(ctx)) {
2879                     ctx->action_set.size = ctx->recirc_action_offset;
2880                     ctx->recirc_action_offset = -1;
2881                     ctx->last_unroll_offset = -1;
2882                 }
2883             }
2884         }
2885
2886         ctx->xin->flow = old_flow;
2887         ctx->xbridge = xport->xbridge;
2888         ofpbuf_uninit(&ctx->action_set);
2889         ctx->action_set = old_action_set;
2890         ofpbuf_uninit(&ctx->stack);
2891         ctx->stack = old_stack;
2892
2893         /* Restore calling bridge's lookup version. */
2894         ctx->tables_version = old_version;
2895
2896         /* The peer bridge popping MPLS should have no effect on the original
2897          * bridge. */
2898         ctx->was_mpls = old_was_mpls;
2899
2900         /* The fact that the peer bridge exits (for any reason) does not mean
2901          * that the original bridge should exit.  Specifically, if the peer
2902          * bridge recirculates (which typically modifies the packet), the
2903          * original bridge must continue processing with the original, not the
2904          * recirculated packet! */
2905         ctx->exit = false;
2906
2907         if (ctx->xin->resubmit_stats) {
2908             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
2909             netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats);
2910             if (peer->bfd) {
2911                 bfd_account_rx(peer->bfd, ctx->xin->resubmit_stats);
2912             }
2913         }
2914         if (ctx->xin->xcache) {
2915             struct xc_entry *entry;
2916
2917             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
2918             entry->u.dev.tx = netdev_ref(xport->netdev);
2919             entry->u.dev.rx = netdev_ref(peer->netdev);
2920             entry->u.dev.bfd = bfd_ref(peer->bfd);
2921         }
2922         return;
2923     }
2924
2925     flow_vlan_tci = flow->vlan_tci;
2926     flow_pkt_mark = flow->pkt_mark;
2927     flow_nw_tos = flow->nw_tos;
2928
2929     if (count_skb_priorities(xport)) {
2930         memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
2931         if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
2932             wc->masks.nw_tos |= IP_DSCP_MASK;
2933             flow->nw_tos &= ~IP_DSCP_MASK;
2934             flow->nw_tos |= dscp;
2935         }
2936     }
2937
2938     if (xport->is_tunnel) {
2939          /* Save tunnel metadata so that changes made due to
2940           * the Logical (tunnel) Port are not visible for any further
2941           * matches, while explicit set actions on tunnel metadata are.
2942           */
2943         flow_tnl = flow->tunnel;
2944         odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
2945         if (odp_port == ODPP_NONE) {
2946             xlate_report(ctx, "Tunneling decided against output");
2947             goto out; /* restore flow_nw_tos */
2948         }
2949         if (flow->tunnel.ip_dst == ctx->orig_tunnel_ip_dst) {
2950             xlate_report(ctx, "Not tunneling to our own address");
2951             goto out; /* restore flow_nw_tos */
2952         }
2953         if (ctx->xin->resubmit_stats) {
2954             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
2955         }
2956         if (ctx->xin->xcache) {
2957             struct xc_entry *entry;
2958
2959             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
2960             entry->u.dev.tx = netdev_ref(xport->netdev);
2961         }
2962         out_port = odp_port;
2963         if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
2964             xlate_report(ctx, "output to native tunnel");
2965             tnl_push_pop_send = true;
2966         } else {
2967             xlate_report(ctx, "output to kernel tunnel");
2968             commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions);
2969             flow->tunnel = flow_tnl; /* Restore tunnel metadata */
2970         }
2971     } else {
2972         odp_port = xport->odp_port;
2973         out_port = odp_port;
2974         if (ofproto_has_vlan_splinters(ctx->xbridge->ofproto)) {
2975             ofp_port_t vlandev_port;
2976
2977             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2978             vlandev_port = vsp_realdev_to_vlandev(ctx->xbridge->ofproto,
2979                                                   ofp_port, flow->vlan_tci);
2980             if (vlandev_port != ofp_port) {
2981                 out_port = ofp_port_to_odp_port(ctx->xbridge, vlandev_port);
2982                 flow->vlan_tci = htons(0);
2983             }
2984         }
2985     }
2986
2987     if (out_port != ODPP_NONE) {
2988         bool use_masked = ctx->xbridge->support.masked_set_action;
2989
2990         ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
2991                                               ctx->odp_actions,
2992                                               wc, use_masked);
2993
2994         if (xr) {
2995             struct ovs_action_hash *act_hash;
2996
2997             /* Hash action. */
2998             act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
2999                                                 OVS_ACTION_ATTR_HASH,
3000                                                 sizeof *act_hash);
3001             act_hash->hash_alg = xr->hash_alg;
3002             act_hash->hash_basis = xr->hash_basis;
3003
3004             /* Recirc action. */
3005             nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
3006                            xr->recirc_id);
3007         } else {
3008
3009             if (tnl_push_pop_send) {
3010                 build_tunnel_send(ctx, xport, flow, odp_port);
3011                 flow->tunnel = flow_tnl; /* Restore tunnel metadata */
3012             } else {
3013                 odp_port_t odp_tnl_port = ODPP_NONE;
3014
3015                 /* XXX: Write better Filter for tunnel port. We can use inport
3016                 * int tunnel-port flow to avoid these checks completely. */
3017                 if (ofp_port == OFPP_LOCAL &&
3018                     ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3019
3020                     odp_tnl_port = tnl_port_map_lookup(flow, wc);
3021                 }
3022
3023                 if (odp_tnl_port != ODPP_NONE) {
3024                     nl_msg_put_odp_port(ctx->odp_actions,
3025                                         OVS_ACTION_ATTR_TUNNEL_POP,
3026                                         odp_tnl_port);
3027                 } else {
3028                     /* Tunnel push-pop action is not compatible with
3029                      * IPFIX action. */
3030                     compose_ipfix_action(ctx, out_port);
3031                     nl_msg_put_odp_port(ctx->odp_actions,
3032                                         OVS_ACTION_ATTR_OUTPUT,
3033                                         out_port);
3034                }
3035            }
3036         }
3037
3038         ctx->sflow_odp_port = odp_port;
3039         ctx->sflow_n_outputs++;
3040         ctx->nf_output_iface = ofp_port;
3041     }
3042
3043     if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) {
3044         mirror_packet(ctx, xport->xbundle,
3045                       xbundle_mirror_dst(xport->xbundle->xbridge,
3046                                          xport->xbundle));
3047     }
3048
3049  out:
3050     /* Restore flow */
3051     flow->vlan_tci = flow_vlan_tci;
3052     flow->pkt_mark = flow_pkt_mark;
3053     flow->nw_tos = flow_nw_tos;
3054 }
3055
3056 static void
3057 compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port,
3058                       const struct xlate_bond_recirc *xr)
3059 {
3060     compose_output_action__(ctx, ofp_port, xr, true);
3061 }
3062
3063 static void
3064 xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule)
3065 {
3066     struct rule_dpif *old_rule = ctx->rule;
3067     ovs_be64 old_cookie = ctx->rule_cookie;
3068     const struct rule_actions *actions;
3069
3070     if (ctx->xin->resubmit_stats) {
3071         rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
3072     }
3073
3074     ctx->resubmits++;
3075     ctx->recurse++;
3076     ctx->rule = rule;
3077     ctx->rule_cookie = rule_dpif_get_flow_cookie(rule);
3078     actions = rule_dpif_get_actions(rule);
3079     do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx);
3080     ctx->rule_cookie = old_cookie;
3081     ctx->rule = old_rule;
3082     ctx->recurse--;
3083 }
3084
3085 static bool
3086 xlate_resubmit_resource_check(struct xlate_ctx *ctx)
3087 {
3088     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3089
3090     if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) {
3091         VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times",
3092                     MAX_RESUBMIT_RECURSION);
3093     } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) {
3094         VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS);
3095     } else if (ctx->odp_actions->size > UINT16_MAX) {
3096         VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions");
3097     } else if (ctx->stack.size >= 65536) {
3098         VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of stack");
3099     } else {
3100         return true;
3101     }
3102
3103     return false;
3104 }
3105
3106 static void
3107 xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
3108                    bool may_packet_in, bool honor_table_miss)
3109 {
3110     /* Check if we need to recirculate before matching in a table. */
3111     if (ctx->was_mpls) {
3112         ctx_trigger_recirculation(ctx);
3113         return;
3114     }
3115     if (xlate_resubmit_resource_check(ctx)) {
3116         uint8_t old_table_id = ctx->table_id;
3117         struct rule_dpif *rule;
3118
3119         ctx->table_id = table_id;
3120
3121         rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
3122                                            ctx->tables_version,
3123                                            &ctx->xin->flow, ctx->xin->wc,
3124                                            ctx->xin->xcache != NULL,
3125                                            ctx->xin->resubmit_stats,
3126                                            &ctx->table_id, in_port,
3127                                            may_packet_in, honor_table_miss);
3128
3129         if (OVS_UNLIKELY(ctx->xin->resubmit_hook)) {
3130             ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse + 1);
3131         }
3132
3133         if (rule) {
3134             /* Fill in the cache entry here instead of xlate_recursively
3135              * to make the reference counting more explicit.  We take a
3136              * reference in the lookups above if we are going to cache the
3137              * rule. */
3138             if (ctx->xin->xcache) {
3139                 struct xc_entry *entry;
3140
3141                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_RULE);
3142                 entry->u.rule = rule;
3143             }
3144             xlate_recursively(ctx, rule);
3145         }
3146
3147         ctx->table_id = old_table_id;
3148         return;
3149     }
3150
3151     ctx->exit = true;
3152 }
3153
3154 static void
3155 xlate_group_stats(struct xlate_ctx *ctx, struct group_dpif *group,
3156                   struct ofputil_bucket *bucket)
3157 {
3158     if (ctx->xin->resubmit_stats) {
3159         group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats);
3160     }
3161     if (ctx->xin->xcache) {
3162         struct xc_entry *entry;
3163
3164         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP);
3165         entry->u.group.group = group_dpif_ref(group);
3166         entry->u.group.bucket = bucket;
3167     }
3168 }
3169
3170 static void
3171 xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
3172 {
3173     uint64_t action_list_stub[1024 / 8];
3174     struct ofpbuf action_list, action_set;
3175     struct flow old_flow = ctx->xin->flow;
3176     bool old_was_mpls = ctx->was_mpls;
3177
3178     ofpbuf_use_const(&action_set, bucket->ofpacts, bucket->ofpacts_len);
3179     ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
3180
3181     ofpacts_execute_action_set(&action_list, &action_set);
3182     ctx->recurse++;
3183     do_xlate_actions(action_list.data, action_list.size, ctx);
3184     ctx->recurse--;
3185
3186     ofpbuf_uninit(&action_set);
3187     ofpbuf_uninit(&action_list);
3188
3189     /* Check if need to recirculate. */
3190     if (exit_recirculates(ctx)) {
3191         compose_recirculate_action(ctx);
3192     }
3193
3194     /* Roll back flow to previous state.
3195      * This is equivalent to cloning the packet for each bucket.
3196      *
3197      * As a side effect any subsequently applied actions will
3198      * also effectively be applied to a clone of the packet taken
3199      * just before applying the all or indirect group.
3200      *
3201      * Note that group buckets are action sets, hence they cannot modify the
3202      * main action set.  Also any stack actions are ignored when executing an
3203      * action set, so group buckets cannot change the stack either.
3204      * However, we do allow resubmit actions in group buckets, which could
3205      * break the above assumptions.  It is up to the controller to not mess up
3206      * with the action_set and stack in the tables resubmitted to from
3207      * group buckets. */
3208     ctx->xin->flow = old_flow;
3209
3210     /* The group bucket popping MPLS should have no effect after bucket
3211      * execution. */
3212     ctx->was_mpls = old_was_mpls;
3213
3214     /* The fact that the group bucket exits (for any reason) does not mean that
3215      * the translation after the group action should exit.  Specifically, if
3216      * the group bucket recirculates (which typically modifies the packet), the
3217      * actions after the group action must continue processing with the
3218      * original, not the recirculated packet! */
3219     ctx->exit = false;
3220 }
3221
3222 static void
3223 xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group)
3224 {
3225     struct ofputil_bucket *bucket;
3226     const struct ovs_list *buckets;
3227
3228     group_dpif_get_buckets(group, &buckets);
3229
3230     LIST_FOR_EACH (bucket, list_node, buckets) {
3231         xlate_group_bucket(ctx, bucket);
3232     }
3233     xlate_group_stats(ctx, group, NULL);
3234 }
3235
3236 static void
3237 xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group)
3238 {
3239     struct ofputil_bucket *bucket;
3240
3241     bucket = group_first_live_bucket(ctx, group, 0);
3242     if (bucket) {
3243         xlate_group_bucket(ctx, bucket);
3244         xlate_group_stats(ctx, group, bucket);
3245     }
3246 }
3247
3248 static void
3249 xlate_default_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3250 {
3251     struct flow_wildcards *wc = ctx->wc;
3252     struct ofputil_bucket *bucket;
3253     uint32_t basis;
3254
3255     basis = flow_hash_symmetric_l4(&ctx->xin->flow, 0);
3256     flow_mask_hash_fields(&ctx->xin->flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
3257     bucket = group_best_live_bucket(ctx, group, basis);
3258     if (bucket) {
3259         xlate_group_bucket(ctx, bucket);
3260         xlate_group_stats(ctx, group, bucket);
3261     }
3262 }
3263
3264 static void
3265 xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3266 {
3267     struct mf_bitmap hash_fields = MF_BITMAP_INITIALIZER;
3268     const struct field_array *fields;
3269     struct ofputil_bucket *bucket;
3270     uint32_t basis;
3271     int i;
3272
3273     fields = group_dpif_get_fields(group);
3274     basis = hash_uint64(group_dpif_get_selection_method_param(group));
3275
3276     /* Determine which fields to hash */
3277     for (i = 0; i < MFF_N_IDS; i++) {
3278         if (bitmap_is_set(fields->used.bm, i)) {
3279             const struct mf_field *mf;
3280
3281             /* If the field is already present in 'hash_fields' then
3282              * this loop has already checked that it and its pre-requisites
3283              * are present in the flow and its pre-requisites have
3284              * already been added to 'hash_fields'. There is nothing more
3285              * to do here and as an optimisation the loop can continue. */
3286             if (bitmap_is_set(hash_fields.bm, i)) {
3287                 continue;
3288             }
3289
3290             mf = mf_from_id(i);
3291
3292             /* Only hash a field if it and its pre-requisites are present
3293              * in the flow. */
3294             if (!mf_are_prereqs_ok(mf, &ctx->xin->flow)) {
3295                 continue;
3296             }
3297
3298             /* Hash both the field and its pre-requisites */
3299             mf_bitmap_set_field_and_prereqs(mf, &hash_fields);
3300         }
3301     }
3302
3303     /* Hash the fields */
3304     for (i = 0; i < MFF_N_IDS; i++) {
3305         if (bitmap_is_set(hash_fields.bm, i)) {
3306             const struct mf_field *mf = mf_from_id(i);
3307             union mf_value value;
3308             int j;
3309
3310             mf_get_value(mf, &ctx->xin->flow, &value);
3311             /* This seems inefficient but so does apply_mask() */
3312             for (j = 0; j < mf->n_bytes; j++) {
3313                 ((uint8_t *) &value)[j] &= ((uint8_t *) &fields->value[i])[j];
3314             }
3315             basis = hash_bytes(&value, mf->n_bytes, basis);
3316
3317             mf_mask_field(mf, &ctx->wc->masks);
3318         }
3319     }
3320
3321     bucket = group_best_live_bucket(ctx, group, basis);
3322     if (bucket) {
3323         xlate_group_bucket(ctx, bucket);
3324         xlate_group_stats(ctx, group, bucket);
3325     }
3326 }
3327
3328 static void
3329 xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3330 {
3331     const char *selection_method = group_dpif_get_selection_method(group);
3332
3333     if (selection_method[0] == '\0') {
3334         xlate_default_select_group(ctx, group);
3335     } else if (!strcasecmp("hash", selection_method)) {
3336         xlate_hash_fields_select_group(ctx, group);
3337     } else {
3338         /* Parsing of groups should ensure this never happens */
3339         OVS_NOT_REACHED();
3340     }
3341 }
3342
3343 static void
3344 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
3345 {
3346     bool was_in_group = ctx->in_group;
3347     ctx->in_group = true;
3348
3349     switch (group_dpif_get_type(group)) {
3350     case OFPGT11_ALL:
3351     case OFPGT11_INDIRECT:
3352         xlate_all_group(ctx, group);
3353         break;
3354     case OFPGT11_SELECT:
3355         xlate_select_group(ctx, group);
3356         break;
3357     case OFPGT11_FF:
3358         xlate_ff_group(ctx, group);
3359         break;
3360     default:
3361         OVS_NOT_REACHED();
3362     }
3363     group_dpif_unref(group);
3364
3365     ctx->in_group = was_in_group;
3366 }
3367
3368 static bool
3369 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
3370 {
3371     if (xlate_resubmit_resource_check(ctx)) {
3372         struct group_dpif *group;
3373         bool got_group;
3374
3375         got_group = group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group);
3376         if (got_group) {
3377             xlate_group_action__(ctx, group);
3378         } else {
3379             return true;
3380         }
3381     }
3382
3383     return false;
3384 }
3385
3386 static void
3387 xlate_ofpact_resubmit(struct xlate_ctx *ctx,
3388                       const struct ofpact_resubmit *resubmit)
3389 {
3390     ofp_port_t in_port;
3391     uint8_t table_id;
3392     bool may_packet_in = false;
3393     bool honor_table_miss = false;
3394
3395     if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
3396         /* Still allow missed packets to be sent to the controller
3397          * if resubmitting from an internal table. */
3398         may_packet_in = true;
3399         honor_table_miss = true;
3400     }
3401
3402     in_port = resubmit->in_port;
3403     if (in_port == OFPP_IN_PORT) {
3404         in_port = ctx->xin->flow.in_port.ofp_port;
3405     }
3406
3407     table_id = resubmit->table_id;
3408     if (table_id == 255) {
3409         table_id = ctx->table_id;
3410     }
3411
3412     xlate_table_action(ctx, in_port, table_id, may_packet_in,
3413                        honor_table_miss);
3414 }
3415
3416 static void
3417 flood_packets(struct xlate_ctx *ctx, bool all)
3418 {
3419     const struct xport *xport;
3420
3421     HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
3422         if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) {
3423             continue;
3424         }
3425
3426         if (all) {
3427             compose_output_action__(ctx, xport->ofp_port, NULL, false);
3428         } else if (!(xport->config & OFPUTIL_PC_NO_FLOOD)) {
3429             compose_output_action(ctx, xport->ofp_port, NULL);
3430         }
3431     }
3432
3433     ctx->nf_output_iface = NF_OUT_FLOOD;
3434 }
3435
3436 static void
3437 execute_controller_action(struct xlate_ctx *ctx, int len,
3438                           enum ofp_packet_in_reason reason,
3439                           uint16_t controller_id)
3440 {
3441     struct ofproto_packet_in *pin;
3442     struct dp_packet *packet;
3443     bool use_masked;
3444
3445     ctx->xout->slow |= SLOW_CONTROLLER;
3446     if (!ctx->xin->packet) {
3447         return;
3448     }
3449
3450     packet = dp_packet_clone(ctx->xin->packet);
3451
3452     use_masked = ctx->xbridge->support.masked_set_action;
3453     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
3454                                           ctx->odp_actions,
3455                                           ctx->wc, use_masked);
3456
3457     odp_execute_actions(NULL, &packet, 1, false,
3458                         ctx->odp_actions->data, ctx->odp_actions->size, NULL);
3459
3460     pin = xmalloc(sizeof *pin);
3461     pin->up.packet_len = dp_packet_size(packet);
3462     pin->up.packet = dp_packet_steal_data(packet);
3463     pin->up.reason = reason;
3464     pin->up.table_id = ctx->table_id;
3465     pin->up.cookie = ctx->rule_cookie;
3466
3467     flow_get_metadata(&ctx->xin->flow, &pin->up.flow_metadata);
3468
3469     pin->controller_id = controller_id;
3470     pin->send_len = len;
3471     /* If a rule is a table-miss rule then this is
3472      * a table-miss handled by a table-miss rule.
3473      *
3474      * Else, if rule is internal and has a controller action,
3475      * the later being implied by the rule being processed here,
3476      * then this is a table-miss handled without a table-miss rule.
3477      *
3478      * Otherwise this is not a table-miss. */
3479     pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
3480     if (ctx->rule) {
3481         if (rule_dpif_is_table_miss(ctx->rule)) {
3482             pin->miss_type = OFPROTO_PACKET_IN_MISS_FLOW;
3483         } else if (rule_dpif_is_internal(ctx->rule)) {
3484             pin->miss_type = OFPROTO_PACKET_IN_MISS_WITHOUT_FLOW;
3485         }
3486     }
3487     ofproto_dpif_send_packet_in(ctx->xbridge->ofproto, pin);
3488     dp_packet_delete(packet);
3489 }
3490
3491 /* Called only when ctx->recirc_action_offset is set. */
3492 static void
3493 compose_recirculate_action(struct xlate_ctx *ctx)
3494 {
3495     struct recirc_metadata md;
3496     bool use_masked;
3497     uint32_t id;
3498
3499     use_masked = ctx->xbridge->support.masked_set_action;
3500     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
3501                                           ctx->odp_actions,
3502                                           ctx->wc, use_masked);
3503
3504     recirc_metadata_from_flow(&md, &ctx->xin->flow);
3505
3506     ovs_assert(ctx->recirc_action_offset >= 0);
3507
3508     struct recirc_state state = {
3509         .table_id = 0,
3510         .ofproto = ctx->xbridge->ofproto,
3511         .metadata = md,
3512         .stack = &ctx->stack,
3513         .action_set_len = ctx->recirc_action_offset,
3514         .ofpacts_len = ctx->action_set.size,
3515         .ofpacts = ctx->action_set.data,
3516     };
3517
3518     /* Only allocate recirculation ID if we have a packet. */
3519     if (ctx->xin->packet) {
3520         /* Allocate a unique recirc id for the given metadata state in the
3521          * flow.  The life-cycle of this recirc id is managed by associating it
3522          * with the udpif key ('ukey') created for each new datapath flow. */
3523         id = recirc_alloc_id_ctx(&state);
3524         if (!id) {
3525             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3526             VLOG_ERR_RL(&rl, "Failed to allocate recirculation id");
3527             return;
3528         }
3529         xlate_out_add_recirc(ctx->xout, id);
3530     } else {
3531         /* Look up an existing recirc id for the given metadata state in the
3532          * flow.  No new reference is taken, as the ID is RCU protected and is
3533          * only required temporarily for verification.
3534          *
3535          * This might fail and return 0.  We let zero 'id' to be used in the
3536          * RECIRC action below, which will fail all revalidations as zero is
3537          * not a valid recirculation ID. */
3538         id = recirc_find_id(&state);
3539     }
3540
3541     nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
3542
3543     /* Undo changes done by recirculation. */
3544     ctx->action_set.size = ctx->recirc_action_offset;
3545     ctx->recirc_action_offset = -1;
3546     ctx->last_unroll_offset = -1;
3547 }
3548
3549 static void
3550 compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
3551 {
3552     struct flow *flow = &ctx->xin->flow;
3553     int n;
3554
3555     ovs_assert(eth_type_mpls(mpls->ethertype));
3556
3557     n = flow_count_mpls_labels(flow, ctx->wc);
3558     if (!n) {
3559         bool use_masked = ctx->xbridge->support.masked_set_action;
3560
3561         ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow,
3562                                               ctx->odp_actions,
3563                                               ctx->wc, use_masked);
3564     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3565         if (ctx->xin->packet != NULL) {
3566             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3567             VLOG_WARN_RL(&rl, "bridge %s: dropping packet on which an "
3568                          "MPLS push action can't be performed as it would "
3569                          "have more MPLS LSEs than the %d supported.",
3570                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3571         }
3572         ctx->exit = true;
3573         return;
3574     }
3575
3576     flow_push_mpls(flow, n, mpls->ethertype, ctx->wc);
3577 }
3578
3579 static void
3580 compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
3581 {
3582     struct flow *flow = &ctx->xin->flow;
3583     int n = flow_count_mpls_labels(flow, ctx->wc);
3584
3585     if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
3586         if (ctx->xbridge->support.odp.recirc) {
3587             ctx->was_mpls = true;
3588         }
3589     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3590         if (ctx->xin->packet != NULL) {
3591             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
3592             VLOG_WARN_RL(&rl, "bridge %s: dropping packet on which an "
3593                          "MPLS pop action can't be performed as it has "
3594                          "more MPLS LSEs than the %d supported.",
3595                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3596         }
3597         ctx->exit = true;
3598         ofpbuf_clear(ctx->odp_actions);
3599     }
3600 }
3601
3602 static bool
3603 compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
3604 {
3605     struct flow *flow = &ctx->xin->flow;
3606
3607     if (!is_ip_any(flow)) {
3608         return false;
3609     }
3610
3611     ctx->wc->masks.nw_ttl = 0xff;
3612     if (flow->nw_ttl > 1) {
3613         flow->nw_ttl--;
3614         return false;
3615     } else {
3616         size_t i;
3617
3618         for (i = 0; i < ids->n_controllers; i++) {
3619             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
3620                                       ids->cnt_ids[i]);
3621         }
3622
3623         /* Stop processing for current table. */
3624         return true;
3625     }
3626 }
3627
3628 static void
3629 compose_set_mpls_label_action(struct xlate_ctx *ctx, ovs_be32 label)
3630 {
3631     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3632         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_LABEL_MASK);
3633         set_mpls_lse_label(&ctx->xin->flow.mpls_lse[0], label);
3634     }
3635 }
3636
3637 static void
3638 compose_set_mpls_tc_action(struct xlate_ctx *ctx, uint8_t tc)
3639 {
3640     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3641         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TC_MASK);
3642         set_mpls_lse_tc(&ctx->xin->flow.mpls_lse[0], tc);
3643     }
3644 }
3645
3646 static void
3647 compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
3648 {
3649     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3650         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3651         set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse[0], ttl);
3652     }
3653 }
3654
3655 static bool
3656 compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
3657 {
3658     struct flow *flow = &ctx->xin->flow;
3659
3660     if (eth_type_mpls(flow->dl_type)) {
3661         uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
3662
3663         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3664         if (ttl > 1) {
3665             ttl--;
3666             set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
3667             return false;
3668         } else {
3669             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
3670         }
3671     }
3672
3673     /* Stop processing for current table. */
3674     return true;
3675 }
3676
3677 static void
3678 xlate_output_action(struct xlate_ctx *ctx,
3679                     ofp_port_t port, uint16_t max_len, bool may_packet_in)
3680 {
3681     ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
3682
3683     ctx->nf_output_iface = NF_OUT_DROP;
3684
3685     switch (port) {
3686     case OFPP_IN_PORT:
3687         compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);
3688         break;
3689     case OFPP_TABLE:
3690         xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
3691                            0, may_packet_in, true);
3692         break;
3693     case OFPP_NORMAL:
3694         xlate_normal(ctx);
3695         break;
3696     case OFPP_FLOOD:
3697         flood_packets(ctx,  false);
3698         break;
3699     case OFPP_ALL:
3700         flood_packets(ctx, true);
3701         break;
3702     case OFPP_CONTROLLER:
3703         execute_controller_action(ctx, max_len,
3704                                   (ctx->in_group ? OFPR_GROUP
3705                                    : ctx->in_action_set ? OFPR_ACTION_SET
3706                                    : OFPR_ACTION),
3707                                   0);
3708         break;
3709     case OFPP_NONE:
3710         break;
3711     case OFPP_LOCAL:
3712     default:
3713         if (port != ctx->xin->flow.in_port.ofp_port) {
3714             compose_output_action(ctx, port, NULL);
3715         } else {
3716             xlate_report(ctx, "skipping output to input port");
3717         }
3718         break;
3719     }
3720
3721     if (prev_nf_output_iface == NF_OUT_FLOOD) {
3722         ctx->nf_output_iface = NF_OUT_FLOOD;
3723     } else if (ctx->nf_output_iface == NF_OUT_DROP) {
3724         ctx->nf_output_iface = prev_nf_output_iface;
3725     } else if (prev_nf_output_iface != NF_OUT_DROP &&
3726                ctx->nf_output_iface != NF_OUT_FLOOD) {
3727         ctx->nf_output_iface = NF_OUT_MULTI;
3728     }
3729 }
3730
3731 static void
3732 xlate_output_reg_action(struct xlate_ctx *ctx,
3733                         const struct ofpact_output_reg *or)
3734 {
3735     uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow);
3736     if (port <= UINT16_MAX) {
3737         union mf_subvalue value;
3738
3739         memset(&value, 0xff, sizeof value);
3740         mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks);
3741         xlate_output_action(ctx, u16_to_ofp(port),
3742                             or->max_len, false);
3743     }
3744 }
3745
3746 static void
3747 xlate_enqueue_action(struct xlate_ctx *ctx,
3748                      const struct ofpact_enqueue *enqueue)
3749 {
3750     ofp_port_t ofp_port = enqueue->port;
3751     uint32_t queue_id = enqueue->queue;
3752     uint32_t flow_priority, priority;
3753     int error;
3754
3755     /* Translate queue to priority. */
3756     error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
3757     if (error) {
3758         /* Fall back to ordinary output action. */
3759         xlate_output_action(ctx, enqueue->port, 0, false);
3760         return;
3761     }
3762
3763     /* Check output port. */
3764     if (ofp_port == OFPP_IN_PORT) {
3765         ofp_port = ctx->xin->flow.in_port.ofp_port;
3766     } else if (ofp_port == ctx->xin->flow.in_port.ofp_port) {
3767         return;
3768     }
3769
3770     /* Add datapath actions. */
3771     flow_priority = ctx->xin->flow.skb_priority;
3772     ctx->xin->flow.skb_priority = priority;
3773     compose_output_action(ctx, ofp_port, NULL);
3774     ctx->xin->flow.skb_priority = flow_priority;
3775
3776     /* Update NetFlow output port. */
3777     if (ctx->nf_output_iface == NF_OUT_DROP) {
3778         ctx->nf_output_iface = ofp_port;
3779     } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
3780         ctx->nf_output_iface = NF_OUT_MULTI;
3781     }
3782 }
3783
3784 static void
3785 xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
3786 {
3787     uint32_t skb_priority;
3788
3789     if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
3790         ctx->xin->flow.skb_priority = skb_priority;
3791     } else {
3792         /* Couldn't translate queue to a priority.  Nothing to do.  A warning
3793          * has already been logged. */
3794     }
3795 }
3796
3797 static bool
3798 slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
3799 {
3800     const struct xbridge *xbridge = xbridge_;
3801     struct xport *port;
3802
3803     switch (ofp_port) {
3804     case OFPP_IN_PORT:
3805     case OFPP_TABLE:
3806     case OFPP_NORMAL:
3807     case OFPP_FLOOD:
3808     case OFPP_ALL:
3809     case OFPP_NONE:
3810         return true;
3811     case OFPP_CONTROLLER: /* Not supported by the bundle action. */
3812         return false;
3813     default:
3814         port = get_ofp_port(xbridge, ofp_port);
3815         return port ? port->may_enable : false;
3816     }
3817 }
3818
3819 static void
3820 xlate_bundle_action(struct xlate_ctx *ctx,
3821                     const struct ofpact_bundle *bundle)
3822 {
3823     ofp_port_t port;
3824
3825     port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
3826                           CONST_CAST(struct xbridge *, ctx->xbridge));
3827     if (bundle->dst.field) {
3828         nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
3829     } else {
3830         xlate_output_action(ctx, port, 0, false);
3831     }
3832 }
3833
3834 static void
3835 xlate_learn_action__(struct xlate_ctx *ctx, const struct ofpact_learn *learn,
3836                      struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts)
3837 {
3838     learn_execute(learn, &ctx->xin->flow, fm, ofpacts);
3839     if (ctx->xin->may_learn) {
3840         ofproto_dpif_flow_mod(ctx->xbridge->ofproto, fm);
3841     }
3842 }
3843
3844 static void
3845 xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
3846 {
3847     learn_mask(learn, ctx->wc);
3848
3849     if (ctx->xin->xcache) {
3850         struct xc_entry *entry;
3851
3852         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_LEARN);
3853         entry->u.learn.ofproto = ctx->xbridge->ofproto;
3854         entry->u.learn.fm = xmalloc(sizeof *entry->u.learn.fm);
3855         entry->u.learn.ofpacts = ofpbuf_new(64);
3856         xlate_learn_action__(ctx, learn, entry->u.learn.fm,
3857                              entry->u.learn.ofpacts);
3858     } else if (ctx->xin->may_learn) {
3859         uint64_t ofpacts_stub[1024 / 8];
3860         struct ofputil_flow_mod fm;
3861         struct ofpbuf ofpacts;
3862
3863         ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
3864         xlate_learn_action__(ctx, learn, &fm, &ofpacts);
3865         ofpbuf_uninit(&ofpacts);
3866     }
3867 }
3868
3869 static void
3870 xlate_fin_timeout__(struct rule_dpif *rule, uint16_t tcp_flags,
3871                     uint16_t idle_timeout, uint16_t hard_timeout)
3872 {
3873     if (tcp_flags & (TCP_FIN | TCP_RST)) {
3874         rule_dpif_reduce_timeouts(rule, idle_timeout, hard_timeout);
3875     }
3876 }
3877
3878 static void
3879 xlate_fin_timeout(struct xlate_ctx *ctx,
3880                   const struct ofpact_fin_timeout *oft)
3881 {
3882     if (ctx->rule) {
3883         xlate_fin_timeout__(ctx->rule, ctx->xin->tcp_flags,
3884                             oft->fin_idle_timeout, oft->fin_hard_timeout);
3885         if (ctx->xin->xcache) {
3886             struct xc_entry *entry;
3887
3888             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_FIN_TIMEOUT);
3889             /* XC_RULE already holds a reference on the rule, none is taken
3890              * here. */
3891             entry->u.fin.rule = ctx->rule;
3892             entry->u.fin.idle = oft->fin_idle_timeout;
3893             entry->u.fin.hard = oft->fin_hard_timeout;
3894         }
3895     }
3896 }
3897
3898 static void
3899 xlate_sample_action(struct xlate_ctx *ctx,
3900                     const struct ofpact_sample *os)
3901 {
3902     /* Scale the probability from 16-bit to 32-bit while representing
3903      * the same percentage. */
3904     uint32_t probability = (os->probability << 16) | os->probability;
3905     bool use_masked;
3906
3907     if (!ctx->xbridge->support.variable_length_userdata) {
3908         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
3909
3910         VLOG_ERR_RL(&rl, "ignoring NXAST_SAMPLE action because datapath "
3911                     "lacks support (needs Linux 3.10+ or kernel module from "
3912                     "OVS 1.11+)");
3913         return;
3914     }
3915
3916     use_masked = ctx->xbridge->support.masked_set_action;
3917     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
3918                                           ctx->odp_actions,
3919                                           ctx->wc, use_masked);
3920
3921     union user_action_cookie cookie = {
3922         .flow_sample = {
3923             .type = USER_ACTION_COOKIE_FLOW_SAMPLE,
3924             .probability = os->probability,
3925             .collector_set_id = os->collector_set_id,
3926             .obs_domain_id = os->obs_domain_id,
3927             .obs_point_id = os->obs_point_id,
3928         }
3929     };
3930     compose_sample_action(ctx, probability, &cookie, sizeof cookie.flow_sample,
3931                           ODPP_NONE, false);
3932 }
3933
3934 static bool
3935 may_receive(const struct xport *xport, struct xlate_ctx *ctx)
3936 {
3937     if (xport->config & (is_stp(&ctx->xin->flow)
3938                          ? OFPUTIL_PC_NO_RECV_STP
3939                          : OFPUTIL_PC_NO_RECV)) {
3940         return false;
3941     }
3942
3943     /* Only drop packets here if both forwarding and learning are
3944      * disabled.  If just learning is enabled, we need to have
3945      * OFPP_NORMAL and the learning action have a look at the packet
3946      * before we can drop it. */
3947     if ((!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) ||
3948         (!xport_rstp_forward_state(xport) && !xport_rstp_learn_state(xport))) {
3949         return false;
3950     }
3951
3952     return true;
3953 }
3954
3955 static void
3956 xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a)
3957 {
3958     const struct ofpact_nest *on = ofpact_get_WRITE_ACTIONS(a);
3959     size_t on_len = ofpact_nest_get_action_len(on);
3960     const struct ofpact *inner;
3961
3962     /* Maintain actset_output depending on the contents of the action set:
3963      *
3964      *   - OFPP_UNSET, if there is no "output" action.
3965      *
3966      *   - The output port, if there is an "output" action and no "group"
3967      *     action.
3968      *
3969      *   - OFPP_UNSET, if there is a "group" action.
3970      */
3971     if (!ctx->action_set_has_group) {
3972         OFPACT_FOR_EACH (inner, on->actions, on_len) {
3973             if (inner->type == OFPACT_OUTPUT) {
3974                 ctx->xin->flow.actset_output = ofpact_get_OUTPUT(inner)->port;
3975             } else if (inner->type == OFPACT_GROUP) {
3976                 ctx->xin->flow.actset_output = OFPP_UNSET;
3977                 ctx->action_set_has_group = true;
3978                 break;
3979             }
3980         }
3981     }
3982
3983     ofpbuf_put(&ctx->action_set, on->actions, on_len);
3984     ofpact_pad(&ctx->action_set);
3985 }
3986
3987 static void
3988 xlate_action_set(struct xlate_ctx *ctx)
3989 {
3990     uint64_t action_list_stub[1024 / 64];
3991     struct ofpbuf action_list;
3992
3993     ctx->in_action_set = true;
3994     ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
3995     ofpacts_execute_action_set(&action_list, &ctx->action_set);
3996     /* Clear the action set, as it is not needed any more. */
3997     ofpbuf_clear(&ctx->action_set);
3998     do_xlate_actions(action_list.data, action_list.size, ctx);
3999     ctx->in_action_set = false;
4000     ofpbuf_uninit(&action_list);
4001 }
4002
4003 static void
4004 recirc_put_unroll_xlate(struct xlate_ctx *ctx)
4005 {
4006     struct ofpact_unroll_xlate *unroll;
4007
4008     unroll = ctx->last_unroll_offset < 0
4009         ? NULL
4010         : ALIGNED_CAST(struct ofpact_unroll_xlate *,
4011                        (char *)ctx->action_set.data + ctx->last_unroll_offset);
4012
4013     /* Restore the table_id and rule cookie for a potential PACKET
4014      * IN if needed. */
4015     if (!unroll ||
4016         (ctx->table_id != unroll->rule_table_id
4017          || ctx->rule_cookie != unroll->rule_cookie)) {
4018
4019         ctx->last_unroll_offset = ctx->action_set.size;
4020         unroll = ofpact_put_UNROLL_XLATE(&ctx->action_set);
4021         unroll->rule_table_id = ctx->table_id;
4022         unroll->rule_cookie = ctx->rule_cookie;
4023     }
4024 }
4025
4026
4027 /* Copy remaining actions to the action_set to be executed after recirculation.
4028  * UNROLL_XLATE action is inserted, if not already done so, before actions that
4029  * may generate PACKET_INs from the current table and without matching another
4030  * rule. */
4031 static void
4032 recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
4033                       struct xlate_ctx *ctx)
4034 {
4035     const struct ofpact *a;
4036
4037     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4038         switch (a->type) {
4039             /* May generate PACKET INs. */
4040         case OFPACT_OUTPUT_REG:
4041         case OFPACT_GROUP:
4042         case OFPACT_OUTPUT:
4043         case OFPACT_CONTROLLER:
4044         case OFPACT_DEC_MPLS_TTL:
4045         case OFPACT_DEC_TTL:
4046             recirc_put_unroll_xlate(ctx);
4047             break;
4048
4049             /* These may not generate PACKET INs. */
4050         case OFPACT_SET_TUNNEL:
4051         case OFPACT_REG_MOVE:
4052         case OFPACT_SET_FIELD:
4053         case OFPACT_STACK_PUSH:
4054         case OFPACT_STACK_POP:
4055         case OFPACT_LEARN:
4056         case OFPACT_WRITE_METADATA:
4057         case OFPACT_RESUBMIT:        /* May indirectly generate PACKET INs, */
4058         case OFPACT_GOTO_TABLE:      /* but from a different table and rule. */
4059         case OFPACT_ENQUEUE:
4060         case OFPACT_SET_VLAN_VID:
4061         case OFPACT_SET_VLAN_PCP:
4062         case OFPACT_STRIP_VLAN:
4063         case OFPACT_PUSH_VLAN:
4064         case OFPACT_SET_ETH_SRC:
4065         case OFPACT_SET_ETH_DST:
4066         case OFPACT_SET_IPV4_SRC:
4067         case OFPACT_SET_IPV4_DST:
4068         case OFPACT_SET_IP_DSCP:
4069         case OFPACT_SET_IP_ECN:
4070         case OFPACT_SET_IP_TTL:
4071         case OFPACT_SET_L4_SRC_PORT:
4072         case OFPACT_SET_L4_DST_PORT:
4073         case OFPACT_SET_QUEUE:
4074         case OFPACT_POP_QUEUE:
4075         case OFPACT_PUSH_MPLS:
4076         case OFPACT_POP_MPLS:
4077         case OFPACT_SET_MPLS_LABEL:
4078         case OFPACT_SET_MPLS_TC:
4079         case OFPACT_SET_MPLS_TTL:
4080         case OFPACT_MULTIPATH:
4081         case OFPACT_BUNDLE:
4082         case OFPACT_EXIT:
4083         case OFPACT_UNROLL_XLATE:
4084         case OFPACT_FIN_TIMEOUT:
4085         case OFPACT_CLEAR_ACTIONS:
4086         case OFPACT_WRITE_ACTIONS:
4087         case OFPACT_METER:
4088         case OFPACT_SAMPLE:
4089         case OFPACT_DEBUG_RECIRC:
4090             break;
4091
4092             /* These need not be copied for restoration. */
4093         case OFPACT_NOTE:
4094         case OFPACT_CONJUNCTION:
4095             continue;
4096         }
4097         /* Copy the action over. */
4098         ofpbuf_put(&ctx->action_set, a, OFPACT_ALIGN(a->len));
4099     }
4100 }
4101
4102 #define CHECK_MPLS_RECIRCULATION()      \
4103     if (ctx->was_mpls) {                \
4104         ctx_trigger_recirculation(ctx); \
4105         break;                          \
4106     }
4107 #define CHECK_MPLS_RECIRCULATION_IF(COND) \
4108     if (COND) {                           \
4109         CHECK_MPLS_RECIRCULATION();       \
4110     }
4111
4112 static void
4113 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
4114                  struct xlate_ctx *ctx)
4115 {
4116     struct flow_wildcards *wc = ctx->wc;
4117     struct flow *flow = &ctx->xin->flow;
4118     const struct ofpact *a;
4119
4120     if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
4121         tnl_arp_snoop(flow, wc, ctx->xbridge->name);
4122     }
4123     /* dl_type already in the mask, not set below. */
4124
4125     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4126         struct ofpact_controller *controller;
4127         const struct ofpact_metadata *metadata;
4128         const struct ofpact_set_field *set_field;
4129         const struct mf_field *mf;
4130
4131         if (ctx->exit) {
4132             /* Check if need to store the remaining actions for later
4133              * execution. */
4134             if (exit_recirculates(ctx)) {
4135                 recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len -
4136                                                       ((uint8_t *)a -
4137                                                        (uint8_t *)ofpacts)),
4138                                       ctx);
4139             }
4140             break;
4141         }
4142
4143         switch (a->type) {
4144         case OFPACT_OUTPUT:
4145             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
4146                                 ofpact_get_OUTPUT(a)->max_len, true);
4147             break;
4148
4149         case OFPACT_GROUP:
4150             if (xlate_group_action(ctx, ofpact_get_GROUP(a)->group_id)) {
4151                 /* Group could not be found. */
4152                 return;
4153             }
4154             break;
4155
4156         case OFPACT_CONTROLLER:
4157             controller = ofpact_get_CONTROLLER(a);
4158             execute_controller_action(ctx, controller->max_len,
4159                                       controller->reason,
4160                                       controller->controller_id);
4161             break;
4162
4163         case OFPACT_ENQUEUE:
4164             memset(&wc->masks.skb_priority, 0xff,
4165                    sizeof wc->masks.skb_priority);
4166             xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
4167             break;
4168
4169         case OFPACT_SET_VLAN_VID:
4170             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
4171             if (flow->vlan_tci & htons(VLAN_CFI) ||
4172                 ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
4173                 flow->vlan_tci &= ~htons(VLAN_VID_MASK);
4174                 flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
4175                                    | htons(VLAN_CFI));
4176             }
4177             break;
4178
4179         case OFPACT_SET_VLAN_PCP:
4180             wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
4181             if (flow->vlan_tci & htons(VLAN_CFI) ||
4182                 ofpact_get_SET_VLAN_PCP(a)->push_vlan_if_needed) {
4183                 flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
4184                 flow->vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
4185                                          << VLAN_PCP_SHIFT) | VLAN_CFI);
4186             }
4187             break;
4188
4189         case OFPACT_STRIP_VLAN:
4190             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4191             flow->vlan_tci = htons(0);
4192             break;
4193
4194         case OFPACT_PUSH_VLAN:
4195             /* XXX 802.1AD(QinQ) */
4196             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4197             flow->vlan_tci = htons(VLAN_CFI);
4198             break;
4199
4200         case OFPACT_SET_ETH_SRC:
4201             memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
4202             memcpy(flow->dl_src, ofpact_get_SET_ETH_SRC(a)->mac, ETH_ADDR_LEN);
4203             break;
4204
4205         case OFPACT_SET_ETH_DST:
4206             memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
4207             memcpy(flow->dl_dst, ofpact_get_SET_ETH_DST(a)->mac, ETH_ADDR_LEN);
4208             break;
4209
4210         case OFPACT_SET_IPV4_SRC:
4211             CHECK_MPLS_RECIRCULATION();
4212             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4213                 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
4214                 flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
4215             }
4216             break;
4217
4218         case OFPACT_SET_IPV4_DST:
4219             CHECK_MPLS_RECIRCULATION();
4220             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4221                 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
4222                 flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
4223             }
4224             break;
4225
4226         case OFPACT_SET_IP_DSCP:
4227             CHECK_MPLS_RECIRCULATION();
4228             if (is_ip_any(flow)) {
4229                 wc->masks.nw_tos |= IP_DSCP_MASK;
4230                 flow->nw_tos &= ~IP_DSCP_MASK;
4231                 flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
4232             }
4233             break;
4234
4235         case OFPACT_SET_IP_ECN:
4236             CHECK_MPLS_RECIRCULATION();
4237             if (is_ip_any(flow)) {
4238                 wc->masks.nw_tos |= IP_ECN_MASK;
4239                 flow->nw_tos &= ~IP_ECN_MASK;
4240                 flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
4241             }
4242             break;
4243
4244         case OFPACT_SET_IP_TTL:
4245             CHECK_MPLS_RECIRCULATION();
4246             if (is_ip_any(flow)) {
4247                 wc->masks.nw_ttl = 0xff;
4248                 flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
4249             }
4250             break;
4251
4252         case OFPACT_SET_L4_SRC_PORT:
4253             CHECK_MPLS_RECIRCULATION();
4254             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4255                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4256                 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
4257                 flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
4258             }
4259             break;
4260
4261         case OFPACT_SET_L4_DST_PORT:
4262             CHECK_MPLS_RECIRCULATION();
4263             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4264                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4265                 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
4266                 flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
4267             }
4268             break;
4269
4270         case OFPACT_RESUBMIT:
4271             xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
4272             break;
4273
4274         case OFPACT_SET_TUNNEL:
4275             flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
4276             break;
4277
4278         case OFPACT_SET_QUEUE:
4279             memset(&wc->masks.skb_priority, 0xff,
4280                    sizeof wc->masks.skb_priority);
4281             xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
4282             break;
4283
4284         case OFPACT_POP_QUEUE:
4285             memset(&wc->masks.skb_priority, 0xff,
4286                    sizeof wc->masks.skb_priority);
4287             flow->skb_priority = ctx->orig_skb_priority;
4288             break;
4289
4290         case OFPACT_REG_MOVE:
4291             CHECK_MPLS_RECIRCULATION_IF(
4292                 mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->dst.field) ||
4293                 mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->src.field));
4294             nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
4295             break;
4296
4297         case OFPACT_SET_FIELD:
4298             CHECK_MPLS_RECIRCULATION_IF(
4299                 mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field));
4300             set_field = ofpact_get_SET_FIELD(a);
4301             mf = set_field->field;
4302
4303             /* Set field action only ever overwrites packet's outermost
4304              * applicable header fields.  Do nothing if no header exists. */
4305             if (mf->id == MFF_VLAN_VID) {
4306                 wc->masks.vlan_tci |= htons(VLAN_CFI);
4307                 if (!(flow->vlan_tci & htons(VLAN_CFI))) {
4308                     break;
4309                 }
4310             } else if ((mf->id == MFF_MPLS_LABEL || mf->id == MFF_MPLS_TC)
4311                        /* 'dl_type' is already unwildcarded. */
4312                        && !eth_type_mpls(flow->dl_type)) {
4313                 break;
4314             }
4315             /* A flow may wildcard nw_frag.  Do nothing if setting a trasport
4316              * header field on a packet that does not have them. */
4317             mf_mask_field_and_prereqs(mf, &wc->masks);
4318             if (mf_are_prereqs_ok(mf, flow)) {
4319                 mf_set_flow_value_masked(mf, &set_field->value,
4320                                          &set_field->mask, flow);
4321             }
4322             break;
4323
4324         case OFPACT_STACK_PUSH:
4325             CHECK_MPLS_RECIRCULATION_IF(
4326                 mf_is_l3_or_higher(ofpact_get_STACK_PUSH(a)->subfield.field));
4327             nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
4328                                    &ctx->stack);
4329             break;
4330
4331         case OFPACT_STACK_POP:
4332             CHECK_MPLS_RECIRCULATION_IF(
4333                 mf_is_l3_or_higher(ofpact_get_STACK_POP(a)->subfield.field));
4334             nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
4335                                   &ctx->stack);
4336             break;
4337
4338         case OFPACT_PUSH_MPLS:
4339             /* Recirculate if it is an IP packet with a zero ttl.  This may
4340              * indicate that the packet was previously MPLS and an MPLS pop
4341              * action converted it to IP. In this case recirculating should
4342              * reveal the IP TTL which is used as the basis for a new MPLS
4343              * LSE. */
4344             CHECK_MPLS_RECIRCULATION_IF(
4345                 !flow_count_mpls_labels(flow, wc)
4346                 && flow->nw_ttl == 0
4347                 && is_ip_any(flow));
4348             compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
4349             break;
4350
4351         case OFPACT_POP_MPLS:
4352             CHECK_MPLS_RECIRCULATION();
4353             compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
4354             break;
4355
4356         case OFPACT_SET_MPLS_LABEL:
4357             CHECK_MPLS_RECIRCULATION();
4358             compose_set_mpls_label_action(
4359                 ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
4360             break;
4361
4362         case OFPACT_SET_MPLS_TC:
4363             CHECK_MPLS_RECIRCULATION();
4364             compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
4365             break;
4366
4367         case OFPACT_SET_MPLS_TTL:
4368             CHECK_MPLS_RECIRCULATION();
4369             compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
4370             break;
4371
4372         case OFPACT_DEC_MPLS_TTL:
4373             CHECK_MPLS_RECIRCULATION();
4374             if (compose_dec_mpls_ttl_action(ctx)) {
4375                 return;
4376             }
4377             break;
4378
4379         case OFPACT_DEC_TTL:
4380             CHECK_MPLS_RECIRCULATION();
4381             wc->masks.nw_ttl = 0xff;
4382             if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
4383                 return;
4384             }
4385             break;
4386
4387         case OFPACT_NOTE:
4388             /* Nothing to do. */
4389             break;
4390
4391         case OFPACT_MULTIPATH:
4392             CHECK_MPLS_RECIRCULATION();
4393             multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
4394             break;
4395
4396         case OFPACT_BUNDLE:
4397             CHECK_MPLS_RECIRCULATION();
4398             xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
4399             break;
4400
4401         case OFPACT_OUTPUT_REG:
4402             xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
4403             break;
4404
4405         case OFPACT_LEARN:
4406             CHECK_MPLS_RECIRCULATION();
4407             xlate_learn_action(ctx, ofpact_get_LEARN(a));
4408             break;
4409
4410         case OFPACT_CONJUNCTION: {
4411             /* A flow with a "conjunction" action represents part of a special
4412              * kind of "set membership match".  Such a flow should not actually
4413              * get executed, but it could via, say, a "packet-out", even though
4414              * that wouldn't be useful.  Log it to help debugging. */
4415             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4416             VLOG_INFO_RL(&rl, "executing no-op conjunction action");
4417             break;
4418         }
4419
4420         case OFPACT_EXIT:
4421             ctx->exit = true;
4422             break;
4423
4424         case OFPACT_UNROLL_XLATE: {
4425             struct ofpact_unroll_xlate *unroll = ofpact_get_UNROLL_XLATE(a);
4426
4427             /* Restore translation context data that was stored earlier. */
4428             ctx->table_id = unroll->rule_table_id;
4429             ctx->rule_cookie = unroll->rule_cookie;
4430             break;
4431         }
4432         case OFPACT_FIN_TIMEOUT:
4433             CHECK_MPLS_RECIRCULATION();
4434             memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4435             xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
4436             break;
4437
4438         case OFPACT_CLEAR_ACTIONS:
4439             ofpbuf_clear(&ctx->action_set);
4440             ctx->xin->flow.actset_output = OFPP_UNSET;
4441             ctx->action_set_has_group = false;
4442             break;
4443
4444         case OFPACT_WRITE_ACTIONS:
4445             xlate_write_actions(ctx, a);
4446             break;
4447
4448         case OFPACT_WRITE_METADATA:
4449             metadata = ofpact_get_WRITE_METADATA(a);
4450             flow->metadata &= ~metadata->mask;
4451             flow->metadata |= metadata->metadata & metadata->mask;
4452             break;
4453
4454         case OFPACT_METER:
4455             /* Not implemented yet. */
4456             break;
4457
4458         case OFPACT_GOTO_TABLE: {
4459             struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
4460
4461             /* Allow ctx->table_id == TBL_INTERNAL, which will be greater
4462              * than ogt->table_id. This is to allow goto_table actions that
4463              * triggered recirculation: ctx->table_id will be TBL_INTERNAL
4464              * after recirculation. */
4465             ovs_assert(ctx->table_id == TBL_INTERNAL
4466                        || ctx->table_id < ogt->table_id);
4467             xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
4468                                ogt->table_id, true, true);
4469             break;
4470         }
4471
4472         case OFPACT_SAMPLE:
4473             xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
4474             break;
4475
4476         case OFPACT_DEBUG_RECIRC:
4477             ctx_trigger_recirculation(ctx);
4478             a = ofpact_next(a);
4479             break;
4480         }
4481
4482         /* Check if need to store this and the remaining actions for later
4483          * execution. */
4484         if (ctx->exit && ctx_first_recirculation_action(ctx)) {
4485             recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len -
4486                                                   ((uint8_t *)a -
4487                                                    (uint8_t *)ofpacts)),
4488                                   ctx);
4489             break;
4490         }
4491     }
4492 }
4493
4494 void
4495 xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
4496               const struct flow *flow, ofp_port_t in_port,
4497               struct rule_dpif *rule, uint16_t tcp_flags,
4498               const struct dp_packet *packet, struct flow_wildcards *wc,
4499               struct ofpbuf *odp_actions)
4500 {
4501     xin->ofproto = ofproto;
4502     xin->flow = *flow;
4503     xin->flow.in_port.ofp_port = in_port;
4504     xin->flow.actset_output = OFPP_UNSET;
4505     xin->packet = packet;
4506     xin->may_learn = packet != NULL;
4507     xin->rule = rule;
4508     xin->xcache = NULL;
4509     xin->ofpacts = NULL;
4510     xin->ofpacts_len = 0;
4511     xin->tcp_flags = tcp_flags;
4512     xin->resubmit_hook = NULL;
4513     xin->report_hook = NULL;
4514     xin->resubmit_stats = NULL;
4515     xin->wc = wc;
4516     xin->odp_actions = odp_actions;
4517
4518     /* Do recirc lookup. */
4519     xin->recirc = flow->recirc_id
4520         ? recirc_id_node_find(flow->recirc_id)
4521         : NULL;
4522 }
4523
4524 void
4525 xlate_out_uninit(struct xlate_out *xout)
4526 {
4527     if (xout) {
4528         xlate_out_free_recircs(xout);
4529     }
4530 }
4531
4532 /* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts'
4533  * into datapath actions, using 'ctx', and discards the datapath actions. */
4534 void
4535 xlate_actions_for_side_effects(struct xlate_in *xin)
4536 {
4537     struct xlate_out xout;
4538
4539     xlate_actions(xin, &xout);
4540     xlate_out_uninit(&xout);
4541 }
4542 \f
4543 static struct skb_priority_to_dscp *
4544 get_skb_priority(const struct xport *xport, uint32_t skb_priority)
4545 {
4546     struct skb_priority_to_dscp *pdscp;
4547     uint32_t hash;
4548
4549     hash = hash_int(skb_priority, 0);
4550     HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
4551         if (pdscp->skb_priority == skb_priority) {
4552             return pdscp;
4553         }
4554     }
4555     return NULL;
4556 }
4557
4558 static bool
4559 dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
4560                        uint8_t *dscp)
4561 {
4562     struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
4563     *dscp = pdscp ? pdscp->dscp : 0;
4564     return pdscp != NULL;
4565 }
4566
4567 static size_t
4568 count_skb_priorities(const struct xport *xport)
4569 {
4570     return hmap_count(&xport->skb_priorities);
4571 }
4572
4573 static void
4574 clear_skb_priorities(struct xport *xport)
4575 {
4576     struct skb_priority_to_dscp *pdscp, *next;
4577
4578     HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) {
4579         hmap_remove(&xport->skb_priorities, &pdscp->hmap_node);
4580         free(pdscp);
4581     }
4582 }
4583
4584 static bool
4585 actions_output_to_local_port(const struct xlate_ctx *ctx)
4586 {
4587     odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL);
4588     const struct nlattr *a;
4589     unsigned int left;
4590
4591     NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->odp_actions->data,
4592                              ctx->odp_actions->size) {
4593         if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT
4594             && nl_attr_get_odp_port(a) == local_odp_port) {
4595             return true;
4596         }
4597     }
4598     return false;
4599 }
4600
4601 #if defined(__linux__)
4602 /* Returns the maximum number of packets that the Linux kernel is willing to
4603  * queue up internally to certain kinds of software-implemented ports, or the
4604  * default (and rarely modified) value if it cannot be determined. */
4605 static int
4606 netdev_max_backlog(void)
4607 {
4608     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4609     static int max_backlog = 1000; /* The normal default value. */
4610
4611     if (ovsthread_once_start(&once)) {
4612         static const char filename[] = "/proc/sys/net/core/netdev_max_backlog";
4613         FILE *stream;
4614         int n;
4615
4616         stream = fopen(filename, "r");
4617         if (!stream) {
4618             VLOG_WARN("%s: open failed (%s)", filename, ovs_strerror(errno));
4619         } else {
4620             if (fscanf(stream, "%d", &n) != 1) {
4621                 VLOG_WARN("%s: read error", filename);
4622             } else if (n <= 100) {
4623                 VLOG_WARN("%s: unexpectedly small value %d", filename, n);
4624             } else {
4625                 max_backlog = n;
4626             }
4627             fclose(stream);
4628         }
4629         ovsthread_once_done(&once);
4630
4631         VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
4632     }
4633
4634     return max_backlog;
4635 }
4636
4637 /* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
4638  * 'odp_actions'. */
4639 static int
4640 count_output_actions(const struct ofpbuf *odp_actions)
4641 {
4642     const struct nlattr *a;
4643     size_t left;
4644     int n = 0;
4645
4646     NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
4647         if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
4648             n++;
4649         }
4650     }
4651     return n;
4652 }
4653 #endif /* defined(__linux__) */
4654
4655 /* Returns true if 'odp_actions' contains more output actions than the datapath
4656  * can reliably handle in one go.  On Linux, this is the value of the
4657  * net.core.netdev_max_backlog sysctl, which limits the maximum number of
4658  * packets that the kernel is willing to queue up for processing while the
4659  * datapath is processing a set of actions. */
4660 static bool
4661 too_many_output_actions(const struct ofpbuf *odp_actions OVS_UNUSED)
4662 {
4663 #ifdef __linux__
4664     return (odp_actions->size / NL_A_U32_SIZE > netdev_max_backlog()
4665             && count_output_actions(odp_actions) > netdev_max_backlog());
4666 #else
4667     /* OSes other than Linux might have similar limits, but we don't know how
4668      * to determine them.*/
4669     return false;
4670 #endif
4671 }
4672
4673 static void
4674 xlate_wc_init(struct xlate_ctx *ctx)
4675 {
4676     flow_wildcards_init_catchall(ctx->wc);
4677
4678     /* Some fields we consider to always be examined. */
4679     memset(&ctx->wc->masks.in_port, 0xff, sizeof ctx->wc->masks.in_port);
4680     memset(&ctx->wc->masks.dl_type, 0xff, sizeof ctx->wc->masks.dl_type);
4681     if (is_ip_any(&ctx->xin->flow)) {
4682         ctx->wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
4683     }
4684
4685     if (ctx->xbridge->support.odp.recirc) {
4686         /* Always exactly match recirc_id when datapath supports
4687          * recirculation.  */
4688         ctx->wc->masks.recirc_id = UINT32_MAX;
4689     }
4690
4691     if (ctx->xbridge->netflow) {
4692         netflow_mask_wc(&ctx->xin->flow, ctx->wc);
4693     }
4694
4695     tnl_wc_init(&ctx->xin->flow, ctx->wc);
4696 }
4697
4698 static void
4699 xlate_wc_finish(struct xlate_ctx *ctx)
4700 {
4701     /* Clear the metadata and register wildcard masks, because we won't
4702      * use non-header fields as part of the cache. */
4703     flow_wildcards_clear_non_packet_fields(ctx->wc);
4704
4705     /* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields.  struct flow
4706      * uses the low 8 bits of the 16-bit tp_src and tp_dst members to
4707      * represent these fields.  The datapath interface, on the other hand,
4708      * represents them with just 8 bits each.  This means that if the high
4709      * 8 bits of the masks for these fields somehow become set, then they
4710      * will get chopped off by a round trip through the datapath, and
4711      * revalidation will spot that as an inconsistency and delete the flow.
4712      * Avoid the problem here by making sure that only the low 8 bits of
4713      * either field can be unwildcarded for ICMP.
4714      */
4715     if (is_icmpv4(&ctx->xin->flow) || is_icmpv6(&ctx->xin->flow)) {
4716         ctx->wc->masks.tp_src &= htons(UINT8_MAX);
4717         ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
4718     }
4719     /* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */
4720     if (ctx->wc->masks.vlan_tci) {
4721         ctx->wc->masks.vlan_tci |= htons(VLAN_CFI);
4722     }
4723 }
4724
4725 /* Translates the flow, actions, or rule in 'xin' into datapath actions in
4726  * 'xout'.
4727  * The caller must take responsibility for eventually freeing 'xout', with
4728  * xlate_out_uninit(). */
4729 void
4730 xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
4731 {
4732     *xout = (struct xlate_out) {
4733         .slow = 0,
4734         .fail_open = false,
4735         .n_recircs = 0,
4736     };
4737
4738     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
4739     struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto);
4740     if (!xbridge) {
4741         return;
4742     }
4743
4744     struct flow *flow = &xin->flow;
4745
4746     union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
4747     uint64_t action_set_stub[1024 / 8];
4748     struct flow_wildcards scratch_wc;
4749     uint64_t actions_stub[256 / 8];
4750     struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
4751     struct xlate_ctx ctx = {
4752         .xin = xin,
4753         .xout = xout,
4754         .base_flow = *flow,
4755         .orig_tunnel_ip_dst = flow->tunnel.ip_dst,
4756         .xbridge = xbridge,
4757         .stack = OFPBUF_STUB_INITIALIZER(stack_stub),
4758         .rule = xin->rule,
4759         .wc = xin->wc ? xin->wc : &scratch_wc,
4760         .odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
4761
4762         .recurse = 0,
4763         .resubmits = 0,
4764         .in_group = false,
4765         .in_action_set = false,
4766
4767         .table_id = 0,
4768         .rule_cookie = OVS_BE64_MAX,
4769         .orig_skb_priority = flow->skb_priority,
4770         .sflow_n_outputs = 0,
4771         .sflow_odp_port = 0,
4772         .nf_output_iface = NF_OUT_DROP,
4773         .exit = false,
4774         .mirrors = 0,
4775
4776         .recirc_action_offset = -1,
4777         .last_unroll_offset = -1,
4778
4779         .was_mpls = false,
4780
4781         .action_set_has_group = false,
4782         .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
4783     };
4784
4785     /* 'base_flow' reflects the packet as it came in, but we need it to reflect
4786      * the packet as the datapath will treat it for output actions:
4787      *
4788      *     - Our datapath doesn't retain tunneling information without us
4789      *       re-setting it, so clear the tunnel data.
4790      *
4791      *     - For VLAN splinters, a higher layer may pretend that the packet
4792      *       came in on 'flow->in_port.ofp_port' with 'flow->vlan_tci'
4793      *       attached, because that's how we want to treat it from an OpenFlow
4794      *       perspective.  But from the datapath's perspective it actually came
4795      *       in on a VLAN device without any VLAN attached.  So here we put the
4796      *       datapath's view of the VLAN information in 'base_flow' to ensure
4797      *       correct treatment.
4798      */
4799     memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
4800     if (flow->in_port.ofp_port
4801         != vsp_realdev_to_vlandev(xbridge->ofproto,
4802                                   flow->in_port.ofp_port,
4803                                   flow->vlan_tci)) {
4804         ctx.base_flow.vlan_tci = 0;
4805     }
4806
4807     ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
4808     if (xin->wc) {
4809         xlate_wc_init(&ctx);
4810     }
4811
4812     COVERAGE_INC(xlate_actions);
4813
4814     if (xin->recirc) {
4815         const struct recirc_state *state = &xin->recirc->state;
4816
4817         if (xin->ofpacts_len > 0 || ctx.rule) {
4818             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4819
4820             VLOG_WARN_RL(&rl, "Recirculation conflict (%s)!",
4821                          xin->ofpacts_len > 0
4822                          ? "actions"
4823                          : "rule");
4824             goto exit;
4825         }
4826
4827         /* Set the bridge for post-recirculation processing if needed. */
4828         if (ctx.xbridge->ofproto != state->ofproto) {
4829             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
4830             const struct xbridge *new_bridge
4831                 = xbridge_lookup(xcfg, state->ofproto);
4832
4833             if (OVS_UNLIKELY(!new_bridge)) {
4834                 /* Drop the packet if the bridge cannot be found. */
4835                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4836                 VLOG_WARN_RL(&rl, "Recirculation bridge no longer exists.");
4837                 goto exit;
4838             }
4839             ctx.xbridge = new_bridge;
4840         }
4841
4842         /* Set the post-recirculation table id.  Note: A table lookup is done
4843          * only if there are no post-recirculation actions. */
4844         ctx.table_id = state->table_id;
4845
4846         /* Restore pipeline metadata. May change flow's in_port and other
4847          * metadata to the values that existed when recirculation was
4848          * triggered. */
4849         recirc_metadata_to_flow(&state->metadata, flow);
4850
4851         /* Restore stack, if any. */
4852         if (state->stack) {
4853             ofpbuf_put(&ctx.stack, state->stack->data, state->stack->size);
4854         }
4855
4856         /* Restore action set, if any. */
4857         if (state->action_set_len) {
4858             const struct ofpact *a;
4859
4860             ofpbuf_put(&ctx.action_set, state->ofpacts, state->action_set_len);
4861
4862             OFPACT_FOR_EACH(a, state->ofpacts, state->action_set_len) {
4863                 if (a->type == OFPACT_GROUP) {
4864                     ctx.action_set_has_group = true;
4865                     break;
4866                 }
4867             }
4868         }
4869
4870         /* Restore recirculation actions.  If there are no actions, processing
4871          * will start with a lookup in the table set above. */
4872         if (state->ofpacts_len > state->action_set_len) {
4873             xin->ofpacts_len = state->ofpacts_len - state->action_set_len;
4874             xin->ofpacts = state->ofpacts +
4875                 state->action_set_len / sizeof *state->ofpacts;
4876         }
4877     } else if (OVS_UNLIKELY(flow->recirc_id)) {
4878         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4879
4880         VLOG_WARN_RL(&rl, "Recirculation context not found for ID %"PRIx32,
4881                      flow->recirc_id);
4882         goto exit;
4883     }
4884     /* The bridge is now known so obtain its table version. */
4885     ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
4886
4887     if (!xin->ofpacts && !ctx.rule) {
4888         ctx.rule = rule_dpif_lookup_from_table(
4889             ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,
4890             ctx.xin->xcache != NULL, ctx.xin->resubmit_stats, &ctx.table_id,
4891             flow->in_port.ofp_port, true, true);
4892         if (ctx.xin->resubmit_stats) {
4893             rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
4894         }
4895         if (ctx.xin->xcache) {
4896             struct xc_entry *entry;
4897
4898             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
4899             entry->u.rule = ctx.rule;
4900         }
4901
4902         if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {
4903             ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);
4904         }
4905     }
4906     xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule);
4907
4908     /* Get the proximate input port of the packet.  (If xin->recirc,
4909      * flow->in_port is the ultimate input port of the packet.) */
4910     struct xport *in_port = get_ofp_port(xbridge,
4911                                          ctx.base_flow.in_port.ofp_port);
4912
4913     /* Tunnel stats only for non-recirculated packets. */
4914     if (!xin->recirc && in_port && in_port->is_tunnel) {
4915         if (ctx.xin->resubmit_stats) {
4916             netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
4917             if (in_port->bfd) {
4918                 bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
4919             }
4920         }
4921         if (ctx.xin->xcache) {
4922             struct xc_entry *entry;
4923
4924             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
4925             entry->u.dev.rx = netdev_ref(in_port->netdev);
4926             entry->u.dev.bfd = bfd_ref(in_port->bfd);
4927         }
4928     }
4929
4930     if (!xin->recirc && process_special(&ctx, in_port)) {
4931         /* process_special() did all the processing for this packet.
4932          *
4933          * We do not perform special processing on recirculated packets, as
4934          * recirculated packets are not really received by the bridge.*/
4935     } else if (in_port && in_port->xbundle
4936                && xbundle_mirror_out(xbridge, in_port->xbundle)) {
4937         if (ctx.xin->packet != NULL) {
4938             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4939             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
4940                          "%s, which is reserved exclusively for mirroring",
4941                          ctx.xbridge->name, in_port->xbundle->name);
4942         }
4943     } else {
4944         /* Sampling is done only for packets really received by the bridge. */
4945         unsigned int user_cookie_offset = 0;
4946         if (!xin->recirc) {
4947             user_cookie_offset = compose_sflow_action(&ctx);
4948             compose_ipfix_action(&ctx, ODPP_NONE);
4949         }
4950         size_t sample_actions_len = ctx.odp_actions->size;
4951
4952         if (tnl_process_ecn(flow)
4953             && (!in_port || may_receive(in_port, &ctx))) {
4954             const struct ofpact *ofpacts;
4955             size_t ofpacts_len;
4956
4957             if (xin->ofpacts) {
4958                 ofpacts = xin->ofpacts;
4959                 ofpacts_len = xin->ofpacts_len;
4960             } else if (ctx.rule) {
4961                 const struct rule_actions *actions
4962                     = rule_dpif_get_actions(ctx.rule);
4963                 ofpacts = actions->ofpacts;
4964                 ofpacts_len = actions->ofpacts_len;
4965                 ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);
4966             } else {
4967                 OVS_NOT_REACHED();
4968             }
4969
4970             mirror_ingress_packet(&ctx);
4971             do_xlate_actions(ofpacts, ofpacts_len, &ctx);
4972
4973             /* We've let OFPP_NORMAL and the learning action look at the
4974              * packet, so drop it now if forwarding is disabled. */
4975             if (in_port && (!xport_stp_forward_state(in_port) ||
4976                             !xport_rstp_forward_state(in_port))) {
4977                 /* Drop all actions added by do_xlate_actions() above. */
4978                 ctx.odp_actions->size = sample_actions_len;
4979
4980                 /* Undo changes that may have been done for recirculation. */
4981                 if (exit_recirculates(&ctx)) {
4982                     ctx.action_set.size = ctx.recirc_action_offset;
4983                     ctx.recirc_action_offset = -1;
4984                     ctx.last_unroll_offset = -1;
4985                 }
4986             } else if (ctx.action_set.size) {
4987                 /* Translate action set only if not dropping the packet and
4988                  * not recirculating. */
4989                 if (!exit_recirculates(&ctx)) {
4990                     xlate_action_set(&ctx);
4991                 }
4992             }
4993             /* Check if need to recirculate. */
4994             if (exit_recirculates(&ctx)) {
4995                 compose_recirculate_action(&ctx);
4996             }
4997         }
4998
4999         /* Output only fully processed packets. */
5000         if (!exit_recirculates(&ctx)
5001             && xbridge->has_in_band
5002             && in_band_must_output_to_local_port(flow)
5003             && !actions_output_to_local_port(&ctx)) {
5004             compose_output_action(&ctx, OFPP_LOCAL, NULL);
5005         }
5006
5007         if (user_cookie_offset) {
5008             fix_sflow_action(&ctx, user_cookie_offset);
5009         }
5010     }
5011
5012     if (nl_attr_oversized(ctx.odp_actions->size)) {
5013         /* These datapath actions are too big for a Netlink attribute, so we
5014          * can't hand them to the kernel directly.  dpif_execute() can execute
5015          * them one by one with help, so just mark the result as SLOW_ACTION to
5016          * prevent the flow from being installed. */
5017         COVERAGE_INC(xlate_actions_oversize);
5018         ctx.xout->slow |= SLOW_ACTION;
5019     } else if (too_many_output_actions(ctx.odp_actions)) {
5020         COVERAGE_INC(xlate_actions_too_many_output);
5021         ctx.xout->slow |= SLOW_ACTION;
5022     }
5023
5024     /* Do netflow only for packets really received by the bridge and not sent
5025      * to the controller.  We consider packets sent to the controller to be
5026      * part of the control plane rather than the data plane. */
5027     if (!xin->recirc && xbridge->netflow && !(xout->slow & SLOW_CONTROLLER)) {
5028         if (ctx.xin->resubmit_stats) {
5029             netflow_flow_update(xbridge->netflow, flow,
5030                                 ctx.nf_output_iface,
5031                                 ctx.xin->resubmit_stats);
5032         }
5033         if (ctx.xin->xcache) {
5034             struct xc_entry *entry;
5035
5036             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETFLOW);
5037             entry->u.nf.netflow = netflow_ref(xbridge->netflow);
5038             entry->u.nf.flow = xmemdup(flow, sizeof *flow);
5039             entry->u.nf.iface = ctx.nf_output_iface;
5040         }
5041     }
5042
5043     if (xin->wc) {
5044         xlate_wc_finish(&ctx);
5045     }
5046
5047 exit:
5048     ofpbuf_uninit(&ctx.stack);
5049     ofpbuf_uninit(&ctx.action_set);
5050     ofpbuf_uninit(&scratch_actions);
5051 }
5052
5053 /* Sends 'packet' out 'ofport'.
5054  * May modify 'packet'.
5055  * Returns 0 if successful, otherwise a positive errno value. */
5056 int
5057 xlate_send_packet(const struct ofport_dpif *ofport, struct dp_packet *packet)
5058 {
5059     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5060     struct xport *xport;
5061     struct ofpact_output output;
5062     struct flow flow;
5063
5064     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
5065     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
5066     flow_extract(packet, &flow);
5067     flow.in_port.ofp_port = OFPP_NONE;
5068
5069     xport = xport_lookup(xcfg, ofport);
5070     if (!xport) {
5071         return EINVAL;
5072     }
5073     output.port = xport->ofp_port;
5074     output.max_len = 0;
5075
5076     return ofproto_dpif_execute_actions(xport->xbridge->ofproto, &flow, NULL,
5077                                         &output.ofpact, sizeof output,
5078                                         packet);
5079 }
5080
5081 struct xlate_cache *
5082 xlate_cache_new(void)
5083 {
5084     struct xlate_cache *xcache = xmalloc(sizeof *xcache);
5085
5086     ofpbuf_init(&xcache->entries, 512);
5087     return xcache;
5088 }
5089
5090 static struct xc_entry *
5091 xlate_cache_add_entry(struct xlate_cache *xcache, enum xc_type type)
5092 {
5093     struct xc_entry *entry;
5094
5095     entry = ofpbuf_put_zeros(&xcache->entries, sizeof *entry);
5096     entry->type = type;
5097
5098     return entry;
5099 }
5100
5101 static void
5102 xlate_cache_netdev(struct xc_entry *entry, const struct dpif_flow_stats *stats)
5103 {
5104     if (entry->u.dev.tx) {
5105         netdev_vport_inc_tx(entry->u.dev.tx, stats);
5106     }
5107     if (entry->u.dev.rx) {
5108         netdev_vport_inc_rx(entry->u.dev.rx, stats);
5109     }
5110     if (entry->u.dev.bfd) {
5111         bfd_account_rx(entry->u.dev.bfd, stats);
5112     }
5113 }
5114
5115 static void
5116 xlate_cache_normal(struct ofproto_dpif *ofproto, struct flow *flow, int vlan)
5117 {
5118     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5119     struct xbridge *xbridge;
5120     struct xbundle *xbundle;
5121     struct flow_wildcards wc;
5122
5123     xbridge = xbridge_lookup(xcfg, ofproto);
5124     if (!xbridge) {
5125         return;
5126     }
5127
5128     xbundle = lookup_input_bundle(xbridge, flow->in_port.ofp_port, false,
5129                                   NULL);
5130     if (!xbundle) {
5131         return;
5132     }
5133
5134     update_learning_table(xbridge, flow, &wc, vlan, xbundle);
5135 }
5136
5137 /* Push stats and perform side effects of flow translation. */
5138 void
5139 xlate_push_stats(struct xlate_cache *xcache,
5140                  const struct dpif_flow_stats *stats)
5141 {
5142     struct xc_entry *entry;
5143     struct ofpbuf entries = xcache->entries;
5144     uint8_t dmac[ETH_ADDR_LEN];
5145
5146     if (!stats->n_packets) {
5147         return;
5148     }
5149
5150     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5151         switch (entry->type) {
5152         case XC_RULE:
5153             rule_dpif_credit_stats(entry->u.rule, stats);
5154             break;
5155         case XC_BOND:
5156             bond_account(entry->u.bond.bond, entry->u.bond.flow,
5157                          entry->u.bond.vid, stats->n_bytes);
5158             break;
5159         case XC_NETDEV:
5160             xlate_cache_netdev(entry, stats);
5161             break;
5162         case XC_NETFLOW:
5163             netflow_flow_update(entry->u.nf.netflow, entry->u.nf.flow,
5164                                 entry->u.nf.iface, stats);
5165             break;
5166         case XC_MIRROR:
5167             mirror_update_stats(entry->u.mirror.mbridge,
5168                                 entry->u.mirror.mirrors,
5169                                 stats->n_packets, stats->n_bytes);
5170             break;
5171         case XC_LEARN:
5172             ofproto_dpif_flow_mod(entry->u.learn.ofproto, entry->u.learn.fm);
5173             break;
5174         case XC_NORMAL:
5175             xlate_cache_normal(entry->u.normal.ofproto, entry->u.normal.flow,
5176                                entry->u.normal.vlan);
5177             break;
5178         case XC_FIN_TIMEOUT:
5179             xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags,
5180                                 entry->u.fin.idle, entry->u.fin.hard);
5181             break;
5182         case XC_GROUP:
5183             group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket,
5184                                     stats);
5185             break;
5186         case XC_TNL_ARP:
5187             /* Lookup arp to avoid arp timeout. */
5188             tnl_arp_lookup(entry->u.tnl_arp_cache.br_name, entry->u.tnl_arp_cache.d_ip, dmac);
5189             break;
5190         default:
5191             OVS_NOT_REACHED();
5192         }
5193     }
5194 }
5195
5196 static void
5197 xlate_dev_unref(struct xc_entry *entry)
5198 {
5199     if (entry->u.dev.tx) {
5200         netdev_close(entry->u.dev.tx);
5201     }
5202     if (entry->u.dev.rx) {
5203         netdev_close(entry->u.dev.rx);
5204     }
5205     if (entry->u.dev.bfd) {
5206         bfd_unref(entry->u.dev.bfd);
5207     }
5208 }
5209
5210 static void
5211 xlate_cache_clear_netflow(struct netflow *netflow, struct flow *flow)
5212 {
5213     netflow_flow_clear(netflow, flow);
5214     netflow_unref(netflow);
5215     free(flow);
5216 }
5217
5218 void
5219 xlate_cache_clear(struct xlate_cache *xcache)
5220 {
5221     struct xc_entry *entry;
5222     struct ofpbuf entries;
5223
5224     if (!xcache) {
5225         return;
5226     }
5227
5228     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5229         switch (entry->type) {
5230         case XC_RULE:
5231             rule_dpif_unref(entry->u.rule);
5232             break;
5233         case XC_BOND:
5234             free(entry->u.bond.flow);
5235             bond_unref(entry->u.bond.bond);
5236             break;
5237         case XC_NETDEV:
5238             xlate_dev_unref(entry);
5239             break;
5240         case XC_NETFLOW:
5241             xlate_cache_clear_netflow(entry->u.nf.netflow, entry->u.nf.flow);
5242             break;
5243         case XC_MIRROR:
5244             mbridge_unref(entry->u.mirror.mbridge);
5245             break;
5246         case XC_LEARN:
5247             free(entry->u.learn.fm);
5248             ofpbuf_delete(entry->u.learn.ofpacts);
5249             break;
5250         case XC_NORMAL:
5251             free(entry->u.normal.flow);
5252             break;
5253         case XC_FIN_TIMEOUT:
5254             /* 'u.fin.rule' is always already held as a XC_RULE, which
5255              * has already released it's reference above. */
5256             break;
5257         case XC_GROUP:
5258             group_dpif_unref(entry->u.group.group);
5259             break;
5260         case XC_TNL_ARP:
5261             break;
5262         default:
5263             OVS_NOT_REACHED();
5264         }
5265     }
5266
5267     ofpbuf_clear(&xcache->entries);
5268 }
5269
5270 void
5271 xlate_cache_delete(struct xlate_cache *xcache)
5272 {
5273     xlate_cache_clear(xcache);
5274     ofpbuf_uninit(&xcache->entries);
5275     free(xcache);
5276 }