ofproto/ofproto-dpif-xlate.c

   1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
   2  *
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License. */
  14
  15 #include <config.h>
  16
  17 #include "ofproto/ofproto-dpif-xlate.h"
  18
  19 #include <errno.h>
  20 #include <arpa/inet.h>
  21 #include <net/if.h>
  22 #include <sys/socket.h>
  23 #include <netinet/in.h>
  24
  25 #include "tnl-neigh-cache.h"
  26 #include "bfd.h"
  27 #include "bitmap.h"
  28 #include "bond.h"
  29 #include "bundle.h"
  30 #include "byte-order.h"
  31 #include "cfm.h"
  32 #include "connmgr.h"
  33 #include "coverage.h"
  34 #include "dp-packet.h"
  35 #include "dpif.h"
  36 #include "dynamic-string.h"
  37 #include "in-band.h"
  38 #include "lacp.h"
  39 #include "learn.h"
  40 #include "list.h"
  41 #include "ovs-lldp.h"
  42 #include "mac-learning.h"
  43 #include "mcast-snooping.h"
  44 #include "meta-flow.h"
  45 #include "multipath.h"
  46 #include "netdev-vport.h"
  47 #include "netlink.h"
  48 #include "nx-match.h"
  49 #include "odp-execute.h"
  50 #include "ofp-actions.h"
  51 #include "ofproto/ofproto-dpif-ipfix.h"
  52 #include "ofproto/ofproto-dpif-mirror.h"
  53 #include "ofproto/ofproto-dpif-monitor.h"
  54 #include "ofproto/ofproto-dpif-sflow.h"
  55 #include "ofproto/ofproto-dpif.h"
  56 #include "ofproto/ofproto-provider.h"
  57 #include "packets.h"
  58 #include "ovs-router.h"
  59 #include "tnl-ports.h"
  60 #include "tunnel.h"
  61 #include "openvswitch/vlog.h"
  62
  63 COVERAGE_DEFINE(xlate_actions);
  64 COVERAGE_DEFINE(xlate_actions_oversize);
  65 COVERAGE_DEFINE(xlate_actions_too_many_output);
  66
  67 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
  68
  69 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  70  * flow translation. */
  71 #define MAX_RESUBMIT_RECURSION 64
  72 #define MAX_INTERNAL_RESUBMITS 1   /* Max resbmits allowed using rules in
  73                                       internal table. */
  74
  75 /* Maximum number of resubmit actions in a flow translation, whether they are
  76  * recursive or not. */
  77 #define MAX_RESUBMITS (MAX_RESUBMIT_RECURSION * MAX_RESUBMIT_RECURSION)
  78
  79 struct xbridge {
  80     struct hmap_node hmap_node;   /* Node in global 'xbridges' map. */
  81     struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
  82
  83     struct ovs_list xbundles;     /* Owned xbundles. */
  84     struct hmap xports;           /* Indexed by ofp_port. */
  85
  86     char *name;                   /* Name used in log messages. */
  87     struct dpif *dpif;            /* Datapath interface. */
  88     struct mac_learning *ml;      /* Mac learning handle. */
  89     struct mcast_snooping *ms;    /* Multicast Snooping handle. */
  90     struct mbridge *mbridge;      /* Mirroring. */
  91     struct dpif_sflow *sflow;     /* SFlow handle, or null. */
  92     struct dpif_ipfix *ipfix;     /* Ipfix handle, or null. */
  93     struct netflow *netflow;      /* Netflow handle, or null. */
  94     struct stp *stp;              /* STP or null if disabled. */
  95     struct rstp *rstp;            /* RSTP or null if disabled. */
  96
  97     bool has_in_band;             /* Bridge has in band control? */
  98     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
  99
 100     /* Datapath feature support. */
 101     struct dpif_backer_support support;
 102 };
 103
 104 struct xbundle {
 105     struct hmap_node hmap_node;    /* In global 'xbundles' map. */
 106     struct ofbundle *ofbundle;     /* Key in global 'xbundles' map. */
 107
 108     struct ovs_list list_node;     /* In parent 'xbridges' list. */
 109     struct xbridge *xbridge;       /* Parent xbridge. */
 110
 111     struct ovs_list xports;        /* Contains "struct xport"s. */
 112
 113     char *name;                    /* Name used in log messages. */
 114     struct bond *bond;             /* Nonnull iff more than one port. */
 115     struct lacp *lacp;             /* LACP handle or null. */
 116
 117     enum port_vlan_mode vlan_mode; /* VLAN mode. */
 118     int vlan;                      /* -1=trunk port, else a 12-bit VLAN ID. */
 119     unsigned long *trunks;         /* Bitmap of trunked VLANs, if 'vlan' == -1.
 120                                     * NULL if all VLANs are trunked. */
 121     bool use_priority_tags;        /* Use 802.1p tag for frames in VLAN 0? */
 122     bool floodable;                /* No port has OFPUTIL_PC_NO_FLOOD set? */
 123 };
 124
 125 struct xport {
 126     struct hmap_node hmap_node;      /* Node in global 'xports' map. */
 127     struct ofport_dpif *ofport;      /* Key in global 'xports map. */
 128
 129     struct hmap_node ofp_node;       /* Node in parent xbridge 'xports' map. */
 130     ofp_port_t ofp_port;             /* Key in parent xbridge 'xports' map. */
 131
 132     odp_port_t odp_port;             /* Datapath port number or ODPP_NONE. */
 133
 134     struct ovs_list bundle_node;     /* In parent xbundle (if it exists). */
 135     struct xbundle *xbundle;         /* Parent xbundle or null. */
 136
 137     struct netdev *netdev;           /* 'ofport''s netdev. */
 138
 139     struct xbridge *xbridge;         /* Parent bridge. */
 140     struct xport *peer;              /* Patch port peer or null. */
 141
 142     enum ofputil_port_config config; /* OpenFlow port configuration. */
 143     enum ofputil_port_state state;   /* OpenFlow port state. */
 144     int stp_port_no;                 /* STP port number or -1 if not in use. */
 145     struct rstp_port *rstp_port;     /* RSTP port or null. */
 146
 147     struct hmap skb_priorities;      /* Map of 'skb_priority_to_dscp's. */
 148
 149     bool may_enable;                 /* May be enabled in bonds. */
 150     bool is_tunnel;                  /* Is a tunnel port. */
 151
 152     struct cfm *cfm;                 /* CFM handle or null. */
 153     struct bfd *bfd;                 /* BFD handle or null. */
 154     struct lldp *lldp;               /* LLDP handle or null. */
 155 };
 156
 157 struct xlate_ctx {
 158     struct xlate_in *xin;
 159     struct xlate_out *xout;
 160
 161     const struct xbridge *xbridge;
 162
 163     /* Flow tables version at the beginning of the translation. */
 164     cls_version_t tables_version;
 165
 166     /* Flow at the last commit. */
 167     struct flow base_flow;
 168
 169     /* Tunnel IP destination address as received.  This is stored separately
 170      * as the base_flow.tunnel is cleared on init to reflect the datapath
 171      * behavior.  Used to make sure not to send tunneled output to ourselves,
 172      * which might lead to an infinite loop.  This could happen easily
 173      * if a tunnel is marked as 'ip_remote=flow', and the flow does not
 174      * actually set the tun_dst field. */
 175     struct in6_addr orig_tunnel_ipv6_dst;
 176
 177     /* Stack for the push and pop actions.  Each stack element is of type
 178      * "union mf_subvalue". */
 179     struct ofpbuf stack;
 180
 181     /* The rule that we are currently translating, or NULL. */
 182     struct rule_dpif *rule;
 183
 184     /* Flow translation populates this with wildcards relevant in translation.
 185      * When 'xin->wc' is nonnull, this is the same pointer.  When 'xin->wc' is
 186      * null, this is a pointer to uninitialized scratch memory.  This allows
 187      * code to blindly write to 'ctx->wc' without worrying about whether the
 188      * caller really wants wildcards. */
 189     struct flow_wildcards *wc;
 190
 191     /* Output buffer for datapath actions.  When 'xin->odp_actions' is nonnull,
 192      * this is the same pointer.  When 'xin->odp_actions' is null, this points
 193      * to a scratch ofpbuf.  This allows code to add actions to
 194      * 'ctx->odp_actions' without worrying about whether the caller really
 195      * wants actions. */
 196     struct ofpbuf *odp_actions;
 197
 198     /* Resubmit statistics, via xlate_table_action(). */
 199     int recurse;                /* Current resubmit nesting depth. */
 200     int resubmits;              /* Total number of resubmits. */
 201     bool in_group;              /* Currently translating ofgroup, if true. */
 202     bool in_action_set;         /* Currently translating action_set, if true. */
 203
 204     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
 205     ovs_be64 rule_cookie;       /* Cookie of the rule being translated. */
 206     uint32_t orig_skb_priority; /* Priority when packet arrived. */
 207     uint32_t sflow_n_outputs;   /* Number of output ports. */
 208     odp_port_t sflow_odp_port;  /* Output port for composing sFlow action. */
 209     ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
 210     bool exit;                  /* No further actions should be processed. */
 211     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 212
 213    /* These are used for non-bond recirculation.  The recirculation IDs are
 214     * stored in xout and must be associated with a datapath flow (ukey),
 215     * otherwise they will be freed when the xout is uninitialized.
 216     *
 217     *
 218     * Steps in Recirculation Translation
 219     * ==================================
 220     *
 221     * At some point during translation, the code recognizes the need for
 222     * recirculation.  For example, recirculation is necessary when, after
 223     * popping the last MPLS label, an action or a match tries to examine or
 224     * modify a field that has been newly revealed following the MPLS label.
 225     *
 226     * The simplest part of the work to be done is to commit existing changes to
 227     * the packet, which produces datapath actions corresponding to the changes,
 228     * and after this, add an OVS_ACTION_ATTR_RECIRC datapath action.
 229     *
 230     * The main problem here is preserving state.  When the datapath executes
 231     * OVS_ACTION_ATTR_RECIRC, it will upcall to userspace to get a translation
 232     * for the post-recirculation actions.  At this point userspace has to
 233     * resume the translation where it left off, which means that it has to
 234     * execute the following:
 235     *
 236     *     - The action that prompted recirculation, and any actions following
 237     *       it within the same flow.
 238     *
 239     *     - If the action that prompted recirculation was invoked within a
 240     *       NXAST_RESUBMIT, then any actions following the resubmit.  These
 241     *       "resubmit"s can be nested, so this has to go all the way up the
 242     *       control stack.
 243     *
 244     *     - The OpenFlow 1.1+ action set.
 245     *
 246     * State that actions and flow table lookups can depend on, such as the
 247     * following, must also be preserved:
 248     *
 249     *     - Metadata fields (input port, registers, OF1.1+ metadata, ...).
 250     *
 251     *     - Action set, stack
 252     *
 253     *     - The table ID and cookie of the flow being translated at each level
 254     *       of the control stack (since OFPAT_CONTROLLER actions send these to
 255     *       the controller).
 256     *
 257     * Translation allows for the control of this state preservation via these
 258     * members.  When a need for recirculation is identified, the translation
 259     * process:
 260     *
 261     * 1. Sets 'recirc_action_offset' to the current size of 'action_set'.  The
 262     *    action set is part of what needs to be preserved, so this allows the
 263     *    action set and the additional state to share the 'action_set' buffer.
 264     *    Later steps can tell that setup for recirculation is in progress from
 265     *    the nonnegative value of 'recirc_action_offset'.
 266     *
 267     * 2. Sets 'exit' to true to tell later steps that we're exiting from the
 268     *    translation process.
 269     *
 270     * 3. Adds an OFPACT_UNROLL_XLATE action to 'action_set'.  This action
 271     *    holds the current table ID and cookie so that they can be restored
 272     *    during a post-recirculation upcall translation.
 273     *
 274     * 4. Adds the action that prompted recirculation and any actions following
 275     *    it within the same flow to 'action_set', so that they can be executed
 276     *    during a post-recirculation upcall translation.
 277     *
 278     * 5. Returns.
 279     *
 280     * 6. The action that prompted recirculation might be nested in a stack of
 281     *    nested "resubmit"s that have actions remaining.  Each of these notices
 282     *    that we're exiting (from 'exit') and that recirculation setup is in
 283     *    progress (from 'recirc_action_offset') and responds by adding more
 284     *    OFPACT_UNROLL_XLATE actions to 'action_set', as necessary, and any
 285     *    actions that were yet unprocessed.
 286     *
 287     * The caller stores all the state produced by this process associated with
 288     * the recirculation ID.  For post-recirculation upcall translation, the
 289     * caller passes it back in for the new translation to execute.  The
 290     * process yielded a set of ofpacts that can be translated directly, so it
 291     * is not much of a special case at that point.
 292     */
 293     int recirc_action_offset;   /* Offset in 'action_set' to actions to be
 294                                  * executed after recirculation, or -1. */
 295     int last_unroll_offset;     /* Offset in 'action_set' to the latest unroll
 296                                  * action, or -1. */
 297
 298     /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
 299      * This is a trigger for recirculation in cases where translating an action
 300      * or looking up a flow requires access to the fields of the packet after
 301      * the MPLS label stack that was originally present. */
 302     bool was_mpls;
 303
 304     /* True if conntrack has been performed on this packet during processing
 305      * on the current bridge. This is used to determine whether conntrack
 306      * state from the datapath should be honored after recirculation. */
 307     bool conntracked;
 308
 309     /* Pointer to an embedded NAT action in a conntrack action, or NULL. */
 310     struct ofpact_nat *ct_nat_action;
 311
 312     /* OpenFlow 1.1+ action set.
 313      *
 314      * 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
 315      * When translation is otherwise complete, ofpacts_execute_action_set()
 316      * converts it to a set of "struct ofpact"s that can be translated into
 317      * datapath actions. */
 318     bool action_set_has_group;  /* Action set contains OFPACT_GROUP? */
 319     struct ofpbuf action_set;   /* Action set. */
 320
 321     enum xlate_error error;     /* Translation failed. */
 322 };
 323
 324 const char *xlate_strerror(enum xlate_error error)
 325 {
 326     switch (error) {
 327     case XLATE_OK:
 328         return "OK";
 329     case XLATE_BRIDGE_NOT_FOUND:
 330         return "Bridge not found";
 331     case XLATE_RECURSION_TOO_DEEP:
 332         return "Recursion too deep";
 333     case XLATE_TOO_MANY_RESUBMITS:
 334         return "Too many resubmits";
 335     case XLATE_STACK_TOO_DEEP:
 336         return "Stack too deep";
 337     case XLATE_NO_RECIRCULATION_CONTEXT:
 338         return "No recirculation context";
 339     case XLATE_RECIRCULATION_CONFLICT:
 340         return "Recirculation conflict";
 341     case XLATE_TOO_MANY_MPLS_LABELS:
 342         return "Too many MPLS labels";
 343     }
 344     return "Unknown error";
 345 }
 346
 347 static void xlate_action_set(struct xlate_ctx *ctx);
 348 static void xlate_commit_actions(struct xlate_ctx *ctx);
 349
 350 static void
 351 ctx_trigger_recirculation(struct xlate_ctx *ctx)
 352 {
 353     ctx->exit = true;
 354     ctx->recirc_action_offset = ctx->action_set.size;
 355 }
 356
 357 static bool
 358 ctx_first_recirculation_action(const struct xlate_ctx *ctx)
 359 {
 360     return ctx->recirc_action_offset == ctx->action_set.size;
 361 }
 362
 363 static inline bool
 364 exit_recirculates(const struct xlate_ctx *ctx)
 365 {
 366     /* When recirculating the 'recirc_action_offset' has a non-negative value.
 367      */
 368     return ctx->recirc_action_offset >= 0;
 369 }
 370
 371 static void
 372 ctx_cancel_recirculation(struct xlate_ctx *ctx)
 373 {
 374     if (exit_recirculates(ctx)) {
 375         ctx->action_set.size = ctx->recirc_action_offset;
 376         ctx->recirc_action_offset = -1;
 377         ctx->last_unroll_offset = -1;
 378     }
 379 }
 380
 381 static void compose_recirculate_action(struct xlate_ctx *ctx);
 382
 383 /* A controller may use OFPP_NONE as the ingress port to indicate that
 384  * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
 385  * when an input bundle is needed for validation (e.g., mirroring or
 386  * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
 387  * any 'port' structs, so care must be taken when dealing with it. */
 388 static struct xbundle ofpp_none_bundle = {
 389     .name      = "OFPP_NONE",
 390     .vlan_mode = PORT_VLAN_TRUNK
 391 };
 392
 393 /* Node in 'xport''s 'skb_priorities' map.  Used to maintain a map from
 394  * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
 395  * traffic egressing the 'ofport' with that priority should be marked with. */
 396 struct skb_priority_to_dscp {
 397     struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
 398     uint32_t skb_priority;      /* Priority of this queue (see struct flow). */
 399
 400     uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
 401 };
 402
 403 enum xc_type {
 404     XC_RULE,
 405     XC_BOND,
 406     XC_NETDEV,
 407     XC_NETFLOW,
 408     XC_MIRROR,
 409     XC_LEARN,
 410     XC_NORMAL,
 411     XC_FIN_TIMEOUT,
 412     XC_GROUP,
 413     XC_TNL_NEIGH,
 414 };
 415
 416 /* xlate_cache entries hold enough information to perform the side effects of
 417  * xlate_actions() for a rule, without needing to perform rule translation
 418  * from scratch. The primary usage of these is to submit statistics to objects
 419  * that a flow relates to, although they may be used for other effects as well
 420  * (for instance, refreshing hard timeouts for learned flows). */
 421 struct xc_entry {
 422     enum xc_type type;
 423     union {
 424         struct rule_dpif *rule;
 425         struct {
 426             struct netdev *tx;
 427             struct netdev *rx;
 428             struct bfd *bfd;
 429         } dev;
 430         struct {
 431             struct netflow *netflow;
 432             struct flow *flow;
 433             ofp_port_t iface;
 434         } nf;
 435         struct {
 436             struct mbridge *mbridge;
 437             mirror_mask_t mirrors;
 438         } mirror;
 439         struct {
 440             struct bond *bond;
 441             struct flow *flow;
 442             uint16_t vid;
 443         } bond;
 444         struct {
 445             struct ofproto_dpif *ofproto;
 446             struct ofputil_flow_mod *fm;
 447             struct ofpbuf *ofpacts;
 448         } learn;
 449         struct {
 450             struct ofproto_dpif *ofproto;
 451             struct flow *flow;
 452             int vlan;
 453         } normal;
 454         struct {
 455             struct rule_dpif *rule;
 456             uint16_t idle;
 457             uint16_t hard;
 458         } fin;
 459         struct {
 460             struct group_dpif *group;
 461             struct ofputil_bucket *bucket;
 462         } group;
 463         struct {
 464             char br_name[IFNAMSIZ];
 465             struct in6_addr d_ipv6;
 466         } tnl_neigh_cache;
 467     } u;
 468 };
 469
 470 #define XC_ENTRY_FOR_EACH(ENTRY, ENTRIES, XCACHE)               \
 471     ENTRIES = XCACHE->entries;                                  \
 472     for (ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY);      \
 473          ENTRY;                                                 \
 474          ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY))
 475
 476 struct xlate_cache {
 477     struct ofpbuf entries;
 478 };
 479
 480 /* Xlate config contains hash maps of all bridges, bundles and ports.
 481  * Xcfgp contains the pointer to the current xlate configuration.
 482  * When the main thread needs to change the configuration, it copies xcfgp to
 483  * new_xcfg and edits new_xcfg. This enables the use of RCU locking which
 484  * does not block handler and revalidator threads. */
 485 struct xlate_cfg {
 486     struct hmap xbridges;
 487     struct hmap xbundles;
 488     struct hmap xports;
 489 };
 490 static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
 491 static struct xlate_cfg *new_xcfg = NULL;
 492
 493 static bool may_receive(const struct xport *, struct xlate_ctx *);
 494 static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
 495                              struct xlate_ctx *);
 496 static void xlate_normal(struct xlate_ctx *);
 497 static inline void xlate_report(struct xlate_ctx *, const char *, ...)
 498     OVS_PRINTF_FORMAT(2, 3);
 499 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
 500                                uint8_t table_id, bool may_packet_in,
 501                                bool honor_table_miss);
 502 static bool input_vid_is_valid(uint16_t vid, struct xbundle *, bool warn);
 503 static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid);
 504 static void output_normal(struct xlate_ctx *, const struct xbundle *,
 505                           uint16_t vlan);
 506
 507 /* Optional bond recirculation parameter to compose_output_action(). */
 508 struct xlate_bond_recirc {
 509     uint32_t recirc_id;  /* !0 Use recirculation instead of output. */
 510     uint8_t  hash_alg;   /* !0 Compute hash for recirc before. */
 511     uint32_t hash_basis;  /* Compute hash for recirc before. */
 512 };
 513
 514 static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
 515                                   const struct xlate_bond_recirc *xr);
 516
 517 static struct xbridge *xbridge_lookup(struct xlate_cfg *,
 518                                       const struct ofproto_dpif *);
 519 static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
 520                                               const struct uuid *);
 521 static struct xbundle *xbundle_lookup(struct xlate_cfg *,
 522                                       const struct ofbundle *);
 523 static struct xport *xport_lookup(struct xlate_cfg *,
 524                                   const struct ofport_dpif *);
 525 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
 526 static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
 527                                                      uint32_t skb_priority);
 528 static void clear_skb_priorities(struct xport *);
 529 static size_t count_skb_priorities(const struct xport *);
 530 static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
 531                                    uint8_t *dscp);
 532
 533 static struct xc_entry *xlate_cache_add_entry(struct xlate_cache *xc,
 534                                               enum xc_type type);
 535 static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
 536 static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
 537 static void xlate_xport_init(struct xlate_cfg *, struct xport *);
 538 static void xlate_xbridge_set(struct xbridge *, struct dpif *,
 539                               const struct mac_learning *, struct stp *,
 540                               struct rstp *, const struct mcast_snooping *,
 541                               const struct mbridge *,
 542                               const struct dpif_sflow *,
 543                               const struct dpif_ipfix *,
 544                               const struct netflow *,
 545                               bool forward_bpdu, bool has_in_band,
 546                               const struct dpif_backer_support *);
 547 static void xlate_xbundle_set(struct xbundle *xbundle,
 548                               enum port_vlan_mode vlan_mode, int vlan,
 549                               unsigned long *trunks, bool use_priority_tags,
 550                               const struct bond *bond, const struct lacp *lacp,
 551                               bool floodable);
 552 static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 553                             const struct netdev *netdev, const struct cfm *cfm,
 554                             const struct bfd *bfd, const struct lldp *lldp,
 555                             int stp_port_no, const struct rstp_port *rstp_port,
 556                             enum ofputil_port_config config,
 557                             enum ofputil_port_state state, bool is_tunnel,
 558                             bool may_enable);
 559 static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
 560 static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
 561 static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
 562 static void xlate_xbridge_copy(struct xbridge *);
 563 static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
 564 static void xlate_xport_copy(struct xbridge *, struct xbundle *,
 565                              struct xport *);
 566 static void xlate_xcfg_free(struct xlate_cfg *);
 567
 568 static inline void
 569 xlate_report(struct xlate_ctx *ctx, const char *format, ...)
 570 {
 571     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
 572         va_list args;
 573
 574         va_start(args, format);
 575         ctx->xin->report_hook(ctx->xin, ctx->recurse, format, args);
 576         va_end(args);
 577     }
 578 }
 579
 580 static struct vlog_rate_limit error_report_rl = VLOG_RATE_LIMIT_INIT(1, 5);
 581
 582 #define XLATE_REPORT_ERROR(CTX, ...)                    \
 583     do {                                                \
 584         if (OVS_UNLIKELY((CTX)->xin->report_hook)) {    \
 585             xlate_report(CTX, __VA_ARGS__);             \
 586         } else {                                        \
 587             VLOG_ERR_RL(&error_report_rl, __VA_ARGS__); \
 588         }                                               \
 589     } while (0)
 590
 591 static inline void
 592 xlate_report_actions(struct xlate_ctx *ctx, const char *title,
 593                      const struct ofpact *ofpacts, size_t ofpacts_len)
 594 {
 595     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
 596         struct ds s = DS_EMPTY_INITIALIZER;
 597         ofpacts_format(ofpacts, ofpacts_len, &s);
 598         xlate_report(ctx, "%s: %s", title, ds_cstr(&s));
 599         ds_destroy(&s);
 600     }
 601 }
 602
 603 static void
 604 xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 605 {
 606     list_init(&xbridge->xbundles);
 607     hmap_init(&xbridge->xports);
 608     hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
 609                 hash_pointer(xbridge->ofproto, 0));
 610 }
 611
 612 static void
 613 xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
 614 {
 615     list_init(&xbundle->xports);
 616     list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
 617     hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
 618                 hash_pointer(xbundle->ofbundle, 0));
 619 }
 620
 621 static void
 622 xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
 623 {
 624     hmap_init(&xport->skb_priorities);
 625     hmap_insert(&xcfg->xports, &xport->hmap_node,
 626                 hash_pointer(xport->ofport, 0));
 627     hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
 628                 hash_ofp_port(xport->ofp_port));
 629 }
 630
 631 static void
 632 xlate_xbridge_set(struct xbridge *xbridge,
 633                   struct dpif *dpif,
 634                   const struct mac_learning *ml, struct stp *stp,
 635                   struct rstp *rstp, const struct mcast_snooping *ms,
 636                   const struct mbridge *mbridge,
 637                   const struct dpif_sflow *sflow,
 638                   const struct dpif_ipfix *ipfix,
 639                   const struct netflow *netflow,
 640                   bool forward_bpdu, bool has_in_band,
 641                   const struct dpif_backer_support *support)
 642 {
 643     if (xbridge->ml != ml) {
 644         mac_learning_unref(xbridge->ml);
 645         xbridge->ml = mac_learning_ref(ml);
 646     }
 647
 648     if (xbridge->ms != ms) {
 649         mcast_snooping_unref(xbridge->ms);
 650         xbridge->ms = mcast_snooping_ref(ms);
 651     }
 652
 653     if (xbridge->mbridge != mbridge) {
 654         mbridge_unref(xbridge->mbridge);
 655         xbridge->mbridge = mbridge_ref(mbridge);
 656     }
 657
 658     if (xbridge->sflow != sflow) {
 659         dpif_sflow_unref(xbridge->sflow);
 660         xbridge->sflow = dpif_sflow_ref(sflow);
 661     }
 662
 663     if (xbridge->ipfix != ipfix) {
 664         dpif_ipfix_unref(xbridge->ipfix);
 665         xbridge->ipfix = dpif_ipfix_ref(ipfix);
 666     }
 667
 668     if (xbridge->stp != stp) {
 669         stp_unref(xbridge->stp);
 670         xbridge->stp = stp_ref(stp);
 671     }
 672
 673     if (xbridge->rstp != rstp) {
 674         rstp_unref(xbridge->rstp);
 675         xbridge->rstp = rstp_ref(rstp);
 676     }
 677
 678     if (xbridge->netflow != netflow) {
 679         netflow_unref(xbridge->netflow);
 680         xbridge->netflow = netflow_ref(netflow);
 681     }
 682
 683     xbridge->dpif = dpif;
 684     xbridge->forward_bpdu = forward_bpdu;
 685     xbridge->has_in_band = has_in_band;
 686     xbridge->support = *support;
 687 }
 688
 689 static void
 690 xlate_xbundle_set(struct xbundle *xbundle,
 691                   enum port_vlan_mode vlan_mode, int vlan,
 692                   unsigned long *trunks, bool use_priority_tags,
 693                   const struct bond *bond, const struct lacp *lacp,
 694                   bool floodable)
 695 {
 696     ovs_assert(xbundle->xbridge);
 697
 698     xbundle->vlan_mode = vlan_mode;
 699     xbundle->vlan = vlan;
 700     xbundle->trunks = trunks;
 701     xbundle->use_priority_tags = use_priority_tags;
 702     xbundle->floodable = floodable;
 703
 704     if (xbundle->bond != bond) {
 705         bond_unref(xbundle->bond);
 706         xbundle->bond = bond_ref(bond);
 707     }
 708
 709     if (xbundle->lacp != lacp) {
 710         lacp_unref(xbundle->lacp);
 711         xbundle->lacp = lacp_ref(lacp);
 712     }
 713 }
 714
 715 static void
 716 xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 717                 const struct netdev *netdev, const struct cfm *cfm,
 718                 const struct bfd *bfd, const struct lldp *lldp, int stp_port_no,
 719                 const struct rstp_port* rstp_port,
 720                 enum ofputil_port_config config, enum ofputil_port_state state,
 721                 bool is_tunnel, bool may_enable)
 722 {
 723     xport->config = config;
 724     xport->state = state;
 725     xport->stp_port_no = stp_port_no;
 726     xport->is_tunnel = is_tunnel;
 727     xport->may_enable = may_enable;
 728     xport->odp_port = odp_port;
 729
 730     if (xport->rstp_port != rstp_port) {
 731         rstp_port_unref(xport->rstp_port);
 732         xport->rstp_port = rstp_port_ref(rstp_port);
 733     }
 734
 735     if (xport->cfm != cfm) {
 736         cfm_unref(xport->cfm);
 737         xport->cfm = cfm_ref(cfm);
 738     }
 739
 740     if (xport->bfd != bfd) {
 741         bfd_unref(xport->bfd);
 742         xport->bfd = bfd_ref(bfd);
 743     }
 744
 745     if (xport->lldp != lldp) {
 746         lldp_unref(xport->lldp);
 747         xport->lldp = lldp_ref(lldp);
 748     }
 749
 750     if (xport->netdev != netdev) {
 751         netdev_close(xport->netdev);
 752         xport->netdev = netdev_ref(netdev);
 753     }
 754 }
 755
 756 static void
 757 xlate_xbridge_copy(struct xbridge *xbridge)
 758 {
 759     struct xbundle *xbundle;
 760     struct xport *xport;
 761     struct xbridge *new_xbridge = xzalloc(sizeof *xbridge);
 762     new_xbridge->ofproto = xbridge->ofproto;
 763     new_xbridge->name = xstrdup(xbridge->name);
 764     xlate_xbridge_init(new_xcfg, new_xbridge);
 765
 766     xlate_xbridge_set(new_xbridge,
 767                       xbridge->dpif, xbridge->ml, xbridge->stp,
 768                       xbridge->rstp, xbridge->ms, xbridge->mbridge,
 769                       xbridge->sflow, xbridge->ipfix, xbridge->netflow,
 770                       xbridge->forward_bpdu, xbridge->has_in_band,
 771                       &xbridge->support);
 772     LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
 773         xlate_xbundle_copy(new_xbridge, xbundle);
 774     }
 775
 776     /* Copy xports which are not part of a xbundle */
 777     HMAP_FOR_EACH (xport, ofp_node, &xbridge->xports) {
 778         if (!xport->xbundle) {
 779             xlate_xport_copy(new_xbridge, NULL, xport);
 780         }
 781     }
 782 }
 783
 784 static void
 785 xlate_xbundle_copy(struct xbridge *xbridge, struct xbundle *xbundle)
 786 {
 787     struct xport *xport;
 788     struct xbundle *new_xbundle = xzalloc(sizeof *xbundle);
 789     new_xbundle->ofbundle = xbundle->ofbundle;
 790     new_xbundle->xbridge = xbridge;
 791     new_xbundle->name = xstrdup(xbundle->name);
 792     xlate_xbundle_init(new_xcfg, new_xbundle);
 793
 794     xlate_xbundle_set(new_xbundle, xbundle->vlan_mode,
 795                       xbundle->vlan, xbundle->trunks,
 796                       xbundle->use_priority_tags, xbundle->bond, xbundle->lacp,
 797                       xbundle->floodable);
 798     LIST_FOR_EACH (xport, bundle_node, &xbundle->xports) {
 799         xlate_xport_copy(xbridge, new_xbundle, xport);
 800     }
 801 }
 802
 803 static void
 804 xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
 805                  struct xport *xport)
 806 {
 807     struct skb_priority_to_dscp *pdscp, *new_pdscp;
 808     struct xport *new_xport = xzalloc(sizeof *xport);
 809     new_xport->ofport = xport->ofport;
 810     new_xport->ofp_port = xport->ofp_port;
 811     new_xport->xbridge = xbridge;
 812     xlate_xport_init(new_xcfg, new_xport);
 813
 814     xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
 815                     xport->bfd, xport->lldp, xport->stp_port_no,
 816                     xport->rstp_port, xport->config, xport->state,
 817                     xport->is_tunnel, xport->may_enable);
 818
 819     if (xport->peer) {
 820         struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
 821         if (peer) {
 822             new_xport->peer = peer;
 823             new_xport->peer->peer = new_xport;
 824         }
 825     }
 826
 827     if (xbundle) {
 828         new_xport->xbundle = xbundle;
 829         list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
 830     }
 831
 832     HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
 833         new_pdscp = xmalloc(sizeof *pdscp);
 834         new_pdscp->skb_priority = pdscp->skb_priority;
 835         new_pdscp->dscp = pdscp->dscp;
 836         hmap_insert(&new_xport->skb_priorities, &new_pdscp->hmap_node,
 837                     hash_int(new_pdscp->skb_priority, 0));
 838     }
 839 }
 840
 841 /* Sets the current xlate configuration to new_xcfg and frees the old xlate
 842  * configuration in xcfgp.
 843  *
 844  * This needs to be called after editing the xlate configuration.
 845  *
 846  * Functions that edit the new xlate configuration are
 847  * xlate_<ofport/bundle/ofport>_set and xlate_<ofport/bundle/ofport>_remove.
 848  *
 849  * A sample workflow:
 850  *
 851  * xlate_txn_start();
 852  * ...
 853  * edit_xlate_configuration();
 854  * ...
 855  * xlate_txn_commit(); */
 856 void
 857 xlate_txn_commit(void)
 858 {
 859     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 860
 861     ovsrcu_set(&xcfgp, new_xcfg);
 862     ovsrcu_synchronize();
 863     xlate_xcfg_free(xcfg);
 864     new_xcfg = NULL;
 865 }
 866
 867 /* Copies the current xlate configuration in xcfgp to new_xcfg.
 868  *
 869  * This needs to be called prior to editing the xlate configuration. */
 870 void
 871 xlate_txn_start(void)
 872 {
 873     struct xbridge *xbridge;
 874     struct xlate_cfg *xcfg;
 875
 876     ovs_assert(!new_xcfg);
 877
 878     new_xcfg = xmalloc(sizeof *new_xcfg);
 879     hmap_init(&new_xcfg->xbridges);
 880     hmap_init(&new_xcfg->xbundles);
 881     hmap_init(&new_xcfg->xports);
 882
 883     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 884     if (!xcfg) {
 885         return;
 886     }
 887
 888     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
 889         xlate_xbridge_copy(xbridge);
 890     }
 891 }
 892
 893
 894 static void
 895 xlate_xcfg_free(struct xlate_cfg *xcfg)
 896 {
 897     struct xbridge *xbridge, *next_xbridge;
 898
 899     if (!xcfg) {
 900         return;
 901     }
 902
 903     HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) {
 904         xlate_xbridge_remove(xcfg, xbridge);
 905     }
 906
 907     hmap_destroy(&xcfg->xbridges);
 908     hmap_destroy(&xcfg->xbundles);
 909     hmap_destroy(&xcfg->xports);
 910     free(xcfg);
 911 }
 912
 913 void
 914 xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
 915                   struct dpif *dpif,
 916                   const struct mac_learning *ml, struct stp *stp,
 917                   struct rstp *rstp, const struct mcast_snooping *ms,
 918                   const struct mbridge *mbridge,
 919                   const struct dpif_sflow *sflow,
 920                   const struct dpif_ipfix *ipfix,
 921                   const struct netflow *netflow,
 922                   bool forward_bpdu, bool has_in_band,
 923                   const struct dpif_backer_support *support)
 924 {
 925     struct xbridge *xbridge;
 926
 927     ovs_assert(new_xcfg);
 928
 929     xbridge = xbridge_lookup(new_xcfg, ofproto);
 930     if (!xbridge) {
 931         xbridge = xzalloc(sizeof *xbridge);
 932         xbridge->ofproto = ofproto;
 933
 934         xlate_xbridge_init(new_xcfg, xbridge);
 935     }
 936
 937     free(xbridge->name);
 938     xbridge->name = xstrdup(name);
 939
 940     xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix,
 941                       netflow, forward_bpdu, has_in_band, support);
 942 }
 943
 944 static void
 945 xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 946 {
 947     struct xbundle *xbundle, *next_xbundle;
 948     struct xport *xport, *next_xport;
 949
 950     if (!xbridge) {
 951         return;
 952     }
 953
 954     HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) {
 955         xlate_xport_remove(xcfg, xport);
 956     }
 957
 958     LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) {
 959         xlate_xbundle_remove(xcfg, xbundle);
 960     }
 961
 962     hmap_remove(&xcfg->xbridges, &xbridge->hmap_node);
 963     mac_learning_unref(xbridge->ml);
 964     mcast_snooping_unref(xbridge->ms);
 965     mbridge_unref(xbridge->mbridge);
 966     dpif_sflow_unref(xbridge->sflow);
 967     dpif_ipfix_unref(xbridge->ipfix);
 968     stp_unref(xbridge->stp);
 969     rstp_unref(xbridge->rstp);
 970     hmap_destroy(&xbridge->xports);
 971     free(xbridge->name);
 972     free(xbridge);
 973 }
 974
 975 void
 976 xlate_remove_ofproto(struct ofproto_dpif *ofproto)
 977 {
 978     struct xbridge *xbridge;
 979
 980     ovs_assert(new_xcfg);
 981
 982     xbridge = xbridge_lookup(new_xcfg, ofproto);
 983     xlate_xbridge_remove(new_xcfg, xbridge);
 984 }
 985
 986 void
 987 xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
 988                  const char *name, enum port_vlan_mode vlan_mode, int vlan,
 989                  unsigned long *trunks, bool use_priority_tags,
 990                  const struct bond *bond, const struct lacp *lacp,
 991                  bool floodable)
 992 {
 993     struct xbundle *xbundle;
 994
 995     ovs_assert(new_xcfg);
 996
 997     xbundle = xbundle_lookup(new_xcfg, ofbundle);
 998     if (!xbundle) {
 999         xbundle = xzalloc(sizeof *xbundle);
1000         xbundle->ofbundle = ofbundle;
1001         xbundle->xbridge = xbridge_lookup(new_xcfg, ofproto);
1002
1003         xlate_xbundle_init(new_xcfg, xbundle);
1004     }
1005
1006     free(xbundle->name);
1007     xbundle->name = xstrdup(name);
1008
1009     xlate_xbundle_set(xbundle, vlan_mode, vlan, trunks,
1010                       use_priority_tags, bond, lacp, floodable);
1011 }
1012
1013 static void
1014 xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle)
1015 {
1016     struct xport *xport;
1017
1018     if (!xbundle) {
1019         return;
1020     }
1021
1022     LIST_FOR_EACH_POP (xport, bundle_node, &xbundle->xports) {
1023         xport->xbundle = NULL;
1024     }
1025
1026     hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
1027     list_remove(&xbundle->list_node);
1028     bond_unref(xbundle->bond);
1029     lacp_unref(xbundle->lacp);
1030     free(xbundle->name);
1031     free(xbundle);
1032 }
1033
1034 void
1035 xlate_bundle_remove(struct ofbundle *ofbundle)
1036 {
1037     struct xbundle *xbundle;
1038
1039     ovs_assert(new_xcfg);
1040
1041     xbundle = xbundle_lookup(new_xcfg, ofbundle);
1042     xlate_xbundle_remove(new_xcfg, xbundle);
1043 }
1044
1045 void
1046 xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
1047                  struct ofport_dpif *ofport, ofp_port_t ofp_port,
1048                  odp_port_t odp_port, const struct netdev *netdev,
1049                  const struct cfm *cfm, const struct bfd *bfd,
1050                  const struct lldp *lldp, struct ofport_dpif *peer,
1051                  int stp_port_no, const struct rstp_port *rstp_port,
1052                  const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
1053                  enum ofputil_port_config config,
1054                  enum ofputil_port_state state, bool is_tunnel,
1055                  bool may_enable)
1056 {
1057     size_t i;
1058     struct xport *xport;
1059
1060     ovs_assert(new_xcfg);
1061
1062     xport = xport_lookup(new_xcfg, ofport);
1063     if (!xport) {
1064         xport = xzalloc(sizeof *xport);
1065         xport->ofport = ofport;
1066         xport->xbridge = xbridge_lookup(new_xcfg, ofproto);
1067         xport->ofp_port = ofp_port;
1068
1069         xlate_xport_init(new_xcfg, xport);
1070     }
1071
1072     ovs_assert(xport->ofp_port == ofp_port);
1073
1074     xlate_xport_set(xport, odp_port, netdev, cfm, bfd, lldp,
1075                     stp_port_no, rstp_port, config, state, is_tunnel,
1076                     may_enable);
1077
1078     if (xport->peer) {
1079         xport->peer->peer = NULL;
1080     }
1081     xport->peer = xport_lookup(new_xcfg, peer);
1082     if (xport->peer) {
1083         xport->peer->peer = xport;
1084     }
1085
1086     if (xport->xbundle) {
1087         list_remove(&xport->bundle_node);
1088     }
1089     xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
1090     if (xport->xbundle) {
1091         list_insert(&xport->xbundle->xports, &xport->bundle_node);
1092     }
1093
1094     clear_skb_priorities(xport);
1095     for (i = 0; i < n_qdscp; i++) {
1096         struct skb_priority_to_dscp *pdscp;
1097         uint32_t skb_priority;
1098
1099         if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
1100                                    &skb_priority)) {
1101             continue;
1102         }
1103
1104         pdscp = xmalloc(sizeof *pdscp);
1105         pdscp->skb_priority = skb_priority;
1106         pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
1107         hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
1108                     hash_int(pdscp->skb_priority, 0));
1109     }
1110 }
1111
1112 static void
1113 xlate_xport_remove(struct xlate_cfg *xcfg, struct xport *xport)
1114 {
1115     if (!xport) {
1116         return;
1117     }
1118
1119     if (xport->peer) {
1120         xport->peer->peer = NULL;
1121         xport->peer = NULL;
1122     }
1123
1124     if (xport->xbundle) {
1125         list_remove(&xport->bundle_node);
1126     }
1127
1128     clear_skb_priorities(xport);
1129     hmap_destroy(&xport->skb_priorities);
1130
1131     hmap_remove(&xcfg->xports, &xport->hmap_node);
1132     hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
1133
1134     netdev_close(xport->netdev);
1135     rstp_port_unref(xport->rstp_port);
1136     cfm_unref(xport->cfm);
1137     bfd_unref(xport->bfd);
1138     lldp_unref(xport->lldp);
1139     free(xport);
1140 }
1141
1142 void
1143 xlate_ofport_remove(struct ofport_dpif *ofport)
1144 {
1145     struct xport *xport;
1146
1147     ovs_assert(new_xcfg);
1148
1149     xport = xport_lookup(new_xcfg, ofport);
1150     xlate_xport_remove(new_xcfg, xport);
1151 }
1152
1153 static struct ofproto_dpif *
1154 xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
1155                       ofp_port_t *ofp_in_port, const struct xport **xportp)
1156 {
1157     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1158     const struct xport *xport;
1159
1160     xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
1161                          ? tnl_port_receive(flow)
1162                          : odp_port_to_ofport(backer, flow->in_port.odp_port));
1163     if (OVS_UNLIKELY(!xport)) {
1164         return NULL;
1165     }
1166     *xportp = xport;
1167     if (ofp_in_port) {
1168         *ofp_in_port = xport->ofp_port;
1169     }
1170     return xport->xbridge->ofproto;
1171 }
1172
1173 /* Given a datapath and flow metadata ('backer', and 'flow' respectively)
1174  * returns the corresponding struct ofproto_dpif and OpenFlow port number. */
1175 struct ofproto_dpif *
1176 xlate_lookup_ofproto(const struct dpif_backer *backer, const struct flow *flow,
1177                      ofp_port_t *ofp_in_port)
1178 {
1179     const struct xport *xport;
1180
1181     return xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1182 }
1183
1184 /* Given a datapath and flow metadata ('backer', and 'flow' respectively),
1185  * optionally populates 'ofproto' with the ofproto_dpif, 'ofp_in_port' with the
1186  * openflow in_port, and 'ipfix', 'sflow', and 'netflow' with the appropriate
1187  * handles for those protocols if they're enabled.  Caller may use the returned
1188  * pointers until quiescing, for longer term use additional references must
1189  * be taken.
1190  *
1191  * Returns 0 if successful, ENODEV if the parsed flow has no associated ofproto.
1192  */
1193 int
1194 xlate_lookup(const struct dpif_backer *backer, const struct flow *flow,
1195              struct ofproto_dpif **ofprotop, struct dpif_ipfix **ipfix,
1196              struct dpif_sflow **sflow, struct netflow **netflow,
1197              ofp_port_t *ofp_in_port)
1198 {
1199     struct ofproto_dpif *ofproto;
1200     const struct xport *xport;
1201
1202     ofproto = xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1203
1204     if (!ofproto) {
1205         return ENODEV;
1206     }
1207
1208     if (ofprotop) {
1209         *ofprotop = ofproto;
1210     }
1211
1212     if (ipfix) {
1213         *ipfix = xport ? xport->xbridge->ipfix : NULL;
1214     }
1215
1216     if (sflow) {
1217         *sflow = xport ? xport->xbridge->sflow : NULL;
1218     }
1219
1220     if (netflow) {
1221         *netflow = xport ? xport->xbridge->netflow : NULL;
1222     }
1223
1224     return 0;
1225 }
1226
1227 static struct xbridge *
1228 xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
1229 {
1230     struct hmap *xbridges;
1231     struct xbridge *xbridge;
1232
1233     if (!ofproto || !xcfg) {
1234         return NULL;
1235     }
1236
1237     xbridges = &xcfg->xbridges;
1238
1239     HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
1240                              xbridges) {
1241         if (xbridge->ofproto == ofproto) {
1242             return xbridge;
1243         }
1244     }
1245     return NULL;
1246 }
1247
1248 static struct xbridge *
1249 xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
1250 {
1251     struct xbridge *xbridge;
1252
1253     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
1254         if (uuid_equals(ofproto_dpif_get_uuid(xbridge->ofproto), uuid)) {
1255             return xbridge;
1256         }
1257     }
1258     return NULL;
1259 }
1260
1261 static struct xbundle *
1262 xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
1263 {
1264     struct hmap *xbundles;
1265     struct xbundle *xbundle;
1266
1267     if (!ofbundle || !xcfg) {
1268         return NULL;
1269     }
1270
1271     xbundles = &xcfg->xbundles;
1272
1273     HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
1274                              xbundles) {
1275         if (xbundle->ofbundle == ofbundle) {
1276             return xbundle;
1277         }
1278     }
1279     return NULL;
1280 }
1281
1282 static struct xport *
1283 xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
1284 {
1285     struct hmap *xports;
1286     struct xport *xport;
1287
1288     if (!ofport || !xcfg) {
1289         return NULL;
1290     }
1291
1292     xports = &xcfg->xports;
1293
1294     HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
1295                              xports) {
1296         if (xport->ofport == ofport) {
1297             return xport;
1298         }
1299     }
1300     return NULL;
1301 }
1302
1303 static struct stp_port *
1304 xport_get_stp_port(const struct xport *xport)
1305 {
1306     return xport->xbridge->stp && xport->stp_port_no != -1
1307         ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
1308         : NULL;
1309 }
1310
1311 static bool
1312 xport_stp_learn_state(const struct xport *xport)
1313 {
1314     struct stp_port *sp = xport_get_stp_port(xport);
1315     return sp
1316         ? stp_learn_in_state(stp_port_get_state(sp))
1317         : true;
1318 }
1319
1320 static bool
1321 xport_stp_forward_state(const struct xport *xport)
1322 {
1323     struct stp_port *sp = xport_get_stp_port(xport);
1324     return sp
1325         ? stp_forward_in_state(stp_port_get_state(sp))
1326         : true;
1327 }
1328
1329 static bool
1330 xport_stp_should_forward_bpdu(const struct xport *xport)
1331 {
1332     struct stp_port *sp = xport_get_stp_port(xport);
1333     return stp_should_forward_bpdu(sp ? stp_port_get_state(sp) : STP_DISABLED);
1334 }
1335
1336 /* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
1337  * were used to make the determination.*/
1338 static bool
1339 stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
1340 {
1341     /* is_stp() also checks dl_type, but dl_type is always set in 'wc'. */
1342     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1343     return is_stp(flow);
1344 }
1345
1346 static void
1347 stp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1348 {
1349     struct stp_port *sp = xport_get_stp_port(xport);
1350     struct dp_packet payload = *packet;
1351     struct eth_header *eth = dp_packet_data(&payload);
1352
1353     /* Sink packets on ports that have STP disabled when the bridge has
1354      * STP enabled. */
1355     if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
1356         return;
1357     }
1358
1359     /* Trim off padding on payload. */
1360     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1361         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1362     }
1363
1364     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1365         stp_received_bpdu(sp, dp_packet_data(&payload), dp_packet_size(&payload));
1366     }
1367 }
1368
1369 static enum rstp_state
1370 xport_get_rstp_port_state(const struct xport *xport)
1371 {
1372     return xport->rstp_port
1373         ? rstp_port_get_state(xport->rstp_port)
1374         : RSTP_DISABLED;
1375 }
1376
1377 static bool
1378 xport_rstp_learn_state(const struct xport *xport)
1379 {
1380     return xport->xbridge->rstp && xport->rstp_port
1381         ? rstp_learn_in_state(xport_get_rstp_port_state(xport))
1382         : true;
1383 }
1384
1385 static bool
1386 xport_rstp_forward_state(const struct xport *xport)
1387 {
1388     return xport->xbridge->rstp && xport->rstp_port
1389         ? rstp_forward_in_state(xport_get_rstp_port_state(xport))
1390         : true;
1391 }
1392
1393 static bool
1394 xport_rstp_should_manage_bpdu(const struct xport *xport)
1395 {
1396     return rstp_should_manage_bpdu(xport_get_rstp_port_state(xport));
1397 }
1398
1399 static void
1400 rstp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1401 {
1402     struct dp_packet payload = *packet;
1403     struct eth_header *eth = dp_packet_data(&payload);
1404
1405     /* Sink packets on ports that have no RSTP. */
1406     if (!xport->rstp_port) {
1407         return;
1408     }
1409
1410     /* Trim off padding on payload. */
1411     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1412         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1413     }
1414
1415     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1416         rstp_port_received_bpdu(xport->rstp_port, dp_packet_data(&payload),
1417                                 dp_packet_size(&payload));
1418     }
1419 }
1420
1421 static struct xport *
1422 get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1423 {
1424     struct xport *xport;
1425
1426     HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port),
1427                              &xbridge->xports) {
1428         if (xport->ofp_port == ofp_port) {
1429             return xport;
1430         }
1431     }
1432     return NULL;
1433 }
1434
1435 static odp_port_t
1436 ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1437 {
1438     const struct xport *xport = get_ofp_port(xbridge, ofp_port);
1439     return xport ? xport->odp_port : ODPP_NONE;
1440 }
1441
1442 static bool
1443 odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port)
1444 {
1445     struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
1446     return xport && xport->may_enable;
1447 }
1448
1449 static struct ofputil_bucket *
1450 group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *,
1451                         int depth);
1452
1453 static bool
1454 group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
1455 {
1456     struct group_dpif *group;
1457
1458     if (group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group)) {
1459         struct ofputil_bucket *bucket;
1460
1461         bucket = group_first_live_bucket(ctx, group, depth);
1462         group_dpif_unref(group);
1463         return bucket == NULL;
1464     }
1465
1466     return false;
1467 }
1468
1469 #define MAX_LIVENESS_RECURSION 128 /* Arbitrary limit */
1470
1471 static bool
1472 bucket_is_alive(const struct xlate_ctx *ctx,
1473                 struct ofputil_bucket *bucket, int depth)
1474 {
1475     if (depth >= MAX_LIVENESS_RECURSION) {
1476         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1477
1478         VLOG_WARN_RL(&rl, "bucket chaining exceeded %d links",
1479                      MAX_LIVENESS_RECURSION);
1480         return false;
1481     }
1482
1483     return (!ofputil_bucket_has_liveness(bucket)
1484             || (bucket->watch_port != OFPP_ANY
1485                && odp_port_is_alive(ctx, bucket->watch_port))
1486             || (bucket->watch_group != OFPG_ANY
1487                && group_is_alive(ctx, bucket->watch_group, depth + 1)));
1488 }
1489
1490 static struct ofputil_bucket *
1491 group_first_live_bucket(const struct xlate_ctx *ctx,
1492                         const struct group_dpif *group, int depth)
1493 {
1494     struct ofputil_bucket *bucket;
1495     const struct ovs_list *buckets;
1496
1497     group_dpif_get_buckets(group, &buckets);
1498     LIST_FOR_EACH (bucket, list_node, buckets) {
1499         if (bucket_is_alive(ctx, bucket, depth)) {
1500             return bucket;
1501         }
1502     }
1503
1504     return NULL;
1505 }
1506
1507 static struct ofputil_bucket *
1508 group_best_live_bucket(const struct xlate_ctx *ctx,
1509                        const struct group_dpif *group,
1510                        uint32_t basis)
1511 {
1512     struct ofputil_bucket *best_bucket = NULL;
1513     uint32_t best_score = 0;
1514     int i = 0;
1515
1516     struct ofputil_bucket *bucket;
1517     const struct ovs_list *buckets;
1518
1519     group_dpif_get_buckets(group, &buckets);
1520     LIST_FOR_EACH (bucket, list_node, buckets) {
1521         if (bucket_is_alive(ctx, bucket, 0)) {
1522             uint32_t score = (hash_int(i, basis) & 0xffff) * bucket->weight;
1523             if (score >= best_score) {
1524                 best_bucket = bucket;
1525                 best_score = score;
1526             }
1527         }
1528         i++;
1529     }
1530
1531     return best_bucket;
1532 }
1533
1534 static bool
1535 xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan)
1536 {
1537     return (bundle->vlan_mode != PORT_VLAN_ACCESS
1538             && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
1539 }
1540
1541 static bool
1542 xbundle_includes_vlan(const struct xbundle *xbundle, uint16_t vlan)
1543 {
1544     return vlan == xbundle->vlan || xbundle_trunks_vlan(xbundle, vlan);
1545 }
1546
1547 static mirror_mask_t
1548 xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle)
1549 {
1550     return xbundle != &ofpp_none_bundle
1551         ? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle)
1552         : 0;
1553 }
1554
1555 static mirror_mask_t
1556 xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle)
1557 {
1558     return xbundle != &ofpp_none_bundle
1559         ? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle)
1560         : 0;
1561 }
1562
1563 static mirror_mask_t
1564 xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle)
1565 {
1566     return xbundle != &ofpp_none_bundle
1567         ? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle)
1568         : 0;
1569 }
1570
1571 static struct xbundle *
1572 lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port,
1573                     bool warn, struct xport **in_xportp)
1574 {
1575     struct xport *xport;
1576
1577     /* Find the port and bundle for the received packet. */
1578     xport = get_ofp_port(xbridge, in_port);
1579     if (in_xportp) {
1580         *in_xportp = xport;
1581     }
1582     if (xport && xport->xbundle) {
1583         return xport->xbundle;
1584     }
1585
1586     /* Special-case OFPP_NONE (OF1.0) and OFPP_CONTROLLER (OF1.1+),
1587      * which a controller may use as the ingress port for traffic that
1588      * it is sourcing. */
1589     if (in_port == OFPP_CONTROLLER || in_port == OFPP_NONE) {
1590         return &ofpp_none_bundle;
1591     }
1592
1593     /* Odd.  A few possible reasons here:
1594      *
1595      * - We deleted a port but there are still a few packets queued up
1596      *   from it.
1597      *
1598      * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
1599      *   we don't know about.
1600      *
1601      * - The ofproto client didn't configure the port as part of a bundle.
1602      *   This is particularly likely to happen if a packet was received on the
1603      *   port after it was created, but before the client had a chance to
1604      *   configure its bundle.
1605      */
1606     if (warn) {
1607         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1608
1609         VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
1610                      "port %"PRIu16, xbridge->name, in_port);
1611     }
1612     return NULL;
1613 }
1614
1615 static void
1616 mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
1617               mirror_mask_t mirrors)
1618 {
1619     bool warn = ctx->xin->packet != NULL;
1620     uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci);
1621     if (!input_vid_is_valid(vid, xbundle, warn)) {
1622         return;
1623     }
1624     uint16_t vlan = input_vid_to_vlan(xbundle, vid);
1625
1626     const struct xbridge *xbridge = ctx->xbridge;
1627
1628     /* Don't mirror to destinations that we've already mirrored to. */
1629     mirrors &= ~ctx->mirrors;
1630     if (!mirrors) {
1631         return;
1632     }
1633
1634     /* Record these mirrors so that we don't mirror to them again. */
1635     ctx->mirrors |= mirrors;
1636
1637     if (ctx->xin->resubmit_stats) {
1638         mirror_update_stats(xbridge->mbridge, mirrors,
1639                             ctx->xin->resubmit_stats->n_packets,
1640                             ctx->xin->resubmit_stats->n_bytes);
1641     }
1642     if (ctx->xin->xcache) {
1643         struct xc_entry *entry;
1644
1645         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_MIRROR);
1646         entry->u.mirror.mbridge = mbridge_ref(xbridge->mbridge);
1647         entry->u.mirror.mirrors = mirrors;
1648     }
1649
1650     while (mirrors) {
1651         const unsigned long *vlans;
1652         mirror_mask_t dup_mirrors;
1653         struct ofbundle *out;
1654         int out_vlan;
1655
1656         bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
1657                                      &vlans, &dup_mirrors, &out, &out_vlan);
1658         ovs_assert(has_mirror);
1659
1660         if (vlans) {
1661             ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
1662         }
1663
1664         if (vlans && !bitmap_is_set(vlans, vlan)) {
1665             mirrors = zero_rightmost_1bit(mirrors);
1666             continue;
1667         }
1668
1669         mirrors &= ~dup_mirrors;
1670         ctx->mirrors |= dup_mirrors;
1671         if (out) {
1672             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1673             struct xbundle *out_xbundle = xbundle_lookup(xcfg, out);
1674             if (out_xbundle) {
1675                 output_normal(ctx, out_xbundle, vlan);
1676             }
1677         } else if (vlan != out_vlan
1678                    && !eth_addr_is_reserved(ctx->xin->flow.dl_dst)) {
1679             struct xbundle *xbundle;
1680
1681             LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
1682                 if (xbundle_includes_vlan(xbundle, out_vlan)
1683                     && !xbundle_mirror_out(xbridge, xbundle)) {
1684                     output_normal(ctx, xbundle, out_vlan);
1685                 }
1686             }
1687         }
1688     }
1689 }
1690
1691 static void
1692 mirror_ingress_packet(struct xlate_ctx *ctx)
1693 {
1694     if (mbridge_has_mirrors(ctx->xbridge->mbridge)) {
1695         bool warn = ctx->xin->packet != NULL;
1696         struct xbundle *xbundle = lookup_input_bundle(
1697             ctx->xbridge, ctx->xin->flow.in_port.ofp_port, warn, NULL);
1698         if (xbundle) {
1699             mirror_packet(ctx, xbundle,
1700                           xbundle_mirror_src(ctx->xbridge, xbundle));
1701         }
1702     }
1703 }
1704
1705 /* Given 'vid', the VID obtained from the 802.1Q header that was received as
1706  * part of a packet (specify 0 if there was no 802.1Q header), and 'in_xbundle',
1707  * the bundle on which the packet was received, returns the VLAN to which the
1708  * packet belongs.
1709  *
1710  * Both 'vid' and the return value are in the range 0...4095. */
1711 static uint16_t
1712 input_vid_to_vlan(const struct xbundle *in_xbundle, uint16_t vid)
1713 {
1714     switch (in_xbundle->vlan_mode) {
1715     case PORT_VLAN_ACCESS:
1716         return in_xbundle->vlan;
1717         break;
1718
1719     case PORT_VLAN_TRUNK:
1720         return vid;
1721
1722     case PORT_VLAN_NATIVE_UNTAGGED:
1723     case PORT_VLAN_NATIVE_TAGGED:
1724         return vid ? vid : in_xbundle->vlan;
1725
1726     default:
1727         OVS_NOT_REACHED();
1728     }
1729 }
1730
1731 /* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'.
1732  * If so, returns true.  Otherwise, returns false and, if 'warn' is true, logs
1733  * a warning.
1734  *
1735  * 'vid' should be the VID obtained from the 802.1Q header that was received as
1736  * part of a packet (specify 0 if there was no 802.1Q header), in the range
1737  * 0...4095. */
1738 static bool
1739 input_vid_is_valid(uint16_t vid, struct xbundle *in_xbundle, bool warn)
1740 {
1741     /* Allow any VID on the OFPP_NONE port. */
1742     if (in_xbundle == &ofpp_none_bundle) {
1743         return true;
1744     }
1745
1746     switch (in_xbundle->vlan_mode) {
1747     case PORT_VLAN_ACCESS:
1748         if (vid) {
1749             if (warn) {
1750                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1751                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" tagged "
1752                              "packet received on port %s configured as VLAN "
1753                              "%"PRIu16" access port", vid, in_xbundle->name,
1754                              in_xbundle->vlan);
1755             }
1756             return false;
1757         }
1758         return true;
1759
1760     case PORT_VLAN_NATIVE_UNTAGGED:
1761     case PORT_VLAN_NATIVE_TAGGED:
1762         if (!vid) {
1763             /* Port must always carry its native VLAN. */
1764             return true;
1765         }
1766         /* Fall through. */
1767     case PORT_VLAN_TRUNK:
1768         if (!xbundle_includes_vlan(in_xbundle, vid)) {
1769             if (warn) {
1770                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1771                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" packet "
1772                              "received on port %s not configured for trunking "
1773                              "VLAN %"PRIu16, vid, in_xbundle->name, vid);
1774             }
1775             return false;
1776         }
1777         return true;
1778
1779     default:
1780         OVS_NOT_REACHED();
1781     }
1782
1783 }
1784
1785 /* Given 'vlan', the VLAN that a packet belongs to, and
1786  * 'out_xbundle', a bundle on which the packet is to be output, returns the VID
1787  * that should be included in the 802.1Q header.  (If the return value is 0,
1788  * then the 802.1Q header should only be included in the packet if there is a
1789  * nonzero PCP.)
1790  *
1791  * Both 'vlan' and the return value are in the range 0...4095. */
1792 static uint16_t
1793 output_vlan_to_vid(const struct xbundle *out_xbundle, uint16_t vlan)
1794 {
1795     switch (out_xbundle->vlan_mode) {
1796     case PORT_VLAN_ACCESS:
1797         return 0;
1798
1799     case PORT_VLAN_TRUNK:
1800     case PORT_VLAN_NATIVE_TAGGED:
1801         return vlan;
1802
1803     case PORT_VLAN_NATIVE_UNTAGGED:
1804         return vlan == out_xbundle->vlan ? 0 : vlan;
1805
1806     default:
1807         OVS_NOT_REACHED();
1808     }
1809 }
1810
1811 static void
1812 output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
1813               uint16_t vlan)
1814 {
1815     ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci;
1816     uint16_t vid;
1817     ovs_be16 tci, old_tci;
1818     struct xport *xport;
1819     struct xlate_bond_recirc xr;
1820     bool use_recirc = false;
1821
1822     vid = output_vlan_to_vid(out_xbundle, vlan);
1823     if (list_is_empty(&out_xbundle->xports)) {
1824         /* Partially configured bundle with no slaves.  Drop the packet. */
1825         return;
1826     } else if (!out_xbundle->bond) {
1827         xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport,
1828                              bundle_node);
1829     } else {
1830         struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1831         struct flow_wildcards *wc = ctx->wc;
1832         struct ofport_dpif *ofport;
1833
1834         if (ctx->xbridge->support.odp.recirc) {
1835             use_recirc = bond_may_recirc(
1836                 out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
1837
1838             if (use_recirc) {
1839                 /* Only TCP mode uses recirculation. */
1840                 xr.hash_alg = OVS_HASH_ALG_L4;
1841                 bond_update_post_recirc_rules(out_xbundle->bond, false);
1842
1843                 /* Recirculation does not require unmasking hash fields. */
1844                 wc = NULL;
1845             }
1846         }
1847
1848         ofport = bond_choose_output_slave(out_xbundle->bond,
1849                                           &ctx->xin->flow, wc, vid);
1850         xport = xport_lookup(xcfg, ofport);
1851
1852         if (!xport) {
1853             /* No slaves enabled, so drop packet. */
1854             return;
1855         }
1856
1857         /* If use_recirc is set, the main thread will handle stats
1858          * accounting for this bond. */
1859         if (!use_recirc) {
1860             if (ctx->xin->resubmit_stats) {
1861                 bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
1862                              ctx->xin->resubmit_stats->n_bytes);
1863             }
1864             if (ctx->xin->xcache) {
1865                 struct xc_entry *entry;
1866                 struct flow *flow;
1867
1868                 flow = &ctx->xin->flow;
1869                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_BOND);
1870                 entry->u.bond.bond = bond_ref(out_xbundle->bond);
1871                 entry->u.bond.flow = xmemdup(flow, sizeof *flow);
1872                 entry->u.bond.vid = vid;
1873             }
1874         }
1875     }
1876
1877     old_tci = *flow_tci;
1878     tci = htons(vid);
1879     if (tci || out_xbundle->use_priority_tags) {
1880         tci |= *flow_tci & htons(VLAN_PCP_MASK);
1881         if (tci) {
1882             tci |= htons(VLAN_CFI);
1883         }
1884     }
1885     *flow_tci = tci;
1886
1887     compose_output_action(ctx, xport->ofp_port, use_recirc ? &xr : NULL);
1888     *flow_tci = old_tci;
1889 }
1890
1891 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
1892  * migration.  Older Citrix-patched Linux DomU used gratuitous ARP replies to
1893  * indicate this; newer upstream kernels use gratuitous ARP requests. */
1894 static bool
1895 is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
1896 {
1897     if (flow->dl_type != htons(ETH_TYPE_ARP)) {
1898         return false;
1899     }
1900
1901     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1902     if (!eth_addr_is_broadcast(flow->dl_dst)) {
1903         return false;
1904     }
1905
1906     memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
1907     if (flow->nw_proto == ARP_OP_REPLY) {
1908         return true;
1909     } else if (flow->nw_proto == ARP_OP_REQUEST) {
1910         memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
1911         memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
1912
1913         return flow->nw_src == flow->nw_dst;
1914     } else {
1915         return false;
1916     }
1917 }
1918
1919 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
1920  * dropped.  Returns true if they may be forwarded, false if they should be
1921  * dropped.
1922  *
1923  * 'in_port' must be the xport that corresponds to flow->in_port.
1924  * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
1925  *
1926  * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
1927  * returned by input_vid_to_vlan().  It must be a valid VLAN for 'in_port', as
1928  * checked by input_vid_is_valid().
1929  *
1930  * May also add tags to '*tags', although the current implementation only does
1931  * so in one special case.
1932  */
1933 static bool
1934 is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
1935               uint16_t vlan)
1936 {
1937     struct xbundle *in_xbundle = in_port->xbundle;
1938     const struct xbridge *xbridge = ctx->xbridge;
1939     struct flow *flow = &ctx->xin->flow;
1940
1941     /* Drop frames for reserved multicast addresses
1942      * only if forward_bpdu option is absent. */
1943     if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
1944         xlate_report(ctx, "packet has reserved destination MAC, dropping");
1945         return false;
1946     }
1947
1948     if (in_xbundle->bond) {
1949         struct mac_entry *mac;
1950
1951         switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
1952                                          flow->dl_dst)) {
1953         case BV_ACCEPT:
1954             break;
1955
1956         case BV_DROP:
1957             xlate_report(ctx, "bonding refused admissibility, dropping");
1958             return false;
1959
1960         case BV_DROP_IF_MOVED:
1961             ovs_rwlock_rdlock(&xbridge->ml->rwlock);
1962             mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
1963             if (mac
1964                 && mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
1965                 && (!is_gratuitous_arp(flow, ctx->wc)
1966                     || mac_entry_is_grat_arp_locked(mac))) {
1967                 ovs_rwlock_unlock(&xbridge->ml->rwlock);
1968                 xlate_report(ctx, "SLB bond thinks this packet looped back, "
1969                              "dropping");
1970                 return false;
1971             }
1972             ovs_rwlock_unlock(&xbridge->ml->rwlock);
1973             break;
1974         }
1975     }
1976
1977     return true;
1978 }
1979
1980 /* Checks whether a MAC learning update is necessary for MAC learning table
1981  * 'ml' given that a packet matching 'flow' was received  on 'in_xbundle' in
1982  * 'vlan'.
1983  *
1984  * Most packets processed through the MAC learning table do not actually
1985  * change it in any way.  This function requires only a read lock on the MAC
1986  * learning table, so it is much cheaper in this common case.
1987  *
1988  * Keep the code here synchronized with that in update_learning_table__()
1989  * below. */
1990 static bool
1991 is_mac_learning_update_needed(const struct mac_learning *ml,
1992                               const struct flow *flow,
1993                               struct flow_wildcards *wc,
1994                               int vlan, struct xbundle *in_xbundle)
1995 OVS_REQ_RDLOCK(ml->rwlock)
1996 {
1997     struct mac_entry *mac;
1998
1999     if (!mac_learning_may_learn(ml, flow->dl_src, vlan)) {
2000         return false;
2001     }
2002
2003     mac = mac_learning_lookup(ml, flow->dl_src, vlan);
2004     if (!mac || mac_entry_age(ml, mac)) {
2005         return true;
2006     }
2007
2008     if (is_gratuitous_arp(flow, wc)) {
2009         /* We don't want to learn from gratuitous ARP packets that are
2010          * reflected back over bond slaves so we lock the learning table. */
2011         if (!in_xbundle->bond) {
2012             return true;
2013         } else if (mac_entry_is_grat_arp_locked(mac)) {
2014             return false;
2015         }
2016     }
2017
2018     return mac_entry_get_port(ml, mac) != in_xbundle->ofbundle;
2019 }
2020
2021
2022 /* Updates MAC learning table 'ml' given that a packet matching 'flow' was
2023  * received on 'in_xbundle' in 'vlan'.
2024  *
2025  * This code repeats all the checks in is_mac_learning_update_needed() because
2026  * the lock was released between there and here and thus the MAC learning state
2027  * could have changed.
2028  *
2029  * Keep the code here synchronized with that in is_mac_learning_update_needed()
2030  * above. */
2031 static void
2032 update_learning_table__(const struct xbridge *xbridge,
2033                         const struct flow *flow, struct flow_wildcards *wc,
2034                         int vlan, struct xbundle *in_xbundle)
2035 OVS_REQ_WRLOCK(xbridge->ml->rwlock)
2036 {
2037     struct mac_entry *mac;
2038
2039     if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
2040         return;
2041     }
2042
2043     mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
2044     if (is_gratuitous_arp(flow, wc)) {
2045         /* We don't want to learn from gratuitous ARP packets that are
2046          * reflected back over bond slaves so we lock the learning table. */
2047         if (!in_xbundle->bond) {
2048             mac_entry_set_grat_arp_lock(mac);
2049         } else if (mac_entry_is_grat_arp_locked(mac)) {
2050             return;
2051         }
2052     }
2053
2054     if (mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle) {
2055         /* The log messages here could actually be useful in debugging,
2056          * so keep the rate limit relatively high. */
2057         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
2058
2059         VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
2060                     "on port %s in VLAN %d",
2061                     xbridge->name, ETH_ADDR_ARGS(flow->dl_src),
2062                     in_xbundle->name, vlan);
2063
2064         mac_entry_set_port(xbridge->ml, mac, in_xbundle->ofbundle);
2065     }
2066 }
2067
2068 static void
2069 update_learning_table(const struct xbridge *xbridge,
2070                       const struct flow *flow, struct flow_wildcards *wc,
2071                       int vlan, struct xbundle *in_xbundle)
2072 {
2073     bool need_update;
2074
2075     /* Don't learn the OFPP_NONE port. */
2076     if (in_xbundle == &ofpp_none_bundle) {
2077         return;
2078     }
2079
2080     /* First try the common case: no change to MAC learning table. */
2081     ovs_rwlock_rdlock(&xbridge->ml->rwlock);
2082     need_update = is_mac_learning_update_needed(xbridge->ml, flow, wc, vlan,
2083                                                 in_xbundle);
2084     ovs_rwlock_unlock(&xbridge->ml->rwlock);
2085
2086     if (need_update) {
2087         /* Slow path: MAC learning table might need an update. */
2088         ovs_rwlock_wrlock(&xbridge->ml->rwlock);
2089         update_learning_table__(xbridge, flow, wc, vlan, in_xbundle);
2090         ovs_rwlock_unlock(&xbridge->ml->rwlock);
2091     }
2092 }
2093
2094 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2095  * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
2096 static void
2097 update_mcast_snooping_table4__(const struct xbridge *xbridge,
2098                                const struct flow *flow,
2099                                struct mcast_snooping *ms, int vlan,
2100                                struct xbundle *in_xbundle,
2101                                const struct dp_packet *packet)
2102     OVS_REQ_WRLOCK(ms->rwlock)
2103 {
2104     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2105     int count;
2106     ovs_be32 ip4 = flow->igmp_group_ip4;
2107
2108     switch (ntohs(flow->tp_src)) {
2109     case IGMP_HOST_MEMBERSHIP_REPORT:
2110     case IGMPV2_HOST_MEMBERSHIP_REPORT:
2111         if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2112             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
2113                         IP_FMT" is on port %s in VLAN %d",
2114                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2115         }
2116         break;
2117     case IGMP_HOST_LEAVE_MESSAGE:
2118         if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2119             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
2120                         IP_FMT" is on port %s in VLAN %d",
2121                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2122         }
2123         break;
2124     case IGMP_HOST_MEMBERSHIP_QUERY:
2125         if (flow->nw_src && mcast_snooping_add_mrouter(ms, vlan,
2126             in_xbundle->ofbundle)) {
2127             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query from "
2128                         IP_FMT" is on port %s in VLAN %d",
2129                         xbridge->name, IP_ARGS(flow->nw_src),
2130                         in_xbundle->name, vlan);
2131         }
2132         break;
2133     case IGMPV3_HOST_MEMBERSHIP_REPORT:
2134         if ((count = mcast_snooping_add_report(ms, packet, vlan,
2135                                                in_xbundle->ofbundle))) {
2136             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2137                         "addresses on port %s in VLAN %d",
2138                         xbridge->name, count, in_xbundle->name, vlan);
2139         }
2140         break;
2141     }
2142 }
2143
2144 static void
2145 update_mcast_snooping_table6__(const struct xbridge *xbridge,
2146                                const struct flow *flow,
2147                                struct mcast_snooping *ms, int vlan,
2148                                struct xbundle *in_xbundle,
2149                                const struct dp_packet *packet)
2150     OVS_REQ_WRLOCK(ms->rwlock)
2151 {
2152     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2153     int count;
2154
2155     switch (ntohs(flow->tp_src)) {
2156     case MLD_QUERY:
2157         if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
2158             && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
2159             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
2160                         "in VLAN %d",
2161                         xbridge->name, in_xbundle->name, vlan);
2162         }
2163         break;
2164     case MLD_REPORT:
2165     case MLD_DONE:
2166     case MLD2_REPORT:
2167         count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
2168         if (count) {
2169             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2170                         "addresses on port %s in VLAN %d",
2171                         xbridge->name, count, in_xbundle->name, vlan);
2172         }
2173         break;
2174     }
2175 }
2176
2177 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2178  * was received on 'in_xbundle' in 'vlan'. */
2179 static void
2180 update_mcast_snooping_table(const struct xbridge *xbridge,
2181                             const struct flow *flow, int vlan,
2182                             struct xbundle *in_xbundle,
2183                             const struct dp_packet *packet)
2184 {
2185     struct mcast_snooping *ms = xbridge->ms;
2186     struct xlate_cfg *xcfg;
2187     struct xbundle *mcast_xbundle;
2188     struct mcast_port_bundle *fport;
2189
2190     /* Don't learn the OFPP_NONE port. */
2191     if (in_xbundle == &ofpp_none_bundle) {
2192         return;
2193     }
2194
2195     /* Don't learn from flood ports */
2196     mcast_xbundle = NULL;
2197     ovs_rwlock_wrlock(&ms->rwlock);
2198     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2199     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2200         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2201         if (mcast_xbundle == in_xbundle) {
2202             break;
2203         }
2204     }
2205
2206     if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
2207         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2208             update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
2209                                            in_xbundle, packet);
2210         } else {
2211             update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
2212                                            in_xbundle, packet);
2213         }
2214     }
2215     ovs_rwlock_unlock(&ms->rwlock);
2216 }
2217
2218 /* send the packet to ports having the multicast group learned */
2219 static void
2220 xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
2221                               struct mcast_snooping *ms OVS_UNUSED,
2222                               struct mcast_group *grp,
2223                               struct xbundle *in_xbundle, uint16_t vlan)
2224     OVS_REQ_RDLOCK(ms->rwlock)
2225 {
2226     struct xlate_cfg *xcfg;
2227     struct mcast_group_bundle *b;
2228     struct xbundle *mcast_xbundle;
2229
2230     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2231     LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
2232         mcast_xbundle = xbundle_lookup(xcfg, b->port);
2233         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2234             xlate_report(ctx, "forwarding to mcast group port");
2235             output_normal(ctx, mcast_xbundle, vlan);
2236         } else if (!mcast_xbundle) {
2237             xlate_report(ctx, "mcast group port is unknown, dropping");
2238         } else {
2239             xlate_report(ctx, "mcast group port is input port, dropping");
2240         }
2241     }
2242 }
2243
2244 /* send the packet to ports connected to multicast routers */
2245 static void
2246 xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
2247                                  struct mcast_snooping *ms,
2248                                  struct xbundle *in_xbundle, uint16_t vlan)
2249     OVS_REQ_RDLOCK(ms->rwlock)
2250 {
2251     struct xlate_cfg *xcfg;
2252     struct mcast_mrouter_bundle *mrouter;
2253     struct xbundle *mcast_xbundle;
2254
2255     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2256     LIST_FOR_EACH(mrouter, mrouter_node, &ms->mrouter_lru) {
2257         mcast_xbundle = xbundle_lookup(xcfg, mrouter->port);
2258         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2259             xlate_report(ctx, "forwarding to mcast router port");
2260             output_normal(ctx, mcast_xbundle, vlan);
2261         } else if (!mcast_xbundle) {
2262             xlate_report(ctx, "mcast router port is unknown, dropping");
2263         } else {
2264             xlate_report(ctx, "mcast router port is input port, dropping");
2265         }
2266     }
2267 }
2268
2269 /* send the packet to ports flagged to be flooded */
2270 static void
2271 xlate_normal_mcast_send_fports(struct xlate_ctx *ctx,
2272                                struct mcast_snooping *ms,
2273                                struct xbundle *in_xbundle, uint16_t vlan)
2274     OVS_REQ_RDLOCK(ms->rwlock)
2275 {
2276     struct xlate_cfg *xcfg;
2277     struct mcast_port_bundle *fport;
2278     struct xbundle *mcast_xbundle;
2279
2280     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2281     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2282         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2283         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2284             xlate_report(ctx, "forwarding to mcast flood port");
2285             output_normal(ctx, mcast_xbundle, vlan);
2286         } else if (!mcast_xbundle) {
2287             xlate_report(ctx, "mcast flood port is unknown, dropping");
2288         } else {
2289             xlate_report(ctx, "mcast flood port is input port, dropping");
2290         }
2291     }
2292 }
2293
2294 /* forward the Reports to configured ports */
2295 static void
2296 xlate_normal_mcast_send_rports(struct xlate_ctx *ctx,
2297                                struct mcast_snooping *ms,
2298                                struct xbundle *in_xbundle, uint16_t vlan)
2299     OVS_REQ_RDLOCK(ms->rwlock)
2300 {
2301     struct xlate_cfg *xcfg;
2302     struct mcast_port_bundle *rport;
2303     struct xbundle *mcast_xbundle;
2304
2305     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2306     LIST_FOR_EACH(rport, node, &ms->rport_list) {
2307         mcast_xbundle = xbundle_lookup(xcfg, rport->port);
2308         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2309             xlate_report(ctx, "forwarding Report to mcast flagged port");
2310             output_normal(ctx, mcast_xbundle, vlan);
2311         } else if (!mcast_xbundle) {
2312             xlate_report(ctx, "mcast port is unknown, dropping the Report");
2313         } else {
2314             xlate_report(ctx, "mcast port is input port, dropping the Report");
2315         }
2316     }
2317 }
2318
2319 static void
2320 xlate_normal_flood(struct xlate_ctx *ctx, struct xbundle *in_xbundle,
2321                    uint16_t vlan)
2322 {
2323     struct xbundle *xbundle;
2324
2325     LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) {
2326         if (xbundle != in_xbundle
2327             && xbundle_includes_vlan(xbundle, vlan)
2328             && xbundle->floodable
2329             && !xbundle_mirror_out(ctx->xbridge, xbundle)) {
2330             output_normal(ctx, xbundle, vlan);
2331         }
2332     }
2333     ctx->nf_output_iface = NF_OUT_FLOOD;
2334 }
2335
2336 static void
2337 xlate_normal(struct xlate_ctx *ctx)
2338 {
2339     struct flow_wildcards *wc = ctx->wc;
2340     struct flow *flow = &ctx->xin->flow;
2341     struct xbundle *in_xbundle;
2342     struct xport *in_port;
2343     struct mac_entry *mac;
2344     void *mac_port;
2345     uint16_t vlan;
2346     uint16_t vid;
2347
2348     memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2349     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2350     wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2351
2352     in_xbundle = lookup_input_bundle(ctx->xbridge, flow->in_port.ofp_port,
2353                                      ctx->xin->packet != NULL, &in_port);
2354     if (!in_xbundle) {
2355         xlate_report(ctx, "no input bundle, dropping");
2356         return;
2357     }
2358
2359     /* Drop malformed frames. */
2360     if (flow->dl_type == htons(ETH_TYPE_VLAN) &&
2361         !(flow->vlan_tci & htons(VLAN_CFI))) {
2362         if (ctx->xin->packet != NULL) {
2363             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2364             VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
2365                          "VLAN tag received on port %s",
2366                          ctx->xbridge->name, in_xbundle->name);
2367         }
2368         xlate_report(ctx, "partial VLAN tag, dropping");
2369         return;
2370     }
2371
2372     /* Drop frames on bundles reserved for mirroring. */
2373     if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) {
2374         if (ctx->xin->packet != NULL) {
2375             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2376             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
2377                          "%s, which is reserved exclusively for mirroring",
2378                          ctx->xbridge->name, in_xbundle->name);
2379         }
2380         xlate_report(ctx, "input port is mirror output port, dropping");
2381         return;
2382     }
2383
2384     /* Check VLAN. */
2385     vid = vlan_tci_to_vid(flow->vlan_tci);
2386     if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) {
2387         xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
2388         return;
2389     }
2390     vlan = input_vid_to_vlan(in_xbundle, vid);
2391
2392     /* Check other admissibility requirements. */
2393     if (in_port && !is_admissible(ctx, in_port, vlan)) {
2394         return;
2395     }
2396
2397     /* Learn source MAC. */
2398     if (ctx->xin->may_learn) {
2399         update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
2400     }
2401     if (ctx->xin->xcache) {
2402         struct xc_entry *entry;
2403
2404         /* Save enough info to update mac learning table later. */
2405         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
2406         entry->u.normal.ofproto = ctx->xbridge->ofproto;
2407         entry->u.normal.flow = xmemdup(flow, sizeof *flow);
2408         entry->u.normal.vlan = vlan;
2409     }
2410
2411     /* Determine output bundle. */
2412     if (mcast_snooping_enabled(ctx->xbridge->ms)
2413         && !eth_addr_is_broadcast(flow->dl_dst)
2414         && eth_addr_is_multicast(flow->dl_dst)
2415         && is_ip_any(flow)) {
2416         struct mcast_snooping *ms = ctx->xbridge->ms;
2417         struct mcast_group *grp = NULL;
2418
2419         if (is_igmp(flow)) {
2420             if (mcast_snooping_is_membership(flow->tp_src) ||
2421                 mcast_snooping_is_query(flow->tp_src)) {
2422                 if (ctx->xin->may_learn) {
2423                     update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2424                                                 in_xbundle, ctx->xin->packet);
2425                 }
2426                 /*
2427                  * IGMP packets need to take the slow path, in order to be
2428                  * processed for mdb updates. That will prevent expires
2429                  * firing off even after hosts have sent reports.
2430                  */
2431                 ctx->xout->slow |= SLOW_ACTION;
2432             }
2433
2434             if (mcast_snooping_is_membership(flow->tp_src)) {
2435                 ovs_rwlock_rdlock(&ms->rwlock);
2436                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2437                 /* RFC4541: section 2.1.1, item 1: A snooping switch should
2438                  * forward IGMP Membership Reports only to those ports where
2439                  * multicast routers are attached.  Alternatively stated: a
2440                  * snooping switch should not forward IGMP Membership Reports
2441                  * to ports on which only hosts are attached.
2442                  * An administrative control may be provided to override this
2443                  * restriction, allowing the report messages to be flooded to
2444                  * other ports. */
2445                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2446                 ovs_rwlock_unlock(&ms->rwlock);
2447             } else {
2448                 xlate_report(ctx, "multicast traffic, flooding");
2449                 xlate_normal_flood(ctx, in_xbundle, vlan);
2450             }
2451             return;
2452         } else if (is_mld(flow)) {
2453             ctx->xout->slow |= SLOW_ACTION;
2454             if (ctx->xin->may_learn) {
2455                 update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2456                                             in_xbundle, ctx->xin->packet);
2457             }
2458             if (is_mld_report(flow)) {
2459                 ovs_rwlock_rdlock(&ms->rwlock);
2460                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2461                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2462                 ovs_rwlock_unlock(&ms->rwlock);
2463             } else {
2464                 xlate_report(ctx, "MLD query, flooding");
2465                 xlate_normal_flood(ctx, in_xbundle, vlan);
2466             }
2467         } else {
2468             if ((flow->dl_type == htons(ETH_TYPE_IP)
2469                  && ip_is_local_multicast(flow->nw_dst))
2470                 || (flow->dl_type == htons(ETH_TYPE_IPV6)
2471                     && ipv6_is_all_hosts(&flow->ipv6_dst))) {
2472                 /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
2473                  * address in the 224.0.0.x range which are not IGMP must
2474                  * be forwarded on all ports */
2475                 xlate_report(ctx, "RFC4541: section 2.1.2, item 2, flooding");
2476                 xlate_normal_flood(ctx, in_xbundle, vlan);
2477                 return;
2478             }
2479         }
2480
2481         /* forwarding to group base ports */
2482         ovs_rwlock_rdlock(&ms->rwlock);
2483         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2484             grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
2485         } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2486             grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
2487         }
2488         if (grp) {
2489             xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
2490             xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2491             xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2492         } else {
2493             if (mcast_snooping_flood_unreg(ms)) {
2494                 xlate_report(ctx, "unregistered multicast, flooding");
2495                 xlate_normal_flood(ctx, in_xbundle, vlan);
2496             } else {
2497                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2498                 xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2499             }
2500         }
2501         ovs_rwlock_unlock(&ms->rwlock);
2502     } else {
2503         ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
2504         mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
2505         mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
2506         ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
2507
2508         if (mac_port) {
2509             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2510             struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);
2511             if (mac_xbundle && mac_xbundle != in_xbundle) {
2512                 xlate_report(ctx, "forwarding to learned port");
2513                 output_normal(ctx, mac_xbundle, vlan);
2514             } else if (!mac_xbundle) {
2515                 xlate_report(ctx, "learned port is unknown, dropping");
2516             } else {
2517                 xlate_report(ctx, "learned port is input port, dropping");
2518             }
2519         } else {
2520             xlate_report(ctx, "no learned MAC for destination, flooding");
2521             xlate_normal_flood(ctx, in_xbundle, vlan);
2522         }
2523     }
2524 }
2525
2526 /* Appends a "sample" action for sFlow or IPFIX to 'ctx->odp_actions'.  The
2527  * 'probability' is the number of packets out of UINT32_MAX to sample.  The
2528  * 'cookie' (of length 'cookie_size' bytes) is passed back in the callback for
2529  * each sampled packet.  'tunnel_out_port', if not ODPP_NONE, is added as the
2530  * OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute.  If 'include_actions', an
2531  * OVS_USERSPACE_ATTR_ACTIONS attribute is added.
2532  */
2533 static size_t
2534 compose_sample_action(struct xlate_ctx *ctx,
2535                       const uint32_t probability,
2536                       const union user_action_cookie *cookie,
2537                       const size_t cookie_size,
2538                       const odp_port_t tunnel_out_port,
2539                       bool include_actions)
2540 {
2541     size_t sample_offset = nl_msg_start_nested(ctx->odp_actions,
2542                                                OVS_ACTION_ATTR_SAMPLE);
2543
2544     nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
2545
2546     size_t actions_offset = nl_msg_start_nested(ctx->odp_actions,
2547                                                 OVS_SAMPLE_ATTR_ACTIONS);
2548
2549     odp_port_t odp_port = ofp_port_to_odp_port(
2550         ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
2551     uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
2552                                      flow_hash_5tuple(&ctx->xin->flow, 0));
2553     int cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
2554                                                  tunnel_out_port,
2555                                                  include_actions,
2556                                                  ctx->odp_actions);
2557
2558     nl_msg_end_nested(ctx->odp_actions, actions_offset);
2559     nl_msg_end_nested(ctx->odp_actions, sample_offset);
2560
2561     return cookie_offset;
2562 }
2563
2564 /* If sFLow is not enabled, returns 0 without doing anything.
2565  *
2566  * If sFlow is enabled, appends a template "sample" action to the ODP actions
2567  * in 'ctx'.  This action is a template because some of the information needed
2568  * to fill it out is not available until flow translation is complete.  In this
2569  * case, this functions returns an offset, which is always nonzero, to pass
2570  * later to fix_sflow_action() to fill in the rest of the template. */
2571 static size_t
2572 compose_sflow_action(struct xlate_ctx *ctx)
2573 {
2574     struct dpif_sflow *sflow = ctx->xbridge->sflow;
2575     if (!sflow || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2576         return 0;
2577     }
2578
2579     union user_action_cookie cookie = { .type = USER_ACTION_COOKIE_SFLOW };
2580     return compose_sample_action(ctx, dpif_sflow_get_probability(sflow),
2581                                  &cookie, sizeof cookie.sflow, ODPP_NONE,
2582                                  true);
2583 }
2584
2585 /* If IPFIX is enabled, this appends a "sample" action to implement IPFIX to
2586  * 'ctx->odp_actions'. */
2587 static void
2588 compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
2589 {
2590     struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
2591     odp_port_t tunnel_out_port = ODPP_NONE;
2592
2593     if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2594         return;
2595     }
2596
2597     /* For input case, output_odp_port is ODPP_NONE, which is an invalid port
2598      * number. */
2599     if (output_odp_port == ODPP_NONE &&
2600         !dpif_ipfix_get_bridge_exporter_input_sampling(ipfix)) {
2601         return;
2602     }
2603
2604     /* For output case, output_odp_port is valid*/
2605     if (output_odp_port != ODPP_NONE) {
2606         if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
2607             return;
2608         }
2609         /* If tunnel sampling is enabled, put an additional option attribute:
2610          * OVS_USERSPACE_ATTR_TUNNEL_OUT_PORT
2611          */
2612         if (dpif_ipfix_get_bridge_exporter_tunnel_sampling(ipfix) &&
2613             dpif_ipfix_get_tunnel_port(ipfix, output_odp_port) ) {
2614            tunnel_out_port = output_odp_port;
2615         }
2616     }
2617
2618     union user_action_cookie cookie = {
2619         .ipfix = {
2620             .type = USER_ACTION_COOKIE_IPFIX,
2621             .output_odp_port = output_odp_port,
2622         }
2623     };
2624     compose_sample_action(ctx,
2625                           dpif_ipfix_get_bridge_exporter_probability(ipfix),
2626                           &cookie, sizeof cookie.ipfix, tunnel_out_port,
2627                           false);
2628 }
2629
2630 /* Fix "sample" action according to data collected while composing ODP actions,
2631  * as described in compose_sflow_action().
2632  *
2633  * 'user_cookie_offset' must be the offset returned by add_sflow_action(). */
2634 static void
2635 fix_sflow_action(struct xlate_ctx *ctx, unsigned int user_cookie_offset)
2636 {
2637     const struct flow *base = &ctx->base_flow;
2638     union user_action_cookie *cookie;
2639
2640     cookie = ofpbuf_at(ctx->odp_actions, user_cookie_offset,
2641                        sizeof cookie->sflow);
2642     ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
2643
2644     cookie->type = USER_ACTION_COOKIE_SFLOW;
2645     cookie->sflow.vlan_tci = base->vlan_tci;
2646
2647     /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
2648      * port information") for the interpretation of cookie->output. */
2649     switch (ctx->sflow_n_outputs) {
2650     case 0:
2651         /* 0x40000000 | 256 means "packet dropped for unknown reason". */
2652         cookie->sflow.output = 0x40000000 | 256;
2653         break;
2654
2655     case 1:
2656         cookie->sflow.output = dpif_sflow_odp_port_to_ifindex(
2657             ctx->xbridge->sflow, ctx->sflow_odp_port);
2658         if (cookie->sflow.output) {
2659             break;
2660         }
2661         /* Fall through. */
2662     default:
2663         /* 0x80000000 means "multiple output ports. */
2664         cookie->sflow.output = 0x80000000 | ctx->sflow_n_outputs;
2665         break;
2666     }
2667 }
2668
2669 static bool
2670 process_special(struct xlate_ctx *ctx, const struct xport *xport)
2671 {
2672     const struct flow *flow = &ctx->xin->flow;
2673     struct flow_wildcards *wc = ctx->wc;
2674     const struct xbridge *xbridge = ctx->xbridge;
2675     const struct dp_packet *packet = ctx->xin->packet;
2676     enum slow_path_reason slow;
2677
2678     if (!xport) {
2679         slow = 0;
2680     } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
2681         if (packet) {
2682             cfm_process_heartbeat(xport->cfm, packet);
2683         }
2684         slow = SLOW_CFM;
2685     } else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) {
2686         if (packet) {
2687             bfd_process_packet(xport->bfd, flow, packet);
2688             /* If POLL received, immediately sends FINAL back. */
2689             if (bfd_should_send_packet(xport->bfd)) {
2690                 ofproto_dpif_monitor_port_send_soon(xport->ofport);
2691             }
2692         }
2693         slow = SLOW_BFD;
2694     } else if (xport->xbundle && xport->xbundle->lacp
2695                && flow->dl_type == htons(ETH_TYPE_LACP)) {
2696         if (packet) {
2697             lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
2698         }
2699         slow = SLOW_LACP;
2700     } else if ((xbridge->stp || xbridge->rstp) &&
2701                stp_should_process_flow(flow, wc)) {
2702         if (packet) {
2703             xbridge->stp
2704                 ? stp_process_packet(xport, packet)
2705                 : rstp_process_packet(xport, packet);
2706         }
2707         slow = SLOW_STP;
2708     } else if (xport->lldp && lldp_should_process_flow(xport->lldp, flow)) {
2709         if (packet) {
2710             lldp_process_packet(xport->lldp, packet);
2711         }
2712         slow = SLOW_LLDP;
2713     } else {
2714         slow = 0;
2715     }
2716
2717     if (slow) {
2718         ctx->xout->slow |= slow;
2719         return true;
2720     } else {
2721         return false;
2722     }
2723 }
2724
2725 static int
2726 tnl_route_lookup_flow(const struct flow *oflow,
2727                       struct in6_addr *ip, struct xport **out_port)
2728 {
2729     char out_dev[IFNAMSIZ];
2730     struct xbridge *xbridge;
2731     struct xlate_cfg *xcfg;
2732     struct in6_addr gw;
2733     struct in6_addr dst;
2734
2735     dst = flow_tnl_dst(&oflow->tunnel);
2736     if (!ovs_router_lookup(&dst, out_dev, &gw)) {
2737         return -ENOENT;
2738     }
2739
2740     if (ipv6_addr_is_set(&gw) &&
2741         (!IN6_IS_ADDR_V4MAPPED(&gw) || in6_addr_get_mapped_ipv4(&gw))) {
2742         *ip = gw;
2743     } else {
2744         *ip = dst;
2745     }
2746
2747     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2748     ovs_assert(xcfg);
2749
2750     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
2751         if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
2752             struct xport *port;
2753
2754             HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
2755                 if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
2756                     *out_port = port;
2757                     return 0;
2758                 }
2759             }
2760         }
2761     }
2762     return -ENOENT;
2763 }
2764
2765 static int
2766 compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev,
2767                     struct dp_packet *packet)
2768 {
2769     struct xbridge *xbridge = out_dev->xbridge;
2770     struct ofpact_output output;
2771     struct flow flow;
2772
2773     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
2774     flow_extract(packet, &flow);
2775     flow.in_port.ofp_port = out_dev->ofp_port;
2776     output.port = OFPP_TABLE;
2777     output.max_len = 0;
2778
2779     return ofproto_dpif_execute_actions__(xbridge->ofproto, &flow, NULL,
2780                                           &output.ofpact, sizeof output,
2781                                           ctx->recurse, ctx->resubmits, packet);
2782 }
2783
2784 static void
2785 tnl_send_nd_request(struct xlate_ctx *ctx, const struct xport *out_dev,
2786                      const struct eth_addr eth_src,
2787                      struct in6_addr * ipv6_src, struct in6_addr * ipv6_dst)
2788 {
2789     struct dp_packet packet;
2790
2791     dp_packet_init(&packet, 0);
2792     compose_nd(&packet, eth_src, ipv6_src, ipv6_dst);
2793     compose_table_xlate(ctx, out_dev, &packet);
2794     dp_packet_uninit(&packet);
2795 }
2796
2797 static void
2798 tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev,
2799                      const struct eth_addr eth_src,
2800                      ovs_be32 ip_src, ovs_be32 ip_dst)
2801 {
2802     struct dp_packet packet;
2803
2804     dp_packet_init(&packet, 0);
2805     compose_arp(&packet, ARP_OP_REQUEST,
2806                 eth_src, eth_addr_zero, true, ip_src, ip_dst);
2807
2808     compose_table_xlate(ctx, out_dev, &packet);
2809     dp_packet_uninit(&packet);
2810 }
2811
2812 static int
2813 build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
2814                   const struct flow *flow, odp_port_t tunnel_odp_port)
2815 {
2816     struct ovs_action_push_tnl tnl_push_data;
2817     struct xport *out_dev = NULL;
2818     ovs_be32 s_ip = 0, d_ip = 0;
2819     struct in6_addr s_ip6 = in6addr_any;
2820     struct in6_addr d_ip6 = in6addr_any;
2821     struct eth_addr smac;
2822     struct eth_addr dmac;
2823     int err;
2824     char buf_sip6[INET6_ADDRSTRLEN];
2825     char buf_dip6[INET6_ADDRSTRLEN];
2826
2827     err = tnl_route_lookup_flow(flow, &d_ip6, &out_dev);
2828     if (err) {
2829         xlate_report(ctx, "native tunnel routing failed");
2830         return err;
2831     }
2832
2833     xlate_report(ctx, "tunneling to %s via %s",
2834                  ipv6_string_mapped(buf_dip6, &d_ip6),
2835                  netdev_get_name(out_dev->netdev));
2836
2837     /* Use mac addr of bridge port of the peer. */
2838     err = netdev_get_etheraddr(out_dev->netdev, &smac);
2839     if (err) {
2840         xlate_report(ctx, "tunnel output device lacks Ethernet address");
2841         return err;
2842     }
2843
2844     d_ip = in6_addr_get_mapped_ipv4(&d_ip6);
2845     if (d_ip) {
2846         err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
2847         if (err) {
2848             xlate_report(ctx, "tunnel output device lacks IPv4 address");
2849             return err;
2850         }
2851         in6_addr_set_mapped_ipv4(&s_ip6, s_ip);
2852     } else {
2853         err = netdev_get_in6(out_dev->netdev, &s_ip6);
2854         if (err) {
2855             xlate_report(ctx, "tunnel output device lacks IPv6 address");
2856             return err;
2857         }
2858     }
2859
2860     err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
2861     if (err) {
2862         xlate_report(ctx, "neighbor cache miss for %s on bridge %s, "
2863                      "sending %s request",
2864                      buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
2865         if (d_ip) {
2866             tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip);
2867         } else {
2868             tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6);
2869         }
2870         return err;
2871     }
2872
2873     if (ctx->xin->xcache) {
2874         struct xc_entry *entry;
2875
2876         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_NEIGH);
2877         ovs_strlcpy(entry->u.tnl_neigh_cache.br_name, out_dev->xbridge->name,
2878                     sizeof entry->u.tnl_neigh_cache.br_name);
2879         entry->u.tnl_neigh_cache.d_ipv6 = d_ip6;
2880     }
2881
2882     xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" %s"
2883                  " to "ETH_ADDR_FMT" %s",
2884                  ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6),
2885                  ETH_ADDR_ARGS(dmac), buf_dip6);
2886
2887     err = tnl_port_build_header(xport->ofport, flow,
2888                                 dmac, smac, &s_ip6, &tnl_push_data);
2889     if (err) {
2890         return err;
2891     }
2892     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
2893     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
2894     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
2895     return 0;
2896 }
2897
2898 static void
2899 xlate_commit_actions(struct xlate_ctx *ctx)
2900 {
2901     bool use_masked = ctx->xbridge->support.masked_set_action;
2902
2903     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
2904                                           ctx->odp_actions, ctx->wc,
2905                                           use_masked);
2906 }
2907
2908 static void
2909 clear_conntrack(struct flow *flow)
2910 {
2911     flow->ct_state = 0;
2912     flow->ct_zone = 0;
2913     flow->ct_mark = 0;
2914     memset(&flow->ct_label, 0, sizeof flow->ct_label);
2915 }
2916
2917 static void
2918 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
2919                         const struct xlate_bond_recirc *xr, bool check_stp)
2920 {
2921     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
2922     struct flow_wildcards *wc = ctx->wc;
2923     struct flow *flow = &ctx->xin->flow;
2924     struct flow_tnl flow_tnl;
2925     ovs_be16 flow_vlan_tci;
2926     uint32_t flow_pkt_mark;
2927     uint8_t flow_nw_tos;
2928     odp_port_t out_port, odp_port;
2929     bool tnl_push_pop_send = false;
2930     uint8_t dscp;
2931
2932     /* If 'struct flow' gets additional metadata, we'll need to zero it out
2933      * before traversing a patch port. */
2934     BUILD_ASSERT_DECL(FLOW_WC_SEQ == 35);
2935     memset(&flow_tnl, 0, sizeof flow_tnl);
2936
2937     if (!xport) {
2938         xlate_report(ctx, "Nonexistent output port");
2939         return;
2940     } else if (xport->config & OFPUTIL_PC_NO_FWD) {
2941         xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
2942         return;
2943     } else if (check_stp) {
2944         if (is_stp(&ctx->base_flow)) {
2945             if (!xport_stp_should_forward_bpdu(xport) &&
2946                 !xport_rstp_should_manage_bpdu(xport)) {
2947                 if (ctx->xbridge->stp != NULL) {
2948                     xlate_report(ctx, "STP not in listening state, "
2949                             "skipping bpdu output");
2950                 } else if (ctx->xbridge->rstp != NULL) {
2951                     xlate_report(ctx, "RSTP not managing BPDU in this state, "
2952                             "skipping bpdu output");
2953                 }
2954                 return;
2955             }
2956         } else if (!xport_stp_forward_state(xport) ||
2957                    !xport_rstp_forward_state(xport)) {
2958             if (ctx->xbridge->stp != NULL) {
2959                 xlate_report(ctx, "STP not in forwarding state, "
2960                         "skipping output");
2961             } else if (ctx->xbridge->rstp != NULL) {
2962                 xlate_report(ctx, "RSTP not in forwarding state, "
2963                         "skipping output");
2964             }
2965             return;
2966         }
2967     }
2968
2969     if (xport->peer) {
2970         const struct xport *peer = xport->peer;
2971         struct flow old_flow = ctx->xin->flow;
2972         bool old_conntrack = ctx->conntracked;
2973         bool old_was_mpls = ctx->was_mpls;
2974         cls_version_t old_version = ctx->tables_version;
2975         struct ofpbuf old_stack = ctx->stack;
2976         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
2977         struct ofpbuf old_action_set = ctx->action_set;
2978         uint64_t actset_stub[1024 / 8];
2979
2980         ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
2981         ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
2982         ctx->xbridge = peer->xbridge;
2983         flow->in_port.ofp_port = peer->ofp_port;
2984         flow->metadata = htonll(0);
2985         memset(&flow->tunnel, 0, sizeof flow->tunnel);
2986         memset(flow->regs, 0, sizeof flow->regs);
2987         flow->actset_output = OFPP_UNSET;
2988         ctx->conntracked = false;
2989         clear_conntrack(flow);
2990
2991         /* The bridge is now known so obtain its table version. */
2992         ctx->tables_version
2993             = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
2994
2995         if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
2996             if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
2997                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
2998                 if (ctx->action_set.size) {
2999                     /* Translate action set only if not dropping the packet and
3000                      * not recirculating. */
3001                     if (!exit_recirculates(ctx)) {
3002                         xlate_action_set(ctx);
3003                     }
3004                 }
3005                 /* Check if need to recirculate. */
3006                 if (exit_recirculates(ctx)) {
3007                     compose_recirculate_action(ctx);
3008                 }
3009             } else {
3010                 /* Forwarding is disabled by STP and RSTP.  Let OFPP_NORMAL and
3011                  * the learning action look at the packet, then drop it. */
3012                 struct flow old_base_flow = ctx->base_flow;
3013                 size_t old_size = ctx->odp_actions->size;
3014                 mirror_mask_t old_mirrors = ctx->mirrors;
3015
3016                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
3017                 ctx->mirrors = old_mirrors;
3018                 ctx->base_flow = old_base_flow;
3019                 ctx->odp_actions->size = old_size;
3020
3021                 /* Undo changes that may have been done for recirculation. */
3022                 ctx_cancel_recirculation(ctx);
3023             }
3024         }
3025
3026         ctx->xin->flow = old_flow;
3027         ctx->xbridge = xport->xbridge;
3028         ofpbuf_uninit(&ctx->action_set);
3029         ctx->action_set = old_action_set;
3030         ofpbuf_uninit(&ctx->stack);
3031         ctx->stack = old_stack;
3032
3033         /* Restore calling bridge's lookup version. */
3034         ctx->tables_version = old_version;
3035
3036         /* The peer bridge popping MPLS should have no effect on the original
3037          * bridge. */
3038         ctx->was_mpls = old_was_mpls;
3039
3040         /* The peer bridge's conntrack execution should have no effect on the
3041          * original bridge. */
3042         ctx->conntracked = old_conntrack;
3043
3044         /* The fact that the peer bridge exits (for any reason) does not mean
3045          * that the original bridge should exit.  Specifically, if the peer
3046          * bridge recirculates (which typically modifies the packet), the
3047          * original bridge must continue processing with the original, not the
3048          * recirculated packet! */
3049         ctx->exit = false;
3050
3051         /* Peer bridge errors do not propagate back. */
3052         ctx->error = XLATE_OK;
3053
3054         if (ctx->xin->resubmit_stats) {
3055             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
3056             netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats);
3057             if (peer->bfd) {
3058                 bfd_account_rx(peer->bfd, ctx->xin->resubmit_stats);
3059             }
3060         }
3061         if (ctx->xin->xcache) {
3062             struct xc_entry *entry;
3063
3064             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
3065             entry->u.dev.tx = netdev_ref(xport->netdev);
3066             entry->u.dev.rx = netdev_ref(peer->netdev);
3067             entry->u.dev.bfd = bfd_ref(peer->bfd);
3068         }
3069         return;
3070     }
3071
3072     flow_vlan_tci = flow->vlan_tci;
3073     flow_pkt_mark = flow->pkt_mark;
3074     flow_nw_tos = flow->nw_tos;
3075
3076     if (count_skb_priorities(xport)) {
3077         memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
3078         if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
3079             wc->masks.nw_tos |= IP_DSCP_MASK;
3080             flow->nw_tos &= ~IP_DSCP_MASK;
3081             flow->nw_tos |= dscp;
3082         }
3083     }
3084
3085     if (xport->is_tunnel) {
3086         struct in6_addr dst;
3087          /* Save tunnel metadata so that changes made due to
3088           * the Logical (tunnel) Port are not visible for any further
3089           * matches, while explicit set actions on tunnel metadata are.
3090           */
3091         flow_tnl = flow->tunnel;
3092         odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
3093         if (odp_port == ODPP_NONE) {
3094             xlate_report(ctx, "Tunneling decided against output");
3095             goto out; /* restore flow_nw_tos */
3096         }
3097         dst = flow_tnl_dst(&flow->tunnel);
3098         if (ipv6_addr_equals(&dst, &ctx->orig_tunnel_ipv6_dst)) {
3099             xlate_report(ctx, "Not tunneling to our own address");
3100             goto out; /* restore flow_nw_tos */
3101         }
3102         if (ctx->xin->resubmit_stats) {
3103             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
3104         }
3105         if (ctx->xin->xcache) {
3106             struct xc_entry *entry;
3107
3108             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
3109             entry->u.dev.tx = netdev_ref(xport->netdev);
3110         }
3111         out_port = odp_port;
3112         if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3113             xlate_report(ctx, "output to native tunnel");
3114             tnl_push_pop_send = true;
3115         } else {
3116             xlate_report(ctx, "output to kernel tunnel");
3117             commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions);
3118             flow->tunnel = flow_tnl; /* Restore tunnel metadata */
3119         }
3120     } else {
3121         odp_port = xport->odp_port;
3122         out_port = odp_port;
3123         if (ofproto_has_vlan_splinters(ctx->xbridge->ofproto)) {
3124             ofp_port_t vlandev_port;
3125
3126             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
3127             vlandev_port = vsp_realdev_to_vlandev(ctx->xbridge->ofproto,
3128                                                   ofp_port, flow->vlan_tci);
3129             if (vlandev_port != ofp_port) {
3130                 out_port = ofp_port_to_odp_port(ctx->xbridge, vlandev_port);
3131                 flow->vlan_tci = htons(0);
3132             }
3133         }
3134     }
3135
3136     if (out_port != ODPP_NONE) {
3137         xlate_commit_actions(ctx);
3138
3139         if (xr) {
3140             struct ovs_action_hash *act_hash;
3141
3142             /* Hash action. */
3143             act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
3144                                                 OVS_ACTION_ATTR_HASH,
3145                                                 sizeof *act_hash);
3146             act_hash->hash_alg = xr->hash_alg;
3147             act_hash->hash_basis = xr->hash_basis;
3148
3149             /* Recirc action. */
3150             nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
3151                            xr->recirc_id);
3152         } else {
3153
3154             if (tnl_push_pop_send) {
3155                 build_tunnel_send(ctx, xport, flow, odp_port);
3156                 flow->tunnel = flow_tnl; /* Restore tunnel metadata */
3157             } else {
3158                 odp_port_t odp_tnl_port = ODPP_NONE;
3159
3160                 /* XXX: Write better Filter for tunnel port. We can use inport
3161                 * int tunnel-port flow to avoid these checks completely. */
3162                 if (ofp_port == OFPP_LOCAL &&
3163                     ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3164
3165                     odp_tnl_port = tnl_port_map_lookup(flow, wc);
3166                 }
3167
3168                 if (odp_tnl_port != ODPP_NONE) {
3169                     nl_msg_put_odp_port(ctx->odp_actions,
3170                                         OVS_ACTION_ATTR_TUNNEL_POP,
3171                                         odp_tnl_port);
3172                 } else {
3173                     /* Tunnel push-pop action is not compatible with
3174                      * IPFIX action. */
3175                     compose_ipfix_action(ctx, out_port);
3176                     nl_msg_put_odp_port(ctx->odp_actions,
3177                                         OVS_ACTION_ATTR_OUTPUT,
3178                                         out_port);
3179                }
3180            }
3181         }
3182
3183         ctx->sflow_odp_port = odp_port;
3184         ctx->sflow_n_outputs++;
3185         ctx->nf_output_iface = ofp_port;
3186     }
3187
3188     if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) {
3189         mirror_packet(ctx, xport->xbundle,
3190                       xbundle_mirror_dst(xport->xbundle->xbridge,
3191                                          xport->xbundle));
3192     }
3193
3194  out:
3195     /* Restore flow */
3196     flow->vlan_tci = flow_vlan_tci;
3197     flow->pkt_mark = flow_pkt_mark;
3198     flow->nw_tos = flow_nw_tos;
3199 }
3200
3201 static void
3202 compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port,
3203                       const struct xlate_bond_recirc *xr)
3204 {
3205     compose_output_action__(ctx, ofp_port, xr, true);
3206 }
3207
3208 static void
3209 xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule)
3210 {
3211     struct rule_dpif *old_rule = ctx->rule;
3212     ovs_be64 old_cookie = ctx->rule_cookie;
3213     const struct rule_actions *actions;
3214
3215     if (ctx->xin->resubmit_stats) {
3216         rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
3217     }
3218
3219     ctx->resubmits++;
3220     ctx->recurse++;
3221     ctx->rule = rule;
3222     ctx->rule_cookie = rule_dpif_get_flow_cookie(rule);
3223     actions = rule_dpif_get_actions(rule);
3224     do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx);
3225     ctx->rule_cookie = old_cookie;
3226     ctx->rule = old_rule;
3227     ctx->recurse--;
3228 }
3229
3230 static bool
3231 xlate_resubmit_resource_check(struct xlate_ctx *ctx)
3232 {
3233     if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) {
3234         XLATE_REPORT_ERROR(ctx, "resubmit actions recursed over %d times",
3235                            MAX_RESUBMIT_RECURSION);
3236         ctx->error = XLATE_RECURSION_TOO_DEEP;
3237     } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) {
3238         XLATE_REPORT_ERROR(ctx, "over %d resubmit actions", MAX_RESUBMITS);
3239         ctx->error = XLATE_TOO_MANY_RESUBMITS;
3240     } else if (ctx->odp_actions->size > UINT16_MAX) {
3241         XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of actions");
3242         /* NOT an error, as we'll be slow-pathing the flow in this case? */
3243         ctx->exit = true; /* XXX: translation still terminated! */
3244     } else if (ctx->stack.size >= 65536) {
3245         XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of stack");
3246         ctx->error = XLATE_STACK_TOO_DEEP;
3247     } else {
3248         return true;
3249     }
3250
3251     return false;
3252 }
3253
3254 static void
3255 xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
3256                    bool may_packet_in, bool honor_table_miss)
3257 {
3258     /* Check if we need to recirculate before matching in a table. */
3259     if (ctx->was_mpls) {
3260         ctx_trigger_recirculation(ctx);
3261         return;
3262     }
3263     if (xlate_resubmit_resource_check(ctx)) {
3264         uint8_t old_table_id = ctx->table_id;
3265         struct rule_dpif *rule;
3266
3267         ctx->table_id = table_id;
3268
3269         rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
3270                                            ctx->tables_version,
3271                                            &ctx->xin->flow, ctx->xin->wc,
3272                                            ctx->xin->resubmit_stats,
3273                                            &ctx->table_id, in_port,
3274                                            may_packet_in, honor_table_miss);
3275
3276         if (OVS_UNLIKELY(ctx->xin->resubmit_hook)) {
3277             ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse + 1);
3278         }
3279
3280         if (rule) {
3281             /* Fill in the cache entry here instead of xlate_recursively
3282              * to make the reference counting more explicit.  We take a
3283              * reference in the lookups above if we are going to cache the
3284              * rule. */
3285             if (ctx->xin->xcache) {
3286                 struct xc_entry *entry;
3287
3288                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_RULE);
3289                 entry->u.rule = rule;
3290                 rule_dpif_ref(rule);
3291             }
3292             xlate_recursively(ctx, rule);
3293         }
3294
3295         ctx->table_id = old_table_id;
3296         return;
3297     }
3298 }
3299
3300 static void
3301 xlate_group_stats(struct xlate_ctx *ctx, struct group_dpif *group,
3302                   struct ofputil_bucket *bucket)
3303 {
3304     if (ctx->xin->resubmit_stats) {
3305         group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats);
3306     }
3307     if (ctx->xin->xcache) {
3308         struct xc_entry *entry;
3309
3310         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP);
3311         entry->u.group.group = group_dpif_ref(group);
3312         entry->u.group.bucket = bucket;
3313     }
3314 }
3315
3316 static void
3317 xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
3318 {
3319     uint64_t action_list_stub[1024 / 8];
3320     struct ofpbuf action_list, action_set;
3321     struct flow old_flow = ctx->xin->flow;
3322     bool old_was_mpls = ctx->was_mpls;
3323
3324     ofpbuf_use_const(&action_set, bucket->ofpacts, bucket->ofpacts_len);
3325     ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
3326
3327     ofpacts_execute_action_set(&action_list, &action_set);
3328     ctx->recurse++;
3329     do_xlate_actions(action_list.data, action_list.size, ctx);
3330     ctx->recurse--;
3331
3332     ofpbuf_uninit(&action_set);
3333     ofpbuf_uninit(&action_list);
3334
3335     /* Check if need to recirculate. */
3336     if (exit_recirculates(ctx)) {
3337         compose_recirculate_action(ctx);
3338     }
3339
3340     /* Roll back flow to previous state.
3341      * This is equivalent to cloning the packet for each bucket.
3342      *
3343      * As a side effect any subsequently applied actions will
3344      * also effectively be applied to a clone of the packet taken
3345      * just before applying the all or indirect group.
3346      *
3347      * Note that group buckets are action sets, hence they cannot modify the
3348      * main action set.  Also any stack actions are ignored when executing an
3349      * action set, so group buckets cannot change the stack either.
3350      * However, we do allow resubmit actions in group buckets, which could
3351      * break the above assumptions.  It is up to the controller to not mess up
3352      * with the action_set and stack in the tables resubmitted to from
3353      * group buckets. */
3354     ctx->xin->flow = old_flow;
3355
3356     /* The group bucket popping MPLS should have no effect after bucket
3357      * execution. */
3358     ctx->was_mpls = old_was_mpls;
3359
3360     /* The fact that the group bucket exits (for any reason) does not mean that
3361      * the translation after the group action should exit.  Specifically, if
3362      * the group bucket recirculates (which typically modifies the packet), the
3363      * actions after the group action must continue processing with the
3364      * original, not the recirculated packet! */
3365     ctx->exit = false;
3366 }
3367
3368 static void
3369 xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group)
3370 {
3371     struct ofputil_bucket *bucket;
3372     const struct ovs_list *buckets;
3373
3374     group_dpif_get_buckets(group, &buckets);
3375
3376     LIST_FOR_EACH (bucket, list_node, buckets) {
3377         xlate_group_bucket(ctx, bucket);
3378     }
3379     xlate_group_stats(ctx, group, NULL);
3380 }
3381
3382 static void
3383 xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group)
3384 {
3385     struct ofputil_bucket *bucket;
3386
3387     bucket = group_first_live_bucket(ctx, group, 0);
3388     if (bucket) {
3389         xlate_group_bucket(ctx, bucket);
3390         xlate_group_stats(ctx, group, bucket);
3391     }
3392 }
3393
3394 static void
3395 xlate_default_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3396 {
3397     struct flow_wildcards *wc = ctx->wc;
3398     struct ofputil_bucket *bucket;
3399     uint32_t basis;
3400
3401     basis = flow_hash_symmetric_l4(&ctx->xin->flow, 0);
3402     flow_mask_hash_fields(&ctx->xin->flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
3403     bucket = group_best_live_bucket(ctx, group, basis);
3404     if (bucket) {
3405         xlate_group_bucket(ctx, bucket);
3406         xlate_group_stats(ctx, group, bucket);
3407     }
3408 }
3409
3410 static void
3411 xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3412 {
3413     struct mf_bitmap hash_fields = MF_BITMAP_INITIALIZER;
3414     const struct field_array *fields;
3415     struct ofputil_bucket *bucket;
3416     uint32_t basis;
3417     int i;
3418
3419     fields = group_dpif_get_fields(group);
3420     basis = hash_uint64(group_dpif_get_selection_method_param(group));
3421
3422     /* Determine which fields to hash */
3423     for (i = 0; i < MFF_N_IDS; i++) {
3424         if (bitmap_is_set(fields->used.bm, i)) {
3425             const struct mf_field *mf;
3426
3427             /* If the field is already present in 'hash_fields' then
3428              * this loop has already checked that it and its pre-requisites
3429              * are present in the flow and its pre-requisites have
3430              * already been added to 'hash_fields'. There is nothing more
3431              * to do here and as an optimisation the loop can continue. */
3432             if (bitmap_is_set(hash_fields.bm, i)) {
3433                 continue;
3434             }
3435
3436             mf = mf_from_id(i);
3437
3438             /* Only hash a field if it and its pre-requisites are present
3439              * in the flow. */
3440             if (!mf_are_prereqs_ok(mf, &ctx->xin->flow)) {
3441                 continue;
3442             }
3443
3444             /* Hash both the field and its pre-requisites */
3445             mf_bitmap_set_field_and_prereqs(mf, &hash_fields);
3446         }
3447     }
3448
3449     /* Hash the fields */
3450     for (i = 0; i < MFF_N_IDS; i++) {
3451         if (bitmap_is_set(hash_fields.bm, i)) {
3452             const struct mf_field *mf = mf_from_id(i);
3453             union mf_value value;
3454             int j;
3455
3456             mf_get_value(mf, &ctx->xin->flow, &value);
3457             /* This seems inefficient but so does apply_mask() */
3458             for (j = 0; j < mf->n_bytes; j++) {
3459                 ((uint8_t *) &value)[j] &= ((uint8_t *) &fields->value[i])[j];
3460             }
3461             basis = hash_bytes(&value, mf->n_bytes, basis);
3462
3463             /* For tunnels, hash in whether the field is present. */
3464             if (mf_is_tun_metadata(mf)) {
3465                 basis = hash_boolean(mf_is_set(mf, &ctx->xin->flow), basis);
3466             }
3467
3468             mf_mask_field(mf, &ctx->wc->masks);
3469         }
3470     }
3471
3472     bucket = group_best_live_bucket(ctx, group, basis);
3473     if (bucket) {
3474         xlate_group_bucket(ctx, bucket);
3475         xlate_group_stats(ctx, group, bucket);
3476     }
3477 }
3478
3479 static void
3480 xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3481 {
3482     const char *selection_method = group_dpif_get_selection_method(group);
3483
3484     if (selection_method[0] == '\0') {
3485         xlate_default_select_group(ctx, group);
3486     } else if (!strcasecmp("hash", selection_method)) {
3487         xlate_hash_fields_select_group(ctx, group);
3488     } else {
3489         /* Parsing of groups should ensure this never happens */
3490         OVS_NOT_REACHED();
3491     }
3492 }
3493
3494 static void
3495 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
3496 {
3497     bool was_in_group = ctx->in_group;
3498     ctx->in_group = true;
3499
3500     switch (group_dpif_get_type(group)) {
3501     case OFPGT11_ALL:
3502     case OFPGT11_INDIRECT:
3503         xlate_all_group(ctx, group);
3504         break;
3505     case OFPGT11_SELECT:
3506         xlate_select_group(ctx, group);
3507         break;
3508     case OFPGT11_FF:
3509         xlate_ff_group(ctx, group);
3510         break;
3511     default:
3512         OVS_NOT_REACHED();
3513     }
3514     group_dpif_unref(group);
3515
3516     ctx->in_group = was_in_group;
3517 }
3518
3519 static bool
3520 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
3521 {
3522     if (xlate_resubmit_resource_check(ctx)) {
3523         struct group_dpif *group;
3524         bool got_group;
3525
3526         got_group = group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group);
3527         if (got_group) {
3528             xlate_group_action__(ctx, group);
3529         } else {
3530             return true;
3531         }
3532     }
3533
3534     return false;
3535 }
3536
3537 static void
3538 xlate_ofpact_resubmit(struct xlate_ctx *ctx,
3539                       const struct ofpact_resubmit *resubmit)
3540 {
3541     ofp_port_t in_port;
3542     uint8_t table_id;
3543     bool may_packet_in = false;
3544     bool honor_table_miss = false;
3545
3546     if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
3547         /* Still allow missed packets to be sent to the controller
3548          * if resubmitting from an internal table. */
3549         may_packet_in = true;
3550         honor_table_miss = true;
3551     }
3552
3553     in_port = resubmit->in_port;
3554     if (in_port == OFPP_IN_PORT) {
3555         in_port = ctx->xin->flow.in_port.ofp_port;
3556     }
3557
3558     table_id = resubmit->table_id;
3559     if (table_id == 255) {
3560         table_id = ctx->table_id;
3561     }
3562
3563     xlate_table_action(ctx, in_port, table_id, may_packet_in,
3564                        honor_table_miss);
3565 }
3566
3567 static void
3568 flood_packets(struct xlate_ctx *ctx, bool all)
3569 {
3570     const struct xport *xport;
3571
3572     HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
3573         if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) {
3574             continue;
3575         }
3576
3577         if (all) {
3578             compose_output_action__(ctx, xport->ofp_port, NULL, false);
3579         } else if (!(xport->config & OFPUTIL_PC_NO_FLOOD)) {
3580             compose_output_action(ctx, xport->ofp_port, NULL);
3581         }
3582     }
3583
3584     ctx->nf_output_iface = NF_OUT_FLOOD;
3585 }
3586
3587 static void
3588 execute_controller_action(struct xlate_ctx *ctx, int len,
3589                           enum ofp_packet_in_reason reason,
3590                           uint16_t controller_id)
3591 {
3592     struct dp_packet *packet;
3593
3594     ctx->xout->slow |= SLOW_CONTROLLER;
3595     xlate_commit_actions(ctx);
3596     if (!ctx->xin->packet) {
3597         return;
3598     }
3599
3600     packet = dp_packet_clone(ctx->xin->packet);
3601
3602     odp_execute_actions(NULL, &packet, 1, false,
3603                         ctx->odp_actions->data, ctx->odp_actions->size, NULL);
3604
3605     /* A packet sent by an action in a table-miss rule is considered an
3606      * explicit table miss.  OpenFlow before 1.3 doesn't have that concept so
3607      * it will get translated back to OFPR_ACTION for those versions. */
3608     if (reason == OFPR_ACTION
3609         && ctx->rule && rule_dpif_is_table_miss(ctx->rule)) {
3610         reason = OFPR_EXPLICIT_MISS;
3611     }
3612
3613     size_t packet_len = dp_packet_size(packet);
3614
3615     struct ofproto_async_msg *am = xmalloc(sizeof *am);
3616     *am = (struct ofproto_async_msg) {
3617         .controller_id = controller_id,
3618         .oam = OAM_PACKET_IN,
3619         .pin = {
3620             .up = {
3621                 .packet = dp_packet_steal_data(packet),
3622                 .len = packet_len,
3623                 .reason = reason,
3624                 .table_id = ctx->table_id,
3625                 .cookie = ctx->rule_cookie,
3626             },
3627             .max_len = len,
3628         },
3629     };
3630     flow_get_metadata(&ctx->xin->flow, &am->pin.up.flow_metadata);
3631
3632     ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
3633     dp_packet_delete(packet);
3634 }
3635
3636 static void
3637 compose_recirculate_action__(struct xlate_ctx *ctx, uint8_t table)
3638 {
3639     struct recirc_metadata md;
3640     uint32_t id;
3641
3642     recirc_metadata_from_flow(&md, &ctx->xin->flow);
3643
3644     ovs_assert(ctx->recirc_action_offset >= 0);
3645
3646     struct recirc_state state = {
3647         .table_id = table,
3648         .ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
3649         .metadata = md,
3650         .stack = ctx->stack.data,
3651         .n_stack = ctx->stack.size / sizeof(union mf_subvalue),
3652         .mirrors = ctx->mirrors,
3653         .conntracked = ctx->conntracked,
3654         .ofpacts = ((struct ofpact *) ctx->action_set.data
3655                     + ctx->recirc_action_offset / sizeof(struct ofpact)),
3656         .ofpacts_len = ctx->action_set.size - ctx->recirc_action_offset,
3657         .action_set = ctx->action_set.data,
3658         .action_set_len = ctx->recirc_action_offset,
3659     };
3660
3661     /* Allocate a unique recirc id for the given metadata state in the
3662      * flow.  An existing id, with a new reference to the corresponding
3663      * recirculation context, will be returned if possible.
3664      * The life-cycle of this recirc id is managed by associating it
3665      * with the udpif key ('ukey') created for each new datapath flow. */
3666     id = recirc_alloc_id_ctx(&state);
3667     if (!id) {
3668         XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
3669         ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
3670         return;
3671     }
3672     recirc_refs_add(&ctx->xout->recircs, id);
3673
3674     nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
3675
3676     /* Undo changes done by recirculation. */
3677     ctx_cancel_recirculation(ctx);
3678 }
3679
3680 /* Called only when ctx->recirc_action_offset is set. */
3681 static void
3682 compose_recirculate_action(struct xlate_ctx *ctx)
3683 {
3684     xlate_commit_actions(ctx);
3685     compose_recirculate_action__(ctx, 0);
3686 }
3687
3688 /* Fork the pipeline here. The current packet will continue processing the
3689  * current action list. A clone of the current packet will recirculate, skip
3690  * the remainder of the current action list and asynchronously resume pipeline
3691  * processing in 'table' with the current metadata and action set. */
3692 static void
3693 compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
3694 {
3695     ctx->recirc_action_offset = ctx->action_set.size;
3696     compose_recirculate_action__(ctx, table);
3697 }
3698
3699 static void
3700 compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
3701 {
3702     struct flow *flow = &ctx->xin->flow;
3703     int n;
3704
3705     ovs_assert(eth_type_mpls(mpls->ethertype));
3706
3707     n = flow_count_mpls_labels(flow, ctx->wc);
3708     if (!n) {
3709         xlate_commit_actions(ctx);
3710     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3711         if (ctx->xin->packet != NULL) {
3712             XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
3713                          "MPLS push action can't be performed as it would "
3714                          "have more MPLS LSEs than the %d supported.",
3715                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3716         }
3717         ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
3718         return;
3719     }
3720
3721     flow_push_mpls(flow, n, mpls->ethertype, ctx->wc);
3722 }
3723
3724 static void
3725 compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
3726 {
3727     struct flow *flow = &ctx->xin->flow;
3728     int n = flow_count_mpls_labels(flow, ctx->wc);
3729
3730     if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
3731         if (ctx->xbridge->support.odp.recirc) {
3732             ctx->was_mpls = true;
3733         }
3734     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3735         if (ctx->xin->packet != NULL) {
3736             XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
3737                          "MPLS pop action can't be performed as it has "
3738                          "more MPLS LSEs than the %d supported.",
3739                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3740         }
3741         ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
3742         ofpbuf_clear(ctx->odp_actions);
3743     }
3744 }
3745
3746 static bool
3747 compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
3748 {
3749     struct flow *flow = &ctx->xin->flow;
3750
3751     if (!is_ip_any(flow)) {
3752         return false;
3753     }
3754
3755     ctx->wc->masks.nw_ttl = 0xff;
3756     if (flow->nw_ttl > 1) {
3757         flow->nw_ttl--;
3758         return false;
3759     } else {
3760         size_t i;
3761
3762         for (i = 0; i < ids->n_controllers; i++) {
3763             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
3764                                       ids->cnt_ids[i]);
3765         }
3766
3767         /* Stop processing for current table. */
3768         return true;
3769     }
3770 }
3771
3772 static void
3773 compose_set_mpls_label_action(struct xlate_ctx *ctx, ovs_be32 label)
3774 {
3775     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3776         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_LABEL_MASK);
3777         set_mpls_lse_label(&ctx->xin->flow.mpls_lse[0], label);
3778     }
3779 }
3780
3781 static void
3782 compose_set_mpls_tc_action(struct xlate_ctx *ctx, uint8_t tc)
3783 {
3784     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3785         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TC_MASK);
3786         set_mpls_lse_tc(&ctx->xin->flow.mpls_lse[0], tc);
3787     }
3788 }
3789
3790 static void
3791 compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
3792 {
3793     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3794         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3795         set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse[0], ttl);
3796     }
3797 }
3798
3799 static bool
3800 compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
3801 {
3802     struct flow *flow = &ctx->xin->flow;
3803
3804     if (eth_type_mpls(flow->dl_type)) {
3805         uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
3806
3807         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3808         if (ttl > 1) {
3809             ttl--;
3810             set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
3811             return false;
3812         } else {
3813             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
3814         }
3815     }
3816
3817     /* Stop processing for current table. */
3818     return true;
3819 }
3820
3821 static void
3822 xlate_output_action(struct xlate_ctx *ctx,
3823                     ofp_port_t port, uint16_t max_len, bool may_packet_in)
3824 {
3825     ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
3826
3827     ctx->nf_output_iface = NF_OUT_DROP;
3828
3829     switch (port) {
3830     case OFPP_IN_PORT:
3831         compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);
3832         break;
3833     case OFPP_TABLE:
3834         xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
3835                            0, may_packet_in, true);
3836         break;
3837     case OFPP_NORMAL:
3838         xlate_normal(ctx);
3839         break;
3840     case OFPP_FLOOD:
3841         flood_packets(ctx,  false);
3842         break;
3843     case OFPP_ALL:
3844         flood_packets(ctx, true);
3845         break;
3846     case OFPP_CONTROLLER:
3847         execute_controller_action(ctx, max_len,
3848                                   (ctx->in_group ? OFPR_GROUP
3849                                    : ctx->in_action_set ? OFPR_ACTION_SET
3850                                    : OFPR_ACTION),
3851                                   0);
3852         break;
3853     case OFPP_NONE:
3854         break;
3855     case OFPP_LOCAL:
3856     default:
3857         if (port != ctx->xin->flow.in_port.ofp_port) {
3858             compose_output_action(ctx, port, NULL);
3859         } else {
3860             xlate_report(ctx, "skipping output to input port");
3861         }
3862         break;
3863     }
3864
3865     if (prev_nf_output_iface == NF_OUT_FLOOD) {
3866         ctx->nf_output_iface = NF_OUT_FLOOD;
3867     } else if (ctx->nf_output_iface == NF_OUT_DROP) {
3868         ctx->nf_output_iface = prev_nf_output_iface;
3869     } else if (prev_nf_output_iface != NF_OUT_DROP &&
3870                ctx->nf_output_iface != NF_OUT_FLOOD) {
3871         ctx->nf_output_iface = NF_OUT_MULTI;
3872     }
3873 }
3874
3875 static void
3876 xlate_output_reg_action(struct xlate_ctx *ctx,
3877                         const struct ofpact_output_reg *or)
3878 {
3879     uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow);
3880     if (port <= UINT16_MAX) {
3881         union mf_subvalue value;
3882
3883         memset(&value, 0xff, sizeof value);
3884         mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks);
3885         xlate_output_action(ctx, u16_to_ofp(port),
3886                             or->max_len, false);
3887     }
3888 }
3889
3890 static void
3891 xlate_enqueue_action(struct xlate_ctx *ctx,
3892                      const struct ofpact_enqueue *enqueue)
3893 {
3894     ofp_port_t ofp_port = enqueue->port;
3895     uint32_t queue_id = enqueue->queue;
3896     uint32_t flow_priority, priority;
3897     int error;
3898
3899     /* Translate queue to priority. */
3900     error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
3901     if (error) {
3902         /* Fall back to ordinary output action. */
3903         xlate_output_action(ctx, enqueue->port, 0, false);
3904         return;
3905     }
3906
3907     /* Check output port. */
3908     if (ofp_port == OFPP_IN_PORT) {
3909         ofp_port = ctx->xin->flow.in_port.ofp_port;
3910     } else if (ofp_port == ctx->xin->flow.in_port.ofp_port) {
3911         return;
3912     }
3913
3914     /* Add datapath actions. */
3915     flow_priority = ctx->xin->flow.skb_priority;
3916     ctx->xin->flow.skb_priority = priority;
3917     compose_output_action(ctx, ofp_port, NULL);
3918     ctx->xin->flow.skb_priority = flow_priority;
3919
3920     /* Update NetFlow output port. */
3921     if (ctx->nf_output_iface == NF_OUT_DROP) {
3922         ctx->nf_output_iface = ofp_port;
3923     } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
3924         ctx->nf_output_iface = NF_OUT_MULTI;
3925     }
3926 }
3927
3928 static void
3929 xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
3930 {
3931     uint32_t skb_priority;
3932
3933     if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
3934         ctx->xin->flow.skb_priority = skb_priority;
3935     } else {
3936         /* Couldn't translate queue to a priority.  Nothing to do.  A warning
3937          * has already been logged. */
3938     }
3939 }
3940
3941 static bool
3942 slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
3943 {
3944     const struct xbridge *xbridge = xbridge_;
3945     struct xport *port;
3946
3947     switch (ofp_port) {
3948     case OFPP_IN_PORT:
3949     case OFPP_TABLE:
3950     case OFPP_NORMAL:
3951     case OFPP_FLOOD:
3952     case OFPP_ALL:
3953     case OFPP_NONE:
3954         return true;
3955     case OFPP_CONTROLLER: /* Not supported by the bundle action. */
3956         return false;
3957     default:
3958         port = get_ofp_port(xbridge, ofp_port);
3959         return port ? port->may_enable : false;
3960     }
3961 }
3962
3963 static void
3964 xlate_bundle_action(struct xlate_ctx *ctx,
3965                     const struct ofpact_bundle *bundle)
3966 {
3967     ofp_port_t port;
3968
3969     port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
3970                           CONST_CAST(struct xbridge *, ctx->xbridge));
3971     if (bundle->dst.field) {
3972         nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
3973     } else {
3974         xlate_output_action(ctx, port, 0, false);
3975     }
3976 }
3977
3978 static void
3979 xlate_learn_action__(struct xlate_ctx *ctx, const struct ofpact_learn *learn,
3980                      struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts)
3981 {
3982     learn_execute(learn, &ctx->xin->flow, fm, ofpacts);
3983     if (ctx->xin->may_learn) {
3984         ofproto_dpif_flow_mod(ctx->xbridge->ofproto, fm);
3985     }
3986 }
3987
3988 static void
3989 xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
3990 {
3991     learn_mask(learn, ctx->wc);
3992
3993     if (ctx->xin->xcache) {
3994         struct xc_entry *entry;
3995
3996         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_LEARN);
3997         entry->u.learn.ofproto = ctx->xbridge->ofproto;
3998         entry->u.learn.fm = xmalloc(sizeof *entry->u.learn.fm);
3999         entry->u.learn.ofpacts = ofpbuf_new(64);
4000         xlate_learn_action__(ctx, learn, entry->u.learn.fm,
4001                              entry->u.learn.ofpacts);
4002     } else if (ctx->xin->may_learn) {
4003         uint64_t ofpacts_stub[1024 / 8];
4004         struct ofputil_flow_mod fm;
4005         struct ofpbuf ofpacts;
4006
4007         ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
4008         xlate_learn_action__(ctx, learn, &fm, &ofpacts);
4009         ofpbuf_uninit(&ofpacts);
4010     }
4011 }
4012
4013 static void
4014 xlate_fin_timeout__(struct rule_dpif *rule, uint16_t tcp_flags,
4015                     uint16_t idle_timeout, uint16_t hard_timeout)
4016 {
4017     if (tcp_flags & (TCP_FIN | TCP_RST)) {
4018         rule_dpif_reduce_timeouts(rule, idle_timeout, hard_timeout);
4019     }
4020 }
4021
4022 static void
4023 xlate_fin_timeout(struct xlate_ctx *ctx,
4024                   const struct ofpact_fin_timeout *oft)
4025 {
4026     if (ctx->rule) {
4027         xlate_fin_timeout__(ctx->rule, ctx->xin->tcp_flags,
4028                             oft->fin_idle_timeout, oft->fin_hard_timeout);
4029         if (ctx->xin->xcache) {
4030             struct xc_entry *entry;
4031
4032             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_FIN_TIMEOUT);
4033             /* XC_RULE already holds a reference on the rule, none is taken
4034              * here. */
4035             entry->u.fin.rule = ctx->rule;
4036             entry->u.fin.idle = oft->fin_idle_timeout;
4037             entry->u.fin.hard = oft->fin_hard_timeout;
4038         }
4039     }
4040 }
4041
4042 static void
4043 xlate_sample_action(struct xlate_ctx *ctx,
4044                     const struct ofpact_sample *os)
4045 {
4046     /* Scale the probability from 16-bit to 32-bit while representing
4047      * the same percentage. */
4048     uint32_t probability = (os->probability << 16) | os->probability;
4049
4050     if (!ctx->xbridge->support.variable_length_userdata) {
4051         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4052
4053         VLOG_ERR_RL(&rl, "ignoring NXAST_SAMPLE action because datapath "
4054                     "lacks support (needs Linux 3.10+ or kernel module from "
4055                     "OVS 1.11+)");
4056         return;
4057     }
4058
4059     xlate_commit_actions(ctx);
4060
4061     union user_action_cookie cookie = {
4062         .flow_sample = {
4063             .type = USER_ACTION_COOKIE_FLOW_SAMPLE,
4064             .probability = os->probability,
4065             .collector_set_id = os->collector_set_id,
4066             .obs_domain_id = os->obs_domain_id,
4067             .obs_point_id = os->obs_point_id,
4068         }
4069     };
4070     compose_sample_action(ctx, probability, &cookie, sizeof cookie.flow_sample,
4071                           ODPP_NONE, false);
4072 }
4073
4074 static bool
4075 may_receive(const struct xport *xport, struct xlate_ctx *ctx)
4076 {
4077     if (xport->config & (is_stp(&ctx->xin->flow)
4078                          ? OFPUTIL_PC_NO_RECV_STP
4079                          : OFPUTIL_PC_NO_RECV)) {
4080         return false;
4081     }
4082
4083     /* Only drop packets here if both forwarding and learning are
4084      * disabled.  If just learning is enabled, we need to have
4085      * OFPP_NORMAL and the learning action have a look at the packet
4086      * before we can drop it. */
4087     if ((!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) ||
4088         (!xport_rstp_forward_state(xport) && !xport_rstp_learn_state(xport))) {
4089         return false;
4090     }
4091
4092     return true;
4093 }
4094
4095 static void
4096 xlate_write_actions__(struct xlate_ctx *ctx,
4097                       const struct ofpact *ofpacts, size_t ofpacts_len)
4098 {
4099     /* Maintain actset_output depending on the contents of the action set:
4100      *
4101      *   - OFPP_UNSET, if there is no "output" action.
4102      *
4103      *   - The output port, if there is an "output" action and no "group"
4104      *     action.
4105      *
4106      *   - OFPP_UNSET, if there is a "group" action.
4107      */
4108     if (!ctx->action_set_has_group) {
4109         const struct ofpact *a;
4110         OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4111             if (a->type == OFPACT_OUTPUT) {
4112                 ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port;
4113             } else if (a->type == OFPACT_GROUP) {
4114                 ctx->xin->flow.actset_output = OFPP_UNSET;
4115                 ctx->action_set_has_group = true;
4116                 break;
4117             }
4118         }
4119     }
4120
4121     ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len);
4122 }
4123
4124 static void
4125 xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a)
4126 {
4127     xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a));
4128 }
4129
4130 static void
4131 xlate_action_set(struct xlate_ctx *ctx)
4132 {
4133     uint64_t action_list_stub[1024 / 64];
4134     struct ofpbuf action_list;
4135
4136     ctx->in_action_set = true;
4137     ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
4138     ofpacts_execute_action_set(&action_list, &ctx->action_set);
4139     /* Clear the action set, as it is not needed any more. */
4140     ofpbuf_clear(&ctx->action_set);
4141     do_xlate_actions(action_list.data, action_list.size, ctx);
4142     ctx->in_action_set = false;
4143     ofpbuf_uninit(&action_list);
4144 }
4145
4146 static void
4147 recirc_put_unroll_xlate(struct xlate_ctx *ctx)
4148 {
4149     struct ofpact_unroll_xlate *unroll;
4150
4151     unroll = ctx->last_unroll_offset < 0
4152         ? NULL
4153         : ALIGNED_CAST(struct ofpact_unroll_xlate *,
4154                        (char *)ctx->action_set.data + ctx->last_unroll_offset);
4155
4156     /* Restore the table_id and rule cookie for a potential PACKET
4157      * IN if needed. */
4158     if (!unroll ||
4159         (ctx->table_id != unroll->rule_table_id
4160          || ctx->rule_cookie != unroll->rule_cookie)) {
4161
4162         ctx->last_unroll_offset = ctx->action_set.size;
4163         unroll = ofpact_put_UNROLL_XLATE(&ctx->action_set);
4164         unroll->rule_table_id = ctx->table_id;
4165         unroll->rule_cookie = ctx->rule_cookie;
4166     }
4167 }
4168
4169
4170 /* Copy actions 'a' through 'end' to the action_set to be executed after
4171  * recirculation.  UNROLL_XLATE action is inserted, if not already done so,
4172  * before actions that may depend on the current table ID or flow cookie. */
4173 static void
4174 recirc_unroll_actions(const struct ofpact *a, const struct ofpact *end,
4175                       struct xlate_ctx *ctx)
4176 {
4177     for (; a < end; a = ofpact_next(a)) {
4178         switch (a->type) {
4179         case OFPACT_OUTPUT_REG:
4180         case OFPACT_GROUP:
4181         case OFPACT_OUTPUT:
4182         case OFPACT_CONTROLLER:
4183         case OFPACT_DEC_MPLS_TTL:
4184         case OFPACT_DEC_TTL:
4185             /* These actions may generate asynchronous messages, which include
4186              * table ID and flow cookie information. */
4187             recirc_put_unroll_xlate(ctx);
4188             break;
4189
4190         case OFPACT_RESUBMIT:
4191             if (ofpact_get_RESUBMIT(a)->table_id == 0xff) {
4192                 /* This resubmit action is relative to the current table, so we
4193                  * need to track what table that is.*/
4194                 recirc_put_unroll_xlate(ctx);
4195             }
4196             break;
4197
4198         case OFPACT_SET_TUNNEL:
4199         case OFPACT_REG_MOVE:
4200         case OFPACT_SET_FIELD:
4201         case OFPACT_STACK_PUSH:
4202         case OFPACT_STACK_POP:
4203         case OFPACT_LEARN:
4204         case OFPACT_WRITE_METADATA:
4205         case OFPACT_GOTO_TABLE:
4206         case OFPACT_ENQUEUE:
4207         case OFPACT_SET_VLAN_VID:
4208         case OFPACT_SET_VLAN_PCP:
4209         case OFPACT_STRIP_VLAN:
4210         case OFPACT_PUSH_VLAN:
4211         case OFPACT_SET_ETH_SRC:
4212         case OFPACT_SET_ETH_DST:
4213         case OFPACT_SET_IPV4_SRC:
4214         case OFPACT_SET_IPV4_DST:
4215         case OFPACT_SET_IP_DSCP:
4216         case OFPACT_SET_IP_ECN:
4217         case OFPACT_SET_IP_TTL:
4218         case OFPACT_SET_L4_SRC_PORT:
4219         case OFPACT_SET_L4_DST_PORT:
4220         case OFPACT_SET_QUEUE:
4221         case OFPACT_POP_QUEUE:
4222         case OFPACT_PUSH_MPLS:
4223         case OFPACT_POP_MPLS:
4224         case OFPACT_SET_MPLS_LABEL:
4225         case OFPACT_SET_MPLS_TC:
4226         case OFPACT_SET_MPLS_TTL:
4227         case OFPACT_MULTIPATH:
4228         case OFPACT_BUNDLE:
4229         case OFPACT_EXIT:
4230         case OFPACT_UNROLL_XLATE:
4231         case OFPACT_FIN_TIMEOUT:
4232         case OFPACT_CLEAR_ACTIONS:
4233         case OFPACT_WRITE_ACTIONS:
4234         case OFPACT_METER:
4235         case OFPACT_SAMPLE:
4236         case OFPACT_DEBUG_RECIRC:
4237         case OFPACT_CT:
4238         case OFPACT_NAT:
4239             /* These may not generate PACKET INs. */
4240             break;
4241
4242         case OFPACT_NOTE:
4243         case OFPACT_CONJUNCTION:
4244             /* These need not be copied for restoration. */
4245             continue;
4246         }
4247         /* Copy the action over. */
4248         ofpbuf_put(&ctx->action_set, a, OFPACT_ALIGN(a->len));
4249     }
4250 }
4251
4252 #define CHECK_MPLS_RECIRCULATION()      \
4253     if (ctx->was_mpls) {                \
4254         ctx_trigger_recirculation(ctx); \
4255         break;                          \
4256     }
4257 #define CHECK_MPLS_RECIRCULATION_IF(COND) \
4258     if (COND) {                           \
4259         CHECK_MPLS_RECIRCULATION();       \
4260     }
4261
4262 static void
4263 put_ct_mark(const struct flow *flow, struct flow *base_flow,
4264             struct ofpbuf *odp_actions, struct flow_wildcards *wc)
4265 {
4266     struct {
4267         uint32_t key;
4268         uint32_t mask;
4269     } odp_attr;
4270
4271     odp_attr.key = flow->ct_mark;
4272     odp_attr.mask = wc->masks.ct_mark;
4273
4274     if (odp_attr.mask && odp_attr.key != base_flow->ct_mark) {
4275         nl_msg_put_unspec(odp_actions, OVS_CT_ATTR_MARK, &odp_attr,
4276                           sizeof(odp_attr));
4277     }
4278 }
4279
4280 static void
4281 put_ct_label(const struct flow *flow, struct flow *base_flow,
4282              struct ofpbuf *odp_actions, struct flow_wildcards *wc)
4283 {
4284     if (!ovs_u128_is_zero(&wc->masks.ct_label)
4285         && !ovs_u128_equals(&flow->ct_label, &base_flow->ct_label)) {
4286         struct {
4287             ovs_u128 key;
4288             ovs_u128 mask;
4289         } *odp_ct_label;
4290
4291         odp_ct_label = nl_msg_put_unspec_uninit(odp_actions,
4292                                                 OVS_CT_ATTR_LABELS,
4293                                                 sizeof(*odp_ct_label));
4294         odp_ct_label->key = flow->ct_label;
4295         odp_ct_label->mask = wc->masks.ct_label;
4296     }
4297 }
4298
4299 static void
4300 put_ct_helper(struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc)
4301 {
4302     if (ofc->alg) {
4303         if (ofc->alg == IPPORT_FTP) {
4304             nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "ftp");
4305         } else {
4306             VLOG_WARN("Cannot serialize ct_helper %d\n", ofc->alg);
4307         }
4308     }
4309 }
4310
4311 static void
4312 put_ct_nat(struct xlate_ctx *ctx)
4313 {
4314     struct ofpact_nat *ofn = ctx->ct_nat_action;
4315     size_t nat_offset;
4316
4317     if (!ofn) {
4318         return;
4319     }
4320
4321     nat_offset = nl_msg_start_nested(ctx->odp_actions, OVS_CT_ATTR_NAT);
4322     if (ofn->flags & NX_NAT_F_SRC || ofn->flags & NX_NAT_F_DST) {
4323         nl_msg_put_flag(ctx->odp_actions, ofn->flags & NX_NAT_F_SRC
4324                         ? OVS_NAT_ATTR_SRC : OVS_NAT_ATTR_DST);
4325         if (ofn->flags & NX_NAT_F_PERSISTENT) {
4326             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PERSISTENT);
4327         }
4328         if (ofn->flags & NX_NAT_F_PROTO_HASH) {
4329             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_HASH);
4330         } else if (ofn->flags & NX_NAT_F_PROTO_RANDOM) {
4331             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_RANDOM);
4332         }
4333         if (ofn->range_af == AF_INET) {
4334             nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
4335                            ofn->range.addr.ipv4.min);
4336             if (ofn->range.addr.ipv4.max &&
4337                 (ntohl(ofn->range.addr.ipv4.max)
4338                  > ntohl(ofn->range.addr.ipv4.min))) {
4339                 nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
4340                                 ofn->range.addr.ipv4.max);
4341             }
4342         } else if (ofn->range_af == AF_INET6) {
4343             nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
4344                               &ofn->range.addr.ipv6.min,
4345                               sizeof ofn->range.addr.ipv6.min);
4346             if (!ipv6_mask_is_any(&ofn->range.addr.ipv6.max) &&
4347                 memcmp(&ofn->range.addr.ipv6.max, &ofn->range.addr.ipv6.min,
4348                        sizeof ofn->range.addr.ipv6.max) > 0) {
4349                 nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
4350                                   &ofn->range.addr.ipv6.max,
4351                                   sizeof ofn->range.addr.ipv6.max);
4352             }
4353         }
4354         if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
4355             nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MIN,
4356                            ofn->range.proto.min);
4357             if (ofn->range.proto.max &&
4358                 ofn->range.proto.max > ofn->range.proto.min) {
4359                 nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MAX,
4360                                ofn->range.proto.max);
4361             }
4362         }
4363     }
4364     nl_msg_end_nested(ctx->odp_actions, nat_offset);
4365 }
4366
4367 static void
4368 compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
4369 {
4370     ovs_u128 old_ct_label = ctx->base_flow.ct_label;
4371     uint32_t old_ct_mark = ctx->base_flow.ct_mark;
4372     size_t ct_offset;
4373     uint16_t zone;
4374
4375     /* Ensure that any prior actions are applied before composing the new
4376      * conntrack action. */
4377     xlate_commit_actions(ctx);
4378
4379     /* Process nested actions first, to populate the key. */
4380     ctx->ct_nat_action = NULL;
4381     do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx);
4382
4383     if (ofc->zone_src.field) {
4384         zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
4385     } else {
4386         zone = ofc->zone_imm;
4387     }
4388
4389     ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
4390     if (ofc->flags & NX_CT_F_COMMIT) {
4391         nl_msg_put_flag(ctx->odp_actions, OVS_CT_ATTR_COMMIT);
4392     }
4393     nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
4394     put_ct_mark(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc);
4395     put_ct_label(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc);
4396     put_ct_helper(ctx->odp_actions, ofc);
4397     put_ct_nat(ctx);
4398     ctx->ct_nat_action = NULL;
4399     nl_msg_end_nested(ctx->odp_actions, ct_offset);
4400
4401     /* Restore the original ct fields in the key. These should only be exposed
4402      * after recirculation to another table. */
4403     ctx->base_flow.ct_mark = old_ct_mark;
4404     ctx->base_flow.ct_label = old_ct_label;
4405
4406     if (ofc->recirc_table == NX_CT_RECIRC_NONE) {
4407         /* If we do not recirculate as part of this action, hide the results of
4408          * connection tracking from subsequent recirculations. */
4409         ctx->conntracked = false;
4410     } else {
4411         /* Use ct_* fields from datapath during recirculation upcall. */
4412         ctx->conntracked = true;
4413         compose_recirculate_and_fork(ctx, ofc->recirc_table);
4414     }
4415 }
4416
4417 static void
4418 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
4419                  struct xlate_ctx *ctx)
4420 {
4421     struct flow_wildcards *wc = ctx->wc;
4422     struct flow *flow = &ctx->xin->flow;
4423     const struct ofpact *a;
4424
4425     if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
4426         tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
4427     }
4428     /* dl_type already in the mask, not set below. */
4429
4430     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4431         struct ofpact_controller *controller;
4432         const struct ofpact_metadata *metadata;
4433         const struct ofpact_set_field *set_field;
4434         const struct mf_field *mf;
4435
4436         if (ctx->error) {
4437             break;
4438         }
4439
4440         if (ctx->exit) {
4441             /* Check if need to store the remaining actions for later
4442              * execution. */
4443             if (exit_recirculates(ctx)) {
4444                 recirc_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len),
4445                                       ctx);
4446             }
4447             break;
4448         }
4449
4450         switch (a->type) {
4451         case OFPACT_OUTPUT:
4452             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
4453                                 ofpact_get_OUTPUT(a)->max_len, true);
4454             break;
4455
4456         case OFPACT_GROUP:
4457             if (xlate_group_action(ctx, ofpact_get_GROUP(a)->group_id)) {
4458                 /* Group could not be found. */
4459                 return;
4460             }
4461             break;
4462
4463         case OFPACT_CONTROLLER:
4464             controller = ofpact_get_CONTROLLER(a);
4465             execute_controller_action(ctx, controller->max_len,
4466                                       controller->reason,
4467                                       controller->controller_id);
4468             break;
4469
4470         case OFPACT_ENQUEUE:
4471             memset(&wc->masks.skb_priority, 0xff,
4472                    sizeof wc->masks.skb_priority);
4473             xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
4474             break;
4475
4476         case OFPACT_SET_VLAN_VID:
4477             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
4478             if (flow->vlan_tci & htons(VLAN_CFI) ||
4479                 ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
4480                 flow->vlan_tci &= ~htons(VLAN_VID_MASK);
4481                 flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
4482                                    | htons(VLAN_CFI));
4483             }
4484             break;
4485
4486         case OFPACT_SET_VLAN_PCP:
4487             wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
4488             if (flow->vlan_tci & htons(VLAN_CFI) ||
4489                 ofpact_get_SET_VLAN_PCP(a)->push_vlan_if_needed) {
4490                 flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
4491                 flow->vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
4492                                          << VLAN_PCP_SHIFT) | VLAN_CFI);
4493             }
4494             break;
4495
4496         case OFPACT_STRIP_VLAN:
4497             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4498             flow->vlan_tci = htons(0);
4499             break;
4500
4501         case OFPACT_PUSH_VLAN:
4502             /* XXX 802.1AD(QinQ) */
4503             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4504             flow->vlan_tci = htons(VLAN_CFI);
4505             break;
4506
4507         case OFPACT_SET_ETH_SRC:
4508             WC_MASK_FIELD(wc, dl_src);
4509             flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
4510             break;
4511
4512         case OFPACT_SET_ETH_DST:
4513             WC_MASK_FIELD(wc, dl_dst);
4514             flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
4515             break;
4516
4517         case OFPACT_SET_IPV4_SRC:
4518             CHECK_MPLS_RECIRCULATION();
4519             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4520                 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
4521                 flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
4522             }
4523             break;
4524
4525         case OFPACT_SET_IPV4_DST:
4526             CHECK_MPLS_RECIRCULATION();
4527             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4528                 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
4529                 flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
4530             }
4531             break;
4532
4533         case OFPACT_SET_IP_DSCP:
4534             CHECK_MPLS_RECIRCULATION();
4535             if (is_ip_any(flow)) {
4536                 wc->masks.nw_tos |= IP_DSCP_MASK;
4537                 flow->nw_tos &= ~IP_DSCP_MASK;
4538                 flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
4539             }
4540             break;
4541
4542         case OFPACT_SET_IP_ECN:
4543             CHECK_MPLS_RECIRCULATION();
4544             if (is_ip_any(flow)) {
4545                 wc->masks.nw_tos |= IP_ECN_MASK;
4546                 flow->nw_tos &= ~IP_ECN_MASK;
4547                 flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
4548             }
4549             break;
4550
4551         case OFPACT_SET_IP_TTL:
4552             CHECK_MPLS_RECIRCULATION();
4553             if (is_ip_any(flow)) {
4554                 wc->masks.nw_ttl = 0xff;
4555                 flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
4556             }
4557             break;
4558
4559         case OFPACT_SET_L4_SRC_PORT:
4560             CHECK_MPLS_RECIRCULATION();
4561             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4562                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4563                 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
4564                 flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
4565             }
4566             break;
4567
4568         case OFPACT_SET_L4_DST_PORT:
4569             CHECK_MPLS_RECIRCULATION();
4570             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4571                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4572                 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
4573                 flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
4574             }
4575             break;
4576
4577         case OFPACT_RESUBMIT:
4578             /* Recirculation complicates resubmit.  There are two cases:
4579              *
4580              *     - If mpls_pop has been executed, then the flow table lookup
4581              *       as part of resubmit might depend on fields that can only
4582              *       be obtained via recirculation, so the resubmit itself
4583              *       triggers recirculation and we need to make sure that the
4584              *       resubmit is executed again after recirculation.
4585              *       Therefore, in this case we trigger recirculation and let
4586              *       the code following this "switch" append the resubmit to
4587              *       the post-recirculation actions.
4588              *
4589              *     - Otherwise, some action in the flow entry found by resubmit
4590              *       might trigger recirculation.  If that happens, then we do
4591              *       not want to execute the resubmit again after
4592              *       recirculation, so we want to skip back to the head of the
4593              *       loop to avoid that, only adding any actions that follow
4594              *       the resubmit to the post-recirculation actions.
4595              */
4596             if (ctx->was_mpls) {
4597                 ctx_trigger_recirculation(ctx);
4598                 break;
4599             }
4600             xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
4601             continue;
4602
4603         case OFPACT_SET_TUNNEL:
4604             flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
4605             break;
4606
4607         case OFPACT_SET_QUEUE:
4608             memset(&wc->masks.skb_priority, 0xff,
4609                    sizeof wc->masks.skb_priority);
4610             xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
4611             break;
4612
4613         case OFPACT_POP_QUEUE:
4614             memset(&wc->masks.skb_priority, 0xff,
4615                    sizeof wc->masks.skb_priority);
4616             flow->skb_priority = ctx->orig_skb_priority;
4617             break;
4618
4619         case OFPACT_REG_MOVE:
4620             CHECK_MPLS_RECIRCULATION_IF(
4621                 mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->dst.field) ||
4622                 mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->src.field));
4623             nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
4624             break;
4625
4626         case OFPACT_SET_FIELD:
4627             CHECK_MPLS_RECIRCULATION_IF(
4628                 mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field));
4629             set_field = ofpact_get_SET_FIELD(a);
4630             mf = set_field->field;
4631
4632             /* Set field action only ever overwrites packet's outermost
4633              * applicable header fields.  Do nothing if no header exists. */
4634             if (mf->id == MFF_VLAN_VID) {
4635                 wc->masks.vlan_tci |= htons(VLAN_CFI);
4636                 if (!(flow->vlan_tci & htons(VLAN_CFI))) {
4637                     break;
4638                 }
4639             } else if ((mf->id == MFF_MPLS_LABEL || mf->id == MFF_MPLS_TC)
4640                        /* 'dl_type' is already unwildcarded. */
4641                        && !eth_type_mpls(flow->dl_type)) {
4642                 break;
4643             }
4644             /* A flow may wildcard nw_frag.  Do nothing if setting a transport
4645              * header field on a packet that does not have them. */
4646             mf_mask_field_and_prereqs(mf, wc);
4647             if (mf_are_prereqs_ok(mf, flow)) {
4648                 mf_set_flow_value_masked(mf, &set_field->value,
4649                                          &set_field->mask, flow);
4650             }
4651             break;
4652
4653         case OFPACT_STACK_PUSH:
4654             CHECK_MPLS_RECIRCULATION_IF(
4655                 mf_is_l3_or_higher(ofpact_get_STACK_PUSH(a)->subfield.field));
4656             nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
4657                                    &ctx->stack);
4658             break;
4659
4660         case OFPACT_STACK_POP:
4661             CHECK_MPLS_RECIRCULATION_IF(
4662                 mf_is_l3_or_higher(ofpact_get_STACK_POP(a)->subfield.field));
4663             nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
4664                                   &ctx->stack);
4665             break;
4666
4667         case OFPACT_PUSH_MPLS:
4668             /* Recirculate if it is an IP packet with a zero ttl.  This may
4669              * indicate that the packet was previously MPLS and an MPLS pop
4670              * action converted it to IP. In this case recirculating should
4671              * reveal the IP TTL which is used as the basis for a new MPLS
4672              * LSE. */
4673             CHECK_MPLS_RECIRCULATION_IF(
4674                 !flow_count_mpls_labels(flow, wc)
4675                 && flow->nw_ttl == 0
4676                 && is_ip_any(flow));
4677             compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
4678             break;
4679
4680         case OFPACT_POP_MPLS:
4681             CHECK_MPLS_RECIRCULATION();
4682             compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
4683             break;
4684
4685         case OFPACT_SET_MPLS_LABEL:
4686             CHECK_MPLS_RECIRCULATION();
4687             compose_set_mpls_label_action(
4688                 ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
4689             break;
4690
4691         case OFPACT_SET_MPLS_TC:
4692             CHECK_MPLS_RECIRCULATION();
4693             compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
4694             break;
4695
4696         case OFPACT_SET_MPLS_TTL:
4697             CHECK_MPLS_RECIRCULATION();
4698             compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
4699             break;
4700
4701         case OFPACT_DEC_MPLS_TTL:
4702             CHECK_MPLS_RECIRCULATION();
4703             if (compose_dec_mpls_ttl_action(ctx)) {
4704                 return;
4705             }
4706             break;
4707
4708         case OFPACT_DEC_TTL:
4709             CHECK_MPLS_RECIRCULATION();
4710             wc->masks.nw_ttl = 0xff;
4711             if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
4712                 return;
4713             }
4714             break;
4715
4716         case OFPACT_NOTE:
4717             /* Nothing to do. */
4718             break;
4719
4720         case OFPACT_MULTIPATH:
4721             CHECK_MPLS_RECIRCULATION();
4722             multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
4723             break;
4724
4725         case OFPACT_BUNDLE:
4726             CHECK_MPLS_RECIRCULATION();
4727             xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
4728             break;
4729
4730         case OFPACT_OUTPUT_REG:
4731             xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
4732             break;
4733
4734         case OFPACT_LEARN:
4735             CHECK_MPLS_RECIRCULATION();
4736             xlate_learn_action(ctx, ofpact_get_LEARN(a));
4737             break;
4738
4739         case OFPACT_CONJUNCTION: {
4740             /* A flow with a "conjunction" action represents part of a special
4741              * kind of "set membership match".  Such a flow should not actually
4742              * get executed, but it could via, say, a "packet-out", even though
4743              * that wouldn't be useful.  Log it to help debugging. */
4744             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4745             VLOG_INFO_RL(&rl, "executing no-op conjunction action");
4746             break;
4747         }
4748
4749         case OFPACT_EXIT:
4750             ctx->exit = true;
4751             break;
4752
4753         case OFPACT_UNROLL_XLATE: {
4754             struct ofpact_unroll_xlate *unroll = ofpact_get_UNROLL_XLATE(a);
4755
4756             /* Restore translation context data that was stored earlier. */
4757             ctx->table_id = unroll->rule_table_id;
4758             ctx->rule_cookie = unroll->rule_cookie;
4759             break;
4760         }
4761         case OFPACT_FIN_TIMEOUT:
4762             CHECK_MPLS_RECIRCULATION();
4763             memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4764             xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
4765             break;
4766
4767         case OFPACT_CLEAR_ACTIONS:
4768             ofpbuf_clear(&ctx->action_set);
4769             ctx->xin->flow.actset_output = OFPP_UNSET;
4770             ctx->action_set_has_group = false;
4771             break;
4772
4773         case OFPACT_WRITE_ACTIONS:
4774             xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a));
4775             break;
4776
4777         case OFPACT_WRITE_METADATA:
4778             metadata = ofpact_get_WRITE_METADATA(a);
4779             flow->metadata &= ~metadata->mask;
4780             flow->metadata |= metadata->metadata & metadata->mask;
4781             break;
4782
4783         case OFPACT_METER:
4784             /* Not implemented yet. */
4785             break;
4786
4787         case OFPACT_GOTO_TABLE: {
4788             struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
4789
4790             ovs_assert(ctx->table_id < ogt->table_id);
4791
4792             xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
4793                                ogt->table_id, true, true);
4794             break;
4795         }
4796
4797         case OFPACT_SAMPLE:
4798             xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
4799             break;
4800
4801         case OFPACT_CT:
4802             CHECK_MPLS_RECIRCULATION();
4803             compose_conntrack_action(ctx, ofpact_get_CT(a));
4804             break;
4805
4806         case OFPACT_NAT:
4807             /* This will be processed by compose_conntrack_action(). */
4808             ctx->ct_nat_action = ofpact_get_NAT(a);
4809             break;
4810
4811         case OFPACT_DEBUG_RECIRC:
4812             ctx_trigger_recirculation(ctx);
4813             a = ofpact_next(a);
4814             break;
4815         }
4816
4817         /* Check if need to store this and the remaining actions for later
4818          * execution. */
4819         if (!ctx->error && ctx->exit && ctx_first_recirculation_action(ctx)) {
4820             recirc_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx);
4821             break;
4822         }
4823     }
4824 }
4825
4826 void
4827 xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
4828               const struct flow *flow, ofp_port_t in_port,
4829               struct rule_dpif *rule, uint16_t tcp_flags,
4830               const struct dp_packet *packet, struct flow_wildcards *wc,
4831               struct ofpbuf *odp_actions)
4832 {
4833     xin->ofproto = ofproto;
4834     xin->flow = *flow;
4835     xin->flow.in_port.ofp_port = in_port;
4836     xin->flow.actset_output = OFPP_UNSET;
4837     xin->packet = packet;
4838     xin->may_learn = packet != NULL;
4839     xin->rule = rule;
4840     xin->xcache = NULL;
4841     xin->ofpacts = NULL;
4842     xin->ofpacts_len = 0;
4843     xin->tcp_flags = tcp_flags;
4844     xin->resubmit_hook = NULL;
4845     xin->report_hook = NULL;
4846     xin->resubmit_stats = NULL;
4847     xin->recurse = 0;
4848     xin->resubmits = 0;
4849     xin->wc = wc;
4850     xin->odp_actions = odp_actions;
4851
4852     /* Do recirc lookup. */
4853     xin->recirc = NULL;
4854     if (flow->recirc_id) {
4855         const struct recirc_id_node *node
4856             = recirc_id_node_find(flow->recirc_id);
4857         if (node) {
4858             xin->recirc = &node->state;
4859         }
4860     }
4861 }
4862
4863 void
4864 xlate_out_uninit(struct xlate_out *xout)
4865 {
4866     if (xout) {
4867         recirc_refs_unref(&xout->recircs);
4868     }
4869 }
4870
4871 /* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts'
4872  * into datapath actions, using 'ctx', and discards the datapath actions. */
4873 void
4874 xlate_actions_for_side_effects(struct xlate_in *xin)
4875 {
4876     struct xlate_out xout;
4877     enum xlate_error error;
4878
4879     error = xlate_actions(xin, &xout);
4880     if (error) {
4881         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
4882
4883         VLOG_WARN_RL(&rl, "xlate_actions failed (%s)!", xlate_strerror(error));
4884     }
4885
4886     xlate_out_uninit(&xout);
4887 }
4888 \f
4889 static struct skb_priority_to_dscp *
4890 get_skb_priority(const struct xport *xport, uint32_t skb_priority)
4891 {
4892     struct skb_priority_to_dscp *pdscp;
4893     uint32_t hash;
4894
4895     hash = hash_int(skb_priority, 0);
4896     HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
4897         if (pdscp->skb_priority == skb_priority) {
4898             return pdscp;
4899         }
4900     }
4901     return NULL;
4902 }
4903
4904 static bool
4905 dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
4906                        uint8_t *dscp)
4907 {
4908     struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
4909     *dscp = pdscp ? pdscp->dscp : 0;
4910     return pdscp != NULL;
4911 }
4912
4913 static size_t
4914 count_skb_priorities(const struct xport *xport)
4915 {
4916     return hmap_count(&xport->skb_priorities);
4917 }
4918
4919 static void
4920 clear_skb_priorities(struct xport *xport)
4921 {
4922     struct skb_priority_to_dscp *pdscp, *next;
4923
4924     HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) {
4925         hmap_remove(&xport->skb_priorities, &pdscp->hmap_node);
4926         free(pdscp);
4927     }
4928 }
4929
4930 static bool
4931 actions_output_to_local_port(const struct xlate_ctx *ctx)
4932 {
4933     odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL);
4934     const struct nlattr *a;
4935     unsigned int left;
4936
4937     NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->odp_actions->data,
4938                              ctx->odp_actions->size) {
4939         if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT
4940             && nl_attr_get_odp_port(a) == local_odp_port) {
4941             return true;
4942         }
4943     }
4944     return false;
4945 }
4946
4947 #if defined(__linux__)
4948 /* Returns the maximum number of packets that the Linux kernel is willing to
4949  * queue up internally to certain kinds of software-implemented ports, or the
4950  * default (and rarely modified) value if it cannot be determined. */
4951 static int
4952 netdev_max_backlog(void)
4953 {
4954     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
4955     static int max_backlog = 1000; /* The normal default value. */
4956
4957     if (ovsthread_once_start(&once)) {
4958         static const char filename[] = "/proc/sys/net/core/netdev_max_backlog";
4959         FILE *stream;
4960         int n;
4961
4962         stream = fopen(filename, "r");
4963         if (!stream) {
4964             VLOG_INFO("%s: open failed (%s)", filename, ovs_strerror(errno));
4965         } else {
4966             if (fscanf(stream, "%d", &n) != 1) {
4967                 VLOG_WARN("%s: read error", filename);
4968             } else if (n <= 100) {
4969                 VLOG_WARN("%s: unexpectedly small value %d", filename, n);
4970             } else {
4971                 max_backlog = n;
4972             }
4973             fclose(stream);
4974         }
4975         ovsthread_once_done(&once);
4976
4977         VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
4978     }
4979
4980     return max_backlog;
4981 }
4982
4983 /* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
4984  * 'odp_actions'. */
4985 static int
4986 count_output_actions(const struct ofpbuf *odp_actions)
4987 {
4988     const struct nlattr *a;
4989     size_t left;
4990     int n = 0;
4991
4992     NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
4993         if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
4994             n++;
4995         }
4996     }
4997     return n;
4998 }
4999 #endif /* defined(__linux__) */
5000
5001 /* Returns true if 'odp_actions' contains more output actions than the datapath
5002  * can reliably handle in one go.  On Linux, this is the value of the
5003  * net.core.netdev_max_backlog sysctl, which limits the maximum number of
5004  * packets that the kernel is willing to queue up for processing while the
5005  * datapath is processing a set of actions. */
5006 static bool
5007 too_many_output_actions(const struct ofpbuf *odp_actions OVS_UNUSED)
5008 {
5009 #ifdef __linux__
5010     return (odp_actions->size / NL_A_U32_SIZE > netdev_max_backlog()
5011             && count_output_actions(odp_actions) > netdev_max_backlog());
5012 #else
5013     /* OSes other than Linux might have similar limits, but we don't know how
5014      * to determine them.*/
5015     return false;
5016 #endif
5017 }
5018
5019 static void
5020 xlate_wc_init(struct xlate_ctx *ctx)
5021 {
5022     flow_wildcards_init_catchall(ctx->wc);
5023
5024     /* Some fields we consider to always be examined. */
5025     WC_MASK_FIELD(ctx->wc, in_port);
5026     WC_MASK_FIELD(ctx->wc, dl_type);
5027     if (is_ip_any(&ctx->xin->flow)) {
5028         WC_MASK_FIELD_MASK(ctx->wc, nw_frag, FLOW_NW_FRAG_MASK);
5029     }
5030
5031     if (ctx->xbridge->support.odp.recirc) {
5032         /* Always exactly match recirc_id when datapath supports
5033          * recirculation.  */
5034         WC_MASK_FIELD(ctx->wc, recirc_id);
5035     }
5036
5037     if (ctx->xbridge->netflow) {
5038         netflow_mask_wc(&ctx->xin->flow, ctx->wc);
5039     }
5040
5041     tnl_wc_init(&ctx->xin->flow, ctx->wc);
5042 }
5043
5044 static void
5045 xlate_wc_finish(struct xlate_ctx *ctx)
5046 {
5047     /* Clear the metadata and register wildcard masks, because we won't
5048      * use non-header fields as part of the cache. */
5049     flow_wildcards_clear_non_packet_fields(ctx->wc);
5050
5051     /* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields.  struct flow
5052      * uses the low 8 bits of the 16-bit tp_src and tp_dst members to
5053      * represent these fields.  The datapath interface, on the other hand,
5054      * represents them with just 8 bits each.  This means that if the high
5055      * 8 bits of the masks for these fields somehow become set, then they
5056      * will get chopped off by a round trip through the datapath, and
5057      * revalidation will spot that as an inconsistency and delete the flow.
5058      * Avoid the problem here by making sure that only the low 8 bits of
5059      * either field can be unwildcarded for ICMP.
5060      */
5061     if (is_icmpv4(&ctx->xin->flow) || is_icmpv6(&ctx->xin->flow)) {
5062         ctx->wc->masks.tp_src &= htons(UINT8_MAX);
5063         ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
5064     }
5065     /* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */
5066     if (ctx->wc->masks.vlan_tci) {
5067         ctx->wc->masks.vlan_tci |= htons(VLAN_CFI);
5068     }
5069 }
5070
5071 /* Translates the flow, actions, or rule in 'xin' into datapath actions in
5072  * 'xout'.
5073  * The caller must take responsibility for eventually freeing 'xout', with
5074  * xlate_out_uninit().
5075  * Returns 'XLATE_OK' if translation was successful.  In case of an error an
5076  * empty set of actions will be returned in 'xin->odp_actions' (if non-NULL),
5077  * so that most callers may ignore the return value and transparently install a
5078  * drop flow when the translation fails. */
5079 enum xlate_error
5080 xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
5081 {
5082     *xout = (struct xlate_out) {
5083         .slow = 0,
5084         .recircs = RECIRC_REFS_EMPTY_INITIALIZER,
5085     };
5086
5087     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5088     struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto);
5089     if (!xbridge) {
5090         return XLATE_BRIDGE_NOT_FOUND;
5091     }
5092
5093     struct flow *flow = &xin->flow;
5094
5095     union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
5096     uint64_t action_set_stub[1024 / 8];
5097     struct flow_wildcards scratch_wc;
5098     uint64_t actions_stub[256 / 8];
5099     struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
5100     struct xlate_ctx ctx = {
5101         .xin = xin,
5102         .xout = xout,
5103         .base_flow = *flow,
5104         .orig_tunnel_ipv6_dst = flow_tnl_dst(&flow->tunnel),
5105         .xbridge = xbridge,
5106         .stack = OFPBUF_STUB_INITIALIZER(stack_stub),
5107         .rule = xin->rule,
5108         .wc = xin->wc ? xin->wc : &scratch_wc,
5109         .odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
5110
5111         .recurse = xin->recurse,
5112         .resubmits = xin->resubmits,
5113         .in_group = false,
5114         .in_action_set = false,
5115
5116         .table_id = 0,
5117         .rule_cookie = OVS_BE64_MAX,
5118         .orig_skb_priority = flow->skb_priority,
5119         .sflow_n_outputs = 0,
5120         .sflow_odp_port = 0,
5121         .nf_output_iface = NF_OUT_DROP,
5122         .exit = false,
5123         .error = XLATE_OK,
5124         .mirrors = 0,
5125
5126         .recirc_action_offset = -1,
5127         .last_unroll_offset = -1,
5128
5129         .was_mpls = false,
5130         .conntracked = false,
5131
5132         .ct_nat_action = NULL,
5133
5134         .action_set_has_group = false,
5135         .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
5136     };
5137
5138     /* 'base_flow' reflects the packet as it came in, but we need it to reflect
5139      * the packet as the datapath will treat it for output actions:
5140      *
5141      *     - Our datapath doesn't retain tunneling information without us
5142      *       re-setting it, so clear the tunnel data.
5143      *
5144      *     - For VLAN splinters, a higher layer may pretend that the packet
5145      *       came in on 'flow->in_port.ofp_port' with 'flow->vlan_tci'
5146      *       attached, because that's how we want to treat it from an OpenFlow
5147      *       perspective.  But from the datapath's perspective it actually came
5148      *       in on a VLAN device without any VLAN attached.  So here we put the
5149      *       datapath's view of the VLAN information in 'base_flow' to ensure
5150      *       correct treatment.
5151      */
5152     memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
5153     if (flow->in_port.ofp_port
5154         != vsp_realdev_to_vlandev(xbridge->ofproto,
5155                                   flow->in_port.ofp_port,
5156                                   flow->vlan_tci)) {
5157         ctx.base_flow.vlan_tci = 0;
5158     }
5159
5160     ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
5161     if (xin->wc) {
5162         xlate_wc_init(&ctx);
5163     }
5164
5165     COVERAGE_INC(xlate_actions);
5166
5167     if (xin->recirc) {
5168         const struct recirc_state *state = xin->recirc;
5169
5170         xlate_report(&ctx, "Restoring state post-recirculation:");
5171
5172         if (xin->ofpacts_len > 0 || ctx.rule) {
5173             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5174             const char *conflict = xin->ofpacts_len ? "actions" : "rule";
5175
5176             VLOG_WARN_RL(&rl, "Recirculation conflict (%s)!", conflict);
5177             xlate_report(&ctx, "- Recirculation conflict (%s)!", conflict);
5178             ctx.error = XLATE_RECIRCULATION_CONFLICT;
5179             goto exit;
5180         }
5181
5182         /* Set the bridge for post-recirculation processing if needed. */
5183         if (!uuid_equals(ofproto_dpif_get_uuid(ctx.xbridge->ofproto),
5184                          &state->ofproto_uuid)) {
5185             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5186             const struct xbridge *new_bridge
5187                 = xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid);
5188
5189             if (OVS_UNLIKELY(!new_bridge)) {
5190                 /* Drop the packet if the bridge cannot be found. */
5191                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5192                 VLOG_WARN_RL(&rl, "Recirculation bridge no longer exists.");
5193                 xlate_report(&ctx, "- Recirculation bridge no longer exists.");
5194                 ctx.error = XLATE_BRIDGE_NOT_FOUND;
5195                 goto exit;
5196             }
5197             ctx.xbridge = new_bridge;
5198         }
5199
5200         /* Set the post-recirculation table id.  Note: A table lookup is done
5201          * only if there are no post-recirculation actions. */
5202         ctx.table_id = state->table_id;
5203         xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id);
5204
5205         if (!state->conntracked) {
5206             clear_conntrack(flow);
5207         }
5208
5209         /* Restore pipeline metadata. May change flow's in_port and other
5210          * metadata to the values that existed when recirculation was
5211          * triggered. */
5212         recirc_metadata_to_flow(&state->metadata, flow);
5213
5214         /* Restore stack, if any. */
5215         if (state->stack) {
5216             ofpbuf_put(&ctx.stack, state->stack,
5217                        state->n_stack * sizeof *state->stack);
5218         }
5219
5220         /* Restore mirror state. */
5221         ctx.mirrors = state->mirrors;
5222
5223         /* Restore action set, if any. */
5224         if (state->action_set_len) {
5225             xlate_report_actions(&ctx, "- Restoring action set",
5226                                  state->action_set, state->action_set_len);
5227
5228             flow->actset_output = OFPP_UNSET;
5229             xlate_write_actions__(&ctx, state->action_set,
5230                                   state->action_set_len);
5231         }
5232
5233         /* Restore recirculation actions.  If there are no actions, processing
5234          * will start with a lookup in the table set above. */
5235         xin->ofpacts = state->ofpacts;
5236         xin->ofpacts_len = state->ofpacts_len;
5237         if (state->ofpacts_len) {
5238             xlate_report_actions(&ctx, "- Restoring actions",
5239                                  xin->ofpacts, xin->ofpacts_len);
5240         }
5241     } else if (OVS_UNLIKELY(flow->recirc_id)) {
5242         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5243
5244         VLOG_WARN_RL(&rl, "Recirculation context not found for ID %"PRIx32,
5245                      flow->recirc_id);
5246         ctx.error = XLATE_NO_RECIRCULATION_CONTEXT;
5247         goto exit;
5248     }
5249     /* The bridge is now known so obtain its table version. */
5250     ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
5251
5252     if (!xin->ofpacts && !ctx.rule) {
5253         ctx.rule = rule_dpif_lookup_from_table(
5254             ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,
5255             ctx.xin->resubmit_stats, &ctx.table_id,
5256             flow->in_port.ofp_port, true, true);
5257         if (ctx.xin->resubmit_stats) {
5258             rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
5259         }
5260         if (ctx.xin->xcache) {
5261             struct xc_entry *entry;
5262
5263             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
5264             entry->u.rule = ctx.rule;
5265             rule_dpif_ref(ctx.rule);
5266         }
5267
5268         if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {
5269             ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);
5270         }
5271     }
5272
5273     /* Get the proximate input port of the packet.  (If xin->recirc,
5274      * flow->in_port is the ultimate input port of the packet.) */
5275     struct xport *in_port = get_ofp_port(xbridge,
5276                                          ctx.base_flow.in_port.ofp_port);
5277
5278     /* Tunnel stats only for non-recirculated packets. */
5279     if (!xin->recirc && in_port && in_port->is_tunnel) {
5280         if (ctx.xin->resubmit_stats) {
5281             netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
5282             if (in_port->bfd) {
5283                 bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
5284             }
5285         }
5286         if (ctx.xin->xcache) {
5287             struct xc_entry *entry;
5288
5289             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
5290             entry->u.dev.rx = netdev_ref(in_port->netdev);
5291             entry->u.dev.bfd = bfd_ref(in_port->bfd);
5292         }
5293     }
5294
5295     if (!xin->recirc && process_special(&ctx, in_port)) {
5296         /* process_special() did all the processing for this packet.
5297          *
5298          * We do not perform special processing on recirculated packets, as
5299          * recirculated packets are not really received by the bridge.*/
5300     } else if (in_port && in_port->xbundle
5301                && xbundle_mirror_out(xbridge, in_port->xbundle)) {
5302         if (ctx.xin->packet != NULL) {
5303             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5304             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
5305                          "%s, which is reserved exclusively for mirroring",
5306                          ctx.xbridge->name, in_port->xbundle->name);
5307         }
5308     } else {
5309         /* Sampling is done only for packets really received by the bridge. */
5310         unsigned int user_cookie_offset = 0;
5311         if (!xin->recirc) {
5312             user_cookie_offset = compose_sflow_action(&ctx);
5313             compose_ipfix_action(&ctx, ODPP_NONE);
5314         }
5315         size_t sample_actions_len = ctx.odp_actions->size;
5316
5317         if (tnl_process_ecn(flow)
5318             && (!in_port || may_receive(in_port, &ctx))) {
5319             const struct ofpact *ofpacts;
5320             size_t ofpacts_len;
5321
5322             if (xin->ofpacts) {
5323                 ofpacts = xin->ofpacts;
5324                 ofpacts_len = xin->ofpacts_len;
5325             } else if (ctx.rule) {
5326                 const struct rule_actions *actions
5327                     = rule_dpif_get_actions(ctx.rule);
5328                 ofpacts = actions->ofpacts;
5329                 ofpacts_len = actions->ofpacts_len;
5330                 ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);
5331             } else {
5332                 OVS_NOT_REACHED();
5333             }
5334
5335             mirror_ingress_packet(&ctx);
5336             do_xlate_actions(ofpacts, ofpacts_len, &ctx);
5337             if (ctx.error) {
5338                 goto exit;
5339             }
5340
5341             /* We've let OFPP_NORMAL and the learning action look at the
5342              * packet, so drop it now if forwarding is disabled. */
5343             if (in_port && (!xport_stp_forward_state(in_port) ||
5344                             !xport_rstp_forward_state(in_port))) {
5345                 /* Drop all actions added by do_xlate_actions() above. */
5346                 ctx.odp_actions->size = sample_actions_len;
5347
5348                 /* Undo changes that may have been done for recirculation. */
5349                 ctx_cancel_recirculation(&ctx);
5350             } else if (ctx.action_set.size) {
5351                 /* Translate action set only if not dropping the packet and
5352                  * not recirculating. */
5353                 if (!exit_recirculates(&ctx)) {
5354                     xlate_action_set(&ctx);
5355                 }
5356             }
5357             /* Check if need to recirculate. */
5358             if (exit_recirculates(&ctx)) {
5359                 compose_recirculate_action(&ctx);
5360             }
5361         }
5362
5363         /* Output only fully processed packets. */
5364         if (!exit_recirculates(&ctx)
5365             && xbridge->has_in_band
5366             && in_band_must_output_to_local_port(flow)
5367             && !actions_output_to_local_port(&ctx)) {
5368             compose_output_action(&ctx, OFPP_LOCAL, NULL);
5369         }
5370
5371         if (user_cookie_offset) {
5372             fix_sflow_action(&ctx, user_cookie_offset);
5373         }
5374     }
5375
5376     if (nl_attr_oversized(ctx.odp_actions->size)) {
5377         /* These datapath actions are too big for a Netlink attribute, so we
5378          * can't hand them to the kernel directly.  dpif_execute() can execute
5379          * them one by one with help, so just mark the result as SLOW_ACTION to
5380          * prevent the flow from being installed. */
5381         COVERAGE_INC(xlate_actions_oversize);
5382         ctx.xout->slow |= SLOW_ACTION;
5383     } else if (too_many_output_actions(ctx.odp_actions)) {
5384         COVERAGE_INC(xlate_actions_too_many_output);
5385         ctx.xout->slow |= SLOW_ACTION;
5386     }
5387
5388     /* Do netflow only for packets really received by the bridge and not sent
5389      * to the controller.  We consider packets sent to the controller to be
5390      * part of the control plane rather than the data plane. */
5391     if (!xin->recirc && xbridge->netflow && !(xout->slow & SLOW_CONTROLLER)) {
5392         if (ctx.xin->resubmit_stats) {
5393             netflow_flow_update(xbridge->netflow, flow,
5394                                 ctx.nf_output_iface,
5395                                 ctx.xin->resubmit_stats);
5396         }
5397         if (ctx.xin->xcache) {
5398             struct xc_entry *entry;
5399
5400             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETFLOW);
5401             entry->u.nf.netflow = netflow_ref(xbridge->netflow);
5402             entry->u.nf.flow = xmemdup(flow, sizeof *flow);
5403             entry->u.nf.iface = ctx.nf_output_iface;
5404         }
5405     }
5406
5407     if (xin->wc) {
5408         xlate_wc_finish(&ctx);
5409     }
5410
5411 exit:
5412     ofpbuf_uninit(&ctx.stack);
5413     ofpbuf_uninit(&ctx.action_set);
5414     ofpbuf_uninit(&scratch_actions);
5415
5416     /* Make sure we return a "drop flow" in case of an error. */
5417     if (ctx.error) {
5418         xout->slow = 0;
5419         if (xin->odp_actions) {
5420             ofpbuf_clear(xin->odp_actions);
5421         }
5422     }
5423     return ctx.error;
5424 }
5425
5426 /* Sends 'packet' out 'ofport'.
5427  * May modify 'packet'.
5428  * Returns 0 if successful, otherwise a positive errno value. */
5429 int
5430 xlate_send_packet(const struct ofport_dpif *ofport, struct dp_packet *packet)
5431 {
5432     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5433     struct xport *xport;
5434     struct ofpact_output output;
5435     struct flow flow;
5436
5437     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
5438     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
5439     flow_extract(packet, &flow);
5440     flow.in_port.ofp_port = OFPP_NONE;
5441
5442     xport = xport_lookup(xcfg, ofport);
5443     if (!xport) {
5444         return EINVAL;
5445     }
5446     output.port = xport->ofp_port;
5447     output.max_len = 0;
5448
5449     return ofproto_dpif_execute_actions(xport->xbridge->ofproto, &flow, NULL,
5450                                         &output.ofpact, sizeof output,
5451                                         packet);
5452 }
5453
5454 struct xlate_cache *
5455 xlate_cache_new(void)
5456 {
5457     struct xlate_cache *xcache = xmalloc(sizeof *xcache);
5458
5459     ofpbuf_init(&xcache->entries, 512);
5460     return xcache;
5461 }
5462
5463 static struct xc_entry *
5464 xlate_cache_add_entry(struct xlate_cache *xcache, enum xc_type type)
5465 {
5466     struct xc_entry *entry;
5467
5468     entry = ofpbuf_put_zeros(&xcache->entries, sizeof *entry);
5469     entry->type = type;
5470
5471     return entry;
5472 }
5473
5474 static void
5475 xlate_cache_netdev(struct xc_entry *entry, const struct dpif_flow_stats *stats)
5476 {
5477     if (entry->u.dev.tx) {
5478         netdev_vport_inc_tx(entry->u.dev.tx, stats);
5479     }
5480     if (entry->u.dev.rx) {
5481         netdev_vport_inc_rx(entry->u.dev.rx, stats);
5482     }
5483     if (entry->u.dev.bfd) {
5484         bfd_account_rx(entry->u.dev.bfd, stats);
5485     }
5486 }
5487
5488 static void
5489 xlate_cache_normal(struct ofproto_dpif *ofproto, struct flow *flow, int vlan)
5490 {
5491     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5492     struct xbridge *xbridge;
5493     struct xbundle *xbundle;
5494     struct flow_wildcards wc;
5495
5496     xbridge = xbridge_lookup(xcfg, ofproto);
5497     if (!xbridge) {
5498         return;
5499     }
5500
5501     xbundle = lookup_input_bundle(xbridge, flow->in_port.ofp_port, false,
5502                                   NULL);
5503     if (!xbundle) {
5504         return;
5505     }
5506
5507     update_learning_table(xbridge, flow, &wc, vlan, xbundle);
5508 }
5509
5510 /* Push stats and perform side effects of flow translation. */
5511 void
5512 xlate_push_stats(struct xlate_cache *xcache,
5513                  const struct dpif_flow_stats *stats)
5514 {
5515     struct xc_entry *entry;
5516     struct ofpbuf entries = xcache->entries;
5517     struct eth_addr dmac;
5518
5519     if (!stats->n_packets) {
5520         return;
5521     }
5522
5523     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5524         switch (entry->type) {
5525         case XC_RULE:
5526             rule_dpif_credit_stats(entry->u.rule, stats);
5527             break;
5528         case XC_BOND:
5529             bond_account(entry->u.bond.bond, entry->u.bond.flow,
5530                          entry->u.bond.vid, stats->n_bytes);
5531             break;
5532         case XC_NETDEV:
5533             xlate_cache_netdev(entry, stats);
5534             break;
5535         case XC_NETFLOW:
5536             netflow_flow_update(entry->u.nf.netflow, entry->u.nf.flow,
5537                                 entry->u.nf.iface, stats);
5538             break;
5539         case XC_MIRROR:
5540             mirror_update_stats(entry->u.mirror.mbridge,
5541                                 entry->u.mirror.mirrors,
5542                                 stats->n_packets, stats->n_bytes);
5543             break;
5544         case XC_LEARN:
5545             ofproto_dpif_flow_mod(entry->u.learn.ofproto, entry->u.learn.fm);
5546             break;
5547         case XC_NORMAL:
5548             xlate_cache_normal(entry->u.normal.ofproto, entry->u.normal.flow,
5549                                entry->u.normal.vlan);
5550             break;
5551         case XC_FIN_TIMEOUT:
5552             xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags,
5553                                 entry->u.fin.idle, entry->u.fin.hard);
5554             break;
5555         case XC_GROUP:
5556             group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket,
5557                                     stats);
5558             break;
5559         case XC_TNL_NEIGH:
5560             /* Lookup neighbor to avoid timeout. */
5561             tnl_neigh_lookup(entry->u.tnl_neigh_cache.br_name,
5562                              &entry->u.tnl_neigh_cache.d_ipv6, &dmac);
5563             break;
5564         default:
5565             OVS_NOT_REACHED();
5566         }
5567     }
5568 }
5569
5570 static void
5571 xlate_dev_unref(struct xc_entry *entry)
5572 {
5573     if (entry->u.dev.tx) {
5574         netdev_close(entry->u.dev.tx);
5575     }
5576     if (entry->u.dev.rx) {
5577         netdev_close(entry->u.dev.rx);
5578     }
5579     if (entry->u.dev.bfd) {
5580         bfd_unref(entry->u.dev.bfd);
5581     }
5582 }
5583
5584 static void
5585 xlate_cache_clear_netflow(struct netflow *netflow, struct flow *flow)
5586 {
5587     netflow_flow_clear(netflow, flow);
5588     netflow_unref(netflow);
5589     free(flow);
5590 }
5591
5592 void
5593 xlate_cache_clear(struct xlate_cache *xcache)
5594 {
5595     struct xc_entry *entry;
5596     struct ofpbuf entries;
5597
5598     if (!xcache) {
5599         return;
5600     }
5601
5602     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5603         switch (entry->type) {
5604         case XC_RULE:
5605             rule_dpif_unref(entry->u.rule);
5606             break;
5607         case XC_BOND:
5608             free(entry->u.bond.flow);
5609             bond_unref(entry->u.bond.bond);
5610             break;
5611         case XC_NETDEV:
5612             xlate_dev_unref(entry);
5613             break;
5614         case XC_NETFLOW:
5615             xlate_cache_clear_netflow(entry->u.nf.netflow, entry->u.nf.flow);
5616             break;
5617         case XC_MIRROR:
5618             mbridge_unref(entry->u.mirror.mbridge);
5619             break;
5620         case XC_LEARN:
5621             free(entry->u.learn.fm);
5622             ofpbuf_delete(entry->u.learn.ofpacts);
5623             break;
5624         case XC_NORMAL:
5625             free(entry->u.normal.flow);
5626             break;
5627         case XC_FIN_TIMEOUT:
5628             /* 'u.fin.rule' is always already held as a XC_RULE, which
5629              * has already released it's reference above. */
5630             break;
5631         case XC_GROUP:
5632             group_dpif_unref(entry->u.group.group);
5633             break;
5634         case XC_TNL_NEIGH:
5635             break;
5636         default:
5637             OVS_NOT_REACHED();
5638         }
5639     }
5640
5641     ofpbuf_clear(&xcache->entries);
5642 }
5643
5644 void
5645 xlate_cache_delete(struct xlate_cache *xcache)
5646 {
5647     xlate_cache_clear(xcache);
5648     ofpbuf_uninit(&xcache->entries);
5649     free(xcache);
5650 }