ofproto/ofproto-dpif-xlate.c

   1 /* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
   2  *
   3  * Licensed under the Apache License, Version 2.0 (the "License");
   4  * you may not use this file except in compliance with the License.
   5  * You may obtain a copy of the License at:
   6  *
   7  *     http://www.apache.org/licenses/LICENSE-2.0
   8  *
   9  * Unless required by applicable law or agreed to in writing, software
  10  * distributed under the License is distributed on an "AS IS" BASIS,
  11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12  * See the License for the specific language governing permissions and
  13  * limitations under the License. */
  14
  15 #include <config.h>
  16
  17 #include "ofproto/ofproto-dpif-xlate.h"
  18
  19 #include <errno.h>
  20 #include <arpa/inet.h>
  21 #include <net/if.h>
  22 #include <sys/socket.h>
  23 #include <netinet/in.h>
  24
  25 #include "bfd.h"
  26 #include "bitmap.h"
  27 #include "bond.h"
  28 #include "bundle.h"
  29 #include "byte-order.h"
  30 #include "cfm.h"
  31 #include "connmgr.h"
  32 #include "coverage.h"
  33 #include "dp-packet.h"
  34 #include "dpif.h"
  35 #include "in-band.h"
  36 #include "lacp.h"
  37 #include "learn.h"
  38 #include "mac-learning.h"
  39 #include "mcast-snooping.h"
  40 #include "multipath.h"
  41 #include "netdev-vport.h"
  42 #include "netlink.h"
  43 #include "nx-match.h"
  44 #include "odp-execute.h"
  45 #include "ofproto/ofproto-dpif-ipfix.h"
  46 #include "ofproto/ofproto-dpif-mirror.h"
  47 #include "ofproto/ofproto-dpif-monitor.h"
  48 #include "ofproto/ofproto-dpif-sflow.h"
  49 #include "ofproto/ofproto-dpif.h"
  50 #include "ofproto/ofproto-provider.h"
  51 #include "openvswitch/dynamic-string.h"
  52 #include "openvswitch/meta-flow.h"
  53 #include "openvswitch/list.h"
  54 #include "openvswitch/ofp-actions.h"
  55 #include "openvswitch/vlog.h"
  56 #include "ovs-lldp.h"
  57 #include "ovs-router.h"
  58 #include "packets.h"
  59 #include "tnl-neigh-cache.h"
  60 #include "tnl-ports.h"
  61 #include "tunnel.h"
  62
  63 COVERAGE_DEFINE(xlate_actions);
  64 COVERAGE_DEFINE(xlate_actions_oversize);
  65 COVERAGE_DEFINE(xlate_actions_too_many_output);
  66
  67 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
  68
  69 /* Maximum depth of flow table recursion (due to resubmit actions) in a
  70  * flow translation.
  71  *
  72  * The goal of limiting the depth of resubmits is to ensure that flow
  73  * translation eventually terminates.  Only resubmits to the same table or an
  74  * earlier table count against the maximum depth.  This is because resubmits to
  75  * strictly monotonically increasing table IDs will eventually terminate, since
  76  * any OpenFlow switch has a finite number of tables.  OpenFlow tables are most
  77  * commonly traversed in numerically increasing order, so this limit has little
  78  * effect on conventionally designed OpenFlow pipelines.
  79  *
  80  * Outputs to patch ports and to groups also count against the depth limit. */
  81 #define MAX_DEPTH 64
  82
  83 /* Maximum number of resubmit actions in a flow translation, whether they are
  84  * recursive or not. */
  85 #define MAX_RESUBMITS (MAX_DEPTH * MAX_DEPTH)
  86
  87 struct xbridge {
  88     struct hmap_node hmap_node;   /* Node in global 'xbridges' map. */
  89     struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
  90
  91     struct ovs_list xbundles;     /* Owned xbundles. */
  92     struct hmap xports;           /* Indexed by ofp_port. */
  93
  94     char *name;                   /* Name used in log messages. */
  95     struct dpif *dpif;            /* Datapath interface. */
  96     struct mac_learning *ml;      /* Mac learning handle. */
  97     struct mcast_snooping *ms;    /* Multicast Snooping handle. */
  98     struct mbridge *mbridge;      /* Mirroring. */
  99     struct dpif_sflow *sflow;     /* SFlow handle, or null. */
 100     struct dpif_ipfix *ipfix;     /* Ipfix handle, or null. */
 101     struct netflow *netflow;      /* Netflow handle, or null. */
 102     struct stp *stp;              /* STP or null if disabled. */
 103     struct rstp *rstp;            /* RSTP or null if disabled. */
 104
 105     bool has_in_band;             /* Bridge has in band control? */
 106     bool forward_bpdu;            /* Bridge forwards STP BPDUs? */
 107
 108     /* Datapath feature support. */
 109     struct dpif_backer_support support;
 110 };
 111
 112 struct xbundle {
 113     struct hmap_node hmap_node;    /* In global 'xbundles' map. */
 114     struct ofbundle *ofbundle;     /* Key in global 'xbundles' map. */
 115
 116     struct ovs_list list_node;     /* In parent 'xbridges' list. */
 117     struct xbridge *xbridge;       /* Parent xbridge. */
 118
 119     struct ovs_list xports;        /* Contains "struct xport"s. */
 120
 121     char *name;                    /* Name used in log messages. */
 122     struct bond *bond;             /* Nonnull iff more than one port. */
 123     struct lacp *lacp;             /* LACP handle or null. */
 124
 125     enum port_vlan_mode vlan_mode; /* VLAN mode. */
 126     int vlan;                      /* -1=trunk port, else a 12-bit VLAN ID. */
 127     unsigned long *trunks;         /* Bitmap of trunked VLANs, if 'vlan' == -1.
 128                                     * NULL if all VLANs are trunked. */
 129     bool use_priority_tags;        /* Use 802.1p tag for frames in VLAN 0? */
 130     bool floodable;                /* No port has OFPUTIL_PC_NO_FLOOD set? */
 131 };
 132
 133 struct xport {
 134     struct hmap_node hmap_node;      /* Node in global 'xports' map. */
 135     struct ofport_dpif *ofport;      /* Key in global 'xports map. */
 136
 137     struct hmap_node ofp_node;       /* Node in parent xbridge 'xports' map. */
 138     ofp_port_t ofp_port;             /* Key in parent xbridge 'xports' map. */
 139
 140     odp_port_t odp_port;             /* Datapath port number or ODPP_NONE. */
 141
 142     struct ovs_list bundle_node;     /* In parent xbundle (if it exists). */
 143     struct xbundle *xbundle;         /* Parent xbundle or null. */
 144
 145     struct netdev *netdev;           /* 'ofport''s netdev. */
 146
 147     struct xbridge *xbridge;         /* Parent bridge. */
 148     struct xport *peer;              /* Patch port peer or null. */
 149
 150     enum ofputil_port_config config; /* OpenFlow port configuration. */
 151     enum ofputil_port_state state;   /* OpenFlow port state. */
 152     int stp_port_no;                 /* STP port number or -1 if not in use. */
 153     struct rstp_port *rstp_port;     /* RSTP port or null. */
 154
 155     struct hmap skb_priorities;      /* Map of 'skb_priority_to_dscp's. */
 156
 157     bool may_enable;                 /* May be enabled in bonds. */
 158     bool is_tunnel;                  /* Is a tunnel port. */
 159
 160     struct cfm *cfm;                 /* CFM handle or null. */
 161     struct bfd *bfd;                 /* BFD handle or null. */
 162     struct lldp *lldp;               /* LLDP handle or null. */
 163 };
 164
 165 struct xlate_ctx {
 166     struct xlate_in *xin;
 167     struct xlate_out *xout;
 168
 169     const struct xbridge *xbridge;
 170
 171     /* Flow tables version at the beginning of the translation. */
 172     cls_version_t tables_version;
 173
 174     /* Flow at the last commit. */
 175     struct flow base_flow;
 176
 177     /* Tunnel IP destination address as received.  This is stored separately
 178      * as the base_flow.tunnel is cleared on init to reflect the datapath
 179      * behavior.  Used to make sure not to send tunneled output to ourselves,
 180      * which might lead to an infinite loop.  This could happen easily
 181      * if a tunnel is marked as 'ip_remote=flow', and the flow does not
 182      * actually set the tun_dst field. */
 183     struct in6_addr orig_tunnel_ipv6_dst;
 184
 185     /* Stack for the push and pop actions.  Each stack element is of type
 186      * "union mf_subvalue". */
 187     struct ofpbuf stack;
 188
 189     /* The rule that we are currently translating, or NULL. */
 190     struct rule_dpif *rule;
 191
 192     /* Flow translation populates this with wildcards relevant in translation.
 193      * When 'xin->wc' is nonnull, this is the same pointer.  When 'xin->wc' is
 194      * null, this is a pointer to a temporary buffer. */
 195     struct flow_wildcards *wc;
 196
 197     /* Output buffer for datapath actions.  When 'xin->odp_actions' is nonnull,
 198      * this is the same pointer.  When 'xin->odp_actions' is null, this points
 199      * to a scratch ofpbuf.  This allows code to add actions to
 200      * 'ctx->odp_actions' without worrying about whether the caller really
 201      * wants actions. */
 202     struct ofpbuf *odp_actions;
 203
 204     /* Statistics maintained by xlate_table_action().
 205      *
 206      * 'indentation' is the nesting level for resubmits.  It is used to indent
 207      * the output of resubmit_hook (e.g. for the "ofproto/trace" feature).
 208      *
 209      * The other statistics limit the amount of work that a single flow
 210      * translation can perform.  The goal of the first of these, 'depth', is
 211      * primarily to prevent translation from performing an infinite amount of
 212      * work.  It counts the current depth of nested "resubmit"s (and a few
 213      * other activities); when a resubmit returns, it decreases.  Resubmits to
 214      * tables in strictly monotonically increasing order don't contribute to
 215      * 'depth' because they cannot cause a flow translation to take an infinite
 216      * amount of time (because the number of tables is finite).  Translation
 217      * aborts when 'depth' exceeds MAX_DEPTH.
 218      *
 219      * 'resubmits', on the other hand, prevents flow translation from
 220      * performing an extraordinarily large while still finite amount of work.
 221      * It counts the total number of resubmits (and a few other activities)
 222      * that have been executed.  Returning from a resubmit does not affect this
 223      * counter.  Thus, this limits the amount of work that a particular
 224      * translation can perform.  Translation aborts when 'resubmits' exceeds
 225      * MAX_RESUBMITS (which is much larger than MAX_DEPTH).
 226      */
 227     int indentation;            /* Indentation level for resubmit_hook. */
 228     int depth;                  /* Current resubmit nesting depth. */
 229     int resubmits;              /* Total number of resubmits. */
 230     bool in_group;              /* Currently translating ofgroup, if true. */
 231     bool in_action_set;         /* Currently translating action_set, if true. */
 232
 233     uint8_t table_id;           /* OpenFlow table ID where flow was found. */
 234     ovs_be64 rule_cookie;       /* Cookie of the rule being translated. */
 235     uint32_t orig_skb_priority; /* Priority when packet arrived. */
 236     uint32_t sflow_n_outputs;   /* Number of output ports. */
 237     odp_port_t sflow_odp_port;  /* Output port for composing sFlow action. */
 238     ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
 239     bool exit;                  /* No further actions should be processed. */
 240     mirror_mask_t mirrors;      /* Bitmap of associated mirrors. */
 241     int mirror_snaplen;         /* Max size of a mirror packet in byte. */
 242
 243    /* Freezing Translation
 244     * ====================
 245     *
 246     * At some point during translation, the code may recognize the need to halt
 247     * and checkpoint the translation in a way that it can be restarted again
 248     * later.  We call the checkpointing process "freezing" and the restarting
 249     * process "thawing".
 250     *
 251     * The use cases for freezing are:
 252     *
 253     *     - "Recirculation", where the translation process discovers that it
 254     *       doesn't have enough information to complete translation without
 255     *       actually executing the actions that have already been translated,
 256     *       which provides the additionally needed information.  In these
 257     *       situations, translation freezes translation and assigns the frozen
 258     *       data a unique "recirculation ID", which it associates with the data
 259     *       in a table in userspace (see ofproto-dpif-rid.h).  It also adds a
 260     *       OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath
 261     *       actions.  When a packet hits that action, the datapath looks its
 262     *       flow up again using the ID.  If there's a miss, it comes back to
 263     *       userspace, which find the recirculation table entry for the ID,
 264     *       thaws the associated frozen data, and continues translation from
 265     *       that point given the additional information that is now known.
 266     *
 267     *       The archetypal example is MPLS.  As MPLS is implemented in
 268     *       OpenFlow, the protocol that follows the last MPLS label becomes
 269     *       known only when that label is popped by an OpenFlow action.  That
 270     *       means that Open vSwitch can't extract the headers beyond the MPLS
 271     *       labels until the pop action is executed.  Thus, at that point
 272     *       translation uses the recirculation process to extract the headers
 273     *       beyond the MPLS labels.
 274     *
 275     *       (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for
 276     *       output to bonds.  OVS pre-populates all the datapath flows for bond
 277     *       output in the datapath, though, which means that the elaborate
 278     *       process of coming back to userspace for a second round of
 279     *       translation isn't needed, and so bonds don't follow the above
 280     *       process.)
 281     *
 282     *     - "Continuation".  A continuation is a way for an OpenFlow controller
 283     *       to interpose on a packet's traversal of the OpenFlow tables.  When
 284     *       the translation process encounters a "controller" action with the
 285     *       "pause" flag, it freezes translation, serializes the frozen data,
 286     *       and sends it to an OpenFlow controller.  The controller then
 287     *       examines and possibly modifies the frozen data and eventually sends
 288     *       it back to the switch, which thaws it and continues translation.
 289     *
 290     * The main problem of freezing translation is preserving state, so that
 291     * when the translation is thawed later it resumes from where it left off,
 292     * without disruption.  In particular, actions must be preserved as follows:
 293     *
 294     *     - If we're freezing because an action needed more information, the
 295     *       action that prompted it.
 296     *
 297     *     - Any actions remaining to be translated within the current flow.
 298     *
 299     *     - If translation was frozen within a NXAST_RESUBMIT, then any actions
 300     *       following the resubmit action.  Resubmit actions can be nested, so
 301     *       this has to go all the way up the control stack.
 302     *
 303     *     - The OpenFlow 1.1+ action set.
 304     *
 305     * State that actions and flow table lookups can depend on, such as the
 306     * following, must also be preserved:
 307     *
 308     *     - Metadata fields (input port, registers, OF1.1+ metadata, ...).
 309     *
 310     *     - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions.
 311     *
 312     *     - The table ID and cookie of the flow being translated at each level
 313     *       of the control stack, because these can become visible through
 314     *       OFPAT_CONTROLLER actions (and other ways).
 315     *
 316     * Translation allows for the control of this state preservation via these
 317     * members.  When a need to freeze translation is identified, the
 318     * translation process:
 319     *
 320     * 1. Sets 'freezing' to true.
 321     *
 322     * 2. Sets 'exit' to true to tell later steps that we're exiting from the
 323     *    translation process.
 324     *
 325     * 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points
 326     *    frozen_actions.header to the action to make it easy to find it later.
 327     *    This action holds the current table ID and cookie so that they can be
 328     *    restored during a post-recirculation upcall translation.
 329     *
 330     * 4. Adds the action that prompted recirculation and any actions following
 331     *    it within the same flow to 'frozen_actions', so that they can be
 332     *    executed during a post-recirculation upcall translation.
 333     *
 334     * 5. Returns.
 335     *
 336     * 6. The action that prompted recirculation might be nested in a stack of
 337     *    nested "resubmit"s that have actions remaining.  Each of these notices
 338     *    that we're exiting and freezing and responds by adding more
 339     *    OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary,
 340     *    followed by any actions that were yet unprocessed.
 341     *
 342     * If we're freezing because of recirculation, the caller generates a
 343     * recirculation ID and associates all the state produced by this process
 344     * with it.  For post-recirculation upcall translation, the caller passes it
 345     * back in for the new translation to execute.  The process yielded a set of
 346     * ofpacts that can be translated directly, so it is not much of a special
 347     * case at that point.
 348     */
 349     bool freezing;
 350     struct ofpbuf frozen_actions;
 351     const struct ofpact_controller *pause;
 352
 353     /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
 354      * This is a trigger for recirculation in cases where translating an action
 355      * or looking up a flow requires access to the fields of the packet after
 356      * the MPLS label stack that was originally present. */
 357     bool was_mpls;
 358
 359     /* True if conntrack has been performed on this packet during processing
 360      * on the current bridge. This is used to determine whether conntrack
 361      * state from the datapath should be honored after thawing. */
 362     bool conntracked;
 363
 364     /* Pointer to an embedded NAT action in a conntrack action, or NULL. */
 365     struct ofpact_nat *ct_nat_action;
 366
 367     /* OpenFlow 1.1+ action set.
 368      *
 369      * 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
 370      * When translation is otherwise complete, ofpacts_execute_action_set()
 371      * converts it to a set of "struct ofpact"s that can be translated into
 372      * datapath actions. */
 373     bool action_set_has_group;  /* Action set contains OFPACT_GROUP? */
 374     struct ofpbuf action_set;   /* Action set. */
 375
 376     enum xlate_error error;     /* Translation failed. */
 377 };
 378
 379 const char *xlate_strerror(enum xlate_error error)
 380 {
 381     switch (error) {
 382     case XLATE_OK:
 383         return "OK";
 384     case XLATE_BRIDGE_NOT_FOUND:
 385         return "Bridge not found";
 386     case XLATE_RECURSION_TOO_DEEP:
 387         return "Recursion too deep";
 388     case XLATE_TOO_MANY_RESUBMITS:
 389         return "Too many resubmits";
 390     case XLATE_STACK_TOO_DEEP:
 391         return "Stack too deep";
 392     case XLATE_NO_RECIRCULATION_CONTEXT:
 393         return "No recirculation context";
 394     case XLATE_RECIRCULATION_CONFLICT:
 395         return "Recirculation conflict";
 396     case XLATE_TOO_MANY_MPLS_LABELS:
 397         return "Too many MPLS labels";
 398     }
 399     return "Unknown error";
 400 }
 401
 402 static void xlate_action_set(struct xlate_ctx *ctx);
 403 static void xlate_commit_actions(struct xlate_ctx *ctx);
 404
 405 static void
 406 ctx_trigger_freeze(struct xlate_ctx *ctx)
 407 {
 408     ctx->exit = true;
 409     ctx->freezing = true;
 410 }
 411
 412 static bool
 413 ctx_first_frozen_action(const struct xlate_ctx *ctx)
 414 {
 415     return !ctx->frozen_actions.size;
 416 }
 417
 418 static void
 419 ctx_cancel_freeze(struct xlate_ctx *ctx)
 420 {
 421     if (ctx->freezing) {
 422         ctx->freezing = false;
 423         ofpbuf_clear(&ctx->frozen_actions);
 424         ctx->frozen_actions.header = NULL;
 425     }
 426 }
 427
 428 static void finish_freezing(struct xlate_ctx *ctx);
 429
 430 /* A controller may use OFPP_NONE as the ingress port to indicate that
 431  * it did not arrive on a "real" port.  'ofpp_none_bundle' exists for
 432  * when an input bundle is needed for validation (e.g., mirroring or
 433  * OFPP_NORMAL processing).  It is not connected to an 'ofproto' or have
 434  * any 'port' structs, so care must be taken when dealing with it. */
 435 static struct xbundle ofpp_none_bundle = {
 436     .name      = "OFPP_NONE",
 437     .vlan_mode = PORT_VLAN_TRUNK
 438 };
 439
 440 /* Node in 'xport''s 'skb_priorities' map.  Used to maintain a map from
 441  * 'priority' (the datapath's term for QoS queue) to the dscp bits which all
 442  * traffic egressing the 'ofport' with that priority should be marked with. */
 443 struct skb_priority_to_dscp {
 444     struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
 445     uint32_t skb_priority;      /* Priority of this queue (see struct flow). */
 446
 447     uint8_t dscp;               /* DSCP bits to mark outgoing traffic with. */
 448 };
 449
 450 enum xc_type {
 451     XC_RULE,
 452     XC_BOND,
 453     XC_NETDEV,
 454     XC_NETFLOW,
 455     XC_MIRROR,
 456     XC_LEARN,
 457     XC_NORMAL,
 458     XC_FIN_TIMEOUT,
 459     XC_GROUP,
 460     XC_TNL_NEIGH,
 461 };
 462
 463 /* xlate_cache entries hold enough information to perform the side effects of
 464  * xlate_actions() for a rule, without needing to perform rule translation
 465  * from scratch. The primary usage of these is to submit statistics to objects
 466  * that a flow relates to, although they may be used for other effects as well
 467  * (for instance, refreshing hard timeouts for learned flows). */
 468 struct xc_entry {
 469     enum xc_type type;
 470     union {
 471         struct rule_dpif *rule;
 472         struct {
 473             struct netdev *tx;
 474             struct netdev *rx;
 475             struct bfd *bfd;
 476         } dev;
 477         struct {
 478             struct netflow *netflow;
 479             struct flow *flow;
 480             ofp_port_t iface;
 481         } nf;
 482         struct {
 483             struct mbridge *mbridge;
 484             mirror_mask_t mirrors;
 485         } mirror;
 486         struct {
 487             struct bond *bond;
 488             struct flow *flow;
 489             uint16_t vid;
 490         } bond;
 491         struct {
 492             struct ofproto_dpif *ofproto;
 493             struct ofputil_flow_mod *fm;
 494             struct ofpbuf *ofpacts;
 495         } learn;
 496         struct {
 497             struct ofproto_dpif *ofproto;
 498             struct flow *flow;
 499             int vlan;
 500         } normal;
 501         struct {
 502             struct rule_dpif *rule;
 503             uint16_t idle;
 504             uint16_t hard;
 505         } fin;
 506         struct {
 507             struct group_dpif *group;
 508             struct ofputil_bucket *bucket;
 509         } group;
 510         struct {
 511             char br_name[IFNAMSIZ];
 512             struct in6_addr d_ipv6;
 513         } tnl_neigh_cache;
 514     } u;
 515 };
 516
 517 #define XC_ENTRY_FOR_EACH(ENTRY, ENTRIES, XCACHE)               \
 518     ENTRIES = XCACHE->entries;                                  \
 519     for (ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY);      \
 520          ENTRY;                                                 \
 521          ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY))
 522
 523 struct xlate_cache {
 524     struct ofpbuf entries;
 525 };
 526
 527 /* Xlate config contains hash maps of all bridges, bundles and ports.
 528  * Xcfgp contains the pointer to the current xlate configuration.
 529  * When the main thread needs to change the configuration, it copies xcfgp to
 530  * new_xcfg and edits new_xcfg. This enables the use of RCU locking which
 531  * does not block handler and revalidator threads. */
 532 struct xlate_cfg {
 533     struct hmap xbridges;
 534     struct hmap xbundles;
 535     struct hmap xports;
 536 };
 537 static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
 538 static struct xlate_cfg *new_xcfg = NULL;
 539
 540 static bool may_receive(const struct xport *, struct xlate_ctx *);
 541 static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
 542                              struct xlate_ctx *);
 543 static void xlate_normal(struct xlate_ctx *);
 544 static inline void xlate_report(struct xlate_ctx *, const char *, ...)
 545     OVS_PRINTF_FORMAT(2, 3);
 546 static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
 547                                uint8_t table_id, bool may_packet_in,
 548                                bool honor_table_miss);
 549 static bool input_vid_is_valid(uint16_t vid, struct xbundle *, bool warn);
 550 static uint16_t input_vid_to_vlan(const struct xbundle *, uint16_t vid);
 551 static void output_normal(struct xlate_ctx *, const struct xbundle *,
 552                           uint16_t vlan);
 553
 554 /* Optional bond recirculation parameter to compose_output_action(). */
 555 struct xlate_bond_recirc {
 556     uint32_t recirc_id;  /* !0 Use recirculation instead of output. */
 557     uint8_t  hash_alg;   /* !0 Compute hash for recirc before. */
 558     uint32_t hash_basis;  /* Compute hash for recirc before. */
 559 };
 560
 561 static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
 562                                   const struct xlate_bond_recirc *xr);
 563
 564 static struct xbridge *xbridge_lookup(struct xlate_cfg *,
 565                                       const struct ofproto_dpif *);
 566 static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
 567                                               const struct uuid *);
 568 static struct xbundle *xbundle_lookup(struct xlate_cfg *,
 569                                       const struct ofbundle *);
 570 static struct xport *xport_lookup(struct xlate_cfg *,
 571                                   const struct ofport_dpif *);
 572 static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
 573 static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
 574                                                      uint32_t skb_priority);
 575 static void clear_skb_priorities(struct xport *);
 576 static size_t count_skb_priorities(const struct xport *);
 577 static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
 578                                    uint8_t *dscp);
 579
 580 static struct xc_entry *xlate_cache_add_entry(struct xlate_cache *xc,
 581                                               enum xc_type type);
 582 static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
 583 static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
 584 static void xlate_xport_init(struct xlate_cfg *, struct xport *);
 585 static void xlate_xbridge_set(struct xbridge *, struct dpif *,
 586                               const struct mac_learning *, struct stp *,
 587                               struct rstp *, const struct mcast_snooping *,
 588                               const struct mbridge *,
 589                               const struct dpif_sflow *,
 590                               const struct dpif_ipfix *,
 591                               const struct netflow *,
 592                               bool forward_bpdu, bool has_in_band,
 593                               const struct dpif_backer_support *);
 594 static void xlate_xbundle_set(struct xbundle *xbundle,
 595                               enum port_vlan_mode vlan_mode, int vlan,
 596                               unsigned long *trunks, bool use_priority_tags,
 597                               const struct bond *bond, const struct lacp *lacp,
 598                               bool floodable);
 599 static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 600                             const struct netdev *netdev, const struct cfm *cfm,
 601                             const struct bfd *bfd, const struct lldp *lldp,
 602                             int stp_port_no, const struct rstp_port *rstp_port,
 603                             enum ofputil_port_config config,
 604                             enum ofputil_port_state state, bool is_tunnel,
 605                             bool may_enable);
 606 static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
 607 static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
 608 static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
 609 static void xlate_xbridge_copy(struct xbridge *);
 610 static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
 611 static void xlate_xport_copy(struct xbridge *, struct xbundle *,
 612                              struct xport *);
 613 static void xlate_xcfg_free(struct xlate_cfg *);
 614
 615 static inline void
 616 xlate_report(struct xlate_ctx *ctx, const char *format, ...)
 617 {
 618     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
 619         va_list args;
 620
 621         va_start(args, format);
 622         ctx->xin->report_hook(ctx->xin, ctx->indentation, format, args);
 623         va_end(args);
 624     }
 625 }
 626
 627 static struct vlog_rate_limit error_report_rl = VLOG_RATE_LIMIT_INIT(1, 5);
 628
 629 #define XLATE_REPORT_ERROR(CTX, ...)                    \
 630     do {                                                \
 631         if (OVS_UNLIKELY((CTX)->xin->report_hook)) {    \
 632             xlate_report(CTX, __VA_ARGS__);             \
 633         } else {                                        \
 634             VLOG_ERR_RL(&error_report_rl, __VA_ARGS__); \
 635         }                                               \
 636     } while (0)
 637
 638 static inline void
 639 xlate_report_actions(struct xlate_ctx *ctx, const char *title,
 640                      const struct ofpact *ofpacts, size_t ofpacts_len)
 641 {
 642     if (OVS_UNLIKELY(ctx->xin->report_hook)) {
 643         struct ds s = DS_EMPTY_INITIALIZER;
 644         ofpacts_format(ofpacts, ofpacts_len, &s);
 645         xlate_report(ctx, "%s: %s", title, ds_cstr(&s));
 646         ds_destroy(&s);
 647     }
 648 }
 649
 650 static void
 651 xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 652 {
 653     ovs_list_init(&xbridge->xbundles);
 654     hmap_init(&xbridge->xports);
 655     hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
 656                 hash_pointer(xbridge->ofproto, 0));
 657 }
 658
 659 static void
 660 xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
 661 {
 662     ovs_list_init(&xbundle->xports);
 663     ovs_list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
 664     hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
 665                 hash_pointer(xbundle->ofbundle, 0));
 666 }
 667
 668 static void
 669 xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
 670 {
 671     hmap_init(&xport->skb_priorities);
 672     hmap_insert(&xcfg->xports, &xport->hmap_node,
 673                 hash_pointer(xport->ofport, 0));
 674     hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
 675                 hash_ofp_port(xport->ofp_port));
 676 }
 677
 678 static void
 679 xlate_xbridge_set(struct xbridge *xbridge,
 680                   struct dpif *dpif,
 681                   const struct mac_learning *ml, struct stp *stp,
 682                   struct rstp *rstp, const struct mcast_snooping *ms,
 683                   const struct mbridge *mbridge,
 684                   const struct dpif_sflow *sflow,
 685                   const struct dpif_ipfix *ipfix,
 686                   const struct netflow *netflow,
 687                   bool forward_bpdu, bool has_in_band,
 688                   const struct dpif_backer_support *support)
 689 {
 690     if (xbridge->ml != ml) {
 691         mac_learning_unref(xbridge->ml);
 692         xbridge->ml = mac_learning_ref(ml);
 693     }
 694
 695     if (xbridge->ms != ms) {
 696         mcast_snooping_unref(xbridge->ms);
 697         xbridge->ms = mcast_snooping_ref(ms);
 698     }
 699
 700     if (xbridge->mbridge != mbridge) {
 701         mbridge_unref(xbridge->mbridge);
 702         xbridge->mbridge = mbridge_ref(mbridge);
 703     }
 704
 705     if (xbridge->sflow != sflow) {
 706         dpif_sflow_unref(xbridge->sflow);
 707         xbridge->sflow = dpif_sflow_ref(sflow);
 708     }
 709
 710     if (xbridge->ipfix != ipfix) {
 711         dpif_ipfix_unref(xbridge->ipfix);
 712         xbridge->ipfix = dpif_ipfix_ref(ipfix);
 713     }
 714
 715     if (xbridge->stp != stp) {
 716         stp_unref(xbridge->stp);
 717         xbridge->stp = stp_ref(stp);
 718     }
 719
 720     if (xbridge->rstp != rstp) {
 721         rstp_unref(xbridge->rstp);
 722         xbridge->rstp = rstp_ref(rstp);
 723     }
 724
 725     if (xbridge->netflow != netflow) {
 726         netflow_unref(xbridge->netflow);
 727         xbridge->netflow = netflow_ref(netflow);
 728     }
 729
 730     xbridge->dpif = dpif;
 731     xbridge->forward_bpdu = forward_bpdu;
 732     xbridge->has_in_band = has_in_band;
 733     xbridge->support = *support;
 734 }
 735
 736 static void
 737 xlate_xbundle_set(struct xbundle *xbundle,
 738                   enum port_vlan_mode vlan_mode, int vlan,
 739                   unsigned long *trunks, bool use_priority_tags,
 740                   const struct bond *bond, const struct lacp *lacp,
 741                   bool floodable)
 742 {
 743     ovs_assert(xbundle->xbridge);
 744
 745     xbundle->vlan_mode = vlan_mode;
 746     xbundle->vlan = vlan;
 747     xbundle->trunks = trunks;
 748     xbundle->use_priority_tags = use_priority_tags;
 749     xbundle->floodable = floodable;
 750
 751     if (xbundle->bond != bond) {
 752         bond_unref(xbundle->bond);
 753         xbundle->bond = bond_ref(bond);
 754     }
 755
 756     if (xbundle->lacp != lacp) {
 757         lacp_unref(xbundle->lacp);
 758         xbundle->lacp = lacp_ref(lacp);
 759     }
 760 }
 761
 762 static void
 763 xlate_xport_set(struct xport *xport, odp_port_t odp_port,
 764                 const struct netdev *netdev, const struct cfm *cfm,
 765                 const struct bfd *bfd, const struct lldp *lldp, int stp_port_no,
 766                 const struct rstp_port* rstp_port,
 767                 enum ofputil_port_config config, enum ofputil_port_state state,
 768                 bool is_tunnel, bool may_enable)
 769 {
 770     xport->config = config;
 771     xport->state = state;
 772     xport->stp_port_no = stp_port_no;
 773     xport->is_tunnel = is_tunnel;
 774     xport->may_enable = may_enable;
 775     xport->odp_port = odp_port;
 776
 777     if (xport->rstp_port != rstp_port) {
 778         rstp_port_unref(xport->rstp_port);
 779         xport->rstp_port = rstp_port_ref(rstp_port);
 780     }
 781
 782     if (xport->cfm != cfm) {
 783         cfm_unref(xport->cfm);
 784         xport->cfm = cfm_ref(cfm);
 785     }
 786
 787     if (xport->bfd != bfd) {
 788         bfd_unref(xport->bfd);
 789         xport->bfd = bfd_ref(bfd);
 790     }
 791
 792     if (xport->lldp != lldp) {
 793         lldp_unref(xport->lldp);
 794         xport->lldp = lldp_ref(lldp);
 795     }
 796
 797     if (xport->netdev != netdev) {
 798         netdev_close(xport->netdev);
 799         xport->netdev = netdev_ref(netdev);
 800     }
 801 }
 802
 803 static void
 804 xlate_xbridge_copy(struct xbridge *xbridge)
 805 {
 806     struct xbundle *xbundle;
 807     struct xport *xport;
 808     struct xbridge *new_xbridge = xzalloc(sizeof *xbridge);
 809     new_xbridge->ofproto = xbridge->ofproto;
 810     new_xbridge->name = xstrdup(xbridge->name);
 811     xlate_xbridge_init(new_xcfg, new_xbridge);
 812
 813     xlate_xbridge_set(new_xbridge,
 814                       xbridge->dpif, xbridge->ml, xbridge->stp,
 815                       xbridge->rstp, xbridge->ms, xbridge->mbridge,
 816                       xbridge->sflow, xbridge->ipfix, xbridge->netflow,
 817                       xbridge->forward_bpdu, xbridge->has_in_band,
 818                       &xbridge->support);
 819     LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
 820         xlate_xbundle_copy(new_xbridge, xbundle);
 821     }
 822
 823     /* Copy xports which are not part of a xbundle */
 824     HMAP_FOR_EACH (xport, ofp_node, &xbridge->xports) {
 825         if (!xport->xbundle) {
 826             xlate_xport_copy(new_xbridge, NULL, xport);
 827         }
 828     }
 829 }
 830
 831 static void
 832 xlate_xbundle_copy(struct xbridge *xbridge, struct xbundle *xbundle)
 833 {
 834     struct xport *xport;
 835     struct xbundle *new_xbundle = xzalloc(sizeof *xbundle);
 836     new_xbundle->ofbundle = xbundle->ofbundle;
 837     new_xbundle->xbridge = xbridge;
 838     new_xbundle->name = xstrdup(xbundle->name);
 839     xlate_xbundle_init(new_xcfg, new_xbundle);
 840
 841     xlate_xbundle_set(new_xbundle, xbundle->vlan_mode,
 842                       xbundle->vlan, xbundle->trunks,
 843                       xbundle->use_priority_tags, xbundle->bond, xbundle->lacp,
 844                       xbundle->floodable);
 845     LIST_FOR_EACH (xport, bundle_node, &xbundle->xports) {
 846         xlate_xport_copy(xbridge, new_xbundle, xport);
 847     }
 848 }
 849
 850 static void
 851 xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
 852                  struct xport *xport)
 853 {
 854     struct skb_priority_to_dscp *pdscp, *new_pdscp;
 855     struct xport *new_xport = xzalloc(sizeof *xport);
 856     new_xport->ofport = xport->ofport;
 857     new_xport->ofp_port = xport->ofp_port;
 858     new_xport->xbridge = xbridge;
 859     xlate_xport_init(new_xcfg, new_xport);
 860
 861     xlate_xport_set(new_xport, xport->odp_port, xport->netdev, xport->cfm,
 862                     xport->bfd, xport->lldp, xport->stp_port_no,
 863                     xport->rstp_port, xport->config, xport->state,
 864                     xport->is_tunnel, xport->may_enable);
 865
 866     if (xport->peer) {
 867         struct xport *peer = xport_lookup(new_xcfg, xport->peer->ofport);
 868         if (peer) {
 869             new_xport->peer = peer;
 870             new_xport->peer->peer = new_xport;
 871         }
 872     }
 873
 874     if (xbundle) {
 875         new_xport->xbundle = xbundle;
 876         ovs_list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
 877     }
 878
 879     HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
 880         new_pdscp = xmalloc(sizeof *pdscp);
 881         new_pdscp->skb_priority = pdscp->skb_priority;
 882         new_pdscp->dscp = pdscp->dscp;
 883         hmap_insert(&new_xport->skb_priorities, &new_pdscp->hmap_node,
 884                     hash_int(new_pdscp->skb_priority, 0));
 885     }
 886 }
 887
 888 /* Sets the current xlate configuration to new_xcfg and frees the old xlate
 889  * configuration in xcfgp.
 890  *
 891  * This needs to be called after editing the xlate configuration.
 892  *
 893  * Functions that edit the new xlate configuration are
 894  * xlate_<ofproto/bundle/ofport>_set and xlate_<ofproto/bundle/ofport>_remove.
 895  *
 896  * A sample workflow:
 897  *
 898  * xlate_txn_start();
 899  * ...
 900  * edit_xlate_configuration();
 901  * ...
 902  * xlate_txn_commit(); */
 903 void
 904 xlate_txn_commit(void)
 905 {
 906     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 907
 908     ovsrcu_set(&xcfgp, new_xcfg);
 909     ovsrcu_synchronize();
 910     xlate_xcfg_free(xcfg);
 911     new_xcfg = NULL;
 912 }
 913
 914 /* Copies the current xlate configuration in xcfgp to new_xcfg.
 915  *
 916  * This needs to be called prior to editing the xlate configuration. */
 917 void
 918 xlate_txn_start(void)
 919 {
 920     struct xbridge *xbridge;
 921     struct xlate_cfg *xcfg;
 922
 923     ovs_assert(!new_xcfg);
 924
 925     new_xcfg = xmalloc(sizeof *new_xcfg);
 926     hmap_init(&new_xcfg->xbridges);
 927     hmap_init(&new_xcfg->xbundles);
 928     hmap_init(&new_xcfg->xports);
 929
 930     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
 931     if (!xcfg) {
 932         return;
 933     }
 934
 935     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
 936         xlate_xbridge_copy(xbridge);
 937     }
 938 }
 939
 940
 941 static void
 942 xlate_xcfg_free(struct xlate_cfg *xcfg)
 943 {
 944     struct xbridge *xbridge, *next_xbridge;
 945
 946     if (!xcfg) {
 947         return;
 948     }
 949
 950     HMAP_FOR_EACH_SAFE (xbridge, next_xbridge, hmap_node, &xcfg->xbridges) {
 951         xlate_xbridge_remove(xcfg, xbridge);
 952     }
 953
 954     hmap_destroy(&xcfg->xbridges);
 955     hmap_destroy(&xcfg->xbundles);
 956     hmap_destroy(&xcfg->xports);
 957     free(xcfg);
 958 }
 959
 960 void
 961 xlate_ofproto_set(struct ofproto_dpif *ofproto, const char *name,
 962                   struct dpif *dpif,
 963                   const struct mac_learning *ml, struct stp *stp,
 964                   struct rstp *rstp, const struct mcast_snooping *ms,
 965                   const struct mbridge *mbridge,
 966                   const struct dpif_sflow *sflow,
 967                   const struct dpif_ipfix *ipfix,
 968                   const struct netflow *netflow,
 969                   bool forward_bpdu, bool has_in_band,
 970                   const struct dpif_backer_support *support)
 971 {
 972     struct xbridge *xbridge;
 973
 974     ovs_assert(new_xcfg);
 975
 976     xbridge = xbridge_lookup(new_xcfg, ofproto);
 977     if (!xbridge) {
 978         xbridge = xzalloc(sizeof *xbridge);
 979         xbridge->ofproto = ofproto;
 980
 981         xlate_xbridge_init(new_xcfg, xbridge);
 982     }
 983
 984     free(xbridge->name);
 985     xbridge->name = xstrdup(name);
 986
 987     xlate_xbridge_set(xbridge, dpif, ml, stp, rstp, ms, mbridge, sflow, ipfix,
 988                       netflow, forward_bpdu, has_in_band, support);
 989 }
 990
 991 static void
 992 xlate_xbridge_remove(struct xlate_cfg *xcfg, struct xbridge *xbridge)
 993 {
 994     struct xbundle *xbundle, *next_xbundle;
 995     struct xport *xport, *next_xport;
 996
 997     if (!xbridge) {
 998         return;
 999     }
1000
1001     HMAP_FOR_EACH_SAFE (xport, next_xport, ofp_node, &xbridge->xports) {
1002         xlate_xport_remove(xcfg, xport);
1003     }
1004
1005     LIST_FOR_EACH_SAFE (xbundle, next_xbundle, list_node, &xbridge->xbundles) {
1006         xlate_xbundle_remove(xcfg, xbundle);
1007     }
1008
1009     hmap_remove(&xcfg->xbridges, &xbridge->hmap_node);
1010     mac_learning_unref(xbridge->ml);
1011     mcast_snooping_unref(xbridge->ms);
1012     mbridge_unref(xbridge->mbridge);
1013     dpif_sflow_unref(xbridge->sflow);
1014     dpif_ipfix_unref(xbridge->ipfix);
1015     stp_unref(xbridge->stp);
1016     rstp_unref(xbridge->rstp);
1017     hmap_destroy(&xbridge->xports);
1018     free(xbridge->name);
1019     free(xbridge);
1020 }
1021
1022 void
1023 xlate_remove_ofproto(struct ofproto_dpif *ofproto)
1024 {
1025     struct xbridge *xbridge;
1026
1027     ovs_assert(new_xcfg);
1028
1029     xbridge = xbridge_lookup(new_xcfg, ofproto);
1030     xlate_xbridge_remove(new_xcfg, xbridge);
1031 }
1032
1033 void
1034 xlate_bundle_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
1035                  const char *name, enum port_vlan_mode vlan_mode, int vlan,
1036                  unsigned long *trunks, bool use_priority_tags,
1037                  const struct bond *bond, const struct lacp *lacp,
1038                  bool floodable)
1039 {
1040     struct xbundle *xbundle;
1041
1042     ovs_assert(new_xcfg);
1043
1044     xbundle = xbundle_lookup(new_xcfg, ofbundle);
1045     if (!xbundle) {
1046         xbundle = xzalloc(sizeof *xbundle);
1047         xbundle->ofbundle = ofbundle;
1048         xbundle->xbridge = xbridge_lookup(new_xcfg, ofproto);
1049
1050         xlate_xbundle_init(new_xcfg, xbundle);
1051     }
1052
1053     free(xbundle->name);
1054     xbundle->name = xstrdup(name);
1055
1056     xlate_xbundle_set(xbundle, vlan_mode, vlan, trunks,
1057                       use_priority_tags, bond, lacp, floodable);
1058 }
1059
1060 static void
1061 xlate_xbundle_remove(struct xlate_cfg *xcfg, struct xbundle *xbundle)
1062 {
1063     struct xport *xport;
1064
1065     if (!xbundle) {
1066         return;
1067     }
1068
1069     LIST_FOR_EACH_POP (xport, bundle_node, &xbundle->xports) {
1070         xport->xbundle = NULL;
1071     }
1072
1073     hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
1074     ovs_list_remove(&xbundle->list_node);
1075     bond_unref(xbundle->bond);
1076     lacp_unref(xbundle->lacp);
1077     free(xbundle->name);
1078     free(xbundle);
1079 }
1080
1081 void
1082 xlate_bundle_remove(struct ofbundle *ofbundle)
1083 {
1084     struct xbundle *xbundle;
1085
1086     ovs_assert(new_xcfg);
1087
1088     xbundle = xbundle_lookup(new_xcfg, ofbundle);
1089     xlate_xbundle_remove(new_xcfg, xbundle);
1090 }
1091
1092 void
1093 xlate_ofport_set(struct ofproto_dpif *ofproto, struct ofbundle *ofbundle,
1094                  struct ofport_dpif *ofport, ofp_port_t ofp_port,
1095                  odp_port_t odp_port, const struct netdev *netdev,
1096                  const struct cfm *cfm, const struct bfd *bfd,
1097                  const struct lldp *lldp, struct ofport_dpif *peer,
1098                  int stp_port_no, const struct rstp_port *rstp_port,
1099                  const struct ofproto_port_queue *qdscp_list, size_t n_qdscp,
1100                  enum ofputil_port_config config,
1101                  enum ofputil_port_state state, bool is_tunnel,
1102                  bool may_enable)
1103 {
1104     size_t i;
1105     struct xport *xport;
1106
1107     ovs_assert(new_xcfg);
1108
1109     xport = xport_lookup(new_xcfg, ofport);
1110     if (!xport) {
1111         xport = xzalloc(sizeof *xport);
1112         xport->ofport = ofport;
1113         xport->xbridge = xbridge_lookup(new_xcfg, ofproto);
1114         xport->ofp_port = ofp_port;
1115
1116         xlate_xport_init(new_xcfg, xport);
1117     }
1118
1119     ovs_assert(xport->ofp_port == ofp_port);
1120
1121     xlate_xport_set(xport, odp_port, netdev, cfm, bfd, lldp,
1122                     stp_port_no, rstp_port, config, state, is_tunnel,
1123                     may_enable);
1124
1125     if (xport->peer) {
1126         xport->peer->peer = NULL;
1127     }
1128     xport->peer = xport_lookup(new_xcfg, peer);
1129     if (xport->peer) {
1130         xport->peer->peer = xport;
1131     }
1132
1133     if (xport->xbundle) {
1134         ovs_list_remove(&xport->bundle_node);
1135     }
1136     xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
1137     if (xport->xbundle) {
1138         ovs_list_insert(&xport->xbundle->xports, &xport->bundle_node);
1139     }
1140
1141     clear_skb_priorities(xport);
1142     for (i = 0; i < n_qdscp; i++) {
1143         struct skb_priority_to_dscp *pdscp;
1144         uint32_t skb_priority;
1145
1146         if (dpif_queue_to_priority(xport->xbridge->dpif, qdscp_list[i].queue,
1147                                    &skb_priority)) {
1148             continue;
1149         }
1150
1151         pdscp = xmalloc(sizeof *pdscp);
1152         pdscp->skb_priority = skb_priority;
1153         pdscp->dscp = (qdscp_list[i].dscp << 2) & IP_DSCP_MASK;
1154         hmap_insert(&xport->skb_priorities, &pdscp->hmap_node,
1155                     hash_int(pdscp->skb_priority, 0));
1156     }
1157 }
1158
1159 static void
1160 xlate_xport_remove(struct xlate_cfg *xcfg, struct xport *xport)
1161 {
1162     if (!xport) {
1163         return;
1164     }
1165
1166     if (xport->peer) {
1167         xport->peer->peer = NULL;
1168         xport->peer = NULL;
1169     }
1170
1171     if (xport->xbundle) {
1172         ovs_list_remove(&xport->bundle_node);
1173     }
1174
1175     clear_skb_priorities(xport);
1176     hmap_destroy(&xport->skb_priorities);
1177
1178     hmap_remove(&xcfg->xports, &xport->hmap_node);
1179     hmap_remove(&xport->xbridge->xports, &xport->ofp_node);
1180
1181     netdev_close(xport->netdev);
1182     rstp_port_unref(xport->rstp_port);
1183     cfm_unref(xport->cfm);
1184     bfd_unref(xport->bfd);
1185     lldp_unref(xport->lldp);
1186     free(xport);
1187 }
1188
1189 void
1190 xlate_ofport_remove(struct ofport_dpif *ofport)
1191 {
1192     struct xport *xport;
1193
1194     ovs_assert(new_xcfg);
1195
1196     xport = xport_lookup(new_xcfg, ofport);
1197     xlate_xport_remove(new_xcfg, xport);
1198 }
1199
1200 static struct ofproto_dpif *
1201 xlate_lookup_ofproto_(const struct dpif_backer *backer, const struct flow *flow,
1202                       ofp_port_t *ofp_in_port, const struct xport **xportp)
1203 {
1204     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1205     const struct xport *xport;
1206
1207     xport = xport_lookup(xcfg, tnl_port_should_receive(flow)
1208                          ? tnl_port_receive(flow)
1209                          : odp_port_to_ofport(backer, flow->in_port.odp_port));
1210     if (OVS_UNLIKELY(!xport)) {
1211         return NULL;
1212     }
1213     *xportp = xport;
1214     if (ofp_in_port) {
1215         *ofp_in_port = xport->ofp_port;
1216     }
1217     return xport->xbridge->ofproto;
1218 }
1219
1220 /* Given a datapath and flow metadata ('backer', and 'flow' respectively)
1221  * returns the corresponding struct ofproto_dpif and OpenFlow port number. */
1222 struct ofproto_dpif *
1223 xlate_lookup_ofproto(const struct dpif_backer *backer, const struct flow *flow,
1224                      ofp_port_t *ofp_in_port)
1225 {
1226     const struct xport *xport;
1227
1228     return xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1229 }
1230
1231 /* Given a datapath and flow metadata ('backer', and 'flow' respectively),
1232  * optionally populates 'ofproto' with the ofproto_dpif, 'ofp_in_port' with the
1233  * openflow in_port, and 'ipfix', 'sflow', and 'netflow' with the appropriate
1234  * handles for those protocols if they're enabled.  Caller may use the returned
1235  * pointers until quiescing, for longer term use additional references must
1236  * be taken.
1237  *
1238  * Returns 0 if successful, ENODEV if the parsed flow has no associated ofproto.
1239  */
1240 int
1241 xlate_lookup(const struct dpif_backer *backer, const struct flow *flow,
1242              struct ofproto_dpif **ofprotop, struct dpif_ipfix **ipfix,
1243              struct dpif_sflow **sflow, struct netflow **netflow,
1244              ofp_port_t *ofp_in_port)
1245 {
1246     struct ofproto_dpif *ofproto;
1247     const struct xport *xport;
1248
1249     ofproto = xlate_lookup_ofproto_(backer, flow, ofp_in_port, &xport);
1250
1251     if (!ofproto) {
1252         return ENODEV;
1253     }
1254
1255     if (ofprotop) {
1256         *ofprotop = ofproto;
1257     }
1258
1259     if (ipfix) {
1260         *ipfix = xport ? xport->xbridge->ipfix : NULL;
1261     }
1262
1263     if (sflow) {
1264         *sflow = xport ? xport->xbridge->sflow : NULL;
1265     }
1266
1267     if (netflow) {
1268         *netflow = xport ? xport->xbridge->netflow : NULL;
1269     }
1270
1271     return 0;
1272 }
1273
1274 static struct xbridge *
1275 xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto)
1276 {
1277     struct hmap *xbridges;
1278     struct xbridge *xbridge;
1279
1280     if (!ofproto || !xcfg) {
1281         return NULL;
1282     }
1283
1284     xbridges = &xcfg->xbridges;
1285
1286     HMAP_FOR_EACH_IN_BUCKET (xbridge, hmap_node, hash_pointer(ofproto, 0),
1287                              xbridges) {
1288         if (xbridge->ofproto == ofproto) {
1289             return xbridge;
1290         }
1291     }
1292     return NULL;
1293 }
1294
1295 static struct xbridge *
1296 xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
1297 {
1298     struct xbridge *xbridge;
1299
1300     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
1301         if (uuid_equals(ofproto_dpif_get_uuid(xbridge->ofproto), uuid)) {
1302             return xbridge;
1303         }
1304     }
1305     return NULL;
1306 }
1307
1308 static struct xbundle *
1309 xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
1310 {
1311     struct hmap *xbundles;
1312     struct xbundle *xbundle;
1313
1314     if (!ofbundle || !xcfg) {
1315         return NULL;
1316     }
1317
1318     xbundles = &xcfg->xbundles;
1319
1320     HMAP_FOR_EACH_IN_BUCKET (xbundle, hmap_node, hash_pointer(ofbundle, 0),
1321                              xbundles) {
1322         if (xbundle->ofbundle == ofbundle) {
1323             return xbundle;
1324         }
1325     }
1326     return NULL;
1327 }
1328
1329 static struct xport *
1330 xport_lookup(struct xlate_cfg *xcfg, const struct ofport_dpif *ofport)
1331 {
1332     struct hmap *xports;
1333     struct xport *xport;
1334
1335     if (!ofport || !xcfg) {
1336         return NULL;
1337     }
1338
1339     xports = &xcfg->xports;
1340
1341     HMAP_FOR_EACH_IN_BUCKET (xport, hmap_node, hash_pointer(ofport, 0),
1342                              xports) {
1343         if (xport->ofport == ofport) {
1344             return xport;
1345         }
1346     }
1347     return NULL;
1348 }
1349
1350 static struct stp_port *
1351 xport_get_stp_port(const struct xport *xport)
1352 {
1353     return xport->xbridge->stp && xport->stp_port_no != -1
1354         ? stp_get_port(xport->xbridge->stp, xport->stp_port_no)
1355         : NULL;
1356 }
1357
1358 static bool
1359 xport_stp_learn_state(const struct xport *xport)
1360 {
1361     struct stp_port *sp = xport_get_stp_port(xport);
1362     return sp
1363         ? stp_learn_in_state(stp_port_get_state(sp))
1364         : true;
1365 }
1366
1367 static bool
1368 xport_stp_forward_state(const struct xport *xport)
1369 {
1370     struct stp_port *sp = xport_get_stp_port(xport);
1371     return sp
1372         ? stp_forward_in_state(stp_port_get_state(sp))
1373         : true;
1374 }
1375
1376 static bool
1377 xport_stp_should_forward_bpdu(const struct xport *xport)
1378 {
1379     struct stp_port *sp = xport_get_stp_port(xport);
1380     return stp_should_forward_bpdu(sp ? stp_port_get_state(sp) : STP_DISABLED);
1381 }
1382
1383 /* Returns true if STP should process 'flow'.  Sets fields in 'wc' that
1384  * were used to make the determination.*/
1385 static bool
1386 stp_should_process_flow(const struct flow *flow, struct flow_wildcards *wc)
1387 {
1388     /* is_stp() also checks dl_type, but dl_type is always set in 'wc'. */
1389     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1390     return is_stp(flow);
1391 }
1392
1393 static void
1394 stp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1395 {
1396     struct stp_port *sp = xport_get_stp_port(xport);
1397     struct dp_packet payload = *packet;
1398     struct eth_header *eth = dp_packet_data(&payload);
1399
1400     /* Sink packets on ports that have STP disabled when the bridge has
1401      * STP enabled. */
1402     if (!sp || stp_port_get_state(sp) == STP_DISABLED) {
1403         return;
1404     }
1405
1406     /* Trim off padding on payload. */
1407     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1408         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1409     }
1410
1411     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1412         stp_received_bpdu(sp, dp_packet_data(&payload), dp_packet_size(&payload));
1413     }
1414 }
1415
1416 static enum rstp_state
1417 xport_get_rstp_port_state(const struct xport *xport)
1418 {
1419     return xport->rstp_port
1420         ? rstp_port_get_state(xport->rstp_port)
1421         : RSTP_DISABLED;
1422 }
1423
1424 static bool
1425 xport_rstp_learn_state(const struct xport *xport)
1426 {
1427     return xport->xbridge->rstp && xport->rstp_port
1428         ? rstp_learn_in_state(xport_get_rstp_port_state(xport))
1429         : true;
1430 }
1431
1432 static bool
1433 xport_rstp_forward_state(const struct xport *xport)
1434 {
1435     return xport->xbridge->rstp && xport->rstp_port
1436         ? rstp_forward_in_state(xport_get_rstp_port_state(xport))
1437         : true;
1438 }
1439
1440 static bool
1441 xport_rstp_should_manage_bpdu(const struct xport *xport)
1442 {
1443     return rstp_should_manage_bpdu(xport_get_rstp_port_state(xport));
1444 }
1445
1446 static void
1447 rstp_process_packet(const struct xport *xport, const struct dp_packet *packet)
1448 {
1449     struct dp_packet payload = *packet;
1450     struct eth_header *eth = dp_packet_data(&payload);
1451
1452     /* Sink packets on ports that have no RSTP. */
1453     if (!xport->rstp_port) {
1454         return;
1455     }
1456
1457     /* Trim off padding on payload. */
1458     if (dp_packet_size(&payload) > ntohs(eth->eth_type) + ETH_HEADER_LEN) {
1459         dp_packet_set_size(&payload, ntohs(eth->eth_type) + ETH_HEADER_LEN);
1460     }
1461
1462     if (dp_packet_try_pull(&payload, ETH_HEADER_LEN + LLC_HEADER_LEN)) {
1463         rstp_port_received_bpdu(xport->rstp_port, dp_packet_data(&payload),
1464                                 dp_packet_size(&payload));
1465     }
1466 }
1467
1468 static struct xport *
1469 get_ofp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1470 {
1471     struct xport *xport;
1472
1473     HMAP_FOR_EACH_IN_BUCKET (xport, ofp_node, hash_ofp_port(ofp_port),
1474                              &xbridge->xports) {
1475         if (xport->ofp_port == ofp_port) {
1476             return xport;
1477         }
1478     }
1479     return NULL;
1480 }
1481
1482 static odp_port_t
1483 ofp_port_to_odp_port(const struct xbridge *xbridge, ofp_port_t ofp_port)
1484 {
1485     const struct xport *xport = get_ofp_port(xbridge, ofp_port);
1486     return xport ? xport->odp_port : ODPP_NONE;
1487 }
1488
1489 static bool
1490 odp_port_is_alive(const struct xlate_ctx *ctx, ofp_port_t ofp_port)
1491 {
1492     struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
1493     return xport && xport->may_enable;
1494 }
1495
1496 static struct ofputil_bucket *
1497 group_first_live_bucket(const struct xlate_ctx *, const struct group_dpif *,
1498                         int depth);
1499
1500 static bool
1501 group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth)
1502 {
1503     struct group_dpif *group;
1504
1505     if (group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group)) {
1506         struct ofputil_bucket *bucket;
1507
1508         bucket = group_first_live_bucket(ctx, group, depth);
1509         group_dpif_unref(group);
1510         return bucket != NULL;
1511     }
1512
1513     return false;
1514 }
1515
1516 #define MAX_LIVENESS_RECURSION 128 /* Arbitrary limit */
1517
1518 static bool
1519 bucket_is_alive(const struct xlate_ctx *ctx,
1520                 struct ofputil_bucket *bucket, int depth)
1521 {
1522     if (depth >= MAX_LIVENESS_RECURSION) {
1523         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1524
1525         VLOG_WARN_RL(&rl, "bucket chaining exceeded %d links",
1526                      MAX_LIVENESS_RECURSION);
1527         return false;
1528     }
1529
1530     return (!ofputil_bucket_has_liveness(bucket)
1531             || (bucket->watch_port != OFPP_ANY
1532                && odp_port_is_alive(ctx, bucket->watch_port))
1533             || (bucket->watch_group != OFPG_ANY
1534                && group_is_alive(ctx, bucket->watch_group, depth + 1)));
1535 }
1536
1537 static struct ofputil_bucket *
1538 group_first_live_bucket(const struct xlate_ctx *ctx,
1539                         const struct group_dpif *group, int depth)
1540 {
1541     struct ofputil_bucket *bucket;
1542     const struct ovs_list *buckets;
1543
1544     group_dpif_get_buckets(group, &buckets);
1545     LIST_FOR_EACH (bucket, list_node, buckets) {
1546         if (bucket_is_alive(ctx, bucket, depth)) {
1547             return bucket;
1548         }
1549     }
1550
1551     return NULL;
1552 }
1553
1554 static struct ofputil_bucket *
1555 group_best_live_bucket(const struct xlate_ctx *ctx,
1556                        const struct group_dpif *group,
1557                        uint32_t basis)
1558 {
1559     struct ofputil_bucket *best_bucket = NULL;
1560     uint32_t best_score = 0;
1561
1562     struct ofputil_bucket *bucket;
1563     const struct ovs_list *buckets;
1564
1565     group_dpif_get_buckets(group, &buckets);
1566     LIST_FOR_EACH (bucket, list_node, buckets) {
1567         if (bucket_is_alive(ctx, bucket, 0)) {
1568             uint32_t score =
1569                 (hash_int(bucket->bucket_id, basis) & 0xffff) * bucket->weight;
1570             if (score >= best_score) {
1571                 best_bucket = bucket;
1572                 best_score = score;
1573             }
1574         }
1575     }
1576
1577     return best_bucket;
1578 }
1579
1580 static bool
1581 xbundle_trunks_vlan(const struct xbundle *bundle, uint16_t vlan)
1582 {
1583     return (bundle->vlan_mode != PORT_VLAN_ACCESS
1584             && (!bundle->trunks || bitmap_is_set(bundle->trunks, vlan)));
1585 }
1586
1587 static bool
1588 xbundle_includes_vlan(const struct xbundle *xbundle, uint16_t vlan)
1589 {
1590     return vlan == xbundle->vlan || xbundle_trunks_vlan(xbundle, vlan);
1591 }
1592
1593 static mirror_mask_t
1594 xbundle_mirror_out(const struct xbridge *xbridge, struct xbundle *xbundle)
1595 {
1596     return xbundle != &ofpp_none_bundle
1597         ? mirror_bundle_out(xbridge->mbridge, xbundle->ofbundle)
1598         : 0;
1599 }
1600
1601 static mirror_mask_t
1602 xbundle_mirror_src(const struct xbridge *xbridge, struct xbundle *xbundle)
1603 {
1604     return xbundle != &ofpp_none_bundle
1605         ? mirror_bundle_src(xbridge->mbridge, xbundle->ofbundle)
1606         : 0;
1607 }
1608
1609 static mirror_mask_t
1610 xbundle_mirror_dst(const struct xbridge *xbridge, struct xbundle *xbundle)
1611 {
1612     return xbundle != &ofpp_none_bundle
1613         ? mirror_bundle_dst(xbridge->mbridge, xbundle->ofbundle)
1614         : 0;
1615 }
1616
1617 static struct xbundle *
1618 lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port,
1619                     bool warn, struct xport **in_xportp)
1620 {
1621     struct xport *xport;
1622
1623     /* Find the port and bundle for the received packet. */
1624     xport = get_ofp_port(xbridge, in_port);
1625     if (in_xportp) {
1626         *in_xportp = xport;
1627     }
1628     if (xport && xport->xbundle) {
1629         return xport->xbundle;
1630     }
1631
1632     /* Special-case OFPP_NONE (OF1.0) and OFPP_CONTROLLER (OF1.1+),
1633      * which a controller may use as the ingress port for traffic that
1634      * it is sourcing. */
1635     if (in_port == OFPP_CONTROLLER || in_port == OFPP_NONE) {
1636         return &ofpp_none_bundle;
1637     }
1638
1639     /* Odd.  A few possible reasons here:
1640      *
1641      * - We deleted a port but there are still a few packets queued up
1642      *   from it.
1643      *
1644      * - Someone externally added a port (e.g. "ovs-dpctl add-if") that
1645      *   we don't know about.
1646      *
1647      * - The ofproto client didn't configure the port as part of a bundle.
1648      *   This is particularly likely to happen if a packet was received on the
1649      *   port after it was created, but before the client had a chance to
1650      *   configure its bundle.
1651      */
1652     if (warn) {
1653         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1654
1655         VLOG_WARN_RL(&rl, "bridge %s: received packet on unknown "
1656                      "port %"PRIu16, xbridge->name, in_port);
1657     }
1658     return NULL;
1659 }
1660
1661 /* Mirrors the packet represented by 'ctx' to appropriate mirror destinations,
1662  * given the packet is ingressing or egressing on 'xbundle', which has ingress
1663  * or egress (as appropriate) mirrors 'mirrors'. */
1664 static void
1665 mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
1666               mirror_mask_t mirrors)
1667 {
1668     /* Figure out what VLAN the packet is in (because mirrors can select
1669      * packets on basis of VLAN). */
1670     bool warn = ctx->xin->packet != NULL;
1671     uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci);
1672     if (!input_vid_is_valid(vid, xbundle, warn)) {
1673         return;
1674     }
1675     uint16_t vlan = input_vid_to_vlan(xbundle, vid);
1676
1677     const struct xbridge *xbridge = ctx->xbridge;
1678
1679     /* Don't mirror to destinations that we've already mirrored to. */
1680     mirrors &= ~ctx->mirrors;
1681     if (!mirrors) {
1682         return;
1683     }
1684
1685     if (ctx->xin->resubmit_stats) {
1686         mirror_update_stats(xbridge->mbridge, mirrors,
1687                             ctx->xin->resubmit_stats->n_packets,
1688                             ctx->xin->resubmit_stats->n_bytes);
1689     }
1690     if (ctx->xin->xcache) {
1691         struct xc_entry *entry;
1692
1693         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_MIRROR);
1694         entry->u.mirror.mbridge = mbridge_ref(xbridge->mbridge);
1695         entry->u.mirror.mirrors = mirrors;
1696     }
1697
1698     /* 'mirrors' is a bit-mask of candidates for mirroring.  Iterate as long as
1699      * some candidates remain.  */
1700     while (mirrors) {
1701         const unsigned long *vlans;
1702         mirror_mask_t dup_mirrors;
1703         struct ofbundle *out;
1704         int out_vlan;
1705         int snaplen;
1706
1707         /* Get the details of the mirror represented by the rightmost 1-bit. */
1708         bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
1709                                      &vlans, &dup_mirrors,
1710                                      &out, &snaplen, &out_vlan);
1711         ovs_assert(has_mirror);
1712
1713
1714         /* If this mirror selects on the basis of VLAN, and it does not select
1715          * 'vlan', then discard this mirror and go on to the next one. */
1716         if (vlans) {
1717             ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
1718         }
1719         if (vlans && !bitmap_is_set(vlans, vlan)) {
1720             mirrors = zero_rightmost_1bit(mirrors);
1721             continue;
1722         }
1723
1724         /* Record the mirror, and the mirrors that output to the same
1725          * destination, so that we don't mirror to them again.  This must be
1726          * done now to ensure that output_normal(), below, doesn't recursively
1727          * output to the same mirrors. */
1728         ctx->mirrors |= dup_mirrors;
1729         ctx->mirror_snaplen = snaplen;
1730
1731         /* Send the packet to the mirror. */
1732         if (out) {
1733             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1734             struct xbundle *out_xbundle = xbundle_lookup(xcfg, out);
1735             if (out_xbundle) {
1736                 output_normal(ctx, out_xbundle, vlan);
1737             }
1738         } else if (vlan != out_vlan
1739                    && !eth_addr_is_reserved(ctx->xin->flow.dl_dst)) {
1740             struct xbundle *xbundle;
1741
1742             LIST_FOR_EACH (xbundle, list_node, &xbridge->xbundles) {
1743                 if (xbundle_includes_vlan(xbundle, out_vlan)
1744                     && !xbundle_mirror_out(xbridge, xbundle)) {
1745                     output_normal(ctx, xbundle, out_vlan);
1746                 }
1747             }
1748         }
1749
1750         /* output_normal() could have recursively output (to different
1751          * mirrors), so make sure that we don't send duplicates. */
1752         mirrors &= ~ctx->mirrors;
1753         ctx->mirror_snaplen = 0;
1754     }
1755 }
1756
1757 static void
1758 mirror_ingress_packet(struct xlate_ctx *ctx)
1759 {
1760     if (mbridge_has_mirrors(ctx->xbridge->mbridge)) {
1761         bool warn = ctx->xin->packet != NULL;
1762         struct xbundle *xbundle = lookup_input_bundle(
1763             ctx->xbridge, ctx->xin->flow.in_port.ofp_port, warn, NULL);
1764         if (xbundle) {
1765             mirror_packet(ctx, xbundle,
1766                           xbundle_mirror_src(ctx->xbridge, xbundle));
1767         }
1768     }
1769 }
1770
1771 /* Given 'vid', the VID obtained from the 802.1Q header that was received as
1772  * part of a packet (specify 0 if there was no 802.1Q header), and 'in_xbundle',
1773  * the bundle on which the packet was received, returns the VLAN to which the
1774  * packet belongs.
1775  *
1776  * Both 'vid' and the return value are in the range 0...4095. */
1777 static uint16_t
1778 input_vid_to_vlan(const struct xbundle *in_xbundle, uint16_t vid)
1779 {
1780     switch (in_xbundle->vlan_mode) {
1781     case PORT_VLAN_ACCESS:
1782         return in_xbundle->vlan;
1783         break;
1784
1785     case PORT_VLAN_TRUNK:
1786         return vid;
1787
1788     case PORT_VLAN_NATIVE_UNTAGGED:
1789     case PORT_VLAN_NATIVE_TAGGED:
1790         return vid ? vid : in_xbundle->vlan;
1791
1792     default:
1793         OVS_NOT_REACHED();
1794     }
1795 }
1796
1797 /* Checks whether a packet with the given 'vid' may ingress on 'in_xbundle'.
1798  * If so, returns true.  Otherwise, returns false and, if 'warn' is true, logs
1799  * a warning.
1800  *
1801  * 'vid' should be the VID obtained from the 802.1Q header that was received as
1802  * part of a packet (specify 0 if there was no 802.1Q header), in the range
1803  * 0...4095. */
1804 static bool
1805 input_vid_is_valid(uint16_t vid, struct xbundle *in_xbundle, bool warn)
1806 {
1807     /* Allow any VID on the OFPP_NONE port. */
1808     if (in_xbundle == &ofpp_none_bundle) {
1809         return true;
1810     }
1811
1812     switch (in_xbundle->vlan_mode) {
1813     case PORT_VLAN_ACCESS:
1814         if (vid) {
1815             if (warn) {
1816                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1817                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" tagged "
1818                              "packet received on port %s configured as VLAN "
1819                              "%"PRIu16" access port", vid, in_xbundle->name,
1820                              in_xbundle->vlan);
1821             }
1822             return false;
1823         }
1824         return true;
1825
1826     case PORT_VLAN_NATIVE_UNTAGGED:
1827     case PORT_VLAN_NATIVE_TAGGED:
1828         if (!vid) {
1829             /* Port must always carry its native VLAN. */
1830             return true;
1831         }
1832         /* Fall through. */
1833     case PORT_VLAN_TRUNK:
1834         if (!xbundle_includes_vlan(in_xbundle, vid)) {
1835             if (warn) {
1836                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1837                 VLOG_WARN_RL(&rl, "dropping VLAN %"PRIu16" packet "
1838                              "received on port %s not configured for trunking "
1839                              "VLAN %"PRIu16, vid, in_xbundle->name, vid);
1840             }
1841             return false;
1842         }
1843         return true;
1844
1845     default:
1846         OVS_NOT_REACHED();
1847     }
1848
1849 }
1850
1851 /* Given 'vlan', the VLAN that a packet belongs to, and
1852  * 'out_xbundle', a bundle on which the packet is to be output, returns the VID
1853  * that should be included in the 802.1Q header.  (If the return value is 0,
1854  * then the 802.1Q header should only be included in the packet if there is a
1855  * nonzero PCP.)
1856  *
1857  * Both 'vlan' and the return value are in the range 0...4095. */
1858 static uint16_t
1859 output_vlan_to_vid(const struct xbundle *out_xbundle, uint16_t vlan)
1860 {
1861     switch (out_xbundle->vlan_mode) {
1862     case PORT_VLAN_ACCESS:
1863         return 0;
1864
1865     case PORT_VLAN_TRUNK:
1866     case PORT_VLAN_NATIVE_TAGGED:
1867         return vlan;
1868
1869     case PORT_VLAN_NATIVE_UNTAGGED:
1870         return vlan == out_xbundle->vlan ? 0 : vlan;
1871
1872     default:
1873         OVS_NOT_REACHED();
1874     }
1875 }
1876
1877 static void
1878 output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
1879               uint16_t vlan)
1880 {
1881     ovs_be16 *flow_tci = &ctx->xin->flow.vlan_tci;
1882     uint16_t vid;
1883     ovs_be16 tci, old_tci;
1884     struct xport *xport;
1885     struct xlate_bond_recirc xr;
1886     bool use_recirc = false;
1887
1888     vid = output_vlan_to_vid(out_xbundle, vlan);
1889     if (ovs_list_is_empty(&out_xbundle->xports)) {
1890         /* Partially configured bundle with no slaves.  Drop the packet. */
1891         return;
1892     } else if (!out_xbundle->bond) {
1893         xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport,
1894                              bundle_node);
1895     } else {
1896         struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
1897         struct flow_wildcards *wc = ctx->wc;
1898         struct ofport_dpif *ofport;
1899
1900         if (ctx->xbridge->support.odp.recirc) {
1901             use_recirc = bond_may_recirc(
1902                 out_xbundle->bond, &xr.recirc_id, &xr.hash_basis);
1903
1904             if (use_recirc) {
1905                 /* Only TCP mode uses recirculation. */
1906                 xr.hash_alg = OVS_HASH_ALG_L4;
1907                 bond_update_post_recirc_rules(out_xbundle->bond, false);
1908
1909                 /* Recirculation does not require unmasking hash fields. */
1910                 wc = NULL;
1911             }
1912         }
1913
1914         ofport = bond_choose_output_slave(out_xbundle->bond,
1915                                           &ctx->xin->flow, wc, vid);
1916         xport = xport_lookup(xcfg, ofport);
1917
1918         if (!xport) {
1919             /* No slaves enabled, so drop packet. */
1920             return;
1921         }
1922
1923         /* If use_recirc is set, the main thread will handle stats
1924          * accounting for this bond. */
1925         if (!use_recirc) {
1926             if (ctx->xin->resubmit_stats) {
1927                 bond_account(out_xbundle->bond, &ctx->xin->flow, vid,
1928                              ctx->xin->resubmit_stats->n_bytes);
1929             }
1930             if (ctx->xin->xcache) {
1931                 struct xc_entry *entry;
1932                 struct flow *flow;
1933
1934                 flow = &ctx->xin->flow;
1935                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_BOND);
1936                 entry->u.bond.bond = bond_ref(out_xbundle->bond);
1937                 entry->u.bond.flow = xmemdup(flow, sizeof *flow);
1938                 entry->u.bond.vid = vid;
1939             }
1940         }
1941     }
1942
1943     old_tci = *flow_tci;
1944     tci = htons(vid);
1945     if (tci || out_xbundle->use_priority_tags) {
1946         tci |= *flow_tci & htons(VLAN_PCP_MASK);
1947         if (tci) {
1948             tci |= htons(VLAN_CFI);
1949         }
1950     }
1951     *flow_tci = tci;
1952
1953     compose_output_action(ctx, xport->ofp_port, use_recirc ? &xr : NULL);
1954     *flow_tci = old_tci;
1955 }
1956
1957 /* A VM broadcasts a gratuitous ARP to indicate that it has resumed after
1958  * migration.  Older Citrix-patched Linux DomU used gratuitous ARP replies to
1959  * indicate this; newer upstream kernels use gratuitous ARP requests. */
1960 static bool
1961 is_gratuitous_arp(const struct flow *flow, struct flow_wildcards *wc)
1962 {
1963     if (flow->dl_type != htons(ETH_TYPE_ARP)) {
1964         return false;
1965     }
1966
1967     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
1968     if (!eth_addr_is_broadcast(flow->dl_dst)) {
1969         return false;
1970     }
1971
1972     memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
1973     if (flow->nw_proto == ARP_OP_REPLY) {
1974         return true;
1975     } else if (flow->nw_proto == ARP_OP_REQUEST) {
1976         memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
1977         memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
1978
1979         return flow->nw_src == flow->nw_dst;
1980     } else {
1981         return false;
1982     }
1983 }
1984
1985 /* Determines whether packets in 'flow' within 'xbridge' should be forwarded or
1986  * dropped.  Returns true if they may be forwarded, false if they should be
1987  * dropped.
1988  *
1989  * 'in_port' must be the xport that corresponds to flow->in_port.
1990  * 'in_port' must be part of a bundle (e.g. in_port->bundle must be nonnull).
1991  *
1992  * 'vlan' must be the VLAN that corresponds to flow->vlan_tci on 'in_port', as
1993  * returned by input_vid_to_vlan().  It must be a valid VLAN for 'in_port', as
1994  * checked by input_vid_is_valid().
1995  *
1996  * May also add tags to '*tags', although the current implementation only does
1997  * so in one special case.
1998  */
1999 static bool
2000 is_admissible(struct xlate_ctx *ctx, struct xport *in_port,
2001               uint16_t vlan)
2002 {
2003     struct xbundle *in_xbundle = in_port->xbundle;
2004     const struct xbridge *xbridge = ctx->xbridge;
2005     struct flow *flow = &ctx->xin->flow;
2006
2007     /* Drop frames for reserved multicast addresses
2008      * only if forward_bpdu option is absent. */
2009     if (!xbridge->forward_bpdu && eth_addr_is_reserved(flow->dl_dst)) {
2010         xlate_report(ctx, "packet has reserved destination MAC, dropping");
2011         return false;
2012     }
2013
2014     if (in_xbundle->bond) {
2015         struct mac_entry *mac;
2016
2017         switch (bond_check_admissibility(in_xbundle->bond, in_port->ofport,
2018                                          flow->dl_dst)) {
2019         case BV_ACCEPT:
2020             break;
2021
2022         case BV_DROP:
2023             xlate_report(ctx, "bonding refused admissibility, dropping");
2024             return false;
2025
2026         case BV_DROP_IF_MOVED:
2027             ovs_rwlock_rdlock(&xbridge->ml->rwlock);
2028             mac = mac_learning_lookup(xbridge->ml, flow->dl_src, vlan);
2029             if (mac
2030                 && mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle
2031                 && (!is_gratuitous_arp(flow, ctx->wc)
2032                     || mac_entry_is_grat_arp_locked(mac))) {
2033                 ovs_rwlock_unlock(&xbridge->ml->rwlock);
2034                 xlate_report(ctx, "SLB bond thinks this packet looped back, "
2035                              "dropping");
2036                 return false;
2037             }
2038             ovs_rwlock_unlock(&xbridge->ml->rwlock);
2039             break;
2040         }
2041     }
2042
2043     return true;
2044 }
2045
2046 /* Checks whether a MAC learning update is necessary for MAC learning table
2047  * 'ml' given that a packet matching 'flow' was received  on 'in_xbundle' in
2048  * 'vlan'.
2049  *
2050  * Most packets processed through the MAC learning table do not actually
2051  * change it in any way.  This function requires only a read lock on the MAC
2052  * learning table, so it is much cheaper in this common case.
2053  *
2054  * Keep the code here synchronized with that in update_learning_table__()
2055  * below. */
2056 static bool
2057 is_mac_learning_update_needed(const struct mac_learning *ml,
2058                               const struct flow *flow,
2059                               struct flow_wildcards *wc,
2060                               int vlan, struct xbundle *in_xbundle)
2061 OVS_REQ_RDLOCK(ml->rwlock)
2062 {
2063     struct mac_entry *mac;
2064
2065     if (!mac_learning_may_learn(ml, flow->dl_src, vlan)) {
2066         return false;
2067     }
2068
2069     mac = mac_learning_lookup(ml, flow->dl_src, vlan);
2070     if (!mac || mac_entry_age(ml, mac)) {
2071         return true;
2072     }
2073
2074     if (is_gratuitous_arp(flow, wc)) {
2075         /* We don't want to learn from gratuitous ARP packets that are
2076          * reflected back over bond slaves so we lock the learning table. */
2077         if (!in_xbundle->bond) {
2078             return true;
2079         } else if (mac_entry_is_grat_arp_locked(mac)) {
2080             return false;
2081         }
2082     }
2083
2084     return mac_entry_get_port(ml, mac) != in_xbundle->ofbundle;
2085 }
2086
2087
2088 /* Updates MAC learning table 'ml' given that a packet matching 'flow' was
2089  * received on 'in_xbundle' in 'vlan'.
2090  *
2091  * This code repeats all the checks in is_mac_learning_update_needed() because
2092  * the lock was released between there and here and thus the MAC learning state
2093  * could have changed.
2094  *
2095  * Keep the code here synchronized with that in is_mac_learning_update_needed()
2096  * above. */
2097 static void
2098 update_learning_table__(const struct xbridge *xbridge,
2099                         const struct flow *flow, struct flow_wildcards *wc,
2100                         int vlan, struct xbundle *in_xbundle)
2101 OVS_REQ_WRLOCK(xbridge->ml->rwlock)
2102 {
2103     struct mac_entry *mac;
2104
2105     if (!mac_learning_may_learn(xbridge->ml, flow->dl_src, vlan)) {
2106         return;
2107     }
2108
2109     mac = mac_learning_insert(xbridge->ml, flow->dl_src, vlan);
2110     if (is_gratuitous_arp(flow, wc)) {
2111         /* We don't want to learn from gratuitous ARP packets that are
2112          * reflected back over bond slaves so we lock the learning table. */
2113         if (!in_xbundle->bond) {
2114             mac_entry_set_grat_arp_lock(mac);
2115         } else if (mac_entry_is_grat_arp_locked(mac)) {
2116             return;
2117         }
2118     }
2119
2120     if (mac_entry_get_port(xbridge->ml, mac) != in_xbundle->ofbundle) {
2121         /* The log messages here could actually be useful in debugging,
2122          * so keep the rate limit relatively high. */
2123         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
2124
2125         VLOG_DBG_RL(&rl, "bridge %s: learned that "ETH_ADDR_FMT" is "
2126                     "on port %s in VLAN %d",
2127                     xbridge->name, ETH_ADDR_ARGS(flow->dl_src),
2128                     in_xbundle->name, vlan);
2129
2130         mac_entry_set_port(xbridge->ml, mac, in_xbundle->ofbundle);
2131     }
2132 }
2133
2134 static void
2135 update_learning_table(const struct xbridge *xbridge,
2136                       const struct flow *flow, struct flow_wildcards *wc,
2137                       int vlan, struct xbundle *in_xbundle)
2138 {
2139     bool need_update;
2140
2141     /* Don't learn the OFPP_NONE port. */
2142     if (in_xbundle == &ofpp_none_bundle) {
2143         return;
2144     }
2145
2146     /* First try the common case: no change to MAC learning table. */
2147     ovs_rwlock_rdlock(&xbridge->ml->rwlock);
2148     need_update = is_mac_learning_update_needed(xbridge->ml, flow, wc, vlan,
2149                                                 in_xbundle);
2150     ovs_rwlock_unlock(&xbridge->ml->rwlock);
2151
2152     if (need_update) {
2153         /* Slow path: MAC learning table might need an update. */
2154         ovs_rwlock_wrlock(&xbridge->ml->rwlock);
2155         update_learning_table__(xbridge, flow, wc, vlan, in_xbundle);
2156         ovs_rwlock_unlock(&xbridge->ml->rwlock);
2157     }
2158 }
2159
2160 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2161  * was received on 'in_xbundle' in 'vlan' and is either Report or Query. */
2162 static void
2163 update_mcast_snooping_table4__(const struct xbridge *xbridge,
2164                                const struct flow *flow,
2165                                struct mcast_snooping *ms, int vlan,
2166                                struct xbundle *in_xbundle,
2167                                const struct dp_packet *packet)
2168     OVS_REQ_WRLOCK(ms->rwlock)
2169 {
2170     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2171     int count;
2172     ovs_be32 ip4 = flow->igmp_group_ip4;
2173
2174     switch (ntohs(flow->tp_src)) {
2175     case IGMP_HOST_MEMBERSHIP_REPORT:
2176     case IGMPV2_HOST_MEMBERSHIP_REPORT:
2177         if (mcast_snooping_add_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2178             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping learned that "
2179                         IP_FMT" is on port %s in VLAN %d",
2180                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2181         }
2182         break;
2183     case IGMP_HOST_LEAVE_MESSAGE:
2184         if (mcast_snooping_leave_group4(ms, ip4, vlan, in_xbundle->ofbundle)) {
2185             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping leaving "
2186                         IP_FMT" is on port %s in VLAN %d",
2187                         xbridge->name, IP_ARGS(ip4), in_xbundle->name, vlan);
2188         }
2189         break;
2190     case IGMP_HOST_MEMBERSHIP_QUERY:
2191         if (flow->nw_src && mcast_snooping_add_mrouter(ms, vlan,
2192             in_xbundle->ofbundle)) {
2193             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query from "
2194                         IP_FMT" is on port %s in VLAN %d",
2195                         xbridge->name, IP_ARGS(flow->nw_src),
2196                         in_xbundle->name, vlan);
2197         }
2198         break;
2199     case IGMPV3_HOST_MEMBERSHIP_REPORT:
2200         if ((count = mcast_snooping_add_report(ms, packet, vlan,
2201                                                in_xbundle->ofbundle))) {
2202             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2203                         "addresses on port %s in VLAN %d",
2204                         xbridge->name, count, in_xbundle->name, vlan);
2205         }
2206         break;
2207     }
2208 }
2209
2210 static void
2211 update_mcast_snooping_table6__(const struct xbridge *xbridge,
2212                                const struct flow *flow,
2213                                struct mcast_snooping *ms, int vlan,
2214                                struct xbundle *in_xbundle,
2215                                const struct dp_packet *packet)
2216     OVS_REQ_WRLOCK(ms->rwlock)
2217 {
2218     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 30);
2219     int count;
2220
2221     switch (ntohs(flow->tp_src)) {
2222     case MLD_QUERY:
2223         if (!ipv6_addr_equals(&flow->ipv6_src, &in6addr_any)
2224             && mcast_snooping_add_mrouter(ms, vlan, in_xbundle->ofbundle)) {
2225             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping query on port %s"
2226                         "in VLAN %d",
2227                         xbridge->name, in_xbundle->name, vlan);
2228         }
2229         break;
2230     case MLD_REPORT:
2231     case MLD_DONE:
2232     case MLD2_REPORT:
2233         count = mcast_snooping_add_mld(ms, packet, vlan, in_xbundle->ofbundle);
2234         if (count) {
2235             VLOG_DBG_RL(&rl, "bridge %s: multicast snooping processed %d "
2236                         "addresses on port %s in VLAN %d",
2237                         xbridge->name, count, in_xbundle->name, vlan);
2238         }
2239         break;
2240     }
2241 }
2242
2243 /* Updates multicast snooping table 'ms' given that a packet matching 'flow'
2244  * was received on 'in_xbundle' in 'vlan'. */
2245 static void
2246 update_mcast_snooping_table(const struct xbridge *xbridge,
2247                             const struct flow *flow, int vlan,
2248                             struct xbundle *in_xbundle,
2249                             const struct dp_packet *packet)
2250 {
2251     struct mcast_snooping *ms = xbridge->ms;
2252     struct xlate_cfg *xcfg;
2253     struct xbundle *mcast_xbundle;
2254     struct mcast_port_bundle *fport;
2255
2256     /* Don't learn the OFPP_NONE port. */
2257     if (in_xbundle == &ofpp_none_bundle) {
2258         return;
2259     }
2260
2261     /* Don't learn from flood ports */
2262     mcast_xbundle = NULL;
2263     ovs_rwlock_wrlock(&ms->rwlock);
2264     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2265     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2266         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2267         if (mcast_xbundle == in_xbundle) {
2268             break;
2269         }
2270     }
2271
2272     if (!mcast_xbundle || mcast_xbundle != in_xbundle) {
2273         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2274             update_mcast_snooping_table4__(xbridge, flow, ms, vlan,
2275                                            in_xbundle, packet);
2276         } else {
2277             update_mcast_snooping_table6__(xbridge, flow, ms, vlan,
2278                                            in_xbundle, packet);
2279         }
2280     }
2281     ovs_rwlock_unlock(&ms->rwlock);
2282 }
2283
2284 /* send the packet to ports having the multicast group learned */
2285 static void
2286 xlate_normal_mcast_send_group(struct xlate_ctx *ctx,
2287                               struct mcast_snooping *ms OVS_UNUSED,
2288                               struct mcast_group *grp,
2289                               struct xbundle *in_xbundle, uint16_t vlan)
2290     OVS_REQ_RDLOCK(ms->rwlock)
2291 {
2292     struct xlate_cfg *xcfg;
2293     struct mcast_group_bundle *b;
2294     struct xbundle *mcast_xbundle;
2295
2296     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2297     LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
2298         mcast_xbundle = xbundle_lookup(xcfg, b->port);
2299         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2300             xlate_report(ctx, "forwarding to mcast group port");
2301             output_normal(ctx, mcast_xbundle, vlan);
2302         } else if (!mcast_xbundle) {
2303             xlate_report(ctx, "mcast group port is unknown, dropping");
2304         } else {
2305             xlate_report(ctx, "mcast group port is input port, dropping");
2306         }
2307     }
2308 }
2309
2310 /* send the packet to ports connected to multicast routers */
2311 static void
2312 xlate_normal_mcast_send_mrouters(struct xlate_ctx *ctx,
2313                                  struct mcast_snooping *ms,
2314                                  struct xbundle *in_xbundle, uint16_t vlan)
2315     OVS_REQ_RDLOCK(ms->rwlock)
2316 {
2317     struct xlate_cfg *xcfg;
2318     struct mcast_mrouter_bundle *mrouter;
2319     struct xbundle *mcast_xbundle;
2320
2321     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2322     LIST_FOR_EACH(mrouter, mrouter_node, &ms->mrouter_lru) {
2323         mcast_xbundle = xbundle_lookup(xcfg, mrouter->port);
2324         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2325             xlate_report(ctx, "forwarding to mcast router port");
2326             output_normal(ctx, mcast_xbundle, vlan);
2327         } else if (!mcast_xbundle) {
2328             xlate_report(ctx, "mcast router port is unknown, dropping");
2329         } else {
2330             xlate_report(ctx, "mcast router port is input port, dropping");
2331         }
2332     }
2333 }
2334
2335 /* send the packet to ports flagged to be flooded */
2336 static void
2337 xlate_normal_mcast_send_fports(struct xlate_ctx *ctx,
2338                                struct mcast_snooping *ms,
2339                                struct xbundle *in_xbundle, uint16_t vlan)
2340     OVS_REQ_RDLOCK(ms->rwlock)
2341 {
2342     struct xlate_cfg *xcfg;
2343     struct mcast_port_bundle *fport;
2344     struct xbundle *mcast_xbundle;
2345
2346     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2347     LIST_FOR_EACH(fport, node, &ms->fport_list) {
2348         mcast_xbundle = xbundle_lookup(xcfg, fport->port);
2349         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2350             xlate_report(ctx, "forwarding to mcast flood port");
2351             output_normal(ctx, mcast_xbundle, vlan);
2352         } else if (!mcast_xbundle) {
2353             xlate_report(ctx, "mcast flood port is unknown, dropping");
2354         } else {
2355             xlate_report(ctx, "mcast flood port is input port, dropping");
2356         }
2357     }
2358 }
2359
2360 /* forward the Reports to configured ports */
2361 static void
2362 xlate_normal_mcast_send_rports(struct xlate_ctx *ctx,
2363                                struct mcast_snooping *ms,
2364                                struct xbundle *in_xbundle, uint16_t vlan)
2365     OVS_REQ_RDLOCK(ms->rwlock)
2366 {
2367     struct xlate_cfg *xcfg;
2368     struct mcast_port_bundle *rport;
2369     struct xbundle *mcast_xbundle;
2370
2371     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2372     LIST_FOR_EACH(rport, node, &ms->rport_list) {
2373         mcast_xbundle = xbundle_lookup(xcfg, rport->port);
2374         if (mcast_xbundle && mcast_xbundle != in_xbundle) {
2375             xlate_report(ctx, "forwarding Report to mcast flagged port");
2376             output_normal(ctx, mcast_xbundle, vlan);
2377         } else if (!mcast_xbundle) {
2378             xlate_report(ctx, "mcast port is unknown, dropping the Report");
2379         } else {
2380             xlate_report(ctx, "mcast port is input port, dropping the Report");
2381         }
2382     }
2383 }
2384
2385 static void
2386 xlate_normal_flood(struct xlate_ctx *ctx, struct xbundle *in_xbundle,
2387                    uint16_t vlan)
2388 {
2389     struct xbundle *xbundle;
2390
2391     LIST_FOR_EACH (xbundle, list_node, &ctx->xbridge->xbundles) {
2392         if (xbundle != in_xbundle
2393             && xbundle_includes_vlan(xbundle, vlan)
2394             && xbundle->floodable
2395             && !xbundle_mirror_out(ctx->xbridge, xbundle)) {
2396             output_normal(ctx, xbundle, vlan);
2397         }
2398     }
2399     ctx->nf_output_iface = NF_OUT_FLOOD;
2400 }
2401
2402 static bool
2403 is_ip_local_multicast(const struct flow *flow, struct flow_wildcards *wc)
2404 {
2405     if (flow->dl_type == htons(ETH_TYPE_IP)) {
2406         memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
2407         return ip_is_local_multicast(flow->nw_dst);
2408     } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2409         memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
2410         return ipv6_is_all_hosts(&flow->ipv6_dst);
2411     } else {
2412         return false;
2413     }
2414 }
2415
2416 static void
2417 xlate_normal(struct xlate_ctx *ctx)
2418 {
2419     struct flow_wildcards *wc = ctx->wc;
2420     struct flow *flow = &ctx->xin->flow;
2421     struct xbundle *in_xbundle;
2422     struct xport *in_port;
2423     struct mac_entry *mac;
2424     void *mac_port;
2425     uint16_t vlan;
2426     uint16_t vid;
2427
2428     memset(&wc->masks.dl_src, 0xff, sizeof wc->masks.dl_src);
2429     memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
2430     wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
2431
2432     in_xbundle = lookup_input_bundle(ctx->xbridge, flow->in_port.ofp_port,
2433                                      ctx->xin->packet != NULL, &in_port);
2434     if (!in_xbundle) {
2435         xlate_report(ctx, "no input bundle, dropping");
2436         return;
2437     }
2438
2439     /* Drop malformed frames. */
2440     if (flow->dl_type == htons(ETH_TYPE_VLAN) &&
2441         !(flow->vlan_tci & htons(VLAN_CFI))) {
2442         if (ctx->xin->packet != NULL) {
2443             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2444             VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial "
2445                          "VLAN tag received on port %s",
2446                          ctx->xbridge->name, in_xbundle->name);
2447         }
2448         xlate_report(ctx, "partial VLAN tag, dropping");
2449         return;
2450     }
2451
2452     /* Drop frames on bundles reserved for mirroring. */
2453     if (xbundle_mirror_out(ctx->xbridge, in_xbundle)) {
2454         if (ctx->xin->packet != NULL) {
2455             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
2456             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
2457                          "%s, which is reserved exclusively for mirroring",
2458                          ctx->xbridge->name, in_xbundle->name);
2459         }
2460         xlate_report(ctx, "input port is mirror output port, dropping");
2461         return;
2462     }
2463
2464     /* Check VLAN. */
2465     vid = vlan_tci_to_vid(flow->vlan_tci);
2466     if (!input_vid_is_valid(vid, in_xbundle, ctx->xin->packet != NULL)) {
2467         xlate_report(ctx, "disallowed VLAN VID for this input port, dropping");
2468         return;
2469     }
2470     vlan = input_vid_to_vlan(in_xbundle, vid);
2471
2472     /* Check other admissibility requirements. */
2473     if (in_port && !is_admissible(ctx, in_port, vlan)) {
2474         return;
2475     }
2476
2477     /* Learn source MAC. */
2478     if (ctx->xin->may_learn) {
2479         update_learning_table(ctx->xbridge, flow, wc, vlan, in_xbundle);
2480     }
2481     if (ctx->xin->xcache) {
2482         struct xc_entry *entry;
2483
2484         /* Save enough info to update mac learning table later. */
2485         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NORMAL);
2486         entry->u.normal.ofproto = ctx->xbridge->ofproto;
2487         entry->u.normal.flow = xmemdup(flow, sizeof *flow);
2488         entry->u.normal.vlan = vlan;
2489     }
2490
2491     /* Determine output bundle. */
2492     if (mcast_snooping_enabled(ctx->xbridge->ms)
2493         && !eth_addr_is_broadcast(flow->dl_dst)
2494         && eth_addr_is_multicast(flow->dl_dst)
2495         && is_ip_any(flow)) {
2496         struct mcast_snooping *ms = ctx->xbridge->ms;
2497         struct mcast_group *grp = NULL;
2498
2499         if (is_igmp(flow, wc)) {
2500             memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
2501             if (mcast_snooping_is_membership(flow->tp_src) ||
2502                 mcast_snooping_is_query(flow->tp_src)) {
2503                 if (ctx->xin->may_learn && ctx->xin->packet) {
2504                     update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2505                                                 in_xbundle, ctx->xin->packet);
2506                 }
2507                 /*
2508                  * IGMP packets need to take the slow path, in order to be
2509                  * processed for mdb updates. That will prevent expires
2510                  * firing off even after hosts have sent reports.
2511                  */
2512                 ctx->xout->slow |= SLOW_ACTION;
2513             }
2514
2515             if (mcast_snooping_is_membership(flow->tp_src)) {
2516                 ovs_rwlock_rdlock(&ms->rwlock);
2517                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2518                 /* RFC4541: section 2.1.1, item 1: A snooping switch should
2519                  * forward IGMP Membership Reports only to those ports where
2520                  * multicast routers are attached.  Alternatively stated: a
2521                  * snooping switch should not forward IGMP Membership Reports
2522                  * to ports on which only hosts are attached.
2523                  * An administrative control may be provided to override this
2524                  * restriction, allowing the report messages to be flooded to
2525                  * other ports. */
2526                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2527                 ovs_rwlock_unlock(&ms->rwlock);
2528             } else {
2529                 xlate_report(ctx, "multicast traffic, flooding");
2530                 xlate_normal_flood(ctx, in_xbundle, vlan);
2531             }
2532             return;
2533         } else if (is_mld(flow, wc)) {
2534             ctx->xout->slow |= SLOW_ACTION;
2535             if (ctx->xin->may_learn && ctx->xin->packet) {
2536                 update_mcast_snooping_table(ctx->xbridge, flow, vlan,
2537                                             in_xbundle, ctx->xin->packet);
2538             }
2539             if (is_mld_report(flow, wc)) {
2540                 ovs_rwlock_rdlock(&ms->rwlock);
2541                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2542                 xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
2543                 ovs_rwlock_unlock(&ms->rwlock);
2544             } else {
2545                 xlate_report(ctx, "MLD query, flooding");
2546                 xlate_normal_flood(ctx, in_xbundle, vlan);
2547             }
2548         } else {
2549             if (is_ip_local_multicast(flow, wc)) {
2550                 /* RFC4541: section 2.1.2, item 2: Packets with a dst IP
2551                  * address in the 224.0.0.x range which are not IGMP must
2552                  * be forwarded on all ports */
2553                 xlate_report(ctx, "RFC4541: section 2.1.2, item 2, flooding");
2554                 xlate_normal_flood(ctx, in_xbundle, vlan);
2555                 return;
2556             }
2557         }
2558
2559         /* forwarding to group base ports */
2560         ovs_rwlock_rdlock(&ms->rwlock);
2561         if (flow->dl_type == htons(ETH_TYPE_IP)) {
2562             grp = mcast_snooping_lookup4(ms, flow->nw_dst, vlan);
2563         } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
2564             grp = mcast_snooping_lookup(ms, &flow->ipv6_dst, vlan);
2565         }
2566         if (grp) {
2567             xlate_normal_mcast_send_group(ctx, ms, grp, in_xbundle, vlan);
2568             xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2569             xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2570         } else {
2571             if (mcast_snooping_flood_unreg(ms)) {
2572                 xlate_report(ctx, "unregistered multicast, flooding");
2573                 xlate_normal_flood(ctx, in_xbundle, vlan);
2574             } else {
2575                 xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
2576                 xlate_normal_mcast_send_fports(ctx, ms, in_xbundle, vlan);
2577             }
2578         }
2579         ovs_rwlock_unlock(&ms->rwlock);
2580     } else {
2581         ovs_rwlock_rdlock(&ctx->xbridge->ml->rwlock);
2582         mac = mac_learning_lookup(ctx->xbridge->ml, flow->dl_dst, vlan);
2583         mac_port = mac ? mac_entry_get_port(ctx->xbridge->ml, mac) : NULL;
2584         ovs_rwlock_unlock(&ctx->xbridge->ml->rwlock);
2585
2586         if (mac_port) {
2587             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2588             struct xbundle *mac_xbundle = xbundle_lookup(xcfg, mac_port);
2589             if (mac_xbundle && mac_xbundle != in_xbundle) {
2590                 xlate_report(ctx, "forwarding to learned port");
2591                 output_normal(ctx, mac_xbundle, vlan);
2592             } else if (!mac_xbundle) {
2593                 xlate_report(ctx, "learned port is unknown, dropping");
2594             } else {
2595                 xlate_report(ctx, "learned port is input port, dropping");
2596             }
2597         } else {
2598             xlate_report(ctx, "no learned MAC for destination, flooding");
2599             xlate_normal_flood(ctx, in_xbundle, vlan);
2600         }
2601     }
2602 }
2603
2604 /* Appends a "sample" action for sFlow or IPFIX to 'ctx->odp_actions'.  The
2605  * 'probability' is the number of packets out of UINT32_MAX to sample.  The
2606  * 'cookie' (of length 'cookie_size' bytes) is passed back in the callback for
2607  * each sampled packet.  'tunnel_out_port', if not ODPP_NONE, is added as the
2608  * OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute.  If 'include_actions', an
2609  * OVS_USERSPACE_ATTR_ACTIONS attribute is added.  If 'emit_set_tunnel',
2610  * sample(sampling_port=1) would translate into datapath sample action
2611  * set(tunnel(...)), sample(...) and it is used for sampling egress tunnel
2612  * information.
2613  */
2614 static size_t
2615 compose_sample_action(struct xlate_ctx *ctx,
2616                       const uint32_t probability,
2617                       const union user_action_cookie *cookie,
2618                       const size_t cookie_size,
2619                       const odp_port_t tunnel_out_port,
2620                       bool include_actions)
2621 {
2622     size_t sample_offset = nl_msg_start_nested(ctx->odp_actions,
2623                                                OVS_ACTION_ATTR_SAMPLE);
2624
2625     nl_msg_put_u32(ctx->odp_actions, OVS_SAMPLE_ATTR_PROBABILITY, probability);
2626
2627     size_t actions_offset = nl_msg_start_nested(ctx->odp_actions,
2628                                                 OVS_SAMPLE_ATTR_ACTIONS);
2629
2630     odp_port_t odp_port = ofp_port_to_odp_port(
2631         ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
2632     uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port,
2633                                      flow_hash_5tuple(&ctx->xin->flow, 0));
2634     int cookie_offset = odp_put_userspace_action(pid, cookie, cookie_size,
2635                                                  tunnel_out_port,
2636                                                  include_actions,
2637                                                  ctx->odp_actions);
2638
2639     nl_msg_end_nested(ctx->odp_actions, actions_offset);
2640     nl_msg_end_nested(ctx->odp_actions, sample_offset);
2641
2642     return cookie_offset;
2643 }
2644
2645 /* If sFLow is not enabled, returns 0 without doing anything.
2646  *
2647  * If sFlow is enabled, appends a template "sample" action to the ODP actions
2648  * in 'ctx'.  This action is a template because some of the information needed
2649  * to fill it out is not available until flow translation is complete.  In this
2650  * case, this functions returns an offset, which is always nonzero, to pass
2651  * later to fix_sflow_action() to fill in the rest of the template. */
2652 static size_t
2653 compose_sflow_action(struct xlate_ctx *ctx)
2654 {
2655     struct dpif_sflow *sflow = ctx->xbridge->sflow;
2656     if (!sflow || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2657         return 0;
2658     }
2659
2660     union user_action_cookie cookie = { .type = USER_ACTION_COOKIE_SFLOW };
2661     return compose_sample_action(ctx, dpif_sflow_get_probability(sflow),
2662                                  &cookie, sizeof cookie.sflow, ODPP_NONE,
2663                                  true);
2664 }
2665
2666 /* If flow IPFIX is enabled, make sure IPFIX flow sample action
2667  * at egress point of tunnel port is just in front of corresponding
2668  * output action. If bridge IPFIX is enabled, this appends an IPFIX
2669  * sample action to 'ctx->odp_actions'. */
2670 static void
2671 compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
2672 {
2673     struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
2674     odp_port_t tunnel_out_port = ODPP_NONE;
2675
2676     if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
2677         return;
2678     }
2679
2680     /* For input case, output_odp_port is ODPP_NONE, which is an invalid port
2681      * number. */
2682     if (output_odp_port == ODPP_NONE &&
2683         !dpif_ipfix_get_bridge_exporter_input_sampling(ipfix)) {
2684         return;
2685     }
2686
2687     /* For output case, output_odp_port is valid. */
2688     if (output_odp_port != ODPP_NONE) {
2689         if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
2690             return;
2691         }
2692         /* If tunnel sampling is enabled, put an additional option attribute:
2693          * OVS_USERSPACE_ATTR_TUNNEL_OUT_PORT
2694          */
2695         if (dpif_ipfix_get_bridge_exporter_tunnel_sampling(ipfix) &&
2696             dpif_ipfix_get_tunnel_port(ipfix, output_odp_port) ) {
2697            tunnel_out_port = output_odp_port;
2698         }
2699     }
2700
2701     union user_action_cookie cookie = {
2702         .ipfix = {
2703             .type = USER_ACTION_COOKIE_IPFIX,
2704             .output_odp_port = output_odp_port,
2705         }
2706     };
2707     compose_sample_action(ctx,
2708                           dpif_ipfix_get_bridge_exporter_probability(ipfix),
2709                           &cookie, sizeof cookie.ipfix, tunnel_out_port,
2710                           false);
2711 }
2712
2713 /* Fix "sample" action according to data collected while composing ODP actions,
2714  * as described in compose_sflow_action().
2715  *
2716  * 'user_cookie_offset' must be the offset returned by add_sflow_action(). */
2717 static void
2718 fix_sflow_action(struct xlate_ctx *ctx, unsigned int user_cookie_offset)
2719 {
2720     const struct flow *base = &ctx->base_flow;
2721     union user_action_cookie *cookie;
2722
2723     cookie = ofpbuf_at(ctx->odp_actions, user_cookie_offset,
2724                        sizeof cookie->sflow);
2725     ovs_assert(cookie->type == USER_ACTION_COOKIE_SFLOW);
2726
2727     cookie->type = USER_ACTION_COOKIE_SFLOW;
2728     cookie->sflow.vlan_tci = base->vlan_tci;
2729
2730     /* See http://www.sflow.org/sflow_version_5.txt (search for "Input/output
2731      * port information") for the interpretation of cookie->output. */
2732     switch (ctx->sflow_n_outputs) {
2733     case 0:
2734         /* 0x40000000 | 256 means "packet dropped for unknown reason". */
2735         cookie->sflow.output = 0x40000000 | 256;
2736         break;
2737
2738     case 1:
2739         cookie->sflow.output = dpif_sflow_odp_port_to_ifindex(
2740             ctx->xbridge->sflow, ctx->sflow_odp_port);
2741         if (cookie->sflow.output) {
2742             break;
2743         }
2744         /* Fall through. */
2745     default:
2746         /* 0x80000000 means "multiple output ports. */
2747         cookie->sflow.output = 0x80000000 | ctx->sflow_n_outputs;
2748         break;
2749     }
2750 }
2751
2752 static bool
2753 process_special(struct xlate_ctx *ctx, const struct xport *xport)
2754 {
2755     const struct flow *flow = &ctx->xin->flow;
2756     struct flow_wildcards *wc = ctx->wc;
2757     const struct xbridge *xbridge = ctx->xbridge;
2758     const struct dp_packet *packet = ctx->xin->packet;
2759     enum slow_path_reason slow;
2760
2761     if (!xport) {
2762         slow = 0;
2763     } else if (xport->cfm && cfm_should_process_flow(xport->cfm, flow, wc)) {
2764         if (packet) {
2765             cfm_process_heartbeat(xport->cfm, packet);
2766         }
2767         slow = SLOW_CFM;
2768     } else if (xport->bfd && bfd_should_process_flow(xport->bfd, flow, wc)) {
2769         if (packet) {
2770             bfd_process_packet(xport->bfd, flow, packet);
2771             /* If POLL received, immediately sends FINAL back. */
2772             if (bfd_should_send_packet(xport->bfd)) {
2773                 ofproto_dpif_monitor_port_send_soon(xport->ofport);
2774             }
2775         }
2776         slow = SLOW_BFD;
2777     } else if (xport->xbundle && xport->xbundle->lacp
2778                && flow->dl_type == htons(ETH_TYPE_LACP)) {
2779         if (packet) {
2780             lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet);
2781         }
2782         slow = SLOW_LACP;
2783     } else if ((xbridge->stp || xbridge->rstp) &&
2784                stp_should_process_flow(flow, wc)) {
2785         if (packet) {
2786             xbridge->stp
2787                 ? stp_process_packet(xport, packet)
2788                 : rstp_process_packet(xport, packet);
2789         }
2790         slow = SLOW_STP;
2791     } else if (xport->lldp && lldp_should_process_flow(xport->lldp, flow)) {
2792         if (packet) {
2793             lldp_process_packet(xport->lldp, packet);
2794         }
2795         slow = SLOW_LLDP;
2796     } else {
2797         slow = 0;
2798     }
2799
2800     if (slow) {
2801         ctx->xout->slow |= slow;
2802         return true;
2803     } else {
2804         return false;
2805     }
2806 }
2807
2808 static int
2809 tnl_route_lookup_flow(const struct flow *oflow,
2810                       struct in6_addr *ip, struct in6_addr *src,
2811                       struct xport **out_port)
2812 {
2813     char out_dev[IFNAMSIZ];
2814     struct xbridge *xbridge;
2815     struct xlate_cfg *xcfg;
2816     struct in6_addr gw;
2817     struct in6_addr dst;
2818
2819     dst = flow_tnl_dst(&oflow->tunnel);
2820     if (!ovs_router_lookup(&dst, out_dev, src, &gw)) {
2821         return -ENOENT;
2822     }
2823
2824     if (ipv6_addr_is_set(&gw) &&
2825         (!IN6_IS_ADDR_V4MAPPED(&gw) || in6_addr_get_mapped_ipv4(&gw))) {
2826         *ip = gw;
2827     } else {
2828         *ip = dst;
2829     }
2830
2831     xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
2832     ovs_assert(xcfg);
2833
2834     HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
2835         if (!strncmp(xbridge->name, out_dev, IFNAMSIZ)) {
2836             struct xport *port;
2837
2838             HMAP_FOR_EACH (port, ofp_node, &xbridge->xports) {
2839                 if (!strncmp(netdev_get_name(port->netdev), out_dev, IFNAMSIZ)) {
2840                     *out_port = port;
2841                     return 0;
2842                 }
2843             }
2844         }
2845     }
2846     return -ENOENT;
2847 }
2848
2849 static int
2850 compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev,
2851                     struct dp_packet *packet)
2852 {
2853     struct xbridge *xbridge = out_dev->xbridge;
2854     struct ofpact_output output;
2855     struct flow flow;
2856
2857     ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
2858     flow_extract(packet, &flow);
2859     flow.in_port.ofp_port = out_dev->ofp_port;
2860     output.port = OFPP_TABLE;
2861     output.max_len = 0;
2862
2863     return ofproto_dpif_execute_actions__(xbridge->ofproto, &flow, NULL,
2864                                           &output.ofpact, sizeof output,
2865                                           ctx->indentation, ctx->depth,
2866                                           ctx->resubmits, packet);
2867 }
2868
2869 static void
2870 tnl_send_nd_request(struct xlate_ctx *ctx, const struct xport *out_dev,
2871                      const struct eth_addr eth_src,
2872                      struct in6_addr * ipv6_src, struct in6_addr * ipv6_dst)
2873 {
2874     struct dp_packet packet;
2875
2876     dp_packet_init(&packet, 0);
2877     compose_nd(&packet, eth_src, ipv6_src, ipv6_dst);
2878     compose_table_xlate(ctx, out_dev, &packet);
2879     dp_packet_uninit(&packet);
2880 }
2881
2882 static void
2883 tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev,
2884                      const struct eth_addr eth_src,
2885                      ovs_be32 ip_src, ovs_be32 ip_dst)
2886 {
2887     struct dp_packet packet;
2888
2889     dp_packet_init(&packet, 0);
2890     compose_arp(&packet, ARP_OP_REQUEST,
2891                 eth_src, eth_addr_zero, true, ip_src, ip_dst);
2892
2893     compose_table_xlate(ctx, out_dev, &packet);
2894     dp_packet_uninit(&packet);
2895 }
2896
2897 static int
2898 build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
2899                   const struct flow *flow, odp_port_t tunnel_odp_port)
2900 {
2901     struct netdev_tnl_build_header_params tnl_params;
2902     struct ovs_action_push_tnl tnl_push_data;
2903     struct xport *out_dev = NULL;
2904     ovs_be32 s_ip = 0, d_ip = 0;
2905     struct in6_addr s_ip6 = in6addr_any;
2906     struct in6_addr d_ip6 = in6addr_any;
2907     struct eth_addr smac;
2908     struct eth_addr dmac;
2909     int err;
2910     char buf_sip6[INET6_ADDRSTRLEN];
2911     char buf_dip6[INET6_ADDRSTRLEN];
2912
2913     err = tnl_route_lookup_flow(flow, &d_ip6, &s_ip6, &out_dev);
2914     if (err) {
2915         xlate_report(ctx, "native tunnel routing failed");
2916         return err;
2917     }
2918
2919     xlate_report(ctx, "tunneling to %s via %s",
2920                  ipv6_string_mapped(buf_dip6, &d_ip6),
2921                  netdev_get_name(out_dev->netdev));
2922
2923     /* Use mac addr of bridge port of the peer. */
2924     err = netdev_get_etheraddr(out_dev->netdev, &smac);
2925     if (err) {
2926         xlate_report(ctx, "tunnel output device lacks Ethernet address");
2927         return err;
2928     }
2929
2930     d_ip = in6_addr_get_mapped_ipv4(&d_ip6);
2931     if (d_ip) {
2932         s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
2933     }
2934
2935     err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
2936     if (err) {
2937         xlate_report(ctx, "neighbor cache miss for %s on bridge %s, "
2938                      "sending %s request",
2939                      buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND");
2940         if (d_ip) {
2941             tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip);
2942         } else {
2943             tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6);
2944         }
2945         return err;
2946     }
2947
2948     if (ctx->xin->xcache) {
2949         struct xc_entry *entry;
2950
2951         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_NEIGH);
2952         ovs_strlcpy(entry->u.tnl_neigh_cache.br_name, out_dev->xbridge->name,
2953                     sizeof entry->u.tnl_neigh_cache.br_name);
2954         entry->u.tnl_neigh_cache.d_ipv6 = d_ip6;
2955     }
2956
2957     xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" %s"
2958                  " to "ETH_ADDR_FMT" %s",
2959                  ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6),
2960                  ETH_ADDR_ARGS(dmac), buf_dip6);
2961
2962     netdev_init_tnl_build_header_params(&tnl_params, flow, &s_ip6, dmac, smac);
2963     err = tnl_port_build_header(xport->ofport, &tnl_push_data, &tnl_params);
2964     if (err) {
2965         return err;
2966     }
2967     tnl_push_data.tnl_port = odp_to_u32(tunnel_odp_port);
2968     tnl_push_data.out_port = odp_to_u32(out_dev->odp_port);
2969     odp_put_tnl_push_action(ctx->odp_actions, &tnl_push_data);
2970     return 0;
2971 }
2972
2973 static void
2974 xlate_commit_actions(struct xlate_ctx *ctx)
2975 {
2976     bool use_masked = ctx->xbridge->support.masked_set_action;
2977
2978     ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow,
2979                                           ctx->odp_actions, ctx->wc,
2980                                           use_masked);
2981 }
2982
2983 static void
2984 clear_conntrack(struct flow *flow)
2985 {
2986     flow->ct_state = 0;
2987     flow->ct_zone = 0;
2988     flow->ct_mark = 0;
2989     memset(&flow->ct_label, 0, sizeof flow->ct_label);
2990 }
2991
2992 static void
2993 compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
2994                         const struct xlate_bond_recirc *xr, bool check_stp)
2995 {
2996     const struct xport *xport = get_ofp_port(ctx->xbridge, ofp_port);
2997     struct flow_wildcards *wc = ctx->wc;
2998     struct flow *flow = &ctx->xin->flow;
2999     struct flow_tnl flow_tnl;
3000     ovs_be16 flow_vlan_tci;
3001     uint32_t flow_pkt_mark;
3002     uint8_t flow_nw_tos;
3003     odp_port_t out_port, odp_port;
3004     bool tnl_push_pop_send = false;
3005     uint8_t dscp;
3006
3007     /* If 'struct flow' gets additional metadata, we'll need to zero it out
3008      * before traversing a patch port. */
3009     BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
3010     memset(&flow_tnl, 0, sizeof flow_tnl);
3011
3012     if (!xport) {
3013         xlate_report(ctx, "Nonexistent output port");
3014         return;
3015     } else if (xport->config & OFPUTIL_PC_NO_FWD) {
3016         xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
3017         return;
3018     } else if (ctx->mirror_snaplen != 0 && xport->odp_port == ODPP_NONE) {
3019         xlate_report(ctx, "Mirror truncate to ODPP_NONE, skipping output");
3020         return;
3021     } else if (check_stp) {
3022         if (is_stp(&ctx->base_flow)) {
3023             if (!xport_stp_should_forward_bpdu(xport) &&
3024                 !xport_rstp_should_manage_bpdu(xport)) {
3025                 if (ctx->xbridge->stp != NULL) {
3026                     xlate_report(ctx, "STP not in listening state, "
3027                             "skipping bpdu output");
3028                 } else if (ctx->xbridge->rstp != NULL) {
3029                     xlate_report(ctx, "RSTP not managing BPDU in this state, "
3030                             "skipping bpdu output");
3031                 }
3032                 return;
3033             }
3034         } else if (!xport_stp_forward_state(xport) ||
3035                    !xport_rstp_forward_state(xport)) {
3036             if (ctx->xbridge->stp != NULL) {
3037                 xlate_report(ctx, "STP not in forwarding state, "
3038                         "skipping output");
3039             } else if (ctx->xbridge->rstp != NULL) {
3040                 xlate_report(ctx, "RSTP not in forwarding state, "
3041                         "skipping output");
3042             }
3043             return;
3044         }
3045     }
3046
3047     if (xport->peer) {
3048         const struct xport *peer = xport->peer;
3049         struct flow old_flow = ctx->xin->flow;
3050         bool old_conntrack = ctx->conntracked;
3051         bool old_was_mpls = ctx->was_mpls;
3052         cls_version_t old_version = ctx->tables_version;
3053         struct ofpbuf old_stack = ctx->stack;
3054         union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
3055         struct ofpbuf old_action_set = ctx->action_set;
3056         uint64_t actset_stub[1024 / 8];
3057
3058         ofpbuf_use_stub(&ctx->stack, new_stack, sizeof new_stack);
3059         ofpbuf_use_stub(&ctx->action_set, actset_stub, sizeof actset_stub);
3060         ctx->xbridge = peer->xbridge;
3061         flow->in_port.ofp_port = peer->ofp_port;
3062         flow->metadata = htonll(0);
3063         memset(&flow->tunnel, 0, sizeof flow->tunnel);
3064         memset(flow->regs, 0, sizeof flow->regs);
3065         flow->actset_output = OFPP_UNSET;
3066         ctx->conntracked = false;
3067         clear_conntrack(flow);
3068
3069         /* The bridge is now known so obtain its table version. */
3070         ctx->tables_version
3071             = ofproto_dpif_get_tables_version(ctx->xbridge->ofproto);
3072
3073         if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
3074             if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
3075                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
3076                 if (!ctx->freezing) {
3077                     xlate_action_set(ctx);
3078                 }
3079                 if (ctx->freezing) {
3080                     finish_freezing(ctx);
3081                 }
3082             } else {
3083                 /* Forwarding is disabled by STP and RSTP.  Let OFPP_NORMAL and
3084                  * the learning action look at the packet, then drop it. */
3085                 struct flow old_base_flow = ctx->base_flow;
3086                 size_t old_size = ctx->odp_actions->size;
3087                 mirror_mask_t old_mirrors = ctx->mirrors;
3088
3089                 xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
3090                 ctx->mirrors = old_mirrors;
3091                 ctx->base_flow = old_base_flow;
3092                 ctx->odp_actions->size = old_size;
3093
3094                 /* Undo changes that may have been done for freezing. */
3095                 ctx_cancel_freeze(ctx);
3096             }
3097         }
3098
3099         ctx->xin->flow = old_flow;
3100         ctx->xbridge = xport->xbridge;
3101         ofpbuf_uninit(&ctx->action_set);
3102         ctx->action_set = old_action_set;
3103         ofpbuf_uninit(&ctx->stack);
3104         ctx->stack = old_stack;
3105
3106         /* Restore calling bridge's lookup version. */
3107         ctx->tables_version = old_version;
3108
3109         /* The peer bridge popping MPLS should have no effect on the original
3110          * bridge. */
3111         ctx->was_mpls = old_was_mpls;
3112
3113         /* The peer bridge's conntrack execution should have no effect on the
3114          * original bridge. */
3115         ctx->conntracked = old_conntrack;
3116
3117         /* The fact that the peer bridge exits (for any reason) does not mean
3118          * that the original bridge should exit.  Specifically, if the peer
3119          * bridge freezes translation, the original bridge must continue
3120          * processing with the original, not the frozen packet! */
3121         ctx->exit = false;
3122
3123         /* Peer bridge errors do not propagate back. */
3124         ctx->error = XLATE_OK;
3125
3126         if (ctx->xin->resubmit_stats) {
3127             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
3128             netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats);
3129             if (peer->bfd) {
3130                 bfd_account_rx(peer->bfd, ctx->xin->resubmit_stats);
3131             }
3132         }
3133         if (ctx->xin->xcache) {
3134             struct xc_entry *entry;
3135
3136             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
3137             entry->u.dev.tx = netdev_ref(xport->netdev);
3138             entry->u.dev.rx = netdev_ref(peer->netdev);
3139             entry->u.dev.bfd = bfd_ref(peer->bfd);
3140         }
3141         return;
3142     }
3143
3144     flow_vlan_tci = flow->vlan_tci;
3145     flow_pkt_mark = flow->pkt_mark;
3146     flow_nw_tos = flow->nw_tos;
3147
3148     if (count_skb_priorities(xport)) {
3149         memset(&wc->masks.skb_priority, 0xff, sizeof wc->masks.skb_priority);
3150         if (dscp_from_skb_priority(xport, flow->skb_priority, &dscp)) {
3151             wc->masks.nw_tos |= IP_DSCP_MASK;
3152             flow->nw_tos &= ~IP_DSCP_MASK;
3153             flow->nw_tos |= dscp;
3154         }
3155     }
3156
3157     if (xport->is_tunnel) {
3158         struct in6_addr dst;
3159          /* Save tunnel metadata so that changes made due to
3160           * the Logical (tunnel) Port are not visible for any further
3161           * matches, while explicit set actions on tunnel metadata are.
3162           */
3163         flow_tnl = flow->tunnel;
3164         odp_port = tnl_port_send(xport->ofport, flow, ctx->wc);
3165         if (odp_port == ODPP_NONE) {
3166             xlate_report(ctx, "Tunneling decided against output");
3167             goto out; /* restore flow_nw_tos */
3168         }
3169         dst = flow_tnl_dst(&flow->tunnel);
3170         if (ipv6_addr_equals(&dst, &ctx->orig_tunnel_ipv6_dst)) {
3171             xlate_report(ctx, "Not tunneling to our own address");
3172             goto out; /* restore flow_nw_tos */
3173         }
3174         if (ctx->xin->resubmit_stats) {
3175             netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats);
3176         }
3177         if (ctx->xin->xcache) {
3178             struct xc_entry *entry;
3179
3180             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_NETDEV);
3181             entry->u.dev.tx = netdev_ref(xport->netdev);
3182         }
3183         out_port = odp_port;
3184         if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3185             xlate_report(ctx, "output to native tunnel");
3186             tnl_push_pop_send = true;
3187         } else {
3188             xlate_report(ctx, "output to kernel tunnel");
3189             commit_odp_tunnel_action(flow, &ctx->base_flow, ctx->odp_actions);
3190             flow->tunnel = flow_tnl; /* Restore tunnel metadata */
3191         }
3192     } else {
3193         odp_port = xport->odp_port;
3194         out_port = odp_port;
3195     }
3196
3197     if (out_port != ODPP_NONE) {
3198         xlate_commit_actions(ctx);
3199
3200         if (xr) {
3201             struct ovs_action_hash *act_hash;
3202
3203             /* Hash action. */
3204             act_hash = nl_msg_put_unspec_uninit(ctx->odp_actions,
3205                                                 OVS_ACTION_ATTR_HASH,
3206                                                 sizeof *act_hash);
3207             act_hash->hash_alg = xr->hash_alg;
3208             act_hash->hash_basis = xr->hash_basis;
3209
3210             /* Recirc action. */
3211             nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC,
3212                            xr->recirc_id);
3213         } else {
3214
3215             if (tnl_push_pop_send) {
3216                 build_tunnel_send(ctx, xport, flow, odp_port);
3217                 flow->tunnel = flow_tnl; /* Restore tunnel metadata */
3218             } else {
3219                 odp_port_t odp_tnl_port = ODPP_NONE;
3220
3221                 /* XXX: Write better Filter for tunnel port. We can use inport
3222                 * int tunnel-port flow to avoid these checks completely. */
3223                 if (ofp_port == OFPP_LOCAL &&
3224                     ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
3225
3226                     odp_tnl_port = tnl_port_map_lookup(flow, wc);
3227                 }
3228
3229                 if (odp_tnl_port != ODPP_NONE) {
3230                     nl_msg_put_odp_port(ctx->odp_actions,
3231                                         OVS_ACTION_ATTR_TUNNEL_POP,
3232                                         odp_tnl_port);
3233                 } else {
3234                     /* Tunnel push-pop action is not compatible with
3235                      * IPFIX action. */
3236                     compose_ipfix_action(ctx, out_port);
3237
3238                     /* Handle truncation of the mirrored packet. */
3239                     if (ctx->mirror_snaplen > 0 &&
3240                         ctx->mirror_snaplen < UINT16_MAX) {
3241                         struct ovs_action_trunc *trunc;
3242
3243                         trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
3244                                                          OVS_ACTION_ATTR_TRUNC,
3245                                                          sizeof *trunc);
3246                         trunc->max_len = ctx->mirror_snaplen;
3247                         if (!ctx->xbridge->support.trunc) {
3248                             ctx->xout->slow |= SLOW_ACTION;
3249                         }
3250                     }
3251
3252                     nl_msg_put_odp_port(ctx->odp_actions,
3253                                         OVS_ACTION_ATTR_OUTPUT,
3254                                         out_port);
3255                 }
3256             }
3257         }
3258
3259         ctx->sflow_odp_port = odp_port;
3260         ctx->sflow_n_outputs++;
3261         ctx->nf_output_iface = ofp_port;
3262     }
3263
3264     if (mbridge_has_mirrors(ctx->xbridge->mbridge) && xport->xbundle) {
3265         mirror_packet(ctx, xport->xbundle,
3266                       xbundle_mirror_dst(xport->xbundle->xbridge,
3267                                          xport->xbundle));
3268     }
3269
3270  out:
3271     /* Restore flow */
3272     flow->vlan_tci = flow_vlan_tci;
3273     flow->pkt_mark = flow_pkt_mark;
3274     flow->nw_tos = flow_nw_tos;
3275 }
3276
3277 static void
3278 compose_output_action(struct xlate_ctx *ctx, ofp_port_t ofp_port,
3279                       const struct xlate_bond_recirc *xr)
3280 {
3281     compose_output_action__(ctx, ofp_port, xr, true);
3282 }
3283
3284 static void
3285 xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule, bool deepens)
3286 {
3287     struct rule_dpif *old_rule = ctx->rule;
3288     ovs_be64 old_cookie = ctx->rule_cookie;
3289     const struct rule_actions *actions;
3290
3291     if (ctx->xin->resubmit_stats) {
3292         rule_dpif_credit_stats(rule, ctx->xin->resubmit_stats);
3293     }
3294
3295     ctx->resubmits++;
3296
3297     ctx->indentation++;
3298     ctx->depth += deepens;
3299     ctx->rule = rule;
3300     ctx->rule_cookie = rule_dpif_get_flow_cookie(rule);
3301     actions = rule_dpif_get_actions(rule);
3302     do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx);
3303     ctx->rule_cookie = old_cookie;
3304     ctx->rule = old_rule;
3305     ctx->depth -= deepens;
3306     ctx->indentation--;
3307 }
3308
3309 static bool
3310 xlate_resubmit_resource_check(struct xlate_ctx *ctx)
3311 {
3312     if (ctx->depth >= MAX_DEPTH) {
3313         XLATE_REPORT_ERROR(ctx, "over max translation depth %d", MAX_DEPTH);
3314         ctx->error = XLATE_RECURSION_TOO_DEEP;
3315     } else if (ctx->resubmits >= MAX_RESUBMITS) {
3316         XLATE_REPORT_ERROR(ctx, "over %d resubmit actions", MAX_RESUBMITS);
3317         ctx->error = XLATE_TOO_MANY_RESUBMITS;
3318     } else if (ctx->odp_actions->size > UINT16_MAX) {
3319         XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of actions");
3320         /* NOT an error, as we'll be slow-pathing the flow in this case? */
3321         ctx->exit = true; /* XXX: translation still terminated! */
3322     } else if (ctx->stack.size >= 65536) {
3323         XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of stack");
3324         ctx->error = XLATE_STACK_TOO_DEEP;
3325     } else {
3326         return true;
3327     }
3328
3329     return false;
3330 }
3331
3332 static void
3333 xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
3334                    bool may_packet_in, bool honor_table_miss)
3335 {
3336     /* Check if we need to recirculate before matching in a table. */
3337     if (ctx->was_mpls) {
3338         ctx_trigger_freeze(ctx);
3339         return;
3340     }
3341     if (xlate_resubmit_resource_check(ctx)) {
3342         uint8_t old_table_id = ctx->table_id;
3343         struct rule_dpif *rule;
3344
3345         ctx->table_id = table_id;
3346
3347         rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
3348                                            ctx->tables_version,
3349                                            &ctx->xin->flow, ctx->wc,
3350                                            ctx->xin->resubmit_stats,
3351                                            &ctx->table_id, in_port,
3352                                            may_packet_in, honor_table_miss);
3353
3354         if (OVS_UNLIKELY(ctx->xin->resubmit_hook)) {
3355             ctx->xin->resubmit_hook(ctx->xin, rule, ctx->indentation + 1);
3356         }
3357
3358         if (rule) {
3359             /* Fill in the cache entry here instead of xlate_recursively
3360              * to make the reference counting more explicit.  We take a
3361              * reference in the lookups above if we are going to cache the
3362              * rule. */
3363             if (ctx->xin->xcache) {
3364                 struct xc_entry *entry;
3365
3366                 entry = xlate_cache_add_entry(ctx->xin->xcache, XC_RULE);
3367                 entry->u.rule = rule;
3368                 rule_dpif_ref(rule);
3369             }
3370             xlate_recursively(ctx, rule, table_id <= old_table_id);
3371         }
3372
3373         ctx->table_id = old_table_id;
3374         return;
3375     }
3376 }
3377
3378 static void
3379 xlate_group_stats(struct xlate_ctx *ctx, struct group_dpif *group,
3380                   struct ofputil_bucket *bucket)
3381 {
3382     if (ctx->xin->resubmit_stats) {
3383         group_dpif_credit_stats(group, bucket, ctx->xin->resubmit_stats);
3384     }
3385     if (ctx->xin->xcache) {
3386         struct xc_entry *entry;
3387
3388         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_GROUP);
3389         entry->u.group.group = group_dpif_ref(group);
3390         entry->u.group.bucket = bucket;
3391     }
3392 }
3393
3394 static void
3395 xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
3396 {
3397     uint64_t action_list_stub[1024 / 8];
3398     struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub);
3399     struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
3400                                                         bucket->ofpacts_len);
3401     struct flow old_flow = ctx->xin->flow;
3402     bool old_was_mpls = ctx->was_mpls;
3403
3404     ofpacts_execute_action_set(&action_list, &action_set);
3405     ctx->indentation++;
3406     ctx->depth++;
3407     do_xlate_actions(action_list.data, action_list.size, ctx);
3408     ctx->depth--;
3409     ctx->indentation--;
3410
3411     ofpbuf_uninit(&action_list);
3412
3413     /* Check if need to freeze. */
3414     if (ctx->freezing) {
3415         finish_freezing(ctx);
3416     }
3417
3418     /* Roll back flow to previous state.
3419      * This is equivalent to cloning the packet for each bucket.
3420      *
3421      * As a side effect any subsequently applied actions will
3422      * also effectively be applied to a clone of the packet taken
3423      * just before applying the all or indirect group.
3424      *
3425      * Note that group buckets are action sets, hence they cannot modify the
3426      * main action set.  Also any stack actions are ignored when executing an
3427      * action set, so group buckets cannot change the stack either.
3428      * However, we do allow resubmit actions in group buckets, which could
3429      * break the above assumptions.  It is up to the controller to not mess up
3430      * with the action_set and stack in the tables resubmitted to from
3431      * group buckets. */
3432     ctx->xin->flow = old_flow;
3433
3434     /* The group bucket popping MPLS should have no effect after bucket
3435      * execution. */
3436     ctx->was_mpls = old_was_mpls;
3437
3438     /* The fact that the group bucket exits (for any reason) does not mean that
3439      * the translation after the group action should exit.  Specifically, if
3440      * the group bucket freezes translation, the actions after the group action
3441      * must continue processing with the original, not the frozen packet! */
3442     ctx->exit = false;
3443 }
3444
3445 static void
3446 xlate_all_group(struct xlate_ctx *ctx, struct group_dpif *group)
3447 {
3448     struct ofputil_bucket *bucket;
3449     const struct ovs_list *buckets;
3450
3451     group_dpif_get_buckets(group, &buckets);
3452
3453     LIST_FOR_EACH (bucket, list_node, buckets) {
3454         xlate_group_bucket(ctx, bucket);
3455     }
3456     xlate_group_stats(ctx, group, NULL);
3457 }
3458
3459 static void
3460 xlate_ff_group(struct xlate_ctx *ctx, struct group_dpif *group)
3461 {
3462     struct ofputil_bucket *bucket;
3463
3464     bucket = group_first_live_bucket(ctx, group, 0);
3465     if (bucket) {
3466         xlate_group_bucket(ctx, bucket);
3467         xlate_group_stats(ctx, group, bucket);
3468     }
3469 }
3470
3471 static void
3472 xlate_default_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3473 {
3474     struct flow_wildcards *wc = ctx->wc;
3475     struct ofputil_bucket *bucket;
3476     uint32_t basis;
3477
3478     basis = flow_hash_symmetric_l4(&ctx->xin->flow, 0);
3479     flow_mask_hash_fields(&ctx->xin->flow, wc, NX_HASH_FIELDS_SYMMETRIC_L4);
3480     bucket = group_best_live_bucket(ctx, group, basis);
3481     if (bucket) {
3482         xlate_group_bucket(ctx, bucket);
3483         xlate_group_stats(ctx, group, bucket);
3484     }
3485 }
3486
3487 static void
3488 xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3489 {
3490     struct mf_bitmap hash_fields = MF_BITMAP_INITIALIZER;
3491     const struct field_array *fields;
3492     struct ofputil_bucket *bucket;
3493     uint32_t basis;
3494     int i;
3495
3496     fields = group_dpif_get_fields(group);
3497     basis = hash_uint64(group_dpif_get_selection_method_param(group));
3498
3499     /* Determine which fields to hash */
3500     for (i = 0; i < MFF_N_IDS; i++) {
3501         if (bitmap_is_set(fields->used.bm, i)) {
3502             const struct mf_field *mf;
3503
3504             /* If the field is already present in 'hash_fields' then
3505              * this loop has already checked that it and its pre-requisites
3506              * are present in the flow and its pre-requisites have
3507              * already been added to 'hash_fields'. There is nothing more
3508              * to do here and as an optimisation the loop can continue. */
3509             if (bitmap_is_set(hash_fields.bm, i)) {
3510                 continue;
3511             }
3512
3513             mf = mf_from_id(i);
3514
3515             /* Only hash a field if it and its pre-requisites are present
3516              * in the flow. */
3517             if (!mf_are_prereqs_ok(mf, &ctx->xin->flow)) {
3518                 continue;
3519             }
3520
3521             /* Hash both the field and its pre-requisites */
3522             mf_bitmap_set_field_and_prereqs(mf, &hash_fields);
3523         }
3524     }
3525
3526     /* Hash the fields */
3527     for (i = 0; i < MFF_N_IDS; i++) {
3528         if (bitmap_is_set(hash_fields.bm, i)) {
3529             const struct mf_field *mf = mf_from_id(i);
3530             union mf_value value;
3531             int j;
3532
3533             mf_get_value(mf, &ctx->xin->flow, &value);
3534             /* This seems inefficient but so does apply_mask() */
3535             for (j = 0; j < mf->n_bytes; j++) {
3536                 ((uint8_t *) &value)[j] &= ((uint8_t *) &fields->value[i])[j];
3537             }
3538             basis = hash_bytes(&value, mf->n_bytes, basis);
3539
3540             /* For tunnels, hash in whether the field is present. */
3541             if (mf_is_tun_metadata(mf)) {
3542                 basis = hash_boolean(mf_is_set(mf, &ctx->xin->flow), basis);
3543             }
3544
3545             mf_mask_field(mf, &ctx->wc->masks);
3546         }
3547     }
3548
3549     bucket = group_best_live_bucket(ctx, group, basis);
3550     if (bucket) {
3551         xlate_group_bucket(ctx, bucket);
3552         xlate_group_stats(ctx, group, bucket);
3553     }
3554 }
3555
3556 static void
3557 xlate_select_group(struct xlate_ctx *ctx, struct group_dpif *group)
3558 {
3559     const char *selection_method = group_dpif_get_selection_method(group);
3560
3561     /* Select groups may access flow keys beyond L2 in order to
3562      * select a bucket. Recirculate as appropriate to make this possible.
3563      */
3564     if (ctx->was_mpls) {
3565         ctx_trigger_freeze(ctx);
3566     }
3567
3568     if (selection_method[0] == '\0') {
3569         xlate_default_select_group(ctx, group);
3570     } else if (!strcasecmp("hash", selection_method)) {
3571         xlate_hash_fields_select_group(ctx, group);
3572     } else {
3573         /* Parsing of groups should ensure this never happens */
3574         OVS_NOT_REACHED();
3575     }
3576 }
3577
3578 static void
3579 xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group)
3580 {
3581     bool was_in_group = ctx->in_group;
3582     ctx->in_group = true;
3583
3584     switch (group_dpif_get_type(group)) {
3585     case OFPGT11_ALL:
3586     case OFPGT11_INDIRECT:
3587         xlate_all_group(ctx, group);
3588         break;
3589     case OFPGT11_SELECT:
3590         xlate_select_group(ctx, group);
3591         break;
3592     case OFPGT11_FF:
3593         xlate_ff_group(ctx, group);
3594         break;
3595     default:
3596         OVS_NOT_REACHED();
3597     }
3598     group_dpif_unref(group);
3599
3600     ctx->in_group = was_in_group;
3601 }
3602
3603 static bool
3604 xlate_group_action(struct xlate_ctx *ctx, uint32_t group_id)
3605 {
3606     if (xlate_resubmit_resource_check(ctx)) {
3607         struct group_dpif *group;
3608         bool got_group;
3609
3610         got_group = group_dpif_lookup(ctx->xbridge->ofproto, group_id, &group);
3611         if (got_group) {
3612             xlate_group_action__(ctx, group);
3613         } else {
3614             return true;
3615         }
3616     }
3617
3618     return false;
3619 }
3620
3621 static void
3622 xlate_ofpact_resubmit(struct xlate_ctx *ctx,
3623                       const struct ofpact_resubmit *resubmit)
3624 {
3625     ofp_port_t in_port;
3626     uint8_t table_id;
3627     bool may_packet_in = false;
3628     bool honor_table_miss = false;
3629
3630     if (ctx->rule && rule_dpif_is_internal(ctx->rule)) {
3631         /* Still allow missed packets to be sent to the controller
3632          * if resubmitting from an internal table. */
3633         may_packet_in = true;
3634         honor_table_miss = true;
3635     }
3636
3637     in_port = resubmit->in_port;
3638     if (in_port == OFPP_IN_PORT) {
3639         in_port = ctx->xin->flow.in_port.ofp_port;
3640     }
3641
3642     table_id = resubmit->table_id;
3643     if (table_id == 255) {
3644         table_id = ctx->table_id;
3645     }
3646
3647     xlate_table_action(ctx, in_port, table_id, may_packet_in,
3648                        honor_table_miss);
3649 }
3650
3651 static void
3652 flood_packets(struct xlate_ctx *ctx, bool all)
3653 {
3654     const struct xport *xport;
3655
3656     HMAP_FOR_EACH (xport, ofp_node, &ctx->xbridge->xports) {
3657         if (xport->ofp_port == ctx->xin->flow.in_port.ofp_port) {
3658             continue;
3659         }
3660
3661         if (all) {
3662             compose_output_action__(ctx, xport->ofp_port, NULL, false);
3663         } else if (!(xport->config & OFPUTIL_PC_NO_FLOOD)) {
3664             compose_output_action(ctx, xport->ofp_port, NULL);
3665         }
3666     }
3667
3668     ctx->nf_output_iface = NF_OUT_FLOOD;
3669 }
3670
3671 static void
3672 execute_controller_action(struct xlate_ctx *ctx, int len,
3673                           enum ofp_packet_in_reason reason,
3674                           uint16_t controller_id,
3675                           const uint8_t *userdata, size_t userdata_len)
3676 {
3677     struct dp_packet_batch batch;
3678     struct dp_packet *packet;
3679
3680     ctx->xout->slow |= SLOW_CONTROLLER;
3681     xlate_commit_actions(ctx);
3682     if (!ctx->xin->packet) {
3683         return;
3684     }
3685
3686     packet = dp_packet_clone(ctx->xin->packet);
3687     packet_batch_init_packet(&batch, packet);
3688     odp_execute_actions(NULL, &batch, false,
3689                         ctx->odp_actions->data, ctx->odp_actions->size, NULL);
3690
3691     /* A packet sent by an action in a table-miss rule is considered an
3692      * explicit table miss.  OpenFlow before 1.3 doesn't have that concept so
3693      * it will get translated back to OFPR_ACTION for those versions. */
3694     if (reason == OFPR_ACTION
3695         && ctx->rule && rule_dpif_is_table_miss(ctx->rule)) {
3696         reason = OFPR_EXPLICIT_MISS;
3697     }
3698
3699     size_t packet_len = dp_packet_size(packet);
3700
3701     struct ofproto_async_msg *am = xmalloc(sizeof *am);
3702     *am = (struct ofproto_async_msg) {
3703         .controller_id = controller_id,
3704         .oam = OAM_PACKET_IN,
3705         .pin = {
3706             .up = {
3707                 .public = {
3708                     .packet = dp_packet_steal_data(packet),
3709                     .packet_len = packet_len,
3710                     .reason = reason,
3711                     .table_id = ctx->table_id,
3712                     .cookie = ctx->rule_cookie,
3713                     .userdata = (userdata_len
3714                                  ? xmemdup(userdata, userdata_len)
3715                                  : NULL),
3716                     .userdata_len = userdata_len,
3717                 }
3718             },
3719             .max_len = len,
3720         },
3721     };
3722     flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
3723
3724     ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
3725     dp_packet_delete(packet);
3726 }
3727
3728 static void
3729 emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
3730 {
3731     struct ofproto_async_msg *am = xmalloc(sizeof *am);
3732     *am = (struct ofproto_async_msg) {
3733         .controller_id = ctx->pause->controller_id,
3734         .oam = OAM_PACKET_IN,
3735         .pin = {
3736             .up = {
3737                 .public = {
3738                     .userdata = xmemdup(ctx->pause->userdata,
3739                                         ctx->pause->userdata_len),
3740                     .userdata_len = ctx->pause->userdata_len,
3741                     .packet = xmemdup(dp_packet_data(ctx->xin->packet),
3742                                       dp_packet_size(ctx->xin->packet)),
3743                     .packet_len = dp_packet_size(ctx->xin->packet),
3744                     .reason = ctx->pause->reason,
3745                 },
3746                 .bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
3747                 .stack = xmemdup(state->stack,
3748                                  state->n_stack * sizeof *state->stack),
3749                 .n_stack = state->n_stack,
3750                 .mirrors = state->mirrors,
3751                 .conntracked = state->conntracked,
3752                 .actions = xmemdup(state->ofpacts, state->ofpacts_len),
3753                 .actions_len = state->ofpacts_len,
3754                 .action_set = xmemdup(state->action_set,
3755                                       state->action_set_len),
3756                 .action_set_len = state->action_set_len,
3757             },
3758             .max_len = UINT16_MAX,
3759         },
3760     };
3761     flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
3762     ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
3763 }
3764
3765 static void
3766 finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
3767 {
3768     ovs_assert(ctx->freezing);
3769
3770     struct frozen_state state = {
3771         .table_id = table,
3772         .ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
3773         .stack = ctx->stack.data,
3774         .n_stack = ctx->stack.size / sizeof(union mf_subvalue),
3775         .mirrors = ctx->mirrors,
3776         .conntracked = ctx->conntracked,
3777         .ofpacts = ctx->frozen_actions.data,
3778         .ofpacts_len = ctx->frozen_actions.size,
3779         .action_set = ctx->action_set.data,
3780         .action_set_len = ctx->action_set.size,
3781     };
3782     frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
3783
3784     if (ctx->pause) {
3785         if (ctx->xin->packet) {
3786             emit_continuation(ctx, &state);
3787         }
3788     } else {
3789         /* Allocate a unique recirc id for the given metadata state in the
3790          * flow.  An existing id, with a new reference to the corresponding
3791          * recirculation context, will be returned if possible.
3792          * The life-cycle of this recirc id is managed by associating it
3793          * with the udpif key ('ukey') created for each new datapath flow. */
3794         uint32_t id = recirc_alloc_id_ctx(&state);
3795         if (!id) {
3796             XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
3797             ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
3798             return;
3799         }
3800         recirc_refs_add(&ctx->xout->recircs, id);
3801
3802         nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
3803     }
3804
3805     /* Undo changes done by freezing. */
3806     ctx_cancel_freeze(ctx);
3807 }
3808
3809 /* Called only when we're freezing. */
3810 static void
3811 finish_freezing(struct xlate_ctx *ctx)
3812 {
3813     xlate_commit_actions(ctx);
3814     finish_freezing__(ctx, 0);
3815 }
3816
3817 /* Fork the pipeline here. The current packet will continue processing the
3818  * current action list. A clone of the current packet will recirculate, skip
3819  * the remainder of the current action list and asynchronously resume pipeline
3820  * processing in 'table' with the current metadata and action set. */
3821 static void
3822 compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
3823 {
3824     ctx->freezing = true;
3825     finish_freezing__(ctx, table);
3826 }
3827
3828 static void
3829 compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls)
3830 {
3831     struct flow *flow = &ctx->xin->flow;
3832     int n;
3833
3834     ovs_assert(eth_type_mpls(mpls->ethertype));
3835
3836     n = flow_count_mpls_labels(flow, ctx->wc);
3837     if (!n) {
3838         xlate_commit_actions(ctx);
3839     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3840         if (ctx->xin->packet != NULL) {
3841             XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
3842                          "MPLS push action can't be performed as it would "
3843                          "have more MPLS LSEs than the %d supported.",
3844                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3845         }
3846         ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
3847         return;
3848     }
3849
3850     flow_push_mpls(flow, n, mpls->ethertype, ctx->wc);
3851 }
3852
3853 static void
3854 compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type)
3855 {
3856     struct flow *flow = &ctx->xin->flow;
3857     int n = flow_count_mpls_labels(flow, ctx->wc);
3858
3859     if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
3860         if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
3861             ctx->was_mpls = true;
3862         }
3863     } else if (n >= FLOW_MAX_MPLS_LABELS) {
3864         if (ctx->xin->packet != NULL) {
3865             XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an "
3866                          "MPLS pop action can't be performed as it has "
3867                          "more MPLS LSEs than the %d supported.",
3868                          ctx->xbridge->name, FLOW_MAX_MPLS_LABELS);
3869         }
3870         ctx->error = XLATE_TOO_MANY_MPLS_LABELS;
3871         ofpbuf_clear(ctx->odp_actions);
3872     }
3873 }
3874
3875 static bool
3876 compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids)
3877 {
3878     struct flow *flow = &ctx->xin->flow;
3879
3880     if (!is_ip_any(flow)) {
3881         return false;
3882     }
3883
3884     ctx->wc->masks.nw_ttl = 0xff;
3885     if (flow->nw_ttl > 1) {
3886         flow->nw_ttl--;
3887         return false;
3888     } else {
3889         size_t i;
3890
3891         for (i = 0; i < ids->n_controllers; i++) {
3892             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
3893                                       ids->cnt_ids[i], NULL, 0);
3894         }
3895
3896         /* Stop processing for current table. */
3897         return true;
3898     }
3899 }
3900
3901 static void
3902 compose_set_mpls_label_action(struct xlate_ctx *ctx, ovs_be32 label)
3903 {
3904     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3905         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_LABEL_MASK);
3906         set_mpls_lse_label(&ctx->xin->flow.mpls_lse[0], label);
3907     }
3908 }
3909
3910 static void
3911 compose_set_mpls_tc_action(struct xlate_ctx *ctx, uint8_t tc)
3912 {
3913     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3914         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TC_MASK);
3915         set_mpls_lse_tc(&ctx->xin->flow.mpls_lse[0], tc);
3916     }
3917 }
3918
3919 static void
3920 compose_set_mpls_ttl_action(struct xlate_ctx *ctx, uint8_t ttl)
3921 {
3922     if (eth_type_mpls(ctx->xin->flow.dl_type)) {
3923         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3924         set_mpls_lse_ttl(&ctx->xin->flow.mpls_lse[0], ttl);
3925     }
3926 }
3927
3928 static bool
3929 compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
3930 {
3931     struct flow *flow = &ctx->xin->flow;
3932
3933     if (eth_type_mpls(flow->dl_type)) {
3934         uint8_t ttl = mpls_lse_to_ttl(flow->mpls_lse[0]);
3935
3936         ctx->wc->masks.mpls_lse[0] |= htonl(MPLS_TTL_MASK);
3937         if (ttl > 1) {
3938             ttl--;
3939             set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
3940             return false;
3941         } else {
3942             execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
3943                                       NULL, 0);
3944         }
3945     }
3946
3947     /* Stop processing for current table. */
3948     return true;
3949 }
3950
3951 static void
3952 xlate_output_action(struct xlate_ctx *ctx,
3953                     ofp_port_t port, uint16_t max_len, bool may_packet_in)
3954 {
3955     ofp_port_t prev_nf_output_iface = ctx->nf_output_iface;
3956
3957     ctx->nf_output_iface = NF_OUT_DROP;
3958
3959     switch (port) {
3960     case OFPP_IN_PORT:
3961         compose_output_action(ctx, ctx->xin->flow.in_port.ofp_port, NULL);
3962         break;
3963     case OFPP_TABLE:
3964         xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
3965                            0, may_packet_in, true);
3966         break;
3967     case OFPP_NORMAL:
3968         xlate_normal(ctx);
3969         break;
3970     case OFPP_FLOOD:
3971         flood_packets(ctx,  false);
3972         break;
3973     case OFPP_ALL:
3974         flood_packets(ctx, true);
3975         break;
3976     case OFPP_CONTROLLER:
3977         execute_controller_action(ctx, max_len,
3978                                   (ctx->in_group ? OFPR_GROUP
3979                                    : ctx->in_action_set ? OFPR_ACTION_SET
3980                                    : OFPR_ACTION),
3981                                   0, NULL, 0);
3982         break;
3983     case OFPP_NONE:
3984         break;
3985     case OFPP_LOCAL:
3986     default:
3987         if (port != ctx->xin->flow.in_port.ofp_port) {
3988             compose_output_action(ctx, port, NULL);
3989         } else {
3990             xlate_report(ctx, "skipping output to input port");
3991         }
3992         break;
3993     }
3994
3995     if (prev_nf_output_iface == NF_OUT_FLOOD) {
3996         ctx->nf_output_iface = NF_OUT_FLOOD;
3997     } else if (ctx->nf_output_iface == NF_OUT_DROP) {
3998         ctx->nf_output_iface = prev_nf_output_iface;
3999     } else if (prev_nf_output_iface != NF_OUT_DROP &&
4000                ctx->nf_output_iface != NF_OUT_FLOOD) {
4001         ctx->nf_output_iface = NF_OUT_MULTI;
4002     }
4003 }
4004
4005 static void
4006 xlate_output_reg_action(struct xlate_ctx *ctx,
4007                         const struct ofpact_output_reg *or)
4008 {
4009     uint64_t port = mf_get_subfield(&or->src, &ctx->xin->flow);
4010     if (port <= UINT16_MAX) {
4011         union mf_subvalue value;
4012
4013         memset(&value, 0xff, sizeof value);
4014         mf_write_subfield_flow(&or->src, &value, &ctx->wc->masks);
4015         xlate_output_action(ctx, u16_to_ofp(port),
4016                             or->max_len, false);
4017     }
4018 }
4019
4020 static void
4021 xlate_output_trunc_action(struct xlate_ctx *ctx,
4022                     ofp_port_t port, uint32_t max_len)
4023 {
4024     bool support_trunc = ctx->xbridge->support.trunc;
4025     struct ovs_action_trunc *trunc;
4026     char name[OFP_MAX_PORT_NAME_LEN];
4027
4028     switch (port) {
4029     case OFPP_TABLE:
4030     case OFPP_NORMAL:
4031     case OFPP_FLOOD:
4032     case OFPP_ALL:
4033     case OFPP_CONTROLLER:
4034     case OFPP_NONE:
4035         ofputil_port_to_string(port, name, sizeof name);
4036         xlate_report(ctx, "output_trunc does not support port: %s", name);
4037         break;
4038     case OFPP_LOCAL:
4039     case OFPP_IN_PORT:
4040     default:
4041         if (port != ctx->xin->flow.in_port.ofp_port) {
4042             const struct xport *xport = get_ofp_port(ctx->xbridge, port);
4043
4044             if (xport == NULL || xport->odp_port == ODPP_NONE) {
4045                 /* Since truncate happens at its following output action, if
4046                  * the output port is a patch port, the behavior is somehow
4047                  * unpredicable. For simpilicity, disallow this case. */
4048                 ofputil_port_to_string(port, name, sizeof name);
4049                 XLATE_REPORT_ERROR(ctx, "bridge %s: "
4050                          "output_trunc does not support port: %s",
4051                          ctx->xbridge->name, name);
4052                 break;
4053             }
4054
4055             trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
4056                                 OVS_ACTION_ATTR_TRUNC,
4057                                 sizeof *trunc);
4058             trunc->max_len = max_len;
4059             xlate_output_action(ctx, port, max_len, false);
4060             if (!support_trunc) {
4061                 ctx->xout->slow |= SLOW_ACTION;
4062             }
4063         } else {
4064             xlate_report(ctx, "skipping output to input port");
4065         }
4066         break;
4067     }
4068 }
4069
4070 static void
4071 xlate_enqueue_action(struct xlate_ctx *ctx,
4072                      const struct ofpact_enqueue *enqueue)
4073 {
4074     ofp_port_t ofp_port = enqueue->port;
4075     uint32_t queue_id = enqueue->queue;
4076     uint32_t flow_priority, priority;
4077     int error;
4078
4079     /* Translate queue to priority. */
4080     error = dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &priority);
4081     if (error) {
4082         /* Fall back to ordinary output action. */
4083         xlate_output_action(ctx, enqueue->port, 0, false);
4084         return;
4085     }
4086
4087     /* Check output port. */
4088     if (ofp_port == OFPP_IN_PORT) {
4089         ofp_port = ctx->xin->flow.in_port.ofp_port;
4090     } else if (ofp_port == ctx->xin->flow.in_port.ofp_port) {
4091         return;
4092     }
4093
4094     /* Add datapath actions. */
4095     flow_priority = ctx->xin->flow.skb_priority;
4096     ctx->xin->flow.skb_priority = priority;
4097     compose_output_action(ctx, ofp_port, NULL);
4098     ctx->xin->flow.skb_priority = flow_priority;
4099
4100     /* Update NetFlow output port. */
4101     if (ctx->nf_output_iface == NF_OUT_DROP) {
4102         ctx->nf_output_iface = ofp_port;
4103     } else if (ctx->nf_output_iface != NF_OUT_FLOOD) {
4104         ctx->nf_output_iface = NF_OUT_MULTI;
4105     }
4106 }
4107
4108 static void
4109 xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
4110 {
4111     uint32_t skb_priority;
4112
4113     if (!dpif_queue_to_priority(ctx->xbridge->dpif, queue_id, &skb_priority)) {
4114         ctx->xin->flow.skb_priority = skb_priority;
4115     } else {
4116         /* Couldn't translate queue to a priority.  Nothing to do.  A warning
4117          * has already been logged. */
4118     }
4119 }
4120
4121 static bool
4122 slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
4123 {
4124     const struct xbridge *xbridge = xbridge_;
4125     struct xport *port;
4126
4127     switch (ofp_port) {
4128     case OFPP_IN_PORT:
4129     case OFPP_TABLE:
4130     case OFPP_NORMAL:
4131     case OFPP_FLOOD:
4132     case OFPP_ALL:
4133     case OFPP_NONE:
4134         return true;
4135     case OFPP_CONTROLLER: /* Not supported by the bundle action. */
4136         return false;
4137     default:
4138         port = get_ofp_port(xbridge, ofp_port);
4139         return port ? port->may_enable : false;
4140     }
4141 }
4142
4143 static void
4144 xlate_bundle_action(struct xlate_ctx *ctx,
4145                     const struct ofpact_bundle *bundle)
4146 {
4147     ofp_port_t port;
4148
4149     port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
4150                           CONST_CAST(struct xbridge *, ctx->xbridge));
4151     if (bundle->dst.field) {
4152         nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
4153     } else {
4154         xlate_output_action(ctx, port, 0, false);
4155     }
4156 }
4157
4158 static void
4159 xlate_learn_action__(struct xlate_ctx *ctx, const struct ofpact_learn *learn,
4160                      struct ofputil_flow_mod *fm, struct ofpbuf *ofpacts)
4161 {
4162     learn_execute(learn, &ctx->xin->flow, fm, ofpacts);
4163     if (ctx->xin->may_learn) {
4164         ofproto_dpif_flow_mod(ctx->xbridge->ofproto, fm);
4165     }
4166 }
4167
4168 static void
4169 xlate_learn_action(struct xlate_ctx *ctx, const struct ofpact_learn *learn)
4170 {
4171     learn_mask(learn, ctx->wc);
4172
4173     if (ctx->xin->xcache) {
4174         struct xc_entry *entry;
4175
4176         entry = xlate_cache_add_entry(ctx->xin->xcache, XC_LEARN);
4177         entry->u.learn.ofproto = ctx->xbridge->ofproto;
4178         entry->u.learn.fm = xmalloc(sizeof *entry->u.learn.fm);
4179         entry->u.learn.ofpacts = ofpbuf_new(64);
4180         xlate_learn_action__(ctx, learn, entry->u.learn.fm,
4181                              entry->u.learn.ofpacts);
4182     } else if (ctx->xin->may_learn) {
4183         uint64_t ofpacts_stub[1024 / 8];
4184         struct ofputil_flow_mod fm;
4185         struct ofpbuf ofpacts;
4186
4187         ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
4188         xlate_learn_action__(ctx, learn, &fm, &ofpacts);
4189         ofpbuf_uninit(&ofpacts);
4190     }
4191 }
4192
4193 static void
4194 xlate_fin_timeout__(struct rule_dpif *rule, uint16_t tcp_flags,
4195                     uint16_t idle_timeout, uint16_t hard_timeout)
4196 {
4197     if (tcp_flags & (TCP_FIN | TCP_RST)) {
4198         rule_dpif_reduce_timeouts(rule, idle_timeout, hard_timeout);
4199     }
4200 }
4201
4202 static void
4203 xlate_fin_timeout(struct xlate_ctx *ctx,
4204                   const struct ofpact_fin_timeout *oft)
4205 {
4206     if (ctx->rule) {
4207         xlate_fin_timeout__(ctx->rule, ctx->xin->tcp_flags,
4208                             oft->fin_idle_timeout, oft->fin_hard_timeout);
4209         if (ctx->xin->xcache) {
4210             struct xc_entry *entry;
4211
4212             entry = xlate_cache_add_entry(ctx->xin->xcache, XC_FIN_TIMEOUT);
4213             /* XC_RULE already holds a reference on the rule, none is taken
4214              * here. */
4215             entry->u.fin.rule = ctx->rule;
4216             entry->u.fin.idle = oft->fin_idle_timeout;
4217             entry->u.fin.hard = oft->fin_hard_timeout;
4218         }
4219     }
4220 }
4221
4222 static void
4223 xlate_sample_action(struct xlate_ctx *ctx,
4224                     const struct ofpact_sample *os)
4225 {
4226     odp_port_t output_odp_port = ODPP_NONE;
4227     odp_port_t tunnel_out_port = ODPP_NONE;
4228     struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
4229     bool emit_set_tunnel = false;
4230
4231     if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
4232         return;
4233     }
4234
4235     /* Scale the probability from 16-bit to 32-bit while representing
4236      * the same percentage. */
4237     uint32_t probability = (os->probability << 16) | os->probability;
4238
4239     if (!ctx->xbridge->support.variable_length_userdata) {
4240         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4241
4242         VLOG_ERR_RL(&rl, "ignoring NXAST_SAMPLE action because datapath "
4243                     "lacks support (needs Linux 3.10+ or kernel module from "
4244                     "OVS 1.11+)");
4245         return;
4246     }
4247
4248     /* If ofp_port in flow sample action is equel to ofp_port,
4249      * this sample action is a input port action. */
4250     if (os->sampling_port != OFPP_NONE &&
4251         os->sampling_port != ctx->xin->flow.in_port.ofp_port) {
4252         output_odp_port = ofp_port_to_odp_port(ctx->xbridge,
4253                                                os->sampling_port);
4254         if (output_odp_port == ODPP_NONE) {
4255             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4256             VLOG_WARN_RL(&rl, "can't use unknown port %d in flow sample "
4257                          "action", os->sampling_port);
4258             return;
4259         }
4260
4261         if (dpif_ipfix_get_flow_exporter_tunnel_sampling(ipfix,
4262                                                          os->collector_set_id)
4263             && dpif_ipfix_get_tunnel_port(ipfix, output_odp_port)) {
4264             tunnel_out_port = output_odp_port;
4265             emit_set_tunnel = true;
4266         }
4267     }
4268
4269      xlate_commit_actions(ctx);
4270     /* If 'emit_set_tunnel', sample(sampling_port=1) would translate
4271      * into datapath sample action set(tunnel(...)), sample(...) and
4272      * it is used for sampling egress tunnel information. */
4273     if (emit_set_tunnel) {
4274         const struct xport *xport = get_ofp_port(ctx->xbridge,
4275                                                  os->sampling_port);
4276
4277         if (xport && xport->is_tunnel) {
4278             struct flow *flow = &ctx->xin->flow;
4279             tnl_port_send(xport->ofport, flow, ctx->wc);
4280             if (!ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
4281                 struct flow_tnl flow_tnl = flow->tunnel;
4282
4283                 commit_odp_tunnel_action(flow, &ctx->base_flow,
4284                                          ctx->odp_actions);
4285                 flow->tunnel = flow_tnl;
4286             }
4287         } else {
4288             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
4289             VLOG_WARN_RL(&rl, "sampling_port:%d should be a tunnel port.",
4290                          os->sampling_port);
4291         }
4292     }
4293
4294     union user_action_cookie cookie = {
4295         .flow_sample = {
4296             .type = USER_ACTION_COOKIE_FLOW_SAMPLE,
4297             .probability = os->probability,
4298             .collector_set_id = os->collector_set_id,
4299             .obs_domain_id = os->obs_domain_id,
4300             .obs_point_id = os->obs_point_id,
4301             .output_odp_port = output_odp_port,
4302         }
4303     };
4304     compose_sample_action(ctx, probability, &cookie, sizeof cookie.flow_sample,
4305                           tunnel_out_port, false);
4306 }
4307
4308 static bool
4309 may_receive(const struct xport *xport, struct xlate_ctx *ctx)
4310 {
4311     if (xport->config & (is_stp(&ctx->xin->flow)
4312                          ? OFPUTIL_PC_NO_RECV_STP
4313                          : OFPUTIL_PC_NO_RECV)) {
4314         return false;
4315     }
4316
4317     /* Only drop packets here if both forwarding and learning are
4318      * disabled.  If just learning is enabled, we need to have
4319      * OFPP_NORMAL and the learning action have a look at the packet
4320      * before we can drop it. */
4321     if ((!xport_stp_forward_state(xport) && !xport_stp_learn_state(xport)) ||
4322         (!xport_rstp_forward_state(xport) && !xport_rstp_learn_state(xport))) {
4323         return false;
4324     }
4325
4326     return true;
4327 }
4328
4329 static void
4330 xlate_write_actions__(struct xlate_ctx *ctx,
4331                       const struct ofpact *ofpacts, size_t ofpacts_len)
4332 {
4333     /* Maintain actset_output depending on the contents of the action set:
4334      *
4335      *   - OFPP_UNSET, if there is no "output" action.
4336      *
4337      *   - The output port, if there is an "output" action and no "group"
4338      *     action.
4339      *
4340      *   - OFPP_UNSET, if there is a "group" action.
4341      */
4342     if (!ctx->action_set_has_group) {
4343         const struct ofpact *a;
4344         OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4345             if (a->type == OFPACT_OUTPUT) {
4346                 ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port;
4347             } else if (a->type == OFPACT_GROUP) {
4348                 ctx->xin->flow.actset_output = OFPP_UNSET;
4349                 ctx->action_set_has_group = true;
4350                 break;
4351             }
4352         }
4353     }
4354
4355     ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len);
4356 }
4357
4358 static void
4359 xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a)
4360 {
4361     xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a));
4362 }
4363
4364 static void
4365 xlate_action_set(struct xlate_ctx *ctx)
4366 {
4367     uint64_t action_list_stub[1024 / 64];
4368     struct ofpbuf action_list;
4369
4370     ctx->in_action_set = true;
4371     ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
4372     ofpacts_execute_action_set(&action_list, &ctx->action_set);
4373     /* Clear the action set, as it is not needed any more. */
4374     ofpbuf_clear(&ctx->action_set);
4375     do_xlate_actions(action_list.data, action_list.size, ctx);
4376     ctx->in_action_set = false;
4377     ofpbuf_uninit(&action_list);
4378 }
4379
4380 static void
4381 freeze_put_unroll_xlate(struct xlate_ctx *ctx)
4382 {
4383     struct ofpact_unroll_xlate *unroll = ctx->frozen_actions.header;
4384
4385     /* Restore the table_id and rule cookie for a potential PACKET
4386      * IN if needed. */
4387     if (!unroll ||
4388         (ctx->table_id != unroll->rule_table_id
4389          || ctx->rule_cookie != unroll->rule_cookie)) {
4390         unroll = ofpact_put_UNROLL_XLATE(&ctx->frozen_actions);
4391         unroll->rule_table_id = ctx->table_id;
4392         unroll->rule_cookie = ctx->rule_cookie;
4393         ctx->frozen_actions.header = unroll;
4394     }
4395 }
4396
4397
4398 /* Copy actions 'a' through 'end' to ctx->frozen_actions, which will be
4399  * executed after thawing.  Inserts an UNROLL_XLATE action, if none is already
4400  * present, before any action that may depend on the current table ID or flow
4401  * cookie. */
4402 static void
4403 freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end,
4404                       struct xlate_ctx *ctx)
4405 {
4406     for (; a < end; a = ofpact_next(a)) {
4407         switch (a->type) {
4408         case OFPACT_OUTPUT_REG:
4409         case OFPACT_OUTPUT_TRUNC:
4410         case OFPACT_GROUP:
4411         case OFPACT_OUTPUT:
4412         case OFPACT_CONTROLLER:
4413         case OFPACT_DEC_MPLS_TTL:
4414         case OFPACT_DEC_TTL:
4415             /* These actions may generate asynchronous messages, which include
4416              * table ID and flow cookie information. */
4417             freeze_put_unroll_xlate(ctx);
4418             break;
4419
4420         case OFPACT_RESUBMIT:
4421             if (ofpact_get_RESUBMIT(a)->table_id == 0xff) {
4422                 /* This resubmit action is relative to the current table, so we
4423                  * need to track what table that is.*/
4424                 freeze_put_unroll_xlate(ctx);
4425             }
4426             break;
4427
4428         case OFPACT_SET_TUNNEL:
4429         case OFPACT_REG_MOVE:
4430         case OFPACT_SET_FIELD:
4431         case OFPACT_STACK_PUSH:
4432         case OFPACT_STACK_POP:
4433         case OFPACT_LEARN:
4434         case OFPACT_WRITE_METADATA:
4435         case OFPACT_GOTO_TABLE:
4436         case OFPACT_ENQUEUE:
4437         case OFPACT_SET_VLAN_VID:
4438         case OFPACT_SET_VLAN_PCP:
4439         case OFPACT_STRIP_VLAN:
4440         case OFPACT_PUSH_VLAN:
4441         case OFPACT_SET_ETH_SRC:
4442         case OFPACT_SET_ETH_DST:
4443         case OFPACT_SET_IPV4_SRC:
4444         case OFPACT_SET_IPV4_DST:
4445         case OFPACT_SET_IP_DSCP:
4446         case OFPACT_SET_IP_ECN:
4447         case OFPACT_SET_IP_TTL:
4448         case OFPACT_SET_L4_SRC_PORT:
4449         case OFPACT_SET_L4_DST_PORT:
4450         case OFPACT_SET_QUEUE:
4451         case OFPACT_POP_QUEUE:
4452         case OFPACT_PUSH_MPLS:
4453         case OFPACT_POP_MPLS:
4454         case OFPACT_SET_MPLS_LABEL:
4455         case OFPACT_SET_MPLS_TC:
4456         case OFPACT_SET_MPLS_TTL:
4457         case OFPACT_MULTIPATH:
4458         case OFPACT_BUNDLE:
4459         case OFPACT_EXIT:
4460         case OFPACT_UNROLL_XLATE:
4461         case OFPACT_FIN_TIMEOUT:
4462         case OFPACT_CLEAR_ACTIONS:
4463         case OFPACT_WRITE_ACTIONS:
4464         case OFPACT_METER:
4465         case OFPACT_SAMPLE:
4466         case OFPACT_DEBUG_RECIRC:
4467         case OFPACT_CT:
4468         case OFPACT_NAT:
4469             /* These may not generate PACKET INs. */
4470             break;
4471
4472         case OFPACT_NOTE:
4473         case OFPACT_CONJUNCTION:
4474             /* These need not be copied for restoration. */
4475             continue;
4476         }
4477         /* Copy the action over. */
4478         ofpbuf_put(&ctx->frozen_actions, a, OFPACT_ALIGN(a->len));
4479     }
4480 }
4481
4482 static void
4483 put_ct_mark(const struct flow *flow, struct ofpbuf *odp_actions,
4484             struct flow_wildcards *wc)
4485 {
4486     if (wc->masks.ct_mark) {
4487         struct {
4488             uint32_t key;
4489             uint32_t mask;
4490         } *odp_ct_mark;
4491
4492         odp_ct_mark = nl_msg_put_unspec_uninit(odp_actions, OVS_CT_ATTR_MARK,
4493                                                sizeof(*odp_ct_mark));
4494         odp_ct_mark->key = flow->ct_mark & wc->masks.ct_mark;
4495         odp_ct_mark->mask = wc->masks.ct_mark;
4496     }
4497 }
4498
4499 static void
4500 put_ct_label(const struct flow *flow, struct ofpbuf *odp_actions,
4501              struct flow_wildcards *wc)
4502 {
4503     if (!ovs_u128_is_zero(wc->masks.ct_label)) {
4504         struct {
4505             ovs_u128 key;
4506             ovs_u128 mask;
4507         } *odp_ct_label;
4508
4509         odp_ct_label = nl_msg_put_unspec_uninit(odp_actions,
4510                                                 OVS_CT_ATTR_LABELS,
4511                                                 sizeof(*odp_ct_label));
4512         odp_ct_label->key = ovs_u128_and(flow->ct_label, wc->masks.ct_label);
4513         odp_ct_label->mask = wc->masks.ct_label;
4514     }
4515 }
4516
4517 static void
4518 put_ct_helper(struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc)
4519 {
4520     if (ofc->alg) {
4521         if (ofc->alg == IPPORT_FTP) {
4522             nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "ftp");
4523         } else {
4524             VLOG_WARN("Cannot serialize ct_helper %d\n", ofc->alg);
4525         }
4526     }
4527 }
4528
4529 static void
4530 put_ct_nat(struct xlate_ctx *ctx)
4531 {
4532     struct ofpact_nat *ofn = ctx->ct_nat_action;
4533     size_t nat_offset;
4534
4535     if (!ofn) {
4536         return;
4537     }
4538
4539     nat_offset = nl_msg_start_nested(ctx->odp_actions, OVS_CT_ATTR_NAT);
4540     if (ofn->flags & NX_NAT_F_SRC || ofn->flags & NX_NAT_F_DST) {
4541         nl_msg_put_flag(ctx->odp_actions, ofn->flags & NX_NAT_F_SRC
4542                         ? OVS_NAT_ATTR_SRC : OVS_NAT_ATTR_DST);
4543         if (ofn->flags & NX_NAT_F_PERSISTENT) {
4544             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PERSISTENT);
4545         }
4546         if (ofn->flags & NX_NAT_F_PROTO_HASH) {
4547             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_HASH);
4548         } else if (ofn->flags & NX_NAT_F_PROTO_RANDOM) {
4549             nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_RANDOM);
4550         }
4551         if (ofn->range_af == AF_INET) {
4552             nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
4553                            ofn->range.addr.ipv4.min);
4554             if (ofn->range.addr.ipv4.max &&
4555                 (ntohl(ofn->range.addr.ipv4.max)
4556                  > ntohl(ofn->range.addr.ipv4.min))) {
4557                 nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
4558                                 ofn->range.addr.ipv4.max);
4559             }
4560         } else if (ofn->range_af == AF_INET6) {
4561             nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN,
4562                               &ofn->range.addr.ipv6.min,
4563                               sizeof ofn->range.addr.ipv6.min);
4564             if (!ipv6_mask_is_any(&ofn->range.addr.ipv6.max) &&
4565                 memcmp(&ofn->range.addr.ipv6.max, &ofn->range.addr.ipv6.min,
4566                        sizeof ofn->range.addr.ipv6.max) > 0) {
4567                 nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX,
4568                                   &ofn->range.addr.ipv6.max,
4569                                   sizeof ofn->range.addr.ipv6.max);
4570             }
4571         }
4572         if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
4573             nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MIN,
4574                            ofn->range.proto.min);
4575             if (ofn->range.proto.max &&
4576                 ofn->range.proto.max > ofn->range.proto.min) {
4577                 nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MAX,
4578                                ofn->range.proto.max);
4579             }
4580         }
4581     }
4582     nl_msg_end_nested(ctx->odp_actions, nat_offset);
4583 }
4584
4585 static void
4586 compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
4587 {
4588     ovs_u128 old_ct_label = ctx->base_flow.ct_label;
4589     ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
4590     uint32_t old_ct_mark = ctx->base_flow.ct_mark;
4591     uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
4592     size_t ct_offset;
4593     uint16_t zone;
4594
4595     /* Ensure that any prior actions are applied before composing the new
4596      * conntrack action. */
4597     xlate_commit_actions(ctx);
4598
4599     /* Process nested actions first, to populate the key. */
4600     ctx->ct_nat_action = NULL;
4601     ctx->wc->masks.ct_mark = 0;
4602     ctx->wc->masks.ct_label.u64.hi = ctx->wc->masks.ct_label.u64.lo = 0;
4603     do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx);
4604
4605     if (ofc->zone_src.field) {
4606         zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow);
4607     } else {
4608         zone = ofc->zone_imm;
4609     }
4610
4611     ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT);
4612     if (ofc->flags & NX_CT_F_COMMIT) {
4613         nl_msg_put_flag(ctx->odp_actions, OVS_CT_ATTR_COMMIT);
4614     }
4615     nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
4616     put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
4617     put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
4618     put_ct_helper(ctx->odp_actions, ofc);
4619     put_ct_nat(ctx);
4620     ctx->ct_nat_action = NULL;
4621     nl_msg_end_nested(ctx->odp_actions, ct_offset);
4622
4623     /* Restore the original ct fields in the key. These should only be exposed
4624      * after recirculation to another table. */
4625     ctx->base_flow.ct_mark = old_ct_mark;
4626     ctx->wc->masks.ct_mark = old_ct_mark_mask;
4627     ctx->base_flow.ct_label = old_ct_label;
4628     ctx->wc->masks.ct_label = old_ct_label_mask;
4629
4630     if (ofc->recirc_table == NX_CT_RECIRC_NONE) {
4631         /* If we do not recirculate as part of this action, hide the results of
4632          * connection tracking from subsequent recirculations. */
4633         ctx->conntracked = false;
4634     } else {
4635         /* Use ct_* fields from datapath during recirculation upcall. */
4636         ctx->conntracked = true;
4637         compose_recirculate_and_fork(ctx, ofc->recirc_table);
4638     }
4639 }
4640
4641 static void
4642 recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
4643 {
4644     /* No need to recirculate if already exiting. */
4645     if (ctx->exit) {
4646         return;
4647     }
4648
4649     /* Do not consider recirculating unless the packet was previously MPLS. */
4650     if (!ctx->was_mpls) {
4651         return;
4652     }
4653
4654     /* Special case these actions, only recirculating if necessary.
4655      * This avoids the overhead of recirculation in common use-cases.
4656      */
4657     switch (a->type) {
4658
4659     /* Output actions  do not require recirculation. */
4660     case OFPACT_OUTPUT:
4661     case OFPACT_OUTPUT_TRUNC:
4662     case OFPACT_ENQUEUE:
4663     case OFPACT_OUTPUT_REG:
4664     /* Set actions that don't touch L3+ fields do not require recirculation. */
4665     case OFPACT_SET_VLAN_VID:
4666     case OFPACT_SET_VLAN_PCP:
4667     case OFPACT_SET_ETH_SRC:
4668     case OFPACT_SET_ETH_DST:
4669     case OFPACT_SET_TUNNEL:
4670     case OFPACT_SET_QUEUE:
4671     /* If actions of a group require recirculation that can be detected
4672      * when translating them. */
4673     case OFPACT_GROUP:
4674         return;
4675
4676     /* Set field that don't touch L3+ fields don't require recirculation. */
4677     case OFPACT_SET_FIELD:
4678         if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
4679             break;
4680         }
4681         return;
4682
4683     /* For simplicity, recirculate in all other cases. */
4684     case OFPACT_CONTROLLER:
4685     case OFPACT_BUNDLE:
4686     case OFPACT_STRIP_VLAN:
4687     case OFPACT_PUSH_VLAN:
4688     case OFPACT_SET_IPV4_SRC:
4689     case OFPACT_SET_IPV4_DST:
4690     case OFPACT_SET_IP_DSCP:
4691     case OFPACT_SET_IP_ECN:
4692     case OFPACT_SET_IP_TTL:
4693     case OFPACT_SET_L4_SRC_PORT:
4694     case OFPACT_SET_L4_DST_PORT:
4695     case OFPACT_REG_MOVE:
4696     case OFPACT_STACK_PUSH:
4697     case OFPACT_STACK_POP:
4698     case OFPACT_DEC_TTL:
4699     case OFPACT_SET_MPLS_LABEL:
4700     case OFPACT_SET_MPLS_TC:
4701     case OFPACT_SET_MPLS_TTL:
4702     case OFPACT_DEC_MPLS_TTL:
4703     case OFPACT_PUSH_MPLS:
4704     case OFPACT_POP_MPLS:
4705     case OFPACT_POP_QUEUE:
4706     case OFPACT_FIN_TIMEOUT:
4707     case OFPACT_RESUBMIT:
4708     case OFPACT_LEARN:
4709     case OFPACT_CONJUNCTION:
4710     case OFPACT_MULTIPATH:
4711     case OFPACT_NOTE:
4712     case OFPACT_EXIT:
4713     case OFPACT_SAMPLE:
4714     case OFPACT_UNROLL_XLATE:
4715     case OFPACT_CT:
4716     case OFPACT_NAT:
4717     case OFPACT_DEBUG_RECIRC:
4718     case OFPACT_METER:
4719     case OFPACT_CLEAR_ACTIONS:
4720     case OFPACT_WRITE_ACTIONS:
4721     case OFPACT_WRITE_METADATA:
4722     case OFPACT_GOTO_TABLE:
4723     default:
4724         break;
4725     }
4726
4727     /* Recirculate */
4728     ctx_trigger_freeze(ctx);
4729 }
4730
4731 static void
4732 do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
4733                  struct xlate_ctx *ctx)
4734 {
4735     struct flow_wildcards *wc = ctx->wc;
4736     struct flow *flow = &ctx->xin->flow;
4737     const struct ofpact *a;
4738
4739     if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
4740         tnl_neigh_snoop(flow, wc, ctx->xbridge->name);
4741     }
4742     /* dl_type already in the mask, not set below. */
4743
4744     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
4745         struct ofpact_controller *controller;
4746         const struct ofpact_metadata *metadata;
4747         const struct ofpact_set_field *set_field;
4748         const struct mf_field *mf;
4749
4750         if (ctx->error) {
4751             break;
4752         }
4753
4754         recirc_for_mpls(a, ctx);
4755
4756         if (ctx->exit) {
4757             /* Check if need to store the remaining actions for later
4758              * execution. */
4759             if (ctx->freezing) {
4760                 freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len),
4761                                       ctx);
4762             }
4763             break;
4764         }
4765
4766         switch (a->type) {
4767         case OFPACT_OUTPUT:
4768             xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
4769                                 ofpact_get_OUTPUT(a)->max_len, true);
4770             break;
4771
4772         case OFPACT_GROUP:
4773             if (xlate_group_action(ctx, ofpact_get_GROUP(a)->group_id)) {
4774                 /* Group could not be found. */
4775                 return;
4776             }
4777             break;
4778
4779         case OFPACT_CONTROLLER:
4780             controller = ofpact_get_CONTROLLER(a);
4781             if (controller->pause) {
4782                 ctx->pause = controller;
4783                 ctx->xout->slow |= SLOW_CONTROLLER;
4784                 ctx_trigger_freeze(ctx);
4785                 a = ofpact_next(a);
4786             } else {
4787                 execute_controller_action(ctx, controller->max_len,
4788                                           controller->reason,
4789                                           controller->controller_id,
4790                                           controller->userdata,
4791                                           controller->userdata_len);
4792             }
4793             break;
4794
4795         case OFPACT_ENQUEUE:
4796             memset(&wc->masks.skb_priority, 0xff,
4797                    sizeof wc->masks.skb_priority);
4798             xlate_enqueue_action(ctx, ofpact_get_ENQUEUE(a));
4799             break;
4800
4801         case OFPACT_SET_VLAN_VID:
4802             wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
4803             if (flow->vlan_tci & htons(VLAN_CFI) ||
4804                 ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
4805                 flow->vlan_tci &= ~htons(VLAN_VID_MASK);
4806                 flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
4807                                    | htons(VLAN_CFI));
4808             }
4809             break;
4810
4811         case OFPACT_SET_VLAN_PCP:
4812             wc->masks.vlan_tci |= htons(VLAN_PCP_MASK | VLAN_CFI);
4813             if (flow->vlan_tci & htons(VLAN_CFI) ||
4814                 ofpact_get_SET_VLAN_PCP(a)->push_vlan_if_needed) {
4815                 flow->vlan_tci &= ~htons(VLAN_PCP_MASK);
4816                 flow->vlan_tci |= htons((ofpact_get_SET_VLAN_PCP(a)->vlan_pcp
4817                                          << VLAN_PCP_SHIFT) | VLAN_CFI);
4818             }
4819             break;
4820
4821         case OFPACT_STRIP_VLAN:
4822             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4823             flow->vlan_tci = htons(0);
4824             break;
4825
4826         case OFPACT_PUSH_VLAN:
4827             /* XXX 802.1AD(QinQ) */
4828             memset(&wc->masks.vlan_tci, 0xff, sizeof wc->masks.vlan_tci);
4829             flow->vlan_tci = htons(VLAN_CFI);
4830             break;
4831
4832         case OFPACT_SET_ETH_SRC:
4833             WC_MASK_FIELD(wc, dl_src);
4834             flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
4835             break;
4836
4837         case OFPACT_SET_ETH_DST:
4838             WC_MASK_FIELD(wc, dl_dst);
4839             flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
4840             break;
4841
4842         case OFPACT_SET_IPV4_SRC:
4843             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4844                 memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
4845                 flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
4846             }
4847             break;
4848
4849         case OFPACT_SET_IPV4_DST:
4850             if (flow->dl_type == htons(ETH_TYPE_IP)) {
4851                 memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
4852                 flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
4853             }
4854             break;
4855
4856         case OFPACT_SET_IP_DSCP:
4857             if (is_ip_any(flow)) {
4858                 wc->masks.nw_tos |= IP_DSCP_MASK;
4859                 flow->nw_tos &= ~IP_DSCP_MASK;
4860                 flow->nw_tos |= ofpact_get_SET_IP_DSCP(a)->dscp;
4861             }
4862             break;
4863
4864         case OFPACT_SET_IP_ECN:
4865             if (is_ip_any(flow)) {
4866                 wc->masks.nw_tos |= IP_ECN_MASK;
4867                 flow->nw_tos &= ~IP_ECN_MASK;
4868                 flow->nw_tos |= ofpact_get_SET_IP_ECN(a)->ecn;
4869             }
4870             break;
4871
4872         case OFPACT_SET_IP_TTL:
4873             if (is_ip_any(flow)) {
4874                 wc->masks.nw_ttl = 0xff;
4875                 flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
4876             }
4877             break;
4878
4879         case OFPACT_SET_L4_SRC_PORT:
4880             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4881                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4882                 memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
4883                 flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
4884             }
4885             break;
4886
4887         case OFPACT_SET_L4_DST_PORT:
4888             if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
4889                 memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
4890                 memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
4891                 flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
4892             }
4893             break;
4894
4895         case OFPACT_RESUBMIT:
4896             /* Freezing complicates resubmit.  Some action in the flow
4897              * entry found by resubmit might trigger freezing.  If that
4898              * happens, then we do not want to execute the resubmit again after
4899              * during thawing, so we want to skip back to the head of the loop
4900              * to avoid that, only adding any actions that follow the resubmit
4901              * to the frozen actions.
4902              */
4903             xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
4904             continue;
4905
4906         case OFPACT_SET_TUNNEL:
4907             flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
4908             break;
4909
4910         case OFPACT_SET_QUEUE:
4911             memset(&wc->masks.skb_priority, 0xff,
4912                    sizeof wc->masks.skb_priority);
4913             xlate_set_queue_action(ctx, ofpact_get_SET_QUEUE(a)->queue_id);
4914             break;
4915
4916         case OFPACT_POP_QUEUE:
4917             memset(&wc->masks.skb_priority, 0xff,
4918                    sizeof wc->masks.skb_priority);
4919             flow->skb_priority = ctx->orig_skb_priority;
4920             break;
4921
4922         case OFPACT_REG_MOVE:
4923             nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
4924             break;
4925
4926         case OFPACT_SET_FIELD:
4927             set_field = ofpact_get_SET_FIELD(a);
4928             mf = set_field->field;
4929
4930             /* Set field action only ever overwrites packet's outermost
4931              * applicable header fields.  Do nothing if no header exists. */
4932             if (mf->id == MFF_VLAN_VID) {
4933                 wc->masks.vlan_tci |= htons(VLAN_CFI);
4934                 if (!(flow->vlan_tci & htons(VLAN_CFI))) {
4935                     break;
4936                 }
4937             } else if ((mf->id == MFF_MPLS_LABEL || mf->id == MFF_MPLS_TC)
4938                        /* 'dl_type' is already unwildcarded. */
4939                        && !eth_type_mpls(flow->dl_type)) {
4940                 break;
4941             }
4942             /* A flow may wildcard nw_frag.  Do nothing if setting a transport
4943              * header field on a packet that does not have them. */
4944             mf_mask_field_and_prereqs__(mf, &set_field->mask, wc);
4945             if (mf_are_prereqs_ok(mf, flow)) {
4946                 mf_set_flow_value_masked(mf, &set_field->value,
4947                                          &set_field->mask, flow);
4948             }
4949             break;
4950
4951         case OFPACT_STACK_PUSH:
4952             nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
4953                                    &ctx->stack);
4954             break;
4955
4956         case OFPACT_STACK_POP:
4957             nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
4958                                   &ctx->stack);
4959             break;
4960
4961         case OFPACT_PUSH_MPLS:
4962             compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
4963             break;
4964
4965         case OFPACT_POP_MPLS:
4966             compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
4967             break;
4968
4969         case OFPACT_SET_MPLS_LABEL:
4970             compose_set_mpls_label_action(
4971                 ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
4972             break;
4973
4974         case OFPACT_SET_MPLS_TC:
4975             compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
4976             break;
4977
4978         case OFPACT_SET_MPLS_TTL:
4979             compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
4980             break;
4981
4982         case OFPACT_DEC_MPLS_TTL:
4983             if (compose_dec_mpls_ttl_action(ctx)) {
4984                 return;
4985             }
4986             break;
4987
4988         case OFPACT_DEC_TTL:
4989             wc->masks.nw_ttl = 0xff;
4990             if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
4991                 return;
4992             }
4993             break;
4994
4995         case OFPACT_NOTE:
4996             /* Nothing to do. */
4997             break;
4998
4999         case OFPACT_MULTIPATH:
5000             multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
5001             break;
5002
5003         case OFPACT_BUNDLE:
5004             xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
5005             break;
5006
5007         case OFPACT_OUTPUT_REG:
5008             xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
5009             break;
5010
5011         case OFPACT_OUTPUT_TRUNC:
5012             xlate_output_trunc_action(ctx, ofpact_get_OUTPUT_TRUNC(a)->port,
5013                                 ofpact_get_OUTPUT_TRUNC(a)->max_len);
5014             break;
5015
5016         case OFPACT_LEARN:
5017             xlate_learn_action(ctx, ofpact_get_LEARN(a));
5018             break;
5019
5020         case OFPACT_CONJUNCTION: {
5021             /* A flow with a "conjunction" action represents part of a special
5022              * kind of "set membership match".  Such a flow should not actually
5023              * get executed, but it could via, say, a "packet-out", even though
5024              * that wouldn't be useful.  Log it to help debugging. */
5025             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5026             VLOG_INFO_RL(&rl, "executing no-op conjunction action");
5027             break;
5028         }
5029
5030         case OFPACT_EXIT:
5031             ctx->exit = true;
5032             break;
5033
5034         case OFPACT_UNROLL_XLATE: {
5035             struct ofpact_unroll_xlate *unroll = ofpact_get_UNROLL_XLATE(a);
5036
5037             /* Restore translation context data that was stored earlier. */
5038             ctx->table_id = unroll->rule_table_id;
5039             ctx->rule_cookie = unroll->rule_cookie;
5040             break;
5041         }
5042         case OFPACT_FIN_TIMEOUT:
5043             memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
5044             xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
5045             break;
5046
5047         case OFPACT_CLEAR_ACTIONS:
5048             ofpbuf_clear(&ctx->action_set);
5049             ctx->xin->flow.actset_output = OFPP_UNSET;
5050             ctx->action_set_has_group = false;
5051             break;
5052
5053         case OFPACT_WRITE_ACTIONS:
5054             xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a));
5055             break;
5056
5057         case OFPACT_WRITE_METADATA:
5058             metadata = ofpact_get_WRITE_METADATA(a);
5059             flow->metadata &= ~metadata->mask;
5060             flow->metadata |= metadata->metadata & metadata->mask;
5061             break;
5062
5063         case OFPACT_METER:
5064             /* Not implemented yet. */
5065             break;
5066
5067         case OFPACT_GOTO_TABLE: {
5068             struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
5069
5070             ovs_assert(ctx->table_id < ogt->table_id);
5071
5072             xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
5073                                ogt->table_id, true, true);
5074             break;
5075         }
5076
5077         case OFPACT_SAMPLE:
5078             xlate_sample_action(ctx, ofpact_get_SAMPLE(a));
5079             break;
5080
5081         case OFPACT_CT:
5082             compose_conntrack_action(ctx, ofpact_get_CT(a));
5083             break;
5084
5085         case OFPACT_NAT:
5086             /* This will be processed by compose_conntrack_action(). */
5087             ctx->ct_nat_action = ofpact_get_NAT(a);
5088             break;
5089
5090         case OFPACT_DEBUG_RECIRC:
5091             ctx_trigger_freeze(ctx);
5092             a = ofpact_next(a);
5093             break;
5094         }
5095
5096         /* Check if need to store this and the remaining actions for later
5097          * execution. */
5098         if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) {
5099             freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx);
5100             break;
5101         }
5102     }
5103 }
5104
5105 void
5106 xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto,
5107               const struct flow *flow, ofp_port_t in_port,
5108               struct rule_dpif *rule, uint16_t tcp_flags,
5109               const struct dp_packet *packet, struct flow_wildcards *wc,
5110               struct ofpbuf *odp_actions)
5111 {
5112     xin->ofproto = ofproto;
5113     xin->flow = *flow;
5114     xin->flow.in_port.ofp_port = in_port;
5115     xin->flow.actset_output = OFPP_UNSET;
5116     xin->packet = packet;
5117     xin->may_learn = packet != NULL;
5118     xin->rule = rule;
5119     xin->xcache = NULL;
5120     xin->ofpacts = NULL;
5121     xin->ofpacts_len = 0;
5122     xin->tcp_flags = tcp_flags;
5123     xin->resubmit_hook = NULL;
5124     xin->report_hook = NULL;
5125     xin->resubmit_stats = NULL;
5126     xin->indentation = 0;
5127     xin->depth = 0;
5128     xin->resubmits = 0;
5129     xin->wc = wc;
5130     xin->odp_actions = odp_actions;
5131
5132     /* Do recirc lookup. */
5133     xin->frozen_state = NULL;
5134     if (flow->recirc_id) {
5135         const struct recirc_id_node *node
5136             = recirc_id_node_find(flow->recirc_id);
5137         if (node) {
5138             xin->frozen_state = &node->state;
5139         }
5140     }
5141 }
5142
5143 void
5144 xlate_out_uninit(struct xlate_out *xout)
5145 {
5146     if (xout) {
5147         recirc_refs_unref(&xout->recircs);
5148     }
5149 }
5150
5151 /* Translates the 'ofpacts_len' bytes of "struct ofpact"s starting at 'ofpacts'
5152  * into datapath actions, using 'ctx', and discards the datapath actions. */
5153 void
5154 xlate_actions_for_side_effects(struct xlate_in *xin)
5155 {
5156     struct xlate_out xout;
5157     enum xlate_error error;
5158
5159     error = xlate_actions(xin, &xout);
5160     if (error) {
5161         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5162
5163         VLOG_WARN_RL(&rl, "xlate_actions failed (%s)!", xlate_strerror(error));
5164     }
5165
5166     xlate_out_uninit(&xout);
5167 }
5168 \f
5169 static struct skb_priority_to_dscp *
5170 get_skb_priority(const struct xport *xport, uint32_t skb_priority)
5171 {
5172     struct skb_priority_to_dscp *pdscp;
5173     uint32_t hash;
5174
5175     hash = hash_int(skb_priority, 0);
5176     HMAP_FOR_EACH_IN_BUCKET (pdscp, hmap_node, hash, &xport->skb_priorities) {
5177         if (pdscp->skb_priority == skb_priority) {
5178             return pdscp;
5179         }
5180     }
5181     return NULL;
5182 }
5183
5184 static bool
5185 dscp_from_skb_priority(const struct xport *xport, uint32_t skb_priority,
5186                        uint8_t *dscp)
5187 {
5188     struct skb_priority_to_dscp *pdscp = get_skb_priority(xport, skb_priority);
5189     *dscp = pdscp ? pdscp->dscp : 0;
5190     return pdscp != NULL;
5191 }
5192
5193 static size_t
5194 count_skb_priorities(const struct xport *xport)
5195 {
5196     return hmap_count(&xport->skb_priorities);
5197 }
5198
5199 static void
5200 clear_skb_priorities(struct xport *xport)
5201 {
5202     struct skb_priority_to_dscp *pdscp;
5203
5204     HMAP_FOR_EACH_POP (pdscp, hmap_node, &xport->skb_priorities) {
5205         free(pdscp);
5206     }
5207 }
5208
5209 static bool
5210 actions_output_to_local_port(const struct xlate_ctx *ctx)
5211 {
5212     odp_port_t local_odp_port = ofp_port_to_odp_port(ctx->xbridge, OFPP_LOCAL);
5213     const struct nlattr *a;
5214     unsigned int left;
5215
5216     NL_ATTR_FOR_EACH_UNSAFE (a, left, ctx->odp_actions->data,
5217                              ctx->odp_actions->size) {
5218         if (nl_attr_type(a) == OVS_ACTION_ATTR_OUTPUT
5219             && nl_attr_get_odp_port(a) == local_odp_port) {
5220             return true;
5221         }
5222     }
5223     return false;
5224 }
5225
5226 #if defined(__linux__)
5227 /* Returns the maximum number of packets that the Linux kernel is willing to
5228  * queue up internally to certain kinds of software-implemented ports, or the
5229  * default (and rarely modified) value if it cannot be determined. */
5230 static int
5231 netdev_max_backlog(void)
5232 {
5233     static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
5234     static int max_backlog = 1000; /* The normal default value. */
5235
5236     if (ovsthread_once_start(&once)) {
5237         static const char filename[] = "/proc/sys/net/core/netdev_max_backlog";
5238         FILE *stream;
5239         int n;
5240
5241         stream = fopen(filename, "r");
5242         if (!stream) {
5243             VLOG_INFO("%s: open failed (%s)", filename, ovs_strerror(errno));
5244         } else {
5245             if (fscanf(stream, "%d", &n) != 1) {
5246                 VLOG_WARN("%s: read error", filename);
5247             } else if (n <= 100) {
5248                 VLOG_WARN("%s: unexpectedly small value %d", filename, n);
5249             } else {
5250                 max_backlog = n;
5251             }
5252             fclose(stream);
5253         }
5254         ovsthread_once_done(&once);
5255
5256         VLOG_DBG("%s: using %d max_backlog", filename, max_backlog);
5257     }
5258
5259     return max_backlog;
5260 }
5261
5262 /* Counts and returns the number of OVS_ACTION_ATTR_OUTPUT actions in
5263  * 'odp_actions'. */
5264 static int
5265 count_output_actions(const struct ofpbuf *odp_actions)
5266 {
5267     const struct nlattr *a;
5268     size_t left;
5269     int n = 0;
5270
5271     NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
5272         if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
5273             n++;
5274         }
5275     }
5276     return n;
5277 }
5278 #endif /* defined(__linux__) */
5279
5280 /* Returns true if 'odp_actions' contains more output actions than the datapath
5281  * can reliably handle in one go.  On Linux, this is the value of the
5282  * net.core.netdev_max_backlog sysctl, which limits the maximum number of
5283  * packets that the kernel is willing to queue up for processing while the
5284  * datapath is processing a set of actions. */
5285 static bool
5286 too_many_output_actions(const struct ofpbuf *odp_actions OVS_UNUSED)
5287 {
5288 #ifdef __linux__
5289     return (odp_actions->size / NL_A_U32_SIZE > netdev_max_backlog()
5290             && count_output_actions(odp_actions) > netdev_max_backlog());
5291 #else
5292     /* OSes other than Linux might have similar limits, but we don't know how
5293      * to determine them.*/
5294     return false;
5295 #endif
5296 }
5297
5298 static void
5299 xlate_wc_init(struct xlate_ctx *ctx)
5300 {
5301     flow_wildcards_init_catchall(ctx->wc);
5302
5303     /* Some fields we consider to always be examined. */
5304     WC_MASK_FIELD(ctx->wc, in_port);
5305     WC_MASK_FIELD(ctx->wc, dl_type);
5306     if (is_ip_any(&ctx->xin->flow)) {
5307         WC_MASK_FIELD_MASK(ctx->wc, nw_frag, FLOW_NW_FRAG_MASK);
5308     }
5309
5310     if (ctx->xbridge->support.odp.recirc) {
5311         /* Always exactly match recirc_id when datapath supports
5312          * recirculation.  */
5313         WC_MASK_FIELD(ctx->wc, recirc_id);
5314     }
5315
5316     if (ctx->xbridge->netflow) {
5317         netflow_mask_wc(&ctx->xin->flow, ctx->wc);
5318     }
5319
5320     tnl_wc_init(&ctx->xin->flow, ctx->wc);
5321 }
5322
5323 static void
5324 xlate_wc_finish(struct xlate_ctx *ctx)
5325 {
5326     /* Clear the metadata and register wildcard masks, because we won't
5327      * use non-header fields as part of the cache. */
5328     flow_wildcards_clear_non_packet_fields(ctx->wc);
5329
5330     /* ICMPv4 and ICMPv6 have 8-bit "type" and "code" fields.  struct flow
5331      * uses the low 8 bits of the 16-bit tp_src and tp_dst members to
5332      * represent these fields.  The datapath interface, on the other hand,
5333      * represents them with just 8 bits each.  This means that if the high
5334      * 8 bits of the masks for these fields somehow become set, then they
5335      * will get chopped off by a round trip through the datapath, and
5336      * revalidation will spot that as an inconsistency and delete the flow.
5337      * Avoid the problem here by making sure that only the low 8 bits of
5338      * either field can be unwildcarded for ICMP.
5339      */
5340     if (is_icmpv4(&ctx->xin->flow, NULL) || is_icmpv6(&ctx->xin->flow, NULL)) {
5341         ctx->wc->masks.tp_src &= htons(UINT8_MAX);
5342         ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
5343     }
5344     /* VLAN_TCI CFI bit must be matched if any of the TCI is matched. */
5345     if (ctx->wc->masks.vlan_tci) {
5346         ctx->wc->masks.vlan_tci |= htons(VLAN_CFI);
5347     }
5348 }
5349
5350 /* Translates the flow, actions, or rule in 'xin' into datapath actions in
5351  * 'xout'.
5352  * The caller must take responsibility for eventually freeing 'xout', with
5353  * xlate_out_uninit().
5354  * Returns 'XLATE_OK' if translation was successful.  In case of an error an
5355  * empty set of actions will be returned in 'xin->odp_actions' (if non-NULL),
5356  * so that most callers may ignore the return value and transparently install a
5357  * drop flow when the translation fails. */
5358 enum xlate_error
5359 xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
5360 {
5361     *xout = (struct xlate_out) {
5362         .slow = 0,
5363         .recircs = RECIRC_REFS_EMPTY_INITIALIZER,
5364     };
5365
5366     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5367     struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto);
5368     if (!xbridge) {
5369         return XLATE_BRIDGE_NOT_FOUND;
5370     }
5371
5372     struct flow *flow = &xin->flow;
5373
5374     union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
5375     uint64_t action_set_stub[1024 / 8];
5376     uint64_t frozen_actions_stub[1024 / 8];
5377     uint64_t actions_stub[256 / 8];
5378     struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
5379     struct xlate_ctx ctx = {
5380         .xin = xin,
5381         .xout = xout,
5382         .base_flow = *flow,
5383         .orig_tunnel_ipv6_dst = flow_tnl_dst(&flow->tunnel),
5384         .xbridge = xbridge,
5385         .stack = OFPBUF_STUB_INITIALIZER(stack_stub),
5386         .rule = xin->rule,
5387         .wc = (xin->wc
5388                ? xin->wc
5389                : &(struct flow_wildcards) { .masks = { .dl_type = 0 } }),
5390         .odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
5391
5392         .indentation = xin->indentation,
5393         .depth = xin->depth,
5394         .resubmits = xin->resubmits,
5395         .in_group = false,
5396         .in_action_set = false,
5397
5398         .table_id = 0,
5399         .rule_cookie = OVS_BE64_MAX,
5400         .orig_skb_priority = flow->skb_priority,
5401         .sflow_n_outputs = 0,
5402         .sflow_odp_port = 0,
5403         .nf_output_iface = NF_OUT_DROP,
5404         .exit = false,
5405         .error = XLATE_OK,
5406         .mirrors = 0,
5407
5408         .freezing = false,
5409         .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
5410         .pause = NULL,
5411
5412         .was_mpls = false,
5413         .conntracked = false,
5414
5415         .ct_nat_action = NULL,
5416
5417         .action_set_has_group = false,
5418         .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub),
5419     };
5420
5421     /* 'base_flow' reflects the packet as it came in, but we need it to reflect
5422      * the packet as the datapath will treat it for output actions. Our
5423      * datapath doesn't retain tunneling information without us re-setting
5424      * it, so clear the tunnel data.
5425      */
5426
5427     memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
5428
5429     ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
5430     xlate_wc_init(&ctx);
5431
5432     COVERAGE_INC(xlate_actions);
5433
5434     if (xin->frozen_state) {
5435         const struct frozen_state *state = xin->frozen_state;
5436
5437         xlate_report(&ctx, "Thawing frozen state:");
5438
5439         if (xin->ofpacts_len > 0 || ctx.rule) {
5440             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5441             const char *conflict = xin->ofpacts_len ? "actions" : "rule";
5442
5443             VLOG_WARN_RL(&rl, "Recirculation conflict (%s)!", conflict);
5444             xlate_report(&ctx, "- Recirculation conflict (%s)!", conflict);
5445             ctx.error = XLATE_RECIRCULATION_CONFLICT;
5446             goto exit;
5447         }
5448
5449         /* Set the bridge for post-recirculation processing if needed. */
5450         if (!uuid_equals(ofproto_dpif_get_uuid(ctx.xbridge->ofproto),
5451                          &state->ofproto_uuid)) {
5452             struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5453             const struct xbridge *new_bridge
5454                 = xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid);
5455
5456             if (OVS_UNLIKELY(!new_bridge)) {
5457                 /* Drop the packet if the bridge cannot be found. */
5458                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5459                 VLOG_WARN_RL(&rl, "Frozen bridge no longer exists.");
5460                 xlate_report(&ctx, "- Frozen bridge no longer exists.");
5461                 ctx.error = XLATE_BRIDGE_NOT_FOUND;
5462                 goto exit;
5463             }
5464             ctx.xbridge = new_bridge;
5465         }
5466
5467         /* Set the thawed table id.  Note: A table lookup is done only if there
5468          * are no frozen actions. */
5469         ctx.table_id = state->table_id;
5470         xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id);
5471
5472         if (!state->conntracked) {
5473             clear_conntrack(flow);
5474         }
5475
5476         /* Restore pipeline metadata. May change flow's in_port and other
5477          * metadata to the values that existed when freezing was triggered. */
5478         frozen_metadata_to_flow(&state->metadata, flow);
5479
5480         /* Restore stack, if any. */
5481         if (state->stack) {
5482             ofpbuf_put(&ctx.stack, state->stack,
5483                        state->n_stack * sizeof *state->stack);
5484         }
5485
5486         /* Restore mirror state. */
5487         ctx.mirrors = state->mirrors;
5488
5489         /* Restore action set, if any. */
5490         if (state->action_set_len) {
5491             xlate_report_actions(&ctx, "- Restoring action set",
5492                                  state->action_set, state->action_set_len);
5493
5494             flow->actset_output = OFPP_UNSET;
5495             xlate_write_actions__(&ctx, state->action_set,
5496                                   state->action_set_len);
5497         }
5498
5499         /* Restore frozen actions.  If there are no actions, processing will
5500          * start with a lookup in the table set above. */
5501         xin->ofpacts = state->ofpacts;
5502         xin->ofpacts_len = state->ofpacts_len;
5503         if (state->ofpacts_len) {
5504             xlate_report_actions(&ctx, "- Restoring actions",
5505                                  xin->ofpacts, xin->ofpacts_len);
5506         }
5507     } else if (OVS_UNLIKELY(flow->recirc_id)) {
5508         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
5509
5510         VLOG_WARN_RL(&rl, "Recirculation context not found for ID %"PRIx32,
5511                      flow->recirc_id);
5512         ctx.error = XLATE_NO_RECIRCULATION_CONTEXT;
5513         goto exit;
5514     }
5515     /* The bridge is now known so obtain its table version. */
5516     ctx.tables_version = ofproto_dpif_get_tables_version(ctx.xbridge->ofproto);
5517
5518     if (!xin->ofpacts && !ctx.rule) {
5519         ctx.rule = rule_dpif_lookup_from_table(
5520             ctx.xbridge->ofproto, ctx.tables_version, flow, ctx.wc,
5521             ctx.xin->resubmit_stats, &ctx.table_id,
5522             flow->in_port.ofp_port, true, true);
5523         if (ctx.xin->resubmit_stats) {
5524             rule_dpif_credit_stats(ctx.rule, ctx.xin->resubmit_stats);
5525         }
5526         if (ctx.xin->xcache) {
5527             struct xc_entry *entry;
5528
5529             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_RULE);
5530             entry->u.rule = ctx.rule;
5531             rule_dpif_ref(ctx.rule);
5532         }
5533
5534         if (OVS_UNLIKELY(ctx.xin->resubmit_hook)) {
5535             ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0);
5536         }
5537     }
5538
5539     /* Get the proximate input port of the packet.  (If xin->frozen_state,
5540      * flow->in_port is the ultimate input port of the packet.) */
5541     struct xport *in_port = get_ofp_port(xbridge,
5542                                          ctx.base_flow.in_port.ofp_port);
5543
5544     /* Tunnel stats only for not-thawed packets. */
5545     if (!xin->frozen_state && in_port && in_port->is_tunnel) {
5546         if (ctx.xin->resubmit_stats) {
5547             netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
5548             if (in_port->bfd) {
5549                 bfd_account_rx(in_port->bfd, ctx.xin->resubmit_stats);
5550             }
5551         }
5552         if (ctx.xin->xcache) {
5553             struct xc_entry *entry;
5554
5555             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETDEV);
5556             entry->u.dev.rx = netdev_ref(in_port->netdev);
5557             entry->u.dev.bfd = bfd_ref(in_port->bfd);
5558         }
5559     }
5560
5561     if (!xin->frozen_state && process_special(&ctx, in_port)) {
5562         /* process_special() did all the processing for this packet.
5563          *
5564          * We do not perform special processing on thawed packets, since that
5565          * was done before they were frozen and should not be redone. */
5566     } else if (in_port && in_port->xbundle
5567                && xbundle_mirror_out(xbridge, in_port->xbundle)) {
5568         if (ctx.xin->packet != NULL) {
5569             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
5570             VLOG_WARN_RL(&rl, "bridge %s: dropping packet received on port "
5571                          "%s, which is reserved exclusively for mirroring",
5572                          ctx.xbridge->name, in_port->xbundle->name);
5573         }
5574     } else {
5575         /* Sampling is done on initial reception; don't redo after thawing. */
5576         unsigned int user_cookie_offset = 0;
5577         if (!xin->frozen_state) {
5578             user_cookie_offset = compose_sflow_action(&ctx);
5579             compose_ipfix_action(&ctx, ODPP_NONE);
5580         }
5581         size_t sample_actions_len = ctx.odp_actions->size;
5582
5583         if (tnl_process_ecn(flow)
5584             && (!in_port || may_receive(in_port, &ctx))) {
5585             const struct ofpact *ofpacts;
5586             size_t ofpacts_len;
5587
5588             if (xin->ofpacts) {
5589                 ofpacts = xin->ofpacts;
5590                 ofpacts_len = xin->ofpacts_len;
5591             } else if (ctx.rule) {
5592                 const struct rule_actions *actions
5593                     = rule_dpif_get_actions(ctx.rule);
5594                 ofpacts = actions->ofpacts;
5595                 ofpacts_len = actions->ofpacts_len;
5596                 ctx.rule_cookie = rule_dpif_get_flow_cookie(ctx.rule);
5597             } else {
5598                 OVS_NOT_REACHED();
5599             }
5600
5601             mirror_ingress_packet(&ctx);
5602             do_xlate_actions(ofpacts, ofpacts_len, &ctx);
5603             if (ctx.error) {
5604                 goto exit;
5605             }
5606
5607             /* We've let OFPP_NORMAL and the learning action look at the
5608              * packet, so cancel all actions and freezing if forwarding is
5609              * disabled. */
5610             if (in_port && (!xport_stp_forward_state(in_port) ||
5611                             !xport_rstp_forward_state(in_port))) {
5612                 ctx.odp_actions->size = sample_actions_len;
5613                 ctx_cancel_freeze(&ctx);
5614                 ofpbuf_clear(&ctx.action_set);
5615             }
5616
5617             if (!ctx.freezing) {
5618                 xlate_action_set(&ctx);
5619             }
5620             if (ctx.freezing) {
5621                 finish_freezing(&ctx);
5622             }
5623         }
5624
5625         /* Output only fully processed packets. */
5626         if (!ctx.freezing
5627             && xbridge->has_in_band
5628             && in_band_must_output_to_local_port(flow)
5629             && !actions_output_to_local_port(&ctx)) {
5630             compose_output_action(&ctx, OFPP_LOCAL, NULL);
5631         }
5632
5633         if (user_cookie_offset) {
5634             fix_sflow_action(&ctx, user_cookie_offset);
5635         }
5636     }
5637
5638     if (nl_attr_oversized(ctx.odp_actions->size)) {
5639         /* These datapath actions are too big for a Netlink attribute, so we
5640          * can't hand them to the kernel directly.  dpif_execute() can execute
5641          * them one by one with help, so just mark the result as SLOW_ACTION to
5642          * prevent the flow from being installed. */
5643         COVERAGE_INC(xlate_actions_oversize);
5644         ctx.xout->slow |= SLOW_ACTION;
5645     } else if (too_many_output_actions(ctx.odp_actions)) {
5646         COVERAGE_INC(xlate_actions_too_many_output);
5647         ctx.xout->slow |= SLOW_ACTION;
5648     }
5649
5650     /* Do netflow only for packets on initial reception, that are not sent to
5651      * the controller.  We consider packets sent to the controller to be part
5652      * of the control plane rather than the data plane. */
5653     if (!xin->frozen_state
5654         && xbridge->netflow
5655         && !(xout->slow & SLOW_CONTROLLER)) {
5656         if (ctx.xin->resubmit_stats) {
5657             netflow_flow_update(xbridge->netflow, flow,
5658                                 ctx.nf_output_iface,
5659                                 ctx.xin->resubmit_stats);
5660         }
5661         if (ctx.xin->xcache) {
5662             struct xc_entry *entry;
5663
5664             entry = xlate_cache_add_entry(ctx.xin->xcache, XC_NETFLOW);
5665             entry->u.nf.netflow = netflow_ref(xbridge->netflow);
5666             entry->u.nf.flow = xmemdup(flow, sizeof *flow);
5667             entry->u.nf.iface = ctx.nf_output_iface;
5668         }
5669     }
5670
5671     xlate_wc_finish(&ctx);
5672
5673 exit:
5674     ofpbuf_uninit(&ctx.stack);
5675     ofpbuf_uninit(&ctx.action_set);
5676     ofpbuf_uninit(&ctx.frozen_actions);
5677     ofpbuf_uninit(&scratch_actions);
5678
5679     /* Make sure we return a "drop flow" in case of an error. */
5680     if (ctx.error) {
5681         xout->slow = 0;
5682         if (xin->odp_actions) {
5683             ofpbuf_clear(xin->odp_actions);
5684         }
5685     }
5686     return ctx.error;
5687 }
5688
5689 enum ofperr
5690 xlate_resume(struct ofproto_dpif *ofproto,
5691              const struct ofputil_packet_in_private *pin,
5692              struct ofpbuf *odp_actions,
5693              enum slow_path_reason *slow)
5694 {
5695     struct dp_packet packet;
5696     dp_packet_use_const(&packet, pin->public.packet,
5697                         pin->public.packet_len);
5698
5699     struct flow flow;
5700     flow_extract(&packet, &flow);
5701
5702     struct xlate_in xin;
5703     xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags),
5704                   &packet, NULL, odp_actions);
5705
5706     struct ofpact_note noop;
5707     ofpact_init_NOTE(&noop);
5708     noop.length = 0;
5709
5710     bool any_actions = pin->actions_len > 0;
5711     struct frozen_state state = {
5712         .table_id = 0,     /* Not the table where NXAST_PAUSE was executed. */
5713         .ofproto_uuid = pin->bridge,
5714         .stack = pin->stack,
5715         .n_stack = pin->n_stack,
5716         .mirrors = pin->mirrors,
5717         .conntracked = pin->conntracked,
5718
5719         /* When there are no actions, xlate_actions() will search the flow
5720          * table.  We don't want it to do that (we want it to resume), so
5721          * supply a no-op action if there aren't any.
5722          *
5723          * (We can't necessarily avoid translating actions entirely if there
5724          * aren't any actions, because there might be some finishing-up to do
5725          * at the end of the pipeline, and we don't check for those
5726          * conditions.) */
5727         .ofpacts = any_actions ? pin->actions : &noop.ofpact,
5728         .ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
5729
5730         .action_set = pin->action_set,
5731         .action_set_len = pin->action_set_len,
5732     };
5733     frozen_metadata_from_flow(&state.metadata,
5734                               &pin->public.flow_metadata.flow);
5735     xin.frozen_state = &state;
5736
5737     struct xlate_out xout;
5738     enum xlate_error error = xlate_actions(&xin, &xout);
5739     *slow = xout.slow;
5740     xlate_out_uninit(&xout);
5741
5742     /* xlate_actions() can generate a number of errors, but only
5743      * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
5744      * sure to report over OpenFlow.  The others could come up in packet-outs
5745      * or regular flow translation and I don't think that it's going to be too
5746      * useful to report them to the controller. */
5747     return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
5748 }
5749
5750 /* Sends 'packet' out 'ofport'. If 'port' is a tunnel and that tunnel type
5751  * supports a notion of an OAM flag, sets it if 'oam' is true.
5752  * May modify 'packet'.
5753  * Returns 0 if successful, otherwise a positive errno value. */
5754 int
5755 xlate_send_packet(const struct ofport_dpif *ofport, bool oam,
5756                   struct dp_packet *packet)
5757 {
5758     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5759     struct xport *xport;
5760     uint64_t ofpacts_stub[1024 / 8];
5761     struct ofpbuf ofpacts;
5762     struct flow flow;
5763
5764     ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
5765     /* Use OFPP_NONE as the in_port to avoid special packet processing. */
5766     flow_extract(packet, &flow);
5767     flow.in_port.ofp_port = OFPP_NONE;
5768
5769     xport = xport_lookup(xcfg, ofport);
5770     if (!xport) {
5771         return EINVAL;
5772     }
5773
5774     if (oam) {
5775         struct ofpact_set_field *sf = ofpact_put_SET_FIELD(&ofpacts);
5776
5777         sf->field = mf_from_id(MFF_TUN_FLAGS);
5778         sf->value.be16 = htons(NX_TUN_FLAG_OAM);
5779         sf->mask.be16 = htons(NX_TUN_FLAG_OAM);
5780     }
5781
5782     ofpact_put_OUTPUT(&ofpacts)->port = xport->ofp_port;
5783
5784     return ofproto_dpif_execute_actions(xport->xbridge->ofproto, &flow, NULL,
5785                                         ofpacts.data, ofpacts.size, packet);
5786 }
5787
5788 struct xlate_cache *
5789 xlate_cache_new(void)
5790 {
5791     struct xlate_cache *xcache = xmalloc(sizeof *xcache);
5792
5793     ofpbuf_init(&xcache->entries, 512);
5794     return xcache;
5795 }
5796
5797 static struct xc_entry *
5798 xlate_cache_add_entry(struct xlate_cache *xcache, enum xc_type type)
5799 {
5800     struct xc_entry *entry;
5801
5802     entry = ofpbuf_put_zeros(&xcache->entries, sizeof *entry);
5803     entry->type = type;
5804
5805     return entry;
5806 }
5807
5808 static void
5809 xlate_cache_netdev(struct xc_entry *entry, const struct dpif_flow_stats *stats)
5810 {
5811     if (entry->u.dev.tx) {
5812         netdev_vport_inc_tx(entry->u.dev.tx, stats);
5813     }
5814     if (entry->u.dev.rx) {
5815         netdev_vport_inc_rx(entry->u.dev.rx, stats);
5816     }
5817     if (entry->u.dev.bfd) {
5818         bfd_account_rx(entry->u.dev.bfd, stats);
5819     }
5820 }
5821
5822 static void
5823 xlate_cache_normal(struct ofproto_dpif *ofproto, struct flow *flow, int vlan)
5824 {
5825     struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
5826     struct xbridge *xbridge;
5827     struct xbundle *xbundle;
5828     struct flow_wildcards wc;
5829
5830     xbridge = xbridge_lookup(xcfg, ofproto);
5831     if (!xbridge) {
5832         return;
5833     }
5834
5835     xbundle = lookup_input_bundle(xbridge, flow->in_port.ofp_port, false,
5836                                   NULL);
5837     if (!xbundle) {
5838         return;
5839     }
5840
5841     update_learning_table(xbridge, flow, &wc, vlan, xbundle);
5842 }
5843
5844 /* Push stats and perform side effects of flow translation. */
5845 void
5846 xlate_push_stats(struct xlate_cache *xcache,
5847                  const struct dpif_flow_stats *stats)
5848 {
5849     struct xc_entry *entry;
5850     struct ofpbuf entries = xcache->entries;
5851     struct eth_addr dmac;
5852
5853     if (!stats->n_packets) {
5854         return;
5855     }
5856
5857     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5858         switch (entry->type) {
5859         case XC_RULE:
5860             rule_dpif_credit_stats(entry->u.rule, stats);
5861             break;
5862         case XC_BOND:
5863             bond_account(entry->u.bond.bond, entry->u.bond.flow,
5864                          entry->u.bond.vid, stats->n_bytes);
5865             break;
5866         case XC_NETDEV:
5867             xlate_cache_netdev(entry, stats);
5868             break;
5869         case XC_NETFLOW:
5870             netflow_flow_update(entry->u.nf.netflow, entry->u.nf.flow,
5871                                 entry->u.nf.iface, stats);
5872             break;
5873         case XC_MIRROR:
5874             mirror_update_stats(entry->u.mirror.mbridge,
5875                                 entry->u.mirror.mirrors,
5876                                 stats->n_packets, stats->n_bytes);
5877             break;
5878         case XC_LEARN:
5879             ofproto_dpif_flow_mod(entry->u.learn.ofproto, entry->u.learn.fm);
5880             break;
5881         case XC_NORMAL:
5882             xlate_cache_normal(entry->u.normal.ofproto, entry->u.normal.flow,
5883                                entry->u.normal.vlan);
5884             break;
5885         case XC_FIN_TIMEOUT:
5886             xlate_fin_timeout__(entry->u.fin.rule, stats->tcp_flags,
5887                                 entry->u.fin.idle, entry->u.fin.hard);
5888             break;
5889         case XC_GROUP:
5890             group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket,
5891                                     stats);
5892             break;
5893         case XC_TNL_NEIGH:
5894             /* Lookup neighbor to avoid timeout. */
5895             tnl_neigh_lookup(entry->u.tnl_neigh_cache.br_name,
5896                              &entry->u.tnl_neigh_cache.d_ipv6, &dmac);
5897             break;
5898         default:
5899             OVS_NOT_REACHED();
5900         }
5901     }
5902 }
5903
5904 static void
5905 xlate_dev_unref(struct xc_entry *entry)
5906 {
5907     if (entry->u.dev.tx) {
5908         netdev_close(entry->u.dev.tx);
5909     }
5910     if (entry->u.dev.rx) {
5911         netdev_close(entry->u.dev.rx);
5912     }
5913     if (entry->u.dev.bfd) {
5914         bfd_unref(entry->u.dev.bfd);
5915     }
5916 }
5917
5918 static void
5919 xlate_cache_clear_netflow(struct netflow *netflow, struct flow *flow)
5920 {
5921     netflow_flow_clear(netflow, flow);
5922     netflow_unref(netflow);
5923     free(flow);
5924 }
5925
5926 void
5927 xlate_cache_clear(struct xlate_cache *xcache)
5928 {
5929     struct xc_entry *entry;
5930     struct ofpbuf entries;
5931
5932     if (!xcache) {
5933         return;
5934     }
5935
5936     XC_ENTRY_FOR_EACH (entry, entries, xcache) {
5937         switch (entry->type) {
5938         case XC_RULE:
5939             rule_dpif_unref(entry->u.rule);
5940             break;
5941         case XC_BOND:
5942             free(entry->u.bond.flow);
5943             bond_unref(entry->u.bond.bond);
5944             break;
5945         case XC_NETDEV:
5946             xlate_dev_unref(entry);
5947             break;
5948         case XC_NETFLOW:
5949             xlate_cache_clear_netflow(entry->u.nf.netflow, entry->u.nf.flow);
5950             break;
5951         case XC_MIRROR:
5952             mbridge_unref(entry->u.mirror.mbridge);
5953             break;
5954         case XC_LEARN:
5955             free(entry->u.learn.fm);
5956             ofpbuf_delete(entry->u.learn.ofpacts);
5957             break;
5958         case XC_NORMAL:
5959             free(entry->u.normal.flow);
5960             break;
5961         case XC_FIN_TIMEOUT:
5962             /* 'u.fin.rule' is always already held as a XC_RULE, which
5963              * has already released it's reference above. */
5964             break;
5965         case XC_GROUP:
5966             group_dpif_unref(entry->u.group.group);
5967             break;
5968         case XC_TNL_NEIGH:
5969             break;
5970         default:
5971             OVS_NOT_REACHED();
5972         }
5973     }
5974
5975     ofpbuf_clear(&xcache->entries);
5976 }
5977
5978 void
5979 xlate_cache_delete(struct xlate_cache *xcache)
5980 {
5981     xlate_cache_clear(xcache);
5982     ofpbuf_uninit(&xcache->entries);
5983     free(xcache);
5984 }