BUILD_ASSERT_DECL(FLOW_NW_FRAG_ANY == NX_IP_FRAG_ANY);
BUILD_ASSERT_DECL(FLOW_NW_FRAG_LATER == NX_IP_FRAG_LATER);
-/* Some flags are exposed through OpenFlow while others are used only
- * internally. */
-
-/* Public flags */
-#define FLOW_TNL_F_OAM (1 << 0)
-
-#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)
BUILD_ASSERT_DECL(FLOW_TNL_F_OAM == NX_TUN_FLAG_OAM);
-/* Private flags */
-#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
-#define FLOW_TNL_F_CSUM (1 << 2)
-#define FLOW_TNL_F_KEY (1 << 3)
-
-#define FLOW_TNL_F_MASK ((1 << 4) - 1)
-
-/* Purely internal to OVS userspace. These flags should never be exposed to
- * the outside world and so aren't included in the flags mask. */
-
-/* Tunnel information is in userspace datapath format. */
-#define FLOW_TNL_F_UDPIF (1 << 4)
-
const char *flow_tun_flag_to_string(uint32_t flags);
/* Maximum number of supported MPLS labels. */
{
md->recirc_id = flow->recirc_id;
md->dp_hash = flow->dp_hash;
- md->tunnel = flow->tunnel;
+ flow_tnl_copy__(&md->tunnel, &flow->tunnel);
md->skb_priority = flow->skb_priority;
md->pkt_mark = flow->pkt_mark;
md->in_port = flow->in_port;
/* Tunnel information used in flow key and metadata. */
struct flow_tnl {
- ovs_be64 tun_id;
ovs_be32 ip_dst;
ovs_be32 ip_src;
+ ovs_be64 tun_id;
uint16_t flags;
uint8_t ip_tos;
uint8_t ip_ttl;
struct tun_metadata metadata;
};
+/* Some flags are exposed through OpenFlow while others are used only
+ * internally. */
+
+/* Public flags */
+#define FLOW_TNL_F_OAM (1 << 0)
+
+#define FLOW_TNL_PUB_F_MASK ((1 << 1) - 1)
+
+/* Private flags */
+#define FLOW_TNL_F_DONT_FRAGMENT (1 << 1)
+#define FLOW_TNL_F_CSUM (1 << 2)
+#define FLOW_TNL_F_KEY (1 << 3)
+
+#define FLOW_TNL_F_MASK ((1 << 4) - 1)
+
+/* Purely internal to OVS userspace. These flags should never be exposed to
+ * the outside world and so aren't included in the flags mask. */
+
+/* Tunnel information is in userspace datapath format. */
+#define FLOW_TNL_F_UDPIF (1 << 4)
+
+/* Returns an offset to 'src' covering all the meaningful fields in 'src'. */
+static inline size_t
+flow_tnl_size(const struct flow_tnl *src)
+{
+ if (!src->ip_dst) {
+ /* Covers ip_dst only. */
+ return offsetof(struct flow_tnl, ip_src);
+ }
+ if (src->flags & FLOW_TNL_F_UDPIF) {
+ /* Datapath format, cover all options we have. */
+ return offsetof(struct flow_tnl, metadata.opts)
+ + src->metadata.present.len;
+ }
+ if (!src->metadata.present.map) {
+ /* No TLVs, opts is irrelevant. */
+ return offsetof(struct flow_tnl, metadata.opts);
+ }
+ /* Have decoded TLVs, opts is relevant. */
+ return sizeof *src;
+}
+
+/* Copy flow_tnl, but avoid copying unused portions of tun_metadata. Unused
+ * data in 'dst' is NOT cleared, so this must not be used in cases where the
+ * uninitialized portion may be hashed over. */
+static inline void
+flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src)
+{
+ memcpy(dst, src, flow_tnl_size(src));
+}
+
+static inline bool
+flow_tnl_equal(const struct flow_tnl *a, const struct flow_tnl *b)
+{
+ size_t a_size = flow_tnl_size(a);
+
+ return a_size == flow_tnl_size(b) && !memcmp(a, b, a_size);
+}
+
/* Unfortunately, a "struct flow" sometimes has to handle OpenFlow port
* numbers and other times datapath (dpif) port numbers. This union allows
* access to both. */
uint32_t skb_priority; /* Packet priority for QoS. */
uint32_t pkt_mark; /* Packet mark. */
union flow_in_port in_port; /* Input port. */
- struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
+ struct flow_tnl tunnel; /* Encapsulating tunnel parameters. Note that
+ * if 'ip_dst' == 0, the rest of the fields may
+ * be uninitialized. */
};
static inline void
hash = hash_pointer(state->ofproto, 0);
hash = hash_int(state->table_id, hash);
- hash = hash_words64((const uint64_t *) &state->metadata,
- sizeof state->metadata / sizeof(uint64_t),
+ if (state->metadata.tunnel->ip_dst) {
+ /* We may leave remainder bytes unhashed, but that is unlikely as
+ * the tunnel is not in the datapath format. */
+ hash = hash_words64((const uint64_t *) state->metadata.tunnel,
+ flow_tnl_size(state->metadata.tunnel)
+ / sizeof(uint64_t), hash);
+ }
+ hash = hash_words64((const uint64_t *) &state->metadata.metadata,
+ (sizeof state->metadata - sizeof state->metadata.tunnel)
+ / sizeof(uint64_t),
hash);
if (state->stack && state->stack->size != 0) {
hash = hash_words64((const uint64_t *) state->stack->data,
{
return (a->table_id == b->table_id
&& a->ofproto == b->ofproto
- && !memcmp(&a->metadata, &b->metadata, sizeof a->metadata)
+ && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
+ && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
+ sizeof a->metadata - sizeof a->metadata.tunnel)
&& (((!a->stack || !a->stack->size) &&
(!b->stack || !b->stack->size))
|| (a->stack && b->stack && ofpbuf_equal(a->stack, b->stack)))
}
static void
-recirc_state_clone(struct recirc_state *new, const struct recirc_state *old)
+recirc_state_clone(struct recirc_state *new, const struct recirc_state *old,
+ struct flow_tnl *tunnel)
{
*new = *old;
+ flow_tnl_copy__(tunnel, old->metadata.tunnel);
+ new->metadata.tunnel = tunnel;
+
if (new->stack) {
new->stack = new->stack->size ? ofpbuf_clone(new->stack) : NULL;
}
ovs_assert(state->action_set_len <= state->ofpacts_len);
struct recirc_id_node *node = xzalloc(sizeof *node);
+
node->hash = hash;
ovs_refcount_init(&node->refcount);
- recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state);
+ recirc_state_clone(CONST_CAST(struct recirc_state *, &node->state), state,
+ &node->state_metadata_tunnel);
ovs_mutex_lock(&mutex);
for (;;) {
uint32_t
recirc_alloc_id(struct ofproto_dpif *ofproto)
{
+ struct flow_tnl tunnel;
+ tunnel.ip_dst = htonl(0);
struct recirc_state state = {
.table_id = TBL_INTERNAL,
.ofproto = ofproto,
- .metadata = { .in_port = OFPP_NONE },
+ .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
};
return recirc_alloc_id__(&state, recirc_metadata_hash(&state))->id;
}
* =============
*
* Recirculation is a technique to allow a frame to re-enter the datapath
- * packet processing path for one or multiple times to achieve more flexible
- * packet processing, such modifying header fields after MPLS POP action and
- * selecting bond a slave port for bond ports.
+ * packet processing path to achieve more flexible packet processing, such as
+ * modifying header fields after MPLS POP action and selecting a slave port for
+ * bond ports.
*
* Data path and user space interface
* -----------------------------------
*
* Recirculation uses two uint32_t fields, recirc_id and dp_hash, and a RECIRC
- * action. The value recirc_id is used to select the next packet processing
- * steps among multiple instances of recirculation. When a packet initially
- * enters the data path it is assigned with recirc_id 0, which indicates no
- * recirculation. Recirc_ids are managed by the user space, opaque to the
- * data path.
+ * action. recirc_id is used to select the next packet processing steps among
+ * multiple instances of recirculation. When a packet initially enters the
+ * datapath it is assigned with recirc_id 0, which indicates no recirculation.
+ * Recirc_ids are managed by the user space, opaque to the datapath.
*
- * On the other hand, dp_hash can only be computed by the data path, opaque to
- * the user space. In fact, user space may not able to recompute the hash
- * value. The dp_hash value should be wildcarded for a newly received
- * packet. HASH action specifies whether the hash is computed, and if
- * computed, how many fields are to be included in the hash computation. The
- * computed hash value is stored into the dp_hash field prior to recirculation.
+ * On the other hand, dp_hash can only be computed by the datapath, opaque to
+ * the user space, as the datapath is free to choose the hashing algorithm
+ * without informing user space about it. The dp_hash value should be
+ * wildcarded for newly received packets. HASH action specifies whether the
+ * hash is computed, and if computed, how many fields are to be included in the
+ * hash computation. The computed hash value is stored into the dp_hash field
+ * prior to recirculation.
*
* The RECIRC action sets the recirc_id field and then reprocesses the packet
- * as if it was received on the same input port. RECIRC action works like a
- * function call; actions listed behind the RECIRC action will be executed
- * after its execution. RECIRC action can be nested, data path implementation
- * limits the number of recirculation executed to prevent unreasonable nesting
- * depth or infinite loop.
+ * as if it was received again on the same input port. RECIRC action works
+ * like a function call; actions listed after the RECIRC action will be
+ * executed after recirculation. RECIRC action can be nested, but datapath
+ * implementation limits the number of nested recirculations to prevent
+ * unreasonable nesting depth or infinite loop.
*
* User space recirculation context
* ---------------------------------
*
- * Recirculation is hidden from the OpenFlow controllers. Action translation
- * code deduces when recirculation is necessary and issues a data path
- * recirculation action. All OpenFlow actions to be performed after
+ * Recirculation is usually hidden from the OpenFlow controllers. Action
+ * translation code deduces when recirculation is necessary and issues a
+ * datapath recirculation action. All OpenFlow actions to be performed after
* recirculation are derived from the OpenFlow pipeline and are stored with the
* recirculation ID. When the OpenFlow tables are changed in a way affecting
* the recirculation flows, new recirculation ID with new metadata and actions
* Recirculation ID pool
* ----------------------
*
- * Recirculation ID needs to be unique for all data paths. Recirculation ID
- * pool keeps track recirculation ids and stores OpenFlow pipeline translation
- * context so that flow processing may continue after recirculation.
+ * Recirculation ID needs to be unique for all datapaths. Recirculation ID
+ * pool keeps track of recirculation ids and stores OpenFlow pipeline
+ * translation context so that flow processing may continue after
+ * recirculation.
*
* A Recirculation ID can be any uint32_t value, except for that the value 0 is
* reserved for 'no recirculation' case.
struct recirc_metadata {
/* Metadata in struct flow. */
- struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
+ const struct flow_tnl *tunnel; /* Encapsulating tunnel parameters. */
ovs_be64 metadata; /* OpenFlow Metadata. */
uint64_t regs[FLOW_N_XREGS]; /* Registers. */
ofp_port_t in_port; /* Incoming port. */
const struct flow *flow)
{
memset(md, 0, sizeof *md);
- md->tunnel = flow->tunnel;
+ md->tunnel = &flow->tunnel;
md->metadata = flow->metadata;
memcpy(md->regs, flow->regs, sizeof md->regs);
md->in_port = flow->in_port.ofp_port;
recirc_metadata_to_flow(const struct recirc_metadata *md,
struct flow *flow)
{
- flow->tunnel = md->tunnel;
+ if (md->tunnel && md->tunnel->ip_dst) {
+ flow->tunnel = *md->tunnel;
+ } else {
+ memset(&flow->tunnel, 0, sizeof flow->tunnel);
+ }
flow->metadata = md->metadata;
memcpy(flow->regs, md->regs, sizeof flow->regs);
flow->in_port.ofp_port = md->in_port;
* This state should not be modified after inserting a node in the pool,
* hence the 'const' to emphasize that. */
const struct recirc_state state;
+
+ /* Storage for tunnel metadata. */
+ struct flow_tnl state_metadata_tunnel;
};
void recirc_init(void);