-/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
bool exit; /* No further actions should be processed. */
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
- /* These are used for non-bond recirculation. The recirculation IDs are
- * stored in xout and must be associated with a datapath flow (ukey),
- * otherwise they will be freed when the xout is uninitialized.
+ /* Freezing Translation
+ * ====================
*
+ * At some point during translation, the code may recognize the need to halt
+ * and checkpoint the translation in a way that it can be restarted again
+ * later. We call the checkpointing process "freezing" and the restarting
+ * process "thawing".
*
- * Steps in Recirculation Translation
- * ==================================
+ * The use cases for freezing are:
*
- * At some point during translation, the code recognizes the need for
- * recirculation. For example, recirculation is necessary when, after
- * popping the last MPLS label, an action or a match tries to examine or
- * modify a field that has been newly revealed following the MPLS label.
+ * - "Recirculation", where the translation process discovers that it
+ * doesn't have enough information to complete translation without
+ * actually executing the actions that have already been translated,
+ * which provides the additionally needed information. In these
+ * situations, translation freezes translation and assigns the frozen
+ * data a unique "recirculation ID", which it associates with the data
+ * in a table in userspace (see ofproto-dpif-rid.h). It also adds a
+ * OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath
+ * actions. When a packet hits that action, the datapath looks its
+ * flow up again using the ID. If there's a miss, it comes back to
+ * userspace, which find the recirculation table entry for the ID,
+ * thaws the associated frozen data, and continues translation from
+ * that point given the additional information that is now known.
*
- * The simplest part of the work to be done is to commit existing changes to
- * the packet, which produces datapath actions corresponding to the changes,
- * and after this, add an OVS_ACTION_ATTR_RECIRC datapath action.
+ * The archetypal example is MPLS. As MPLS is implemented in
+ * OpenFlow, the protocol that follows the last MPLS label becomes
+ * known only when that label is popped by an OpenFlow action. That
+ * means that Open vSwitch can't extract the headers beyond the MPLS
+ * labels until the pop action is executed. Thus, at that point
+ * translation uses the recirculation process to extract the headers
+ * beyond the MPLS labels.
*
- * The main problem here is preserving state. When the datapath executes
- * OVS_ACTION_ATTR_RECIRC, it will upcall to userspace to get a translation
- * for the post-recirculation actions. At this point userspace has to
- * resume the translation where it left off, which means that it has to
- * execute the following:
+ * (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for
+ * output to bonds. OVS pre-populates all the datapath flows for bond
+ * output in the datapath, though, which means that the elaborate
+ * process of coming back to userspace for a second round of
+ * translation isn't needed, and so bonds don't follow the above
+ * process.)
*
- * - The action that prompted recirculation, and any actions following
- * it within the same flow.
+ * - "Continuation". A continuation is a way for an OpenFlow controller
+ * to interpose on a packet's traversal of the OpenFlow tables. When
+ * the translation process encounters a "controller" action with the
+ * "pause" flag, it freezes translation, serializes the frozen data,
+ * and sends it to an OpenFlow controller. The controller then
+ * examines and possibly modifies the frozen data and eventually sends
+ * it back to the switch, which thaws it and continues translation.
*
- * - If the action that prompted recirculation was invoked within a
- * NXAST_RESUBMIT, then any actions following the resubmit. These
- * "resubmit"s can be nested, so this has to go all the way up the
- * control stack.
+ * The main problem of freezing translation is preserving state, so that
+ * when the translation is thawed later it resumes from where it left off,
+ * without disruption. In particular, actions must be preserved as follows:
+ *
+ * - If we're freezing because an action needed more information, the
+ * action that prompted it.
+ *
+ * - Any actions remaining to be translated within the current flow.
+ *
+ * - If translation was frozen within a NXAST_RESUBMIT, then any actions
+ * following the resubmit action. Resubmit actions can be nested, so
+ * this has to go all the way up the control stack.
*
* - The OpenFlow 1.1+ action set.
*
*
* - Metadata fields (input port, registers, OF1.1+ metadata, ...).
*
- * - Action set, stack
+ * - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions.
*
* - The table ID and cookie of the flow being translated at each level
- * of the control stack (since OFPAT_CONTROLLER actions send these to
- * the controller).
+ * of the control stack, because these can become visible through
+ * OFPAT_CONTROLLER actions (and other ways).
*
* Translation allows for the control of this state preservation via these
- * members. When a need for recirculation is identified, the translation
- * process:
+ * members. When a need to freeze translation is identified, the
+ * translation process:
*
- * 1. Sets 'recirc_action_offset' to the current size of 'action_set'. The
- * action set is part of what needs to be preserved, so this allows the
- * action set and the additional state to share the 'action_set' buffer.
- * Later steps can tell that setup for recirculation is in progress from
- * the nonnegative value of 'recirc_action_offset'.
+ * 1. Sets 'freezing' to true.
*
* 2. Sets 'exit' to true to tell later steps that we're exiting from the
* translation process.
*
- * 3. Adds an OFPACT_UNROLL_XLATE action to 'action_set'. This action
- * holds the current table ID and cookie so that they can be restored
- * during a post-recirculation upcall translation.
+ * 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points
+ * frozen_actions.header to the action to make it easy to find it later.
+ * This action holds the current table ID and cookie so that they can be
+ * restored during a post-recirculation upcall translation.
*
* 4. Adds the action that prompted recirculation and any actions following
- * it within the same flow to 'action_set', so that they can be executed
- * during a post-recirculation upcall translation.
+ * it within the same flow to 'frozen_actions', so that they can be
+ * executed during a post-recirculation upcall translation.
*
* 5. Returns.
*
* 6. The action that prompted recirculation might be nested in a stack of
* nested "resubmit"s that have actions remaining. Each of these notices
- * that we're exiting (from 'exit') and that recirculation setup is in
- * progress (from 'recirc_action_offset') and responds by adding more
- * OFPACT_UNROLL_XLATE actions to 'action_set', as necessary, and any
- * actions that were yet unprocessed.
+ * that we're exiting and freezing and responds by adding more
+ * OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary,
+ * followed by any actions that were yet unprocessed.
*
- * The caller stores all the state produced by this process associated with
- * the recirculation ID. For post-recirculation upcall translation, the
- * caller passes it back in for the new translation to execute. The
- * process yielded a set of ofpacts that can be translated directly, so it
- * is not much of a special case at that point.
+ * If we're freezing because of recirculation, the caller generates a
+ * recirculation ID and associates all the state produced by this process
+ * with it. For post-recirculation upcall translation, the caller passes it
+ * back in for the new translation to execute. The process yielded a set of
+ * ofpacts that can be translated directly, so it is not much of a special
+ * case at that point.
*/
- int recirc_action_offset; /* Offset in 'action_set' to actions to be
- * executed after recirculation, or -1. */
- int last_unroll_offset; /* Offset in 'action_set' to the latest unroll
- * action, or -1. */
+ bool freezing;
+ struct ofpbuf frozen_actions;
+ const struct ofpact_controller *pause;
/* True if a packet was but is no longer MPLS (due to an MPLS pop action).
* This is a trigger for recirculation in cases where translating an action
/* True if conntrack has been performed on this packet during processing
* on the current bridge. This is used to determine whether conntrack
- * state from the datapath should be honored after recirculation. */
+ * state from the datapath should be honored after thawing. */
bool conntracked;
/* Pointer to an embedded NAT action in a conntrack action, or NULL. */
static void xlate_commit_actions(struct xlate_ctx *ctx);
static void
-ctx_trigger_recirculation(struct xlate_ctx *ctx)
+ctx_trigger_freeze(struct xlate_ctx *ctx)
{
ctx->exit = true;
- ctx->recirc_action_offset = ctx->action_set.size;
+ ctx->freezing = true;
}
static bool
-ctx_first_recirculation_action(const struct xlate_ctx *ctx)
+ctx_first_frozen_action(const struct xlate_ctx *ctx)
{
- return ctx->recirc_action_offset == ctx->action_set.size;
+ return !ctx->frozen_actions.size;
}
-static inline bool
-exit_recirculates(const struct xlate_ctx *ctx)
+static void
+ctx_cancel_freeze(struct xlate_ctx *ctx)
{
- /* When recirculating the 'recirc_action_offset' has a non-negative value.
- */
- return ctx->recirc_action_offset >= 0;
+ if (ctx->freezing) {
+ ctx->freezing = false;
+ ofpbuf_clear(&ctx->frozen_actions);
+ ctx->frozen_actions.header = NULL;
+ }
}
-static void compose_recirculate_action(struct xlate_ctx *ctx);
+static void finish_freezing(struct xlate_ctx *ctx);
/* A controller may use OFPP_NONE as the ingress port to indicate that
* it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
} u;
};
-#define XC_ENTRY_FOR_EACH(entry, entries, xcache) \
- entries = xcache->entries; \
- for (entry = ofpbuf_try_pull(&entries, sizeof *entry); \
- entry; \
- entry = ofpbuf_try_pull(&entries, sizeof *entry))
+#define XC_ENTRY_FOR_EACH(ENTRY, ENTRIES, XCACHE) \
+ ENTRIES = XCACHE->entries; \
+ for (ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY); \
+ ENTRY; \
+ ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY))
struct xlate_cache {
struct ofpbuf entries;
static struct xbridge *xbridge_lookup(struct xlate_cfg *,
const struct ofproto_dpif *);
+static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
+ const struct uuid *);
static struct xbundle *xbundle_lookup(struct xlate_cfg *,
const struct ofbundle *);
static struct xport *xport_lookup(struct xlate_cfg *,
return NULL;
}
+static struct xbridge *
+xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid)
+{
+ struct xbridge *xbridge;
+
+ HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) {
+ if (uuid_equals(ofproto_dpif_get_uuid(xbridge->ofproto), uuid)) {
+ return xbridge;
+ }
+ }
+ return NULL;
+}
+
static struct xbundle *
xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle)
{
return NULL;
}
+/* Mirrors the packet represented by 'ctx' to appropriate mirror destinations,
+ * given the packet is ingressing or egressing on 'xbundle', which has ingress
+ * or egress (as appropriate) mirrors 'mirrors'. */
static void
mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle,
mirror_mask_t mirrors)
{
+ /* Figure out what VLAN the packet is in (because mirrors can select
+ * packets on basis of VLAN). */
bool warn = ctx->xin->packet != NULL;
uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci);
if (!input_vid_is_valid(vid, xbundle, warn)) {
return;
}
- /* Record these mirrors so that we don't mirror to them again. */
- ctx->mirrors |= mirrors;
-
if (ctx->xin->resubmit_stats) {
mirror_update_stats(xbridge->mbridge, mirrors,
ctx->xin->resubmit_stats->n_packets,
entry->u.mirror.mirrors = mirrors;
}
+ /* 'mirrors' is a bit-mask of candidates for mirroring. Iterate as long as
+ * some candidates remain. */
while (mirrors) {
const unsigned long *vlans;
mirror_mask_t dup_mirrors;
struct ofbundle *out;
int out_vlan;
+ /* Get the details of the mirror represented by the rightmost 1-bit. */
bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
&vlans, &dup_mirrors, &out, &out_vlan);
ovs_assert(has_mirror);
+ /* If this mirror selects on the basis of VLAN, and it does not select
+ * 'vlan', then discard this mirror and go on to the next one. */
if (vlans) {
ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK);
}
-
if (vlans && !bitmap_is_set(vlans, vlan)) {
mirrors = zero_rightmost_1bit(mirrors);
continue;
}
- mirrors &= ~dup_mirrors;
+ /* Record the mirror, and the mirrors that output to the same
+ * destination, so that we don't mirror to them again. This must be
+ * done now to ensure that output_normal(), below, doesn't recursively
+ * output to the same mirrors. */
ctx->mirrors |= dup_mirrors;
+
+ /* Send the packet to the mirror. */
if (out) {
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
struct xbundle *out_xbundle = xbundle_lookup(xcfg, out);
}
}
}
+
+ /* output_normal() could have recursively output (to different
+ * mirrors), so make sure that we don't send duplicates. */
+ mirrors &= ~ctx->mirrors;
}
}
if (!process_special(ctx, peer) && may_receive(peer, ctx)) {
if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) {
xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true);
- if (ctx->action_set.size) {
- /* Translate action set only if not dropping the packet and
- * not recirculating. */
- if (!exit_recirculates(ctx)) {
- xlate_action_set(ctx);
- }
+ if (!ctx->freezing) {
+ xlate_action_set(ctx);
}
- /* Check if need to recirculate. */
- if (exit_recirculates(ctx)) {
- compose_recirculate_action(ctx);
+ if (ctx->freezing) {
+ finish_freezing(ctx);
}
} else {
/* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and
ctx->base_flow = old_base_flow;
ctx->odp_actions->size = old_size;
- /* Undo changes that may have been done for recirculation. */
- if (exit_recirculates(ctx)) {
- ctx->action_set.size = ctx->recirc_action_offset;
- ctx->recirc_action_offset = -1;
- ctx->last_unroll_offset = -1;
- }
+ /* Undo changes that may have been done for freezing. */
+ ctx_cancel_freeze(ctx);
}
}
/* The fact that the peer bridge exits (for any reason) does not mean
* that the original bridge should exit. Specifically, if the peer
- * bridge recirculates (which typically modifies the packet), the
- * original bridge must continue processing with the original, not the
- * recirculated packet! */
+ * bridge freezes translation, the original bridge must continue
+ * processing with the original, not the frozen packet! */
ctx->exit = false;
/* Peer bridge errors do not propagate back. */
{
/* Check if we need to recirculate before matching in a table. */
if (ctx->was_mpls) {
- ctx_trigger_recirculation(ctx);
+ ctx_trigger_freeze(ctx);
return;
}
if (xlate_resubmit_resource_check(ctx)) {
xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket)
{
uint64_t action_list_stub[1024 / 8];
- struct ofpbuf action_list, action_set;
+ struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub);
+ struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
+ bucket->ofpacts_len);
struct flow old_flow = ctx->xin->flow;
bool old_was_mpls = ctx->was_mpls;
- ofpbuf_use_const(&action_set, bucket->ofpacts, bucket->ofpacts_len);
- ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub);
-
ofpacts_execute_action_set(&action_list, &action_set);
ctx->recurse++;
do_xlate_actions(action_list.data, action_list.size, ctx);
ctx->recurse--;
- ofpbuf_uninit(&action_set);
ofpbuf_uninit(&action_list);
- /* Check if need to recirculate. */
- if (exit_recirculates(ctx)) {
- compose_recirculate_action(ctx);
+ /* Check if need to freeze. */
+ if (ctx->freezing) {
+ finish_freezing(ctx);
}
/* Roll back flow to previous state.
/* The fact that the group bucket exits (for any reason) does not mean that
* the translation after the group action should exit. Specifically, if
- * the group bucket recirculates (which typically modifies the packet), the
- * actions after the group action must continue processing with the
- * original, not the recirculated packet! */
+ * the group bucket freezes translation, the actions after the group action
+ * must continue processing with the original, not the frozen packet! */
ctx->exit = false;
}
static void
execute_controller_action(struct xlate_ctx *ctx, int len,
enum ofp_packet_in_reason reason,
- uint16_t controller_id)
+ uint16_t controller_id,
+ const uint8_t *userdata, size_t userdata_len)
{
- struct ofproto_packet_in *pin;
struct dp_packet *packet;
ctx->xout->slow |= SLOW_CONTROLLER;
odp_execute_actions(NULL, &packet, 1, false,
ctx->odp_actions->data, ctx->odp_actions->size, NULL);
- pin = xmalloc(sizeof *pin);
- pin->up.packet_len = dp_packet_size(packet);
- pin->up.packet = dp_packet_steal_data(packet);
- pin->up.reason = reason;
- pin->up.table_id = ctx->table_id;
- pin->up.cookie = ctx->rule_cookie;
-
- flow_get_metadata(&ctx->xin->flow, &pin->up.flow_metadata);
+ /* A packet sent by an action in a table-miss rule is considered an
+ * explicit table miss. OpenFlow before 1.3 doesn't have that concept so
+ * it will get translated back to OFPR_ACTION for those versions. */
+ if (reason == OFPR_ACTION
+ && ctx->rule && rule_dpif_is_table_miss(ctx->rule)) {
+ reason = OFPR_EXPLICIT_MISS;
+ }
+
+ size_t packet_len = dp_packet_size(packet);
+
+ struct ofproto_async_msg *am = xmalloc(sizeof *am);
+ *am = (struct ofproto_async_msg) {
+ .controller_id = controller_id,
+ .oam = OAM_PACKET_IN,
+ .pin = {
+ .up = {
+ .public = {
+ .packet = dp_packet_steal_data(packet),
+ .packet_len = packet_len,
+ .reason = reason,
+ .table_id = ctx->table_id,
+ .cookie = ctx->rule_cookie,
+ .userdata = (userdata_len
+ ? xmemdup(userdata, userdata_len)
+ : NULL),
+ .userdata_len = userdata_len,
+ }
+ },
+ .max_len = len,
+ },
+ };
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
- pin->controller_id = controller_id;
- pin->send_len = len;
- /* If a rule is a table-miss rule then this is
- * a table-miss handled by a table-miss rule.
- *
- * Else, if rule is internal and has a controller action,
- * the later being implied by the rule being processed here,
- * then this is a table-miss handled without a table-miss rule.
- *
- * Otherwise this is not a table-miss. */
- pin->miss_type = OFPROTO_PACKET_IN_NO_MISS;
- if (ctx->rule) {
- if (rule_dpif_is_table_miss(ctx->rule)) {
- pin->miss_type = OFPROTO_PACKET_IN_MISS_FLOW;
- } else if (rule_dpif_is_internal(ctx->rule)) {
- pin->miss_type = OFPROTO_PACKET_IN_MISS_WITHOUT_FLOW;
- }
- }
- ofproto_dpif_send_packet_in(ctx->xbridge->ofproto, pin);
+ ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
dp_packet_delete(packet);
}
static void
-compose_recirculate_action__(struct xlate_ctx *ctx, uint8_t table)
-{
- struct recirc_metadata md;
- uint32_t id;
-
- recirc_metadata_from_flow(&md, &ctx->xin->flow);
+emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
+{
+ struct ofproto_async_msg *am = xmalloc(sizeof *am);
+ *am = (struct ofproto_async_msg) {
+ .controller_id = ctx->pause->controller_id,
+ .oam = OAM_PACKET_IN,
+ .pin = {
+ .up = {
+ .public = {
+ .userdata = xmemdup(ctx->pause->userdata,
+ ctx->pause->userdata_len),
+ .userdata_len = ctx->pause->userdata_len,
+ .packet = xmemdup(dp_packet_data(ctx->xin->packet),
+ dp_packet_size(ctx->xin->packet)),
+ .packet_len = dp_packet_size(ctx->xin->packet),
+ },
+ .bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
+ .stack = xmemdup(state->stack,
+ state->n_stack * sizeof *state->stack),
+ .n_stack = state->n_stack,
+ .mirrors = state->mirrors,
+ .conntracked = state->conntracked,
+ .actions = xmemdup(state->ofpacts, state->ofpacts_len),
+ .actions_len = state->ofpacts_len,
+ .action_set = xmemdup(state->action_set,
+ state->action_set_len),
+ .action_set_len = state->action_set_len,
+ },
+ .max_len = UINT16_MAX,
+ },
+ };
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
+ ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
+}
- ovs_assert(ctx->recirc_action_offset >= 0);
+static void
+finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
+{
+ ovs_assert(ctx->freezing);
- struct recirc_state state = {
+ struct frozen_state state = {
.table_id = table,
- .ofproto = ctx->xbridge->ofproto,
- .metadata = md,
- .stack = &ctx->stack,
+ .ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
+ .stack = ctx->stack.data,
+ .n_stack = ctx->stack.size / sizeof(union mf_subvalue),
.mirrors = ctx->mirrors,
.conntracked = ctx->conntracked,
- .action_set_len = ctx->recirc_action_offset,
- .ofpacts_len = ctx->action_set.size,
- .ofpacts = ctx->action_set.data,
+ .ofpacts = ctx->frozen_actions.data,
+ .ofpacts_len = ctx->frozen_actions.size,
+ .action_set = ctx->action_set.data,
+ .action_set_len = ctx->action_set.size,
};
+ frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
- /* Allocate a unique recirc id for the given metadata state in the
- * flow. An existing id, with a new reference to the corresponding
- * recirculation context, will be returned if possible.
- * The life-cycle of this recirc id is managed by associating it
- * with the udpif key ('ukey') created for each new datapath flow. */
- id = recirc_alloc_id_ctx(&state);
- if (!id) {
- XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
- ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
- return;
- }
- recirc_refs_add(&ctx->xout->recircs, id);
+ if (ctx->pause) {
+ if (ctx->xin->packet) {
+ emit_continuation(ctx, &state);
+ }
+ } else {
+ /* Allocate a unique recirc id for the given metadata state in the
+ * flow. An existing id, with a new reference to the corresponding
+ * recirculation context, will be returned if possible.
+ * The life-cycle of this recirc id is managed by associating it
+ * with the udpif key ('ukey') created for each new datapath flow. */
+ uint32_t id = recirc_alloc_id_ctx(&state);
+ if (!id) {
+ XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
+ ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
+ return;
+ }
+ recirc_refs_add(&ctx->xout->recircs, id);
- nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ }
- /* Undo changes done by recirculation. */
- ctx->action_set.size = ctx->recirc_action_offset;
- ctx->recirc_action_offset = -1;
- ctx->last_unroll_offset = -1;
+ /* Undo changes done by freezing. */
+ ctx_cancel_freeze(ctx);
}
-/* Called only when ctx->recirc_action_offset is set. */
+/* Called only when we're freezing. */
static void
-compose_recirculate_action(struct xlate_ctx *ctx)
+finish_freezing(struct xlate_ctx *ctx)
{
xlate_commit_actions(ctx);
- compose_recirculate_action__(ctx, 0);
+ finish_freezing__(ctx, 0);
}
/* Fork the pipeline here. The current packet will continue processing the
static void
compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
{
- ctx->recirc_action_offset = ctx->action_set.size;
- compose_recirculate_action__(ctx, table);
+ ctx->freezing = true;
+ finish_freezing__(ctx, table);
}
static void
for (i = 0; i < ids->n_controllers; i++) {
execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
- ids->cnt_ids[i]);
+ ids->cnt_ids[i], NULL, 0);
}
/* Stop processing for current table. */
set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
return false;
} else {
- execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
+ execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
+ NULL, 0);
}
}
(ctx->in_group ? OFPR_GROUP
: ctx->in_action_set ? OFPR_ACTION_SET
: OFPR_ACTION),
- 0);
+ 0, NULL, 0);
break;
case OFPP_NONE:
break;
}
static void
-xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a)
+xlate_write_actions__(struct xlate_ctx *ctx,
+ const struct ofpact *ofpacts, size_t ofpacts_len)
{
- const struct ofpact_nest *on = ofpact_get_WRITE_ACTIONS(a);
- size_t on_len = ofpact_nest_get_action_len(on);
- const struct ofpact *inner;
-
/* Maintain actset_output depending on the contents of the action set:
*
* - OFPP_UNSET, if there is no "output" action.
* - OFPP_UNSET, if there is a "group" action.
*/
if (!ctx->action_set_has_group) {
- OFPACT_FOR_EACH (inner, on->actions, on_len) {
- if (inner->type == OFPACT_OUTPUT) {
- ctx->xin->flow.actset_output = ofpact_get_OUTPUT(inner)->port;
- } else if (inner->type == OFPACT_GROUP) {
+ const struct ofpact *a;
+ OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
+ if (a->type == OFPACT_OUTPUT) {
+ ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port;
+ } else if (a->type == OFPACT_GROUP) {
ctx->xin->flow.actset_output = OFPP_UNSET;
ctx->action_set_has_group = true;
break;
}
}
- ofpbuf_put(&ctx->action_set, on->actions, on_len);
+ ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len);
+}
+
+static void
+xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a)
+{
+ xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a));
}
static void
}
static void
-recirc_put_unroll_xlate(struct xlate_ctx *ctx)
+freeze_put_unroll_xlate(struct xlate_ctx *ctx)
{
- struct ofpact_unroll_xlate *unroll;
-
- unroll = ctx->last_unroll_offset < 0
- ? NULL
- : ALIGNED_CAST(struct ofpact_unroll_xlate *,
- (char *)ctx->action_set.data + ctx->last_unroll_offset);
+ struct ofpact_unroll_xlate *unroll = ctx->frozen_actions.header;
/* Restore the table_id and rule cookie for a potential PACKET
* IN if needed. */
if (!unroll ||
(ctx->table_id != unroll->rule_table_id
|| ctx->rule_cookie != unroll->rule_cookie)) {
-
- ctx->last_unroll_offset = ctx->action_set.size;
- unroll = ofpact_put_UNROLL_XLATE(&ctx->action_set);
+ unroll = ofpact_put_UNROLL_XLATE(&ctx->frozen_actions);
unroll->rule_table_id = ctx->table_id;
unroll->rule_cookie = ctx->rule_cookie;
+ ctx->frozen_actions.header = unroll;
}
}
-/* Copy remaining actions to the action_set to be executed after recirculation.
- * UNROLL_XLATE action is inserted, if not already done so, before actions that
- * may generate PACKET_INs from the current table and without matching another
- * rule. */
+/* Copy actions 'a' through 'end' to ctx->frozen_actions, which will be
+ * executed after thawing. Inserts an UNROLL_XLATE action, if none is already
+ * present, before any action that may depend on the current table ID or flow
+ * cookie. */
static void
-recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
+freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end,
struct xlate_ctx *ctx)
{
- const struct ofpact *a;
-
- OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
+ for (; a < end; a = ofpact_next(a)) {
switch (a->type) {
- /* May generate PACKET INs. */
case OFPACT_OUTPUT_REG:
case OFPACT_GROUP:
case OFPACT_OUTPUT:
case OFPACT_CONTROLLER:
case OFPACT_DEC_MPLS_TTL:
case OFPACT_DEC_TTL:
- recirc_put_unroll_xlate(ctx);
+ /* These actions may generate asynchronous messages, which include
+ * table ID and flow cookie information. */
+ freeze_put_unroll_xlate(ctx);
+ break;
+
+ case OFPACT_RESUBMIT:
+ if (ofpact_get_RESUBMIT(a)->table_id == 0xff) {
+ /* This resubmit action is relative to the current table, so we
+ * need to track what table that is.*/
+ freeze_put_unroll_xlate(ctx);
+ }
break;
- /* These may not generate PACKET INs. */
case OFPACT_SET_TUNNEL:
case OFPACT_REG_MOVE:
case OFPACT_SET_FIELD:
case OFPACT_STACK_POP:
case OFPACT_LEARN:
case OFPACT_WRITE_METADATA:
- case OFPACT_RESUBMIT: /* May indirectly generate PACKET INs, */
- case OFPACT_GOTO_TABLE: /* but from a different table and rule. */
+ case OFPACT_GOTO_TABLE:
case OFPACT_ENQUEUE:
case OFPACT_SET_VLAN_VID:
case OFPACT_SET_VLAN_PCP:
case OFPACT_DEBUG_RECIRC:
case OFPACT_CT:
case OFPACT_NAT:
+ /* These may not generate PACKET INs. */
break;
- /* These need not be copied for restoration. */
case OFPACT_NOTE:
case OFPACT_CONJUNCTION:
+ /* These need not be copied for restoration. */
continue;
}
/* Copy the action over. */
- ofpbuf_put(&ctx->action_set, a, OFPACT_ALIGN(a->len));
+ ofpbuf_put(&ctx->frozen_actions, a, OFPACT_ALIGN(a->len));
}
}
#define CHECK_MPLS_RECIRCULATION() \
if (ctx->was_mpls) { \
- ctx_trigger_recirculation(ctx); \
+ ctx_trigger_freeze(ctx); \
break; \
}
#define CHECK_MPLS_RECIRCULATION_IF(COND) \
if (ctx->exit) {
/* Check if need to store the remaining actions for later
* execution. */
- if (exit_recirculates(ctx)) {
- recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len -
- ((uint8_t *)a -
- (uint8_t *)ofpacts)),
+ if (ctx->freezing) {
+ freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len),
ctx);
}
break;
case OFPACT_CONTROLLER:
controller = ofpact_get_CONTROLLER(a);
- execute_controller_action(ctx, controller->max_len,
- controller->reason,
- controller->controller_id);
+ if (controller->pause) {
+ ctx->pause = controller;
+ ctx->xout->slow |= SLOW_CONTROLLER;
+ ctx_trigger_freeze(ctx);
+ a = ofpact_next(a);
+ } else {
+ execute_controller_action(ctx, controller->max_len,
+ controller->reason,
+ controller->controller_id,
+ controller->userdata,
+ controller->userdata_len);
+ }
break;
case OFPACT_ENQUEUE:
break;
case OFPACT_RESUBMIT:
+ /* Freezing complicates resubmit. There are two cases:
+ *
+ * - If mpls_pop has been executed, then the flow table lookup
+ * as part of resubmit might depend on fields that can only
+ * be obtained via recirculation, so the resubmit itself
+ * triggers recirculation and we need to make sure that the
+ * resubmit is executed again after recirculation.
+ * Therefore, in this case we trigger recirculation and let
+ * the code following this "switch" append the resubmit to
+ * the post-recirculation actions.
+ *
+ * - Otherwise, some action in the flow entry found by resubmit
+ * might trigger freezing. If that happens, then we do not
+ * want to execute the resubmit again during thawing, so we
+ * want to skip back to the head of the loop to avoid that,
+ * only adding any actions that follow the resubmit to the
+ * frozen actions.
+ */
+ if (ctx->was_mpls) {
+ ctx_trigger_freeze(ctx);
+ break;
+ }
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
- break;
+ continue;
case OFPACT_SET_TUNNEL:
flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id);
break;
case OFPACT_WRITE_ACTIONS:
- xlate_write_actions(ctx, a);
+ xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a));
break;
case OFPACT_WRITE_METADATA:
case OFPACT_GOTO_TABLE: {
struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a);
- /* Allow ctx->table_id == TBL_INTERNAL, which will be greater
- * than ogt->table_id. This is to allow goto_table actions that
- * triggered recirculation: ctx->table_id will be TBL_INTERNAL
- * after recirculation. */
- ovs_assert(ctx->table_id == TBL_INTERNAL
- || ctx->table_id < ogt->table_id);
+ ovs_assert(ctx->table_id < ogt->table_id);
+
xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port,
ogt->table_id, true, true);
break;
break;
case OFPACT_DEBUG_RECIRC:
- ctx_trigger_recirculation(ctx);
+ ctx_trigger_freeze(ctx);
a = ofpact_next(a);
break;
}
/* Check if need to store this and the remaining actions for later
* execution. */
- if (!ctx->error && ctx->exit && ctx_first_recirculation_action(ctx)) {
- recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len -
- ((uint8_t *)a -
- (uint8_t *)ofpacts)),
- ctx);
+ if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) {
+ freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx);
break;
}
}
xin->odp_actions = odp_actions;
/* Do recirc lookup. */
- xin->recirc = flow->recirc_id
- ? recirc_id_node_find(flow->recirc_id)
- : NULL;
+ xin->frozen_state = NULL;
+ if (flow->recirc_id) {
+ const struct recirc_id_node *node
+ = recirc_id_node_find(flow->recirc_id);
+ if (node) {
+ xin->frozen_state = &node->state;
+ }
+ }
}
void
union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
uint64_t action_set_stub[1024 / 8];
+ uint64_t frozen_actions_stub[1024 / 8];
struct flow_wildcards scratch_wc;
uint64_t actions_stub[256 / 8];
struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
.error = XLATE_OK,
.mirrors = 0,
- .recirc_action_offset = -1,
- .last_unroll_offset = -1,
+ .freezing = false,
+ .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
+ .pause = NULL,
.was_mpls = false,
.conntracked = false,
COVERAGE_INC(xlate_actions);
- if (xin->recirc) {
- const struct recirc_state *state = &xin->recirc->state;
+ if (xin->frozen_state) {
+ const struct frozen_state *state = xin->frozen_state;
- xlate_report(&ctx, "Restoring state post-recirculation:");
+ xlate_report(&ctx, "Thawing frozen state:");
if (xin->ofpacts_len > 0 || ctx.rule) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
}
/* Set the bridge for post-recirculation processing if needed. */
- if (ctx.xbridge->ofproto != state->ofproto) {
+ if (!uuid_equals(ofproto_dpif_get_uuid(ctx.xbridge->ofproto),
+ &state->ofproto_uuid)) {
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
const struct xbridge *new_bridge
- = xbridge_lookup(xcfg, state->ofproto);
+ = xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid);
if (OVS_UNLIKELY(!new_bridge)) {
/* Drop the packet if the bridge cannot be found. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
- VLOG_WARN_RL(&rl, "Recirculation bridge no longer exists.");
- xlate_report(&ctx, "- Recirculation bridge no longer exists.");
+ VLOG_WARN_RL(&rl, "Frozen bridge no longer exists.");
+ xlate_report(&ctx, "- Frozen bridge no longer exists.");
ctx.error = XLATE_BRIDGE_NOT_FOUND;
goto exit;
}
ctx.xbridge = new_bridge;
}
- /* Set the post-recirculation table id. Note: A table lookup is done
- * only if there are no post-recirculation actions. */
+ /* Set the thawed table id. Note: A table lookup is done only if there
+ * are no frozen actions. */
ctx.table_id = state->table_id;
xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id);
}
/* Restore pipeline metadata. May change flow's in_port and other
- * metadata to the values that existed when recirculation was
- * triggered. */
- recirc_metadata_to_flow(&state->metadata, flow);
+ * metadata to the values that existed when freezing was triggered. */
+ frozen_metadata_to_flow(&state->metadata, flow);
/* Restore stack, if any. */
if (state->stack) {
- ofpbuf_put(&ctx.stack, state->stack->data, state->stack->size);
+ ofpbuf_put(&ctx.stack, state->stack,
+ state->n_stack * sizeof *state->stack);
}
/* Restore mirror state. */
/* Restore action set, if any. */
if (state->action_set_len) {
- const struct ofpact *a;
-
xlate_report_actions(&ctx, "- Restoring action set",
- state->ofpacts, state->action_set_len);
+ state->action_set, state->action_set_len);
- ofpbuf_put(&ctx.action_set, state->ofpacts, state->action_set_len);
-
- OFPACT_FOR_EACH(a, state->ofpacts, state->action_set_len) {
- if (a->type == OFPACT_GROUP) {
- ctx.action_set_has_group = true;
- break;
- }
- }
+ flow->actset_output = OFPP_UNSET;
+ xlate_write_actions__(&ctx, state->action_set,
+ state->action_set_len);
}
- /* Restore recirculation actions. If there are no actions, processing
- * will start with a lookup in the table set above. */
- if (state->ofpacts_len > state->action_set_len) {
- xin->ofpacts_len = state->ofpacts_len - state->action_set_len;
- xin->ofpacts = state->ofpacts +
- state->action_set_len / sizeof *state->ofpacts;
-
+ /* Restore frozen actions. If there are no actions, processing will
+ * start with a lookup in the table set above. */
+ xin->ofpacts = state->ofpacts;
+ xin->ofpacts_len = state->ofpacts_len;
+ if (state->ofpacts_len) {
xlate_report_actions(&ctx, "- Restoring actions",
xin->ofpacts, xin->ofpacts_len);
}
}
}
- /* Get the proximate input port of the packet. (If xin->recirc,
+ /* Get the proximate input port of the packet. (If xin->frozen_state,
* flow->in_port is the ultimate input port of the packet.) */
struct xport *in_port = get_ofp_port(xbridge,
ctx.base_flow.in_port.ofp_port);
- /* Tunnel stats only for non-recirculated packets. */
- if (!xin->recirc && in_port && in_port->is_tunnel) {
+ /* Tunnel stats only for not-thawed packets. */
+ if (!xin->frozen_state && in_port && in_port->is_tunnel) {
if (ctx.xin->resubmit_stats) {
netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats);
if (in_port->bfd) {
}
}
- if (!xin->recirc && process_special(&ctx, in_port)) {
+ if (!xin->frozen_state && process_special(&ctx, in_port)) {
/* process_special() did all the processing for this packet.
*
- * We do not perform special processing on recirculated packets, as
- * recirculated packets are not really received by the bridge.*/
+ * We do not perform special processing on thawed packets, since that
+ * was done before they were frozen and should not be redone. */
} else if (in_port && in_port->xbundle
&& xbundle_mirror_out(xbridge, in_port->xbundle)) {
if (ctx.xin->packet != NULL) {
ctx.xbridge->name, in_port->xbundle->name);
}
} else {
- /* Sampling is done only for packets really received by the bridge. */
+ /* Sampling is done on initial reception; don't redo after thawing. */
unsigned int user_cookie_offset = 0;
- if (!xin->recirc) {
+ if (!xin->frozen_state) {
user_cookie_offset = compose_sflow_action(&ctx);
compose_ipfix_action(&ctx, ODPP_NONE);
}
}
/* We've let OFPP_NORMAL and the learning action look at the
- * packet, so drop it now if forwarding is disabled. */
+ * packet, so cancel all actions and freezing if forwarding is
+ * disabled. */
if (in_port && (!xport_stp_forward_state(in_port) ||
!xport_rstp_forward_state(in_port))) {
- /* Drop all actions added by do_xlate_actions() above. */
ctx.odp_actions->size = sample_actions_len;
+ ctx_cancel_freeze(&ctx);
+ ofpbuf_clear(&ctx.action_set);
+ }
- /* Undo changes that may have been done for recirculation. */
- if (exit_recirculates(&ctx)) {
- ctx.action_set.size = ctx.recirc_action_offset;
- ctx.recirc_action_offset = -1;
- ctx.last_unroll_offset = -1;
- }
- } else if (ctx.action_set.size) {
- /* Translate action set only if not dropping the packet and
- * not recirculating. */
- if (!exit_recirculates(&ctx)) {
- xlate_action_set(&ctx);
- }
+ if (!ctx.freezing) {
+ xlate_action_set(&ctx);
}
- /* Check if need to recirculate. */
- if (exit_recirculates(&ctx)) {
- compose_recirculate_action(&ctx);
+ if (ctx.freezing) {
+ finish_freezing(&ctx);
}
}
/* Output only fully processed packets. */
- if (!exit_recirculates(&ctx)
+ if (!ctx.freezing
&& xbridge->has_in_band
&& in_band_must_output_to_local_port(flow)
&& !actions_output_to_local_port(&ctx)) {
ctx.xout->slow |= SLOW_ACTION;
}
- /* Do netflow only for packets really received by the bridge and not sent
- * to the controller. We consider packets sent to the controller to be
- * part of the control plane rather than the data plane. */
- if (!xin->recirc && xbridge->netflow && !(xout->slow & SLOW_CONTROLLER)) {
+ /* Do netflow only for packets on initial reception, that are not sent to
+ * the controller. We consider packets sent to the controller to be part
+ * of the control plane rather than the data plane. */
+ if (!xin->frozen_state
+ && xbridge->netflow
+ && !(xout->slow & SLOW_CONTROLLER)) {
if (ctx.xin->resubmit_stats) {
netflow_flow_update(xbridge->netflow, flow,
ctx.nf_output_iface,
exit:
ofpbuf_uninit(&ctx.stack);
ofpbuf_uninit(&ctx.action_set);
+ ofpbuf_uninit(&ctx.frozen_actions);
ofpbuf_uninit(&scratch_actions);
/* Make sure we return a "drop flow" in case of an error. */
return ctx.error;
}
+enum ofperr
+xlate_resume(struct ofproto_dpif *ofproto,
+ const struct ofputil_packet_in_private *pin,
+ struct ofpbuf *odp_actions,
+ enum slow_path_reason *slow)
+{
+ struct dp_packet packet;
+ dp_packet_use_const(&packet, pin->public.packet,
+ pin->public.packet_len);
+
+ struct flow flow;
+ flow_extract(&packet, &flow);
+
+ struct xlate_in xin;
+ xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags),
+ &packet, NULL, odp_actions);
+
+ struct ofpact_note noop;
+ ofpact_init_NOTE(&noop);
+ noop.length = 0;
+
+ bool any_actions = pin->actions_len > 0;
+ struct frozen_state state = {
+ .table_id = 0, /* Not the table where NXAST_PAUSE was executed. */
+ .ofproto_uuid = pin->bridge,
+ .stack = pin->stack,
+ .n_stack = pin->n_stack,
+ .mirrors = pin->mirrors,
+ .conntracked = pin->conntracked,
+
+ /* When there are no actions, xlate_actions() will search the flow
+ * table. We don't want it to do that (we want it to resume), so
+ * supply a no-op action if there aren't any.
+ *
+ * (We can't necessarily avoid translating actions entirely if there
+ * aren't any actions, because there might be some finishing-up to do
+ * at the end of the pipeline, and we don't check for those
+ * conditions.) */
+ .ofpacts = any_actions ? pin->actions : &noop.ofpact,
+ .ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
+
+ .action_set = pin->action_set,
+ .action_set_len = pin->action_set_len,
+ };
+ frozen_metadata_from_flow(&state.metadata,
+ &pin->public.flow_metadata.flow);
+ xin.frozen_state = &state;
+
+ struct xlate_out xout;
+ enum xlate_error error = xlate_actions(&xin, &xout);
+ *slow = xout.slow;
+ xlate_out_uninit(&xout);
+
+ /* xlate_actions() can generate a number of errors, but only
+ * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
+ * sure to report over OpenFlow. The others could come up in packet-outs
+ * or regular flow translation and I don't think that it's going to be too
+ * useful to report them to the controller. */
+ return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
+}
+
/* Sends 'packet' out 'ofport'.
* May modify 'packet'.
* Returns 0 if successful, otherwise a positive errno value. */