* translation isn't needed, and so bonds don't follow the above
* process.)
*
+ * - "Continuation". A continuation is a way for an OpenFlow controller
+ * to interpose on a packet's traversal of the OpenFlow tables. When
+ * the translation process encounters a "controller" action with the
+ * "pause" flag, it freezes translation, serializes the frozen data,
+ * and sends it to an OpenFlow controller. The controller then
+ * examines and possibly modifies the frozen data and eventually sends
+ * it back to the switch, which thaws it and continues translation.
*
* The main problem of freezing translation is preserving state, so that
* when the translation is thawed later it resumes from where it left off,
*/
bool freezing;
struct ofpbuf frozen_actions;
-
- /* True if a packet was but is no longer MPLS (due to an MPLS pop action).
- * This is a trigger for recirculation in cases where translating an action
- * or looking up a flow requires access to the fields of the packet after
- * the MPLS label stack that was originally present. */
- bool was_mpls;
+ const struct ofpact_controller *pause;
/* True if conntrack has been performed on this packet during processing
* on the current bridge. This is used to determine whether conntrack
}
}
-static void compose_recirculate_action(struct xlate_ctx *ctx);
+static void finish_freezing(struct xlate_ctx *ctx);
/* A controller may use OFPP_NONE as the ingress port to indicate that
* it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
if (is_igmp(flow)) {
if (mcast_snooping_is_membership(flow->tp_src) ||
mcast_snooping_is_query(flow->tp_src)) {
- if (ctx->xin->may_learn) {
+ if (ctx->xin->may_learn && ctx->xin->packet) {
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
in_xbundle, ctx->xin->packet);
}
return;
} else if (is_mld(flow)) {
ctx->xout->slow |= SLOW_ACTION;
- if (ctx->xin->may_learn) {
+ if (ctx->xin->may_learn && ctx->xin->packet) {
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
in_xbundle, ctx->xin->packet);
}
const struct xport *peer = xport->peer;
struct flow old_flow = ctx->xin->flow;
bool old_conntrack = ctx->conntracked;
- bool old_was_mpls = ctx->was_mpls;
cls_version_t old_version = ctx->tables_version;
struct ofpbuf old_stack = ctx->stack;
union mf_subvalue new_stack[1024 / sizeof(union mf_subvalue)];
xlate_action_set(ctx);
}
if (ctx->freezing) {
- compose_recirculate_action(ctx);
+ finish_freezing(ctx);
}
} else {
/* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and
/* Restore calling bridge's lookup version. */
ctx->tables_version = old_version;
- /* The peer bridge popping MPLS should have no effect on the original
- * bridge. */
- ctx->was_mpls = old_was_mpls;
-
/* The peer bridge's conntrack execution should have no effect on the
* original bridge. */
ctx->conntracked = old_conntrack;
xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id,
bool may_packet_in, bool honor_table_miss)
{
- /* Check if we need to recirculate before matching in a table. */
- if (ctx->was_mpls) {
- ctx_trigger_freeze(ctx);
- return;
- }
if (xlate_resubmit_resource_check(ctx)) {
uint8_t old_table_id = ctx->table_id;
struct rule_dpif *rule;
struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts,
bucket->ofpacts_len);
struct flow old_flow = ctx->xin->flow;
- bool old_was_mpls = ctx->was_mpls;
ofpacts_execute_action_set(&action_list, &action_set);
ctx->recurse++;
ofpbuf_uninit(&action_list);
- /* Check if need to recirculate. */
+ /* Check if need to freeze. */
if (ctx->freezing) {
- compose_recirculate_action(ctx);
+ finish_freezing(ctx);
}
/* Roll back flow to previous state.
* group buckets. */
ctx->xin->flow = old_flow;
- /* The group bucket popping MPLS should have no effect after bucket
- * execution. */
- ctx->was_mpls = old_was_mpls;
-
/* The fact that the group bucket exits (for any reason) does not mean that
* the translation after the group action should exit. Specifically, if
* the group bucket freezes translation, the actions after the group action
static void
execute_controller_action(struct xlate_ctx *ctx, int len,
enum ofp_packet_in_reason reason,
- uint16_t controller_id)
+ uint16_t controller_id,
+ const uint8_t *userdata, size_t userdata_len)
{
struct dp_packet *packet;
.oam = OAM_PACKET_IN,
.pin = {
.up = {
- .packet = dp_packet_steal_data(packet),
- .len = packet_len,
- .reason = reason,
- .table_id = ctx->table_id,
- .cookie = ctx->rule_cookie,
+ .public = {
+ .packet = dp_packet_steal_data(packet),
+ .packet_len = packet_len,
+ .reason = reason,
+ .table_id = ctx->table_id,
+ .cookie = ctx->rule_cookie,
+ .userdata = (userdata_len
+ ? xmemdup(userdata, userdata_len)
+ : NULL),
+ .userdata_len = userdata_len,
+ }
},
.max_len = len,
},
};
- flow_get_metadata(&ctx->xin->flow, &am->pin.up.flow_metadata);
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
dp_packet_delete(packet);
}
static void
-compose_recirculate_action__(struct xlate_ctx *ctx, uint8_t table)
+emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
{
- struct frozen_metadata md;
- uint32_t id;
-
- frozen_metadata_from_flow(&md, &ctx->xin->flow);
+ struct ofproto_async_msg *am = xmalloc(sizeof *am);
+ *am = (struct ofproto_async_msg) {
+ .controller_id = ctx->pause->controller_id,
+ .oam = OAM_PACKET_IN,
+ .pin = {
+ .up = {
+ .public = {
+ .userdata = xmemdup(ctx->pause->userdata,
+ ctx->pause->userdata_len),
+ .userdata_len = ctx->pause->userdata_len,
+ .packet = xmemdup(dp_packet_data(ctx->xin->packet),
+ dp_packet_size(ctx->xin->packet)),
+ .packet_len = dp_packet_size(ctx->xin->packet),
+ },
+ .bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
+ .stack = xmemdup(state->stack,
+ state->n_stack * sizeof *state->stack),
+ .n_stack = state->n_stack,
+ .mirrors = state->mirrors,
+ .conntracked = state->conntracked,
+ .actions = xmemdup(state->ofpacts, state->ofpacts_len),
+ .actions_len = state->ofpacts_len,
+ .action_set = xmemdup(state->action_set,
+ state->action_set_len),
+ .action_set_len = state->action_set_len,
+ },
+ .max_len = UINT16_MAX,
+ },
+ };
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
+ ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
+}
+static void
+finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
+{
ovs_assert(ctx->freezing);
struct frozen_state state = {
.table_id = table,
.ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
- .metadata = md,
.stack = ctx->stack.data,
.n_stack = ctx->stack.size / sizeof(union mf_subvalue),
.mirrors = ctx->mirrors,
.action_set = ctx->action_set.data,
.action_set_len = ctx->action_set.size,
};
+ frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
- /* Allocate a unique recirc id for the given metadata state in the
- * flow. An existing id, with a new reference to the corresponding
- * recirculation context, will be returned if possible.
- * The life-cycle of this recirc id is managed by associating it
- * with the udpif key ('ukey') created for each new datapath flow. */
- id = recirc_alloc_id_ctx(&state);
- if (!id) {
- XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
- ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
- return;
- }
- recirc_refs_add(&ctx->xout->recircs, id);
+ if (ctx->pause) {
+ if (ctx->xin->packet) {
+ emit_continuation(ctx, &state);
+ }
+ } else {
+ /* Allocate a unique recirc id for the given metadata state in the
+ * flow. An existing id, with a new reference to the corresponding
+ * recirculation context, will be returned if possible.
+ * The life-cycle of this recirc id is managed by associating it
+ * with the udpif key ('ukey') created for each new datapath flow. */
+ uint32_t id = recirc_alloc_id_ctx(&state);
+ if (!id) {
+ XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
+ ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
+ return;
+ }
+ recirc_refs_add(&ctx->xout->recircs, id);
- nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ }
/* Undo changes done by freezing. */
ctx_cancel_freeze(ctx);
/* Called only when we're freezing. */
static void
-compose_recirculate_action(struct xlate_ctx *ctx)
+finish_freezing(struct xlate_ctx *ctx)
{
xlate_commit_actions(ctx);
- compose_recirculate_action__(ctx, 0);
+ finish_freezing__(ctx, 0);
}
/* Fork the pipeline here. The current packet will continue processing the
compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
{
ctx->freezing = true;
- compose_recirculate_action__(ctx, table);
+ finish_freezing__(ctx, table);
}
static void
int n = flow_count_mpls_labels(flow, ctx->wc);
if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
- if (ctx->xbridge->support.odp.recirc) {
- ctx->was_mpls = true;
+ if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
+ ctx_trigger_freeze(ctx);
}
} else if (n >= FLOW_MAX_MPLS_LABELS) {
if (ctx->xin->packet != NULL) {
for (i = 0; i < ids->n_controllers; i++) {
execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
- ids->cnt_ids[i]);
+ ids->cnt_ids[i], NULL, 0);
}
/* Stop processing for current table. */
set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
return false;
} else {
- execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
+ execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
+ NULL, 0);
}
}
(ctx->in_group ? OFPR_GROUP
: ctx->in_action_set ? OFPR_ACTION_SET
: OFPR_ACTION),
- 0);
+ 0, NULL, 0);
break;
case OFPP_NONE:
break;
}
}
-#define CHECK_MPLS_RECIRCULATION() \
- if (ctx->was_mpls) { \
- ctx_trigger_freeze(ctx); \
- break; \
- }
-#define CHECK_MPLS_RECIRCULATION_IF(COND) \
- if (COND) { \
- CHECK_MPLS_RECIRCULATION(); \
- }
-
static void
put_ct_mark(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
case OFPACT_CONTROLLER:
controller = ofpact_get_CONTROLLER(a);
- execute_controller_action(ctx, controller->max_len,
- controller->reason,
- controller->controller_id);
+ if (controller->pause) {
+ ctx->pause = controller;
+ ctx->xout->slow |= SLOW_CONTROLLER;
+ ctx_trigger_freeze(ctx);
+ a = ofpact_next(a);
+ } else {
+ execute_controller_action(ctx, controller->max_len,
+ controller->reason,
+ controller->controller_id,
+ controller->userdata,
+ controller->userdata_len);
+ }
break;
case OFPACT_ENQUEUE:
break;
case OFPACT_SET_IPV4_SRC:
- CHECK_MPLS_RECIRCULATION();
if (flow->dl_type == htons(ETH_TYPE_IP)) {
memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
break;
case OFPACT_SET_IPV4_DST:
- CHECK_MPLS_RECIRCULATION();
if (flow->dl_type == htons(ETH_TYPE_IP)) {
memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
break;
case OFPACT_SET_IP_DSCP:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_tos |= IP_DSCP_MASK;
flow->nw_tos &= ~IP_DSCP_MASK;
break;
case OFPACT_SET_IP_ECN:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_tos |= IP_ECN_MASK;
flow->nw_tos &= ~IP_ECN_MASK;
break;
case OFPACT_SET_IP_TTL:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_ttl = 0xff;
flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
break;
case OFPACT_SET_L4_SRC_PORT:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
break;
case OFPACT_SET_L4_DST_PORT:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
break;
case OFPACT_RESUBMIT:
- /* Freezing complicates resubmit. There are two cases:
- *
- * - If mpls_pop has been executed, then the flow table lookup
- * as part of resubmit might depend on fields that can only
- * be obtained via recirculation, so the resubmit itself
- * triggers recirculation and we need to make sure that the
- * resubmit is executed again after recirculation.
- * Therefore, in this case we trigger recirculation and let
- * the code following this "switch" append the resubmit to
- * the post-recirculation actions.
- *
- * - Otherwise, some action in the flow entry found by resubmit
- * might trigger freezing. If that happens, then we do not
- * want to execute the resubmit again during thawing, so we
- * want to skip back to the head of the loop to avoid that,
- * only adding any actions that follow the resubmit to the
- * frozen actions.
+ /* Freezing complicates resubmit. Some action in the flow
+ * entry found by resubmit might trigger freezing. If that
+ * happens, then we do not want to execute the resubmit again after
+ * during thawing, so we want to skip back to the head of the loop
+ * to avoid that, only adding any actions that follow the resubmit
+ * to the frozen actions.
*/
- if (ctx->was_mpls) {
- ctx_trigger_freeze(ctx);
- break;
- }
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
continue;
break;
case OFPACT_REG_MOVE:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->dst.field) ||
- mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->src.field));
nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
break;
case OFPACT_SET_FIELD:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field));
set_field = ofpact_get_SET_FIELD(a);
mf = set_field->field;
break;
case OFPACT_STACK_PUSH:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_STACK_PUSH(a)->subfield.field));
nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
&ctx->stack);
break;
case OFPACT_STACK_POP:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_STACK_POP(a)->subfield.field));
nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
&ctx->stack);
break;
case OFPACT_PUSH_MPLS:
- /* Recirculate if it is an IP packet with a zero ttl. This may
- * indicate that the packet was previously MPLS and an MPLS pop
- * action converted it to IP. In this case recirculating should
- * reveal the IP TTL which is used as the basis for a new MPLS
- * LSE. */
- CHECK_MPLS_RECIRCULATION_IF(
- !flow_count_mpls_labels(flow, wc)
- && flow->nw_ttl == 0
- && is_ip_any(flow));
compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
break;
case OFPACT_POP_MPLS:
- CHECK_MPLS_RECIRCULATION();
compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
break;
case OFPACT_SET_MPLS_LABEL:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_label_action(
ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
break;
case OFPACT_SET_MPLS_TC:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
break;
case OFPACT_SET_MPLS_TTL:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
break;
case OFPACT_DEC_MPLS_TTL:
- CHECK_MPLS_RECIRCULATION();
if (compose_dec_mpls_ttl_action(ctx)) {
return;
}
break;
case OFPACT_DEC_TTL:
- CHECK_MPLS_RECIRCULATION();
wc->masks.nw_ttl = 0xff;
if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
return;
break;
case OFPACT_MULTIPATH:
- CHECK_MPLS_RECIRCULATION();
multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
break;
case OFPACT_BUNDLE:
- CHECK_MPLS_RECIRCULATION();
xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
break;
break;
case OFPACT_LEARN:
- CHECK_MPLS_RECIRCULATION();
xlate_learn_action(ctx, ofpact_get_LEARN(a));
break;
break;
}
case OFPACT_FIN_TIMEOUT:
- CHECK_MPLS_RECIRCULATION();
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
break;
break;
case OFPACT_CT:
- CHECK_MPLS_RECIRCULATION();
compose_conntrack_action(ctx, ofpact_get_CT(a));
break;
.freezing = false,
.frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
+ .pause = NULL,
- .was_mpls = false,
.conntracked = false,
.ct_nat_action = NULL,
xlate_action_set(&ctx);
}
if (ctx.freezing) {
- compose_recirculate_action(&ctx);
+ finish_freezing(&ctx);
}
}
return ctx.error;
}
+enum ofperr
+xlate_resume(struct ofproto_dpif *ofproto,
+ const struct ofputil_packet_in_private *pin,
+ struct ofpbuf *odp_actions,
+ enum slow_path_reason *slow)
+{
+ struct dp_packet packet;
+ dp_packet_use_const(&packet, pin->public.packet,
+ pin->public.packet_len);
+
+ struct flow flow;
+ flow_extract(&packet, &flow);
+
+ struct xlate_in xin;
+ xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags),
+ &packet, NULL, odp_actions);
+
+ struct ofpact_note noop;
+ ofpact_init_NOTE(&noop);
+ noop.length = 0;
+
+ bool any_actions = pin->actions_len > 0;
+ struct frozen_state state = {
+ .table_id = 0, /* Not the table where NXAST_PAUSE was executed. */
+ .ofproto_uuid = pin->bridge,
+ .stack = pin->stack,
+ .n_stack = pin->n_stack,
+ .mirrors = pin->mirrors,
+ .conntracked = pin->conntracked,
+
+ /* When there are no actions, xlate_actions() will search the flow
+ * table. We don't want it to do that (we want it to resume), so
+ * supply a no-op action if there aren't any.
+ *
+ * (We can't necessarily avoid translating actions entirely if there
+ * aren't any actions, because there might be some finishing-up to do
+ * at the end of the pipeline, and we don't check for those
+ * conditions.) */
+ .ofpacts = any_actions ? pin->actions : &noop.ofpact,
+ .ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
+
+ .action_set = pin->action_set,
+ .action_set_len = pin->action_set_len,
+ };
+ frozen_metadata_from_flow(&state.metadata,
+ &pin->public.flow_metadata.flow);
+ xin.frozen_state = &state;
+
+ struct xlate_out xout;
+ enum xlate_error error = xlate_actions(&xin, &xout);
+ *slow = xout.slow;
+ xlate_out_uninit(&xout);
+
+ /* xlate_actions() can generate a number of errors, but only
+ * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
+ * sure to report over OpenFlow. The others could come up in packet-outs
+ * or regular flow translation and I don't think that it's going to be too
+ * useful to report them to the controller. */
+ return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
+}
+
/* Sends 'packet' out 'ofport'.
* May modify 'packet'.
* Returns 0 if successful, otherwise a positive errno value. */