#include <sys/socket.h>
#include <netinet/in.h>
-#include "tnl-neigh-cache.h"
#include "bfd.h"
#include "bitmap.h"
#include "bond.h"
#include "coverage.h"
#include "dp-packet.h"
#include "dpif.h"
-#include "dynamic-string.h"
#include "in-band.h"
#include "lacp.h"
#include "learn.h"
-#include "list.h"
-#include "ovs-lldp.h"
#include "mac-learning.h"
#include "mcast-snooping.h"
-#include "meta-flow.h"
#include "multipath.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "nx-match.h"
#include "odp-execute.h"
-#include "ofp-actions.h"
#include "ofproto/ofproto-dpif-ipfix.h"
#include "ofproto/ofproto-dpif-mirror.h"
#include "ofproto/ofproto-dpif-monitor.h"
#include "ofproto/ofproto-dpif-sflow.h"
#include "ofproto/ofproto-dpif.h"
#include "ofproto/ofproto-provider.h"
-#include "packets.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/meta-flow.h"
+#include "openvswitch/list.h"
+#include "openvswitch/ofp-actions.h"
+#include "openvswitch/vlog.h"
+#include "ovs-lldp.h"
#include "ovs-router.h"
+#include "packets.h"
+#include "tnl-neigh-cache.h"
#include "tnl-ports.h"
#include "tunnel.h"
-#include "openvswitch/vlog.h"
+#include "util.h"
COVERAGE_DEFINE(xlate_actions);
COVERAGE_DEFINE(xlate_actions_oversize);
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
/* Maximum depth of flow table recursion (due to resubmit actions) in a
- * flow translation. */
-#define MAX_RESUBMIT_RECURSION 64
-#define MAX_INTERNAL_RESUBMITS 1 /* Max resbmits allowed using rules in
- internal table. */
+ * flow translation.
+ *
+ * The goal of limiting the depth of resubmits is to ensure that flow
+ * translation eventually terminates. Only resubmits to the same table or an
+ * earlier table count against the maximum depth. This is because resubmits to
+ * strictly monotonically increasing table IDs will eventually terminate, since
+ * any OpenFlow switch has a finite number of tables. OpenFlow tables are most
+ * commonly traversed in numerically increasing order, so this limit has little
+ * effect on conventionally designed OpenFlow pipelines.
+ *
+ * Outputs to patch ports and to groups also count against the depth limit. */
+#define MAX_DEPTH 64
/* Maximum number of resubmit actions in a flow translation, whether they are
* recursive or not. */
-#define MAX_RESUBMITS (MAX_RESUBMIT_RECURSION * MAX_RESUBMIT_RECURSION)
+#define MAX_RESUBMITS (MAX_DEPTH * MAX_DEPTH)
struct xbridge {
struct hmap_node hmap_node; /* Node in global 'xbridges' map. */
/* Flow translation populates this with wildcards relevant in translation.
* When 'xin->wc' is nonnull, this is the same pointer. When 'xin->wc' is
- * null, this is a pointer to uninitialized scratch memory. This allows
- * code to blindly write to 'ctx->wc' without worrying about whether the
- * caller really wants wildcards. */
+ * null, this is a pointer to a temporary buffer. */
struct flow_wildcards *wc;
/* Output buffer for datapath actions. When 'xin->odp_actions' is nonnull,
* wants actions. */
struct ofpbuf *odp_actions;
- /* Resubmit statistics, via xlate_table_action(). */
- int recurse; /* Current resubmit nesting depth. */
+ /* Statistics maintained by xlate_table_action().
+ *
+ * 'indentation' is the nesting level for resubmits. It is used to indent
+ * the output of resubmit_hook (e.g. for the "ofproto/trace" feature).
+ *
+ * The other statistics limit the amount of work that a single flow
+ * translation can perform. The goal of the first of these, 'depth', is
+ * primarily to prevent translation from performing an infinite amount of
+ * work. It counts the current depth of nested "resubmit"s (and a few
+ * other activities); when a resubmit returns, it decreases. Resubmits to
+ * tables in strictly monotonically increasing order don't contribute to
+ * 'depth' because they cannot cause a flow translation to take an infinite
+ * amount of time (because the number of tables is finite). Translation
+ * aborts when 'depth' exceeds MAX_DEPTH.
+ *
+ * 'resubmits', on the other hand, prevents flow translation from
+ * performing an extraordinarily large while still finite amount of work.
+ * It counts the total number of resubmits (and a few other activities)
+ * that have been executed. Returning from a resubmit does not affect this
+ * counter. Thus, this limits the amount of work that a particular
+ * translation can perform. Translation aborts when 'resubmits' exceeds
+ * MAX_RESUBMITS (which is much larger than MAX_DEPTH).
+ */
+ int indentation; /* Indentation level for resubmit_hook. */
+ int depth; /* Current resubmit nesting depth. */
int resubmits; /* Total number of resubmits. */
bool in_group; /* Currently translating ofgroup, if true. */
bool in_action_set; /* Currently translating action_set, if true. */
ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
bool exit; /* No further actions should be processed. */
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
+ int mirror_snaplen; /* Max size of a mirror packet in byte. */
/* Freezing Translation
* ====================
* translation isn't needed, and so bonds don't follow the above
* process.)
*
+ * - "Continuation". A continuation is a way for an OpenFlow controller
+ * to interpose on a packet's traversal of the OpenFlow tables. When
+ * the translation process encounters a "controller" action with the
+ * "pause" flag, it freezes translation, serializes the frozen data,
+ * and sends it to an OpenFlow controller. The controller then
+ * examines and possibly modifies the frozen data and eventually sends
+ * it back to the switch, which thaws it and continues translation.
*
* The main problem of freezing translation is preserving state, so that
* when the translation is thawed later it resumes from where it left off,
*/
bool freezing;
struct ofpbuf frozen_actions;
+ const struct ofpact_controller *pause;
/* True if a packet was but is no longer MPLS (due to an MPLS pop action).
* This is a trigger for recirculation in cases where translating an action
}
}
-static void compose_recirculate_action(struct xlate_ctx *ctx);
+static void finish_freezing(struct xlate_ctx *ctx);
/* A controller may use OFPP_NONE as the ingress port to indicate that
* it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
va_list args;
va_start(args, format);
- ctx->xin->report_hook(ctx->xin, ctx->recurse, format, args);
+ ctx->xin->report_hook(ctx->xin, ctx->indentation, format, args);
va_end(args);
}
}
static void
xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
{
- list_init(&xbridge->xbundles);
+ ovs_list_init(&xbridge->xbundles);
hmap_init(&xbridge->xports);
hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
hash_pointer(xbridge->ofproto, 0));
static void
xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
{
- list_init(&xbundle->xports);
- list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
+ ovs_list_init(&xbundle->xports);
+ ovs_list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
hash_pointer(xbundle->ofbundle, 0));
}
if (xbundle) {
new_xport->xbundle = xbundle;
- list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
+ ovs_list_insert(&new_xport->xbundle->xports, &new_xport->bundle_node);
}
HMAP_FOR_EACH (pdscp, hmap_node, &xport->skb_priorities) {
* This needs to be called after editing the xlate configuration.
*
* Functions that edit the new xlate configuration are
- * xlate_<ofport/bundle/ofport>_set and xlate_<ofport/bundle/ofport>_remove.
+ * xlate_<ofproto/bundle/ofport>_set and xlate_<ofproto/bundle/ofport>_remove.
*
* A sample workflow:
*
}
hmap_remove(&xcfg->xbundles, &xbundle->hmap_node);
- list_remove(&xbundle->list_node);
+ ovs_list_remove(&xbundle->list_node);
bond_unref(xbundle->bond);
lacp_unref(xbundle->lacp);
free(xbundle->name);
}
if (xport->xbundle) {
- list_remove(&xport->bundle_node);
+ ovs_list_remove(&xport->bundle_node);
}
xport->xbundle = xbundle_lookup(new_xcfg, ofbundle);
if (xport->xbundle) {
- list_insert(&xport->xbundle->xports, &xport->bundle_node);
+ ovs_list_insert(&xport->xbundle->xports, &xport->bundle_node);
}
clear_skb_priorities(xport);
}
if (xport->xbundle) {
- list_remove(&xport->bundle_node);
+ ovs_list_remove(&xport->bundle_node);
}
clear_skb_priorities(xport);
bucket = group_first_live_bucket(ctx, group, depth);
group_dpif_unref(group);
- return bucket == NULL;
+ return bucket != NULL;
}
return false;
{
struct ofputil_bucket *best_bucket = NULL;
uint32_t best_score = 0;
- int i = 0;
struct ofputil_bucket *bucket;
const struct ovs_list *buckets;
group_dpif_get_buckets(group, &buckets);
LIST_FOR_EACH (bucket, list_node, buckets) {
if (bucket_is_alive(ctx, bucket, 0)) {
- uint32_t score = (hash_int(i, basis) & 0xffff) * bucket->weight;
+ uint32_t score =
+ (hash_int(bucket->bucket_id, basis) & 0xffff) * bucket->weight;
if (score >= best_score) {
best_bucket = bucket;
best_score = score;
}
}
- i++;
}
return best_bucket;
mirror_mask_t dup_mirrors;
struct ofbundle *out;
int out_vlan;
+ int snaplen;
/* Get the details of the mirror represented by the rightmost 1-bit. */
bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors),
- &vlans, &dup_mirrors, &out, &out_vlan);
+ &vlans, &dup_mirrors,
+ &out, &snaplen, &out_vlan);
ovs_assert(has_mirror);
+
/* If this mirror selects on the basis of VLAN, and it does not select
* 'vlan', then discard this mirror and go on to the next one. */
if (vlans) {
* done now to ensure that output_normal(), below, doesn't recursively
* output to the same mirrors. */
ctx->mirrors |= dup_mirrors;
+ ctx->mirror_snaplen = snaplen;
/* Send the packet to the mirror. */
if (out) {
/* output_normal() could have recursively output (to different
* mirrors), so make sure that we don't send duplicates. */
mirrors &= ~ctx->mirrors;
+ ctx->mirror_snaplen = 0;
}
}
bool use_recirc = false;
vid = output_vlan_to_vid(out_xbundle, vlan);
- if (list_is_empty(&out_xbundle->xports)) {
+ if (ovs_list_is_empty(&out_xbundle->xports)) {
/* Partially configured bundle with no slaves. Drop the packet. */
return;
} else if (!out_xbundle->bond) {
- xport = CONTAINER_OF(list_front(&out_xbundle->xports), struct xport,
+ xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport,
bundle_node);
} else {
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
ctx->nf_output_iface = NF_OUT_FLOOD;
}
+static bool
+is_ip_local_multicast(const struct flow *flow, struct flow_wildcards *wc)
+{
+ if (flow->dl_type == htons(ETH_TYPE_IP)) {
+ memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
+ return ip_is_local_multicast(flow->nw_dst);
+ } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) {
+ memset(&wc->masks.ipv6_dst, 0xff, sizeof wc->masks.ipv6_dst);
+ return ipv6_is_all_hosts(&flow->ipv6_dst);
+ } else {
+ return false;
+ }
+}
+
static void
xlate_normal(struct xlate_ctx *ctx)
{
struct mcast_snooping *ms = ctx->xbridge->ms;
struct mcast_group *grp = NULL;
- if (is_igmp(flow)) {
+ if (is_igmp(flow, wc)) {
+ memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
if (mcast_snooping_is_membership(flow->tp_src) ||
mcast_snooping_is_query(flow->tp_src)) {
- if (ctx->xin->may_learn) {
+ if (ctx->xin->may_learn && ctx->xin->packet) {
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
in_xbundle, ctx->xin->packet);
}
xlate_normal_flood(ctx, in_xbundle, vlan);
}
return;
- } else if (is_mld(flow)) {
+ } else if (is_mld(flow, wc)) {
ctx->xout->slow |= SLOW_ACTION;
- if (ctx->xin->may_learn) {
+ if (ctx->xin->may_learn && ctx->xin->packet) {
update_mcast_snooping_table(ctx->xbridge, flow, vlan,
in_xbundle, ctx->xin->packet);
}
- if (is_mld_report(flow)) {
+ if (is_mld_report(flow, wc)) {
ovs_rwlock_rdlock(&ms->rwlock);
xlate_normal_mcast_send_mrouters(ctx, ms, in_xbundle, vlan);
xlate_normal_mcast_send_rports(ctx, ms, in_xbundle, vlan);
xlate_normal_flood(ctx, in_xbundle, vlan);
}
} else {
- if ((flow->dl_type == htons(ETH_TYPE_IP)
- && ip_is_local_multicast(flow->nw_dst))
- || (flow->dl_type == htons(ETH_TYPE_IPV6)
- && ipv6_is_all_hosts(&flow->ipv6_dst))) {
+ if (is_ip_local_multicast(flow, wc)) {
/* RFC4541: section 2.1.2, item 2: Packets with a dst IP
* address in the 224.0.0.x range which are not IGMP must
* be forwarded on all ports */
* 'cookie' (of length 'cookie_size' bytes) is passed back in the callback for
* each sampled packet. 'tunnel_out_port', if not ODPP_NONE, is added as the
* OVS_USERSPACE_ATTR_EGRESS_TUN_PORT attribute. If 'include_actions', an
- * OVS_USERSPACE_ATTR_ACTIONS attribute is added.
+ * OVS_USERSPACE_ATTR_ACTIONS attribute is added. If 'emit_set_tunnel',
+ * sample(sampling_port=1) would translate into datapath sample action
+ * set(tunnel(...)), sample(...) and it is used for sampling egress tunnel
+ * information.
*/
static size_t
compose_sample_action(struct xlate_ctx *ctx,
true);
}
-/* If IPFIX is enabled, this appends a "sample" action to implement IPFIX to
- * 'ctx->odp_actions'. */
+/* If flow IPFIX is enabled, make sure IPFIX flow sample action
+ * at egress point of tunnel port is just in front of corresponding
+ * output action. If bridge IPFIX is enabled, this appends an IPFIX
+ * sample action to 'ctx->odp_actions'. */
static void
compose_ipfix_action(struct xlate_ctx *ctx, odp_port_t output_odp_port)
{
return;
}
- /* For output case, output_odp_port is valid*/
+ /* For output case, output_odp_port is valid. */
if (output_odp_port != ODPP_NONE) {
if (!dpif_ipfix_get_bridge_exporter_output_sampling(ipfix)) {
return;
static int
tnl_route_lookup_flow(const struct flow *oflow,
- struct in6_addr *ip, struct xport **out_port)
+ struct in6_addr *ip, struct in6_addr *src,
+ struct xport **out_port)
{
char out_dev[IFNAMSIZ];
struct xbridge *xbridge;
struct in6_addr dst;
dst = flow_tnl_dst(&oflow->tunnel);
- if (!ovs_router_lookup(&dst, out_dev, &gw)) {
+ if (!ovs_router_lookup(&dst, out_dev, src, &gw)) {
return -ENOENT;
}
return ofproto_dpif_execute_actions__(xbridge->ofproto, &flow, NULL,
&output.ofpact, sizeof output,
- ctx->recurse, ctx->resubmits, packet);
+ ctx->indentation, ctx->depth,
+ ctx->resubmits, packet);
}
static void
build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
const struct flow *flow, odp_port_t tunnel_odp_port)
{
+ struct netdev_tnl_build_header_params tnl_params;
struct ovs_action_push_tnl tnl_push_data;
struct xport *out_dev = NULL;
ovs_be32 s_ip = 0, d_ip = 0;
char buf_sip6[INET6_ADDRSTRLEN];
char buf_dip6[INET6_ADDRSTRLEN];
- err = tnl_route_lookup_flow(flow, &d_ip6, &out_dev);
+ err = tnl_route_lookup_flow(flow, &d_ip6, &s_ip6, &out_dev);
if (err) {
xlate_report(ctx, "native tunnel routing failed");
return err;
d_ip = in6_addr_get_mapped_ipv4(&d_ip6);
if (d_ip) {
- err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
- if (err) {
- xlate_report(ctx, "tunnel output device lacks IPv4 address");
- return err;
- }
- in6_addr_set_mapped_ipv4(&s_ip6, s_ip);
- } else {
- err = netdev_get_in6(out_dev->netdev, &s_ip6);
- if (err) {
- xlate_report(ctx, "tunnel output device lacks IPv6 address");
- return err;
- }
+ s_ip = in6_addr_get_mapped_ipv4(&s_ip6);
}
err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac);
ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6),
ETH_ADDR_ARGS(dmac), buf_dip6);
- err = tnl_port_build_header(xport->ofport, flow,
- dmac, smac, &s_ip6, &tnl_push_data);
+ netdev_init_tnl_build_header_params(&tnl_params, flow, &s_ip6, dmac, smac);
+ err = tnl_port_build_header(xport->ofport, &tnl_push_data, &tnl_params);
if (err) {
return err;
}
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 35);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 36);
memset(&flow_tnl, 0, sizeof flow_tnl);
if (!xport) {
} else if (xport->config & OFPUTIL_PC_NO_FWD) {
xlate_report(ctx, "OFPPC_NO_FWD set, skipping output");
return;
+ } else if (ctx->mirror_snaplen != 0 && xport->odp_port == ODPP_NONE) {
+ xlate_report(ctx, "Mirror truncate to ODPP_NONE, skipping output");
+ return;
} else if (check_stp) {
if (is_stp(&ctx->base_flow)) {
if (!xport_stp_should_forward_bpdu(xport) &&
xlate_action_set(ctx);
}
if (ctx->freezing) {
- compose_recirculate_action(ctx);
+ finish_freezing(ctx);
}
} else {
/* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and
} else {
odp_port = xport->odp_port;
out_port = odp_port;
- if (ofproto_has_vlan_splinters(ctx->xbridge->ofproto)) {
- ofp_port_t vlandev_port;
-
- wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
- vlandev_port = vsp_realdev_to_vlandev(ctx->xbridge->ofproto,
- ofp_port, flow->vlan_tci);
- if (vlandev_port != ofp_port) {
- out_port = ofp_port_to_odp_port(ctx->xbridge, vlandev_port);
- flow->vlan_tci = htons(0);
- }
- }
}
if (out_port != ODPP_NONE) {
/* Tunnel push-pop action is not compatible with
* IPFIX action. */
compose_ipfix_action(ctx, out_port);
+
+ /* Handle truncation of the mirrored packet. */
+ if (ctx->mirror_snaplen > 0 &&
+ ctx->mirror_snaplen < UINT16_MAX) {
+ struct ovs_action_trunc *trunc;
+
+ trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
+ OVS_ACTION_ATTR_TRUNC,
+ sizeof *trunc);
+ trunc->max_len = ctx->mirror_snaplen;
+ if (!ctx->xbridge->support.trunc) {
+ ctx->xout->slow |= SLOW_ACTION;
+ }
+ }
+
nl_msg_put_odp_port(ctx->odp_actions,
OVS_ACTION_ATTR_OUTPUT,
out_port);
- }
- }
+ }
+ }
}
ctx->sflow_odp_port = odp_port;
}
static void
-xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule)
+xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule, bool deepens)
{
struct rule_dpif *old_rule = ctx->rule;
ovs_be64 old_cookie = ctx->rule_cookie;
}
ctx->resubmits++;
- ctx->recurse++;
+
+ ctx->indentation++;
+ ctx->depth += deepens;
ctx->rule = rule;
ctx->rule_cookie = rule_dpif_get_flow_cookie(rule);
actions = rule_dpif_get_actions(rule);
do_xlate_actions(actions->ofpacts, actions->ofpacts_len, ctx);
ctx->rule_cookie = old_cookie;
ctx->rule = old_rule;
- ctx->recurse--;
+ ctx->depth -= deepens;
+ ctx->indentation--;
}
static bool
xlate_resubmit_resource_check(struct xlate_ctx *ctx)
{
- if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) {
- XLATE_REPORT_ERROR(ctx, "resubmit actions recursed over %d times",
- MAX_RESUBMIT_RECURSION);
+ if (ctx->depth >= MAX_DEPTH) {
+ XLATE_REPORT_ERROR(ctx, "over max translation depth %d", MAX_DEPTH);
ctx->error = XLATE_RECURSION_TOO_DEEP;
- } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) {
+ } else if (ctx->resubmits >= MAX_RESUBMITS) {
XLATE_REPORT_ERROR(ctx, "over %d resubmit actions", MAX_RESUBMITS);
ctx->error = XLATE_TOO_MANY_RESUBMITS;
} else if (ctx->odp_actions->size > UINT16_MAX) {
rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
ctx->tables_version,
- &ctx->xin->flow, ctx->xin->wc,
+ &ctx->xin->flow, ctx->wc,
ctx->xin->resubmit_stats,
&ctx->table_id, in_port,
may_packet_in, honor_table_miss);
if (OVS_UNLIKELY(ctx->xin->resubmit_hook)) {
- ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse + 1);
+ ctx->xin->resubmit_hook(ctx->xin, rule, ctx->indentation + 1);
}
if (rule) {
entry->u.rule = rule;
rule_dpif_ref(rule);
}
- xlate_recursively(ctx, rule);
+ xlate_recursively(ctx, rule, table_id <= old_table_id);
}
ctx->table_id = old_table_id;
bool old_was_mpls = ctx->was_mpls;
ofpacts_execute_action_set(&action_list, &action_set);
- ctx->recurse++;
+ ctx->indentation++;
+ ctx->depth++;
do_xlate_actions(action_list.data, action_list.size, ctx);
- ctx->recurse--;
+ ctx->depth--;
+ ctx->indentation--;
ofpbuf_uninit(&action_list);
- /* Check if need to recirculate. */
+ /* Check if need to freeze. */
if (ctx->freezing) {
- compose_recirculate_action(ctx);
+ finish_freezing(ctx);
}
/* Roll back flow to previous state.
{
const char *selection_method = group_dpif_get_selection_method(group);
+ /* Select groups may access flow keys beyond L2 in order to
+ * select a bucket. Recirculate as appropriate to make this possible.
+ */
+ if (ctx->was_mpls) {
+ ctx_trigger_freeze(ctx);
+ }
+
if (selection_method[0] == '\0') {
xlate_default_select_group(ctx, group);
} else if (!strcasecmp("hash", selection_method)) {
static void
execute_controller_action(struct xlate_ctx *ctx, int len,
enum ofp_packet_in_reason reason,
- uint16_t controller_id)
+ uint16_t controller_id,
+ const uint8_t *userdata, size_t userdata_len)
{
+ struct dp_packet_batch batch;
struct dp_packet *packet;
ctx->xout->slow |= SLOW_CONTROLLER;
}
packet = dp_packet_clone(ctx->xin->packet);
-
- odp_execute_actions(NULL, &packet, 1, false,
+ packet_batch_init_packet(&batch, packet);
+ odp_execute_actions(NULL, &batch, false,
ctx->odp_actions->data, ctx->odp_actions->size, NULL);
/* A packet sent by an action in a table-miss rule is considered an
.oam = OAM_PACKET_IN,
.pin = {
.up = {
- .packet = dp_packet_steal_data(packet),
- .len = packet_len,
- .reason = reason,
- .table_id = ctx->table_id,
- .cookie = ctx->rule_cookie,
+ .public = {
+ .packet = dp_packet_steal_data(packet),
+ .packet_len = packet_len,
+ .reason = reason,
+ .table_id = ctx->table_id,
+ .cookie = ctx->rule_cookie,
+ .userdata = (userdata_len
+ ? xmemdup(userdata, userdata_len)
+ : NULL),
+ .userdata_len = userdata_len,
+ }
},
.max_len = len,
},
};
- flow_get_metadata(&ctx->xin->flow, &am->pin.up.flow_metadata);
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
dp_packet_delete(packet);
}
static void
-compose_recirculate_action__(struct xlate_ctx *ctx, uint8_t table)
+emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state)
{
- struct frozen_metadata md;
- uint32_t id;
-
- frozen_metadata_from_flow(&md, &ctx->xin->flow);
+ struct ofproto_async_msg *am = xmalloc(sizeof *am);
+ *am = (struct ofproto_async_msg) {
+ .controller_id = ctx->pause->controller_id,
+ .oam = OAM_PACKET_IN,
+ .pin = {
+ .up = {
+ .public = {
+ .userdata = xmemdup(ctx->pause->userdata,
+ ctx->pause->userdata_len),
+ .userdata_len = ctx->pause->userdata_len,
+ .packet = xmemdup(dp_packet_data(ctx->xin->packet),
+ dp_packet_size(ctx->xin->packet)),
+ .packet_len = dp_packet_size(ctx->xin->packet),
+ .reason = ctx->pause->reason,
+ },
+ .bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
+ .stack = xmemdup(state->stack,
+ state->n_stack * sizeof *state->stack),
+ .n_stack = state->n_stack,
+ .mirrors = state->mirrors,
+ .conntracked = state->conntracked,
+ .actions = xmemdup(state->ofpacts, state->ofpacts_len),
+ .actions_len = state->ofpacts_len,
+ .action_set = xmemdup(state->action_set,
+ state->action_set_len),
+ .action_set_len = state->action_set_len,
+ },
+ .max_len = UINT16_MAX,
+ },
+ };
+ flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata);
+ ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am);
+}
+static void
+finish_freezing__(struct xlate_ctx *ctx, uint8_t table)
+{
ovs_assert(ctx->freezing);
struct frozen_state state = {
.table_id = table,
.ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto),
- .metadata = md,
.stack = ctx->stack.data,
.n_stack = ctx->stack.size / sizeof(union mf_subvalue),
.mirrors = ctx->mirrors,
.action_set = ctx->action_set.data,
.action_set_len = ctx->action_set.size,
};
+ frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow);
- /* Allocate a unique recirc id for the given metadata state in the
- * flow. An existing id, with a new reference to the corresponding
- * recirculation context, will be returned if possible.
- * The life-cycle of this recirc id is managed by associating it
- * with the udpif key ('ukey') created for each new datapath flow. */
- id = recirc_alloc_id_ctx(&state);
- if (!id) {
- XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
- ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
- return;
- }
- recirc_refs_add(&ctx->xout->recircs, id);
+ if (ctx->pause) {
+ if (ctx->xin->packet) {
+ emit_continuation(ctx, &state);
+ }
+ } else {
+ /* Allocate a unique recirc id for the given metadata state in the
+ * flow. An existing id, with a new reference to the corresponding
+ * recirculation context, will be returned if possible.
+ * The life-cycle of this recirc id is managed by associating it
+ * with the udpif key ('ukey') created for each new datapath flow. */
+ uint32_t id = recirc_alloc_id_ctx(&state);
+ if (!id) {
+ XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id");
+ ctx->error = XLATE_NO_RECIRCULATION_CONTEXT;
+ return;
+ }
+ recirc_refs_add(&ctx->xout->recircs, id);
- nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id);
+ }
/* Undo changes done by freezing. */
ctx_cancel_freeze(ctx);
/* Called only when we're freezing. */
static void
-compose_recirculate_action(struct xlate_ctx *ctx)
+finish_freezing(struct xlate_ctx *ctx)
{
xlate_commit_actions(ctx);
- compose_recirculate_action__(ctx, 0);
+ finish_freezing__(ctx, 0);
}
/* Fork the pipeline here. The current packet will continue processing the
compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table)
{
ctx->freezing = true;
- compose_recirculate_action__(ctx, table);
+ finish_freezing__(ctx, table);
}
static void
int n = flow_count_mpls_labels(flow, ctx->wc);
if (flow_pop_mpls(flow, n, eth_type, ctx->wc)) {
- if (ctx->xbridge->support.odp.recirc) {
+ if (!eth_type_mpls(eth_type) && ctx->xbridge->support.odp.recirc) {
ctx->was_mpls = true;
}
} else if (n >= FLOW_MAX_MPLS_LABELS) {
for (i = 0; i < ids->n_controllers; i++) {
execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL,
- ids->cnt_ids[i]);
+ ids->cnt_ids[i], NULL, 0);
}
/* Stop processing for current table. */
set_mpls_lse_ttl(&flow->mpls_lse[0], ttl);
return false;
} else {
- execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0);
+ execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0,
+ NULL, 0);
}
}
(ctx->in_group ? OFPR_GROUP
: ctx->in_action_set ? OFPR_ACTION_SET
: OFPR_ACTION),
- 0);
+ 0, NULL, 0);
break;
case OFPP_NONE:
break;
}
}
+static void
+xlate_output_trunc_action(struct xlate_ctx *ctx,
+ ofp_port_t port, uint32_t max_len)
+{
+ bool support_trunc = ctx->xbridge->support.trunc;
+ struct ovs_action_trunc *trunc;
+ char name[OFP_MAX_PORT_NAME_LEN];
+
+ switch (port) {
+ case OFPP_TABLE:
+ case OFPP_NORMAL:
+ case OFPP_FLOOD:
+ case OFPP_ALL:
+ case OFPP_CONTROLLER:
+ case OFPP_NONE:
+ ofputil_port_to_string(port, name, sizeof name);
+ xlate_report(ctx, "output_trunc does not support port: %s", name);
+ break;
+ case OFPP_LOCAL:
+ case OFPP_IN_PORT:
+ default:
+ if (port != ctx->xin->flow.in_port.ofp_port) {
+ const struct xport *xport = get_ofp_port(ctx->xbridge, port);
+
+ if (xport == NULL || xport->odp_port == ODPP_NONE) {
+ /* Since truncate happens at its following output action, if
+ * the output port is a patch port, the behavior is somehow
+ * unpredicable. For simpilicity, disallow this case. */
+ ofputil_port_to_string(port, name, sizeof name);
+ XLATE_REPORT_ERROR(ctx, "bridge %s: "
+ "output_trunc does not support port: %s",
+ ctx->xbridge->name, name);
+ break;
+ }
+
+ trunc = nl_msg_put_unspec_uninit(ctx->odp_actions,
+ OVS_ACTION_ATTR_TRUNC,
+ sizeof *trunc);
+ trunc->max_len = max_len;
+ xlate_output_action(ctx, port, max_len, false);
+ if (!support_trunc) {
+ ctx->xout->slow |= SLOW_ACTION;
+ }
+ } else {
+ xlate_report(ctx, "skipping output to input port");
+ }
+ break;
+ }
+}
+
static void
xlate_enqueue_action(struct xlate_ctx *ctx,
const struct ofpact_enqueue *enqueue)
xlate_sample_action(struct xlate_ctx *ctx,
const struct ofpact_sample *os)
{
+ odp_port_t output_odp_port = ODPP_NONE;
+ odp_port_t tunnel_out_port = ODPP_NONE;
+ struct dpif_ipfix *ipfix = ctx->xbridge->ipfix;
+ bool emit_set_tunnel = false;
+
+ if (!ipfix || ctx->xin->flow.in_port.ofp_port == OFPP_NONE) {
+ return;
+ }
+
/* Scale the probability from 16-bit to 32-bit while representing
* the same percentage. */
uint32_t probability = (os->probability << 16) | os->probability;
return;
}
- xlate_commit_actions(ctx);
+ /* If ofp_port in flow sample action is equel to ofp_port,
+ * this sample action is a input port action. */
+ if (os->sampling_port != OFPP_NONE &&
+ os->sampling_port != ctx->xin->flow.in_port.ofp_port) {
+ output_odp_port = ofp_port_to_odp_port(ctx->xbridge,
+ os->sampling_port);
+ if (output_odp_port == ODPP_NONE) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "can't use unknown port %d in flow sample "
+ "action", os->sampling_port);
+ return;
+ }
+
+ if (dpif_ipfix_get_flow_exporter_tunnel_sampling(ipfix,
+ os->collector_set_id)
+ && dpif_ipfix_get_tunnel_port(ipfix, output_odp_port)) {
+ tunnel_out_port = output_odp_port;
+ emit_set_tunnel = true;
+ }
+ }
+
+ xlate_commit_actions(ctx);
+ /* If 'emit_set_tunnel', sample(sampling_port=1) would translate
+ * into datapath sample action set(tunnel(...)), sample(...) and
+ * it is used for sampling egress tunnel information. */
+ if (emit_set_tunnel) {
+ const struct xport *xport = get_ofp_port(ctx->xbridge,
+ os->sampling_port);
+
+ if (xport && xport->is_tunnel) {
+ struct flow *flow = &ctx->xin->flow;
+ tnl_port_send(xport->ofport, flow, ctx->wc);
+ if (!ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+ struct flow_tnl flow_tnl = flow->tunnel;
+
+ commit_odp_tunnel_action(flow, &ctx->base_flow,
+ ctx->odp_actions);
+ flow->tunnel = flow_tnl;
+ }
+ } else {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "sampling_port:%d should be a tunnel port.",
+ os->sampling_port);
+ }
+ }
union user_action_cookie cookie = {
.flow_sample = {
.collector_set_id = os->collector_set_id,
.obs_domain_id = os->obs_domain_id,
.obs_point_id = os->obs_point_id,
+ .output_odp_port = output_odp_port,
}
};
compose_sample_action(ctx, probability, &cookie, sizeof cookie.flow_sample,
- ODPP_NONE, false);
+ tunnel_out_port, false);
}
static bool
for (; a < end; a = ofpact_next(a)) {
switch (a->type) {
case OFPACT_OUTPUT_REG:
+ case OFPACT_OUTPUT_TRUNC:
case OFPACT_GROUP:
case OFPACT_OUTPUT:
case OFPACT_CONTROLLER:
}
}
-#define CHECK_MPLS_RECIRCULATION() \
- if (ctx->was_mpls) { \
- ctx_trigger_freeze(ctx); \
- break; \
- }
-#define CHECK_MPLS_RECIRCULATION_IF(COND) \
- if (COND) { \
- CHECK_MPLS_RECIRCULATION(); \
- }
-
static void
-put_ct_mark(const struct flow *flow, struct flow *base_flow,
- struct ofpbuf *odp_actions, struct flow_wildcards *wc)
+put_ct_mark(const struct flow *flow, struct ofpbuf *odp_actions,
+ struct flow_wildcards *wc)
{
- struct {
- uint32_t key;
- uint32_t mask;
- } odp_attr;
-
- odp_attr.key = flow->ct_mark;
- odp_attr.mask = wc->masks.ct_mark;
+ if (wc->masks.ct_mark) {
+ struct {
+ uint32_t key;
+ uint32_t mask;
+ } *odp_ct_mark;
- if (odp_attr.mask && odp_attr.key != base_flow->ct_mark) {
- nl_msg_put_unspec(odp_actions, OVS_CT_ATTR_MARK, &odp_attr,
- sizeof(odp_attr));
+ odp_ct_mark = nl_msg_put_unspec_uninit(odp_actions, OVS_CT_ATTR_MARK,
+ sizeof(*odp_ct_mark));
+ odp_ct_mark->key = flow->ct_mark & wc->masks.ct_mark;
+ odp_ct_mark->mask = wc->masks.ct_mark;
}
}
static void
-put_ct_label(const struct flow *flow, struct flow *base_flow,
- struct ofpbuf *odp_actions, struct flow_wildcards *wc)
+put_ct_label(const struct flow *flow, struct ofpbuf *odp_actions,
+ struct flow_wildcards *wc)
{
- if (!ovs_u128_is_zero(&wc->masks.ct_label)
- && !ovs_u128_equals(&flow->ct_label, &base_flow->ct_label)) {
+ if (!ovs_u128_is_zero(wc->masks.ct_label)) {
struct {
ovs_u128 key;
ovs_u128 mask;
odp_ct_label = nl_msg_put_unspec_uninit(odp_actions,
OVS_CT_ATTR_LABELS,
sizeof(*odp_ct_label));
- odp_ct_label->key = flow->ct_label;
+ odp_ct_label->key = ovs_u128_and(flow->ct_label, wc->masks.ct_label);
odp_ct_label->mask = wc->masks.ct_label;
}
}
compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc)
{
ovs_u128 old_ct_label = ctx->base_flow.ct_label;
+ ovs_u128 old_ct_label_mask = ctx->wc->masks.ct_label;
uint32_t old_ct_mark = ctx->base_flow.ct_mark;
+ uint32_t old_ct_mark_mask = ctx->wc->masks.ct_mark;
size_t ct_offset;
uint16_t zone;
/* Process nested actions first, to populate the key. */
ctx->ct_nat_action = NULL;
+ ctx->wc->masks.ct_mark = 0;
+ ctx->wc->masks.ct_label.u64.hi = ctx->wc->masks.ct_label.u64.lo = 0;
do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx);
if (ofc->zone_src.field) {
nl_msg_put_flag(ctx->odp_actions, OVS_CT_ATTR_COMMIT);
}
nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone);
- put_ct_mark(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc);
- put_ct_label(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc);
+ put_ct_mark(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
+ put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
put_ct_helper(ctx->odp_actions, ofc);
put_ct_nat(ctx);
ctx->ct_nat_action = NULL;
/* Restore the original ct fields in the key. These should only be exposed
* after recirculation to another table. */
ctx->base_flow.ct_mark = old_ct_mark;
+ ctx->wc->masks.ct_mark = old_ct_mark_mask;
ctx->base_flow.ct_label = old_ct_label;
+ ctx->wc->masks.ct_label = old_ct_label_mask;
if (ofc->recirc_table == NX_CT_RECIRC_NONE) {
/* If we do not recirculate as part of this action, hide the results of
}
}
+static void
+recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
+{
+ /* No need to recirculate if already exiting. */
+ if (ctx->exit) {
+ return;
+ }
+
+ /* Do not consider recirculating unless the packet was previously MPLS. */
+ if (!ctx->was_mpls) {
+ return;
+ }
+
+ /* Special case these actions, only recirculating if necessary.
+ * This avoids the overhead of recirculation in common use-cases.
+ */
+ switch (a->type) {
+
+ /* Output actions do not require recirculation. */
+ case OFPACT_OUTPUT:
+ case OFPACT_OUTPUT_TRUNC:
+ case OFPACT_ENQUEUE:
+ case OFPACT_OUTPUT_REG:
+ /* Set actions that don't touch L3+ fields do not require recirculation. */
+ case OFPACT_SET_VLAN_VID:
+ case OFPACT_SET_VLAN_PCP:
+ case OFPACT_SET_ETH_SRC:
+ case OFPACT_SET_ETH_DST:
+ case OFPACT_SET_TUNNEL:
+ case OFPACT_SET_QUEUE:
+ /* If actions of a group require recirculation that can be detected
+ * when translating them. */
+ case OFPACT_GROUP:
+ return;
+
+ /* Set field that don't touch L3+ fields don't require recirculation. */
+ case OFPACT_SET_FIELD:
+ if (mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field)) {
+ break;
+ }
+ return;
+
+ /* For simplicity, recirculate in all other cases. */
+ case OFPACT_CONTROLLER:
+ case OFPACT_BUNDLE:
+ case OFPACT_STRIP_VLAN:
+ case OFPACT_PUSH_VLAN:
+ case OFPACT_SET_IPV4_SRC:
+ case OFPACT_SET_IPV4_DST:
+ case OFPACT_SET_IP_DSCP:
+ case OFPACT_SET_IP_ECN:
+ case OFPACT_SET_IP_TTL:
+ case OFPACT_SET_L4_SRC_PORT:
+ case OFPACT_SET_L4_DST_PORT:
+ case OFPACT_REG_MOVE:
+ case OFPACT_STACK_PUSH:
+ case OFPACT_STACK_POP:
+ case OFPACT_DEC_TTL:
+ case OFPACT_SET_MPLS_LABEL:
+ case OFPACT_SET_MPLS_TC:
+ case OFPACT_SET_MPLS_TTL:
+ case OFPACT_DEC_MPLS_TTL:
+ case OFPACT_PUSH_MPLS:
+ case OFPACT_POP_MPLS:
+ case OFPACT_POP_QUEUE:
+ case OFPACT_FIN_TIMEOUT:
+ case OFPACT_RESUBMIT:
+ case OFPACT_LEARN:
+ case OFPACT_CONJUNCTION:
+ case OFPACT_MULTIPATH:
+ case OFPACT_NOTE:
+ case OFPACT_EXIT:
+ case OFPACT_SAMPLE:
+ case OFPACT_UNROLL_XLATE:
+ case OFPACT_CT:
+ case OFPACT_NAT:
+ case OFPACT_DEBUG_RECIRC:
+ case OFPACT_METER:
+ case OFPACT_CLEAR_ACTIONS:
+ case OFPACT_WRITE_ACTIONS:
+ case OFPACT_WRITE_METADATA:
+ case OFPACT_GOTO_TABLE:
+ default:
+ break;
+ }
+
+ /* Recirculate */
+ ctx_trigger_freeze(ctx);
+}
+
static void
do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
struct xlate_ctx *ctx)
break;
}
+ recirc_for_mpls(a, ctx);
+
if (ctx->exit) {
/* Check if need to store the remaining actions for later
* execution. */
case OFPACT_CONTROLLER:
controller = ofpact_get_CONTROLLER(a);
- execute_controller_action(ctx, controller->max_len,
- controller->reason,
- controller->controller_id);
+ if (controller->pause) {
+ ctx->pause = controller;
+ ctx->xout->slow |= SLOW_CONTROLLER;
+ ctx_trigger_freeze(ctx);
+ a = ofpact_next(a);
+ } else {
+ execute_controller_action(ctx, controller->max_len,
+ controller->reason,
+ controller->controller_id,
+ controller->userdata,
+ controller->userdata_len);
+ }
break;
case OFPACT_ENQUEUE:
break;
case OFPACT_SET_IPV4_SRC:
- CHECK_MPLS_RECIRCULATION();
if (flow->dl_type == htons(ETH_TYPE_IP)) {
memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
break;
case OFPACT_SET_IPV4_DST:
- CHECK_MPLS_RECIRCULATION();
if (flow->dl_type == htons(ETH_TYPE_IP)) {
memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
break;
case OFPACT_SET_IP_DSCP:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_tos |= IP_DSCP_MASK;
flow->nw_tos &= ~IP_DSCP_MASK;
break;
case OFPACT_SET_IP_ECN:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_tos |= IP_ECN_MASK;
flow->nw_tos &= ~IP_ECN_MASK;
break;
case OFPACT_SET_IP_TTL:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow)) {
wc->masks.nw_ttl = 0xff;
flow->nw_ttl = ofpact_get_SET_IP_TTL(a)->ttl;
break;
case OFPACT_SET_L4_SRC_PORT:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
break;
case OFPACT_SET_L4_DST_PORT:
- CHECK_MPLS_RECIRCULATION();
if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
break;
case OFPACT_RESUBMIT:
- /* Freezing complicates resubmit. There are two cases:
- *
- * - If mpls_pop has been executed, then the flow table lookup
- * as part of resubmit might depend on fields that can only
- * be obtained via recirculation, so the resubmit itself
- * triggers recirculation and we need to make sure that the
- * resubmit is executed again after recirculation.
- * Therefore, in this case we trigger recirculation and let
- * the code following this "switch" append the resubmit to
- * the post-recirculation actions.
- *
- * - Otherwise, some action in the flow entry found by resubmit
- * might trigger freezing. If that happens, then we do not
- * want to execute the resubmit again during thawing, so we
- * want to skip back to the head of the loop to avoid that,
- * only adding any actions that follow the resubmit to the
- * frozen actions.
+ /* Freezing complicates resubmit. Some action in the flow
+ * entry found by resubmit might trigger freezing. If that
+ * happens, then we do not want to execute the resubmit again after
+ * during thawing, so we want to skip back to the head of the loop
+ * to avoid that, only adding any actions that follow the resubmit
+ * to the frozen actions.
*/
- if (ctx->was_mpls) {
- ctx_trigger_freeze(ctx);
- break;
- }
xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a));
continue;
break;
case OFPACT_REG_MOVE:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->dst.field) ||
- mf_is_l3_or_higher(ofpact_get_REG_MOVE(a)->src.field));
nxm_execute_reg_move(ofpact_get_REG_MOVE(a), flow, wc);
break;
case OFPACT_SET_FIELD:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_SET_FIELD(a)->field));
set_field = ofpact_get_SET_FIELD(a);
mf = set_field->field;
}
/* A flow may wildcard nw_frag. Do nothing if setting a transport
* header field on a packet that does not have them. */
- mf_mask_field_and_prereqs(mf, wc);
+ mf_mask_field_and_prereqs__(mf, &set_field->mask, wc);
if (mf_are_prereqs_ok(mf, flow)) {
mf_set_flow_value_masked(mf, &set_field->value,
&set_field->mask, flow);
break;
case OFPACT_STACK_PUSH:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_STACK_PUSH(a)->subfield.field));
nxm_execute_stack_push(ofpact_get_STACK_PUSH(a), flow, wc,
&ctx->stack);
break;
case OFPACT_STACK_POP:
- CHECK_MPLS_RECIRCULATION_IF(
- mf_is_l3_or_higher(ofpact_get_STACK_POP(a)->subfield.field));
nxm_execute_stack_pop(ofpact_get_STACK_POP(a), flow, wc,
&ctx->stack);
break;
case OFPACT_PUSH_MPLS:
- /* Recirculate if it is an IP packet with a zero ttl. This may
- * indicate that the packet was previously MPLS and an MPLS pop
- * action converted it to IP. In this case recirculating should
- * reveal the IP TTL which is used as the basis for a new MPLS
- * LSE. */
- CHECK_MPLS_RECIRCULATION_IF(
- !flow_count_mpls_labels(flow, wc)
- && flow->nw_ttl == 0
- && is_ip_any(flow));
compose_mpls_push_action(ctx, ofpact_get_PUSH_MPLS(a));
break;
case OFPACT_POP_MPLS:
- CHECK_MPLS_RECIRCULATION();
compose_mpls_pop_action(ctx, ofpact_get_POP_MPLS(a)->ethertype);
break;
case OFPACT_SET_MPLS_LABEL:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_label_action(
ctx, ofpact_get_SET_MPLS_LABEL(a)->label);
break;
case OFPACT_SET_MPLS_TC:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_tc_action(ctx, ofpact_get_SET_MPLS_TC(a)->tc);
break;
case OFPACT_SET_MPLS_TTL:
- CHECK_MPLS_RECIRCULATION();
compose_set_mpls_ttl_action(ctx, ofpact_get_SET_MPLS_TTL(a)->ttl);
break;
case OFPACT_DEC_MPLS_TTL:
- CHECK_MPLS_RECIRCULATION();
if (compose_dec_mpls_ttl_action(ctx)) {
return;
}
break;
case OFPACT_DEC_TTL:
- CHECK_MPLS_RECIRCULATION();
wc->masks.nw_ttl = 0xff;
if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
return;
break;
case OFPACT_MULTIPATH:
- CHECK_MPLS_RECIRCULATION();
multipath_execute(ofpact_get_MULTIPATH(a), flow, wc);
break;
case OFPACT_BUNDLE:
- CHECK_MPLS_RECIRCULATION();
xlate_bundle_action(ctx, ofpact_get_BUNDLE(a));
break;
xlate_output_reg_action(ctx, ofpact_get_OUTPUT_REG(a));
break;
+ case OFPACT_OUTPUT_TRUNC:
+ xlate_output_trunc_action(ctx, ofpact_get_OUTPUT_TRUNC(a)->port,
+ ofpact_get_OUTPUT_TRUNC(a)->max_len);
+ break;
+
case OFPACT_LEARN:
- CHECK_MPLS_RECIRCULATION();
xlate_learn_action(ctx, ofpact_get_LEARN(a));
break;
break;
}
case OFPACT_FIN_TIMEOUT:
- CHECK_MPLS_RECIRCULATION();
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
break;
break;
case OFPACT_CT:
- CHECK_MPLS_RECIRCULATION();
compose_conntrack_action(ctx, ofpact_get_CT(a));
break;
xin->resubmit_hook = NULL;
xin->report_hook = NULL;
xin->resubmit_stats = NULL;
- xin->recurse = 0;
+ xin->indentation = 0;
+ xin->depth = 0;
xin->resubmits = 0;
xin->wc = wc;
xin->odp_actions = odp_actions;
static void
clear_skb_priorities(struct xport *xport)
{
- struct skb_priority_to_dscp *pdscp, *next;
+ struct skb_priority_to_dscp *pdscp;
- HMAP_FOR_EACH_SAFE (pdscp, next, hmap_node, &xport->skb_priorities) {
- hmap_remove(&xport->skb_priorities, &pdscp->hmap_node);
+ HMAP_FOR_EACH_POP (pdscp, hmap_node, &xport->skb_priorities) {
free(pdscp);
}
}
* Avoid the problem here by making sure that only the low 8 bits of
* either field can be unwildcarded for ICMP.
*/
- if (is_icmpv4(&ctx->xin->flow) || is_icmpv6(&ctx->xin->flow)) {
+ if (is_icmpv4(&ctx->xin->flow, NULL) || is_icmpv6(&ctx->xin->flow, NULL)) {
ctx->wc->masks.tp_src &= htons(UINT8_MAX);
ctx->wc->masks.tp_dst &= htons(UINT8_MAX);
}
union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
uint64_t action_set_stub[1024 / 8];
uint64_t frozen_actions_stub[1024 / 8];
- struct flow_wildcards scratch_wc;
uint64_t actions_stub[256 / 8];
struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
struct xlate_ctx ctx = {
.xbridge = xbridge,
.stack = OFPBUF_STUB_INITIALIZER(stack_stub),
.rule = xin->rule,
- .wc = xin->wc ? xin->wc : &scratch_wc,
+ .wc = (xin->wc
+ ? xin->wc
+ : &(struct flow_wildcards) { .masks = { .dl_type = 0 } }),
.odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions,
- .recurse = xin->recurse,
+ .indentation = xin->indentation,
+ .depth = xin->depth,
.resubmits = xin->resubmits,
.in_group = false,
.in_action_set = false,
.freezing = false,
.frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub),
+ .pause = NULL,
.was_mpls = false,
.conntracked = false,
};
/* 'base_flow' reflects the packet as it came in, but we need it to reflect
- * the packet as the datapath will treat it for output actions:
- *
- * - Our datapath doesn't retain tunneling information without us
- * re-setting it, so clear the tunnel data.
- *
- * - For VLAN splinters, a higher layer may pretend that the packet
- * came in on 'flow->in_port.ofp_port' with 'flow->vlan_tci'
- * attached, because that's how we want to treat it from an OpenFlow
- * perspective. But from the datapath's perspective it actually came
- * in on a VLAN device without any VLAN attached. So here we put the
- * datapath's view of the VLAN information in 'base_flow' to ensure
- * correct treatment.
+ * the packet as the datapath will treat it for output actions. Our
+ * datapath doesn't retain tunneling information without us re-setting
+ * it, so clear the tunnel data.
*/
+
memset(&ctx.base_flow.tunnel, 0, sizeof ctx.base_flow.tunnel);
- if (flow->in_port.ofp_port
- != vsp_realdev_to_vlandev(xbridge->ofproto,
- flow->in_port.ofp_port,
- flow->vlan_tci)) {
- ctx.base_flow.vlan_tci = 0;
- }
ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
- if (xin->wc) {
- xlate_wc_init(&ctx);
- }
+ xlate_wc_init(&ctx);
COVERAGE_INC(xlate_actions);
if (!xin->ofpacts && !ctx.rule) {
ctx.rule = rule_dpif_lookup_from_table(
- ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,
+ ctx.xbridge->ofproto, ctx.tables_version, flow, ctx.wc,
ctx.xin->resubmit_stats, &ctx.table_id,
flow->in_port.ofp_port, true, true);
if (ctx.xin->resubmit_stats) {
xlate_action_set(&ctx);
}
if (ctx.freezing) {
- compose_recirculate_action(&ctx);
+ finish_freezing(&ctx);
}
}
}
}
- if (xin->wc) {
- xlate_wc_finish(&ctx);
- }
+ xlate_wc_finish(&ctx);
exit:
ofpbuf_uninit(&ctx.stack);
return ctx.error;
}
-/* Sends 'packet' out 'ofport'.
+enum ofperr
+xlate_resume(struct ofproto_dpif *ofproto,
+ const struct ofputil_packet_in_private *pin,
+ struct ofpbuf *odp_actions,
+ enum slow_path_reason *slow)
+{
+ struct dp_packet packet;
+ dp_packet_use_const(&packet, pin->public.packet,
+ pin->public.packet_len);
+
+ struct flow flow;
+ flow_extract(&packet, &flow);
+
+ struct xlate_in xin;
+ xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags),
+ &packet, NULL, odp_actions);
+
+ struct ofpact_note noop;
+ ofpact_init_NOTE(&noop);
+ noop.length = 0;
+
+ bool any_actions = pin->actions_len > 0;
+ struct frozen_state state = {
+ .table_id = 0, /* Not the table where NXAST_PAUSE was executed. */
+ .ofproto_uuid = pin->bridge,
+ .stack = pin->stack,
+ .n_stack = pin->n_stack,
+ .mirrors = pin->mirrors,
+ .conntracked = pin->conntracked,
+
+ /* When there are no actions, xlate_actions() will search the flow
+ * table. We don't want it to do that (we want it to resume), so
+ * supply a no-op action if there aren't any.
+ *
+ * (We can't necessarily avoid translating actions entirely if there
+ * aren't any actions, because there might be some finishing-up to do
+ * at the end of the pipeline, and we don't check for those
+ * conditions.) */
+ .ofpacts = any_actions ? pin->actions : &noop.ofpact,
+ .ofpacts_len = any_actions ? pin->actions_len : sizeof noop,
+
+ .action_set = pin->action_set,
+ .action_set_len = pin->action_set_len,
+ };
+ frozen_metadata_from_flow(&state.metadata,
+ &pin->public.flow_metadata.flow);
+ xin.frozen_state = &state;
+
+ struct xlate_out xout;
+ enum xlate_error error = xlate_actions(&xin, &xout);
+ *slow = xout.slow;
+ xlate_out_uninit(&xout);
+
+ /* xlate_actions() can generate a number of errors, but only
+ * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be
+ * sure to report over OpenFlow. The others could come up in packet-outs
+ * or regular flow translation and I don't think that it's going to be too
+ * useful to report them to the controller. */
+ return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0;
+}
+
+/* Sends 'packet' out 'ofport'. If 'port' is a tunnel and that tunnel type
+ * supports a notion of an OAM flag, sets it if 'oam' is true.
* May modify 'packet'.
* Returns 0 if successful, otherwise a positive errno value. */
int
-xlate_send_packet(const struct ofport_dpif *ofport, struct dp_packet *packet)
+xlate_send_packet(const struct ofport_dpif *ofport, bool oam,
+ struct dp_packet *packet)
{
struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp);
struct xport *xport;
- struct ofpact_output output;
+ uint64_t ofpacts_stub[1024 / 8];
+ struct ofpbuf ofpacts;
struct flow flow;
- ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output);
+ ofpbuf_use_stack(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
/* Use OFPP_NONE as the in_port to avoid special packet processing. */
flow_extract(packet, &flow);
flow.in_port.ofp_port = OFPP_NONE;
if (!xport) {
return EINVAL;
}
- output.port = xport->ofp_port;
- output.max_len = 0;
+
+ if (oam) {
+ struct ofpact_set_field *sf = ofpact_put_SET_FIELD(&ofpacts);
+
+ sf->field = mf_from_id(MFF_TUN_FLAGS);
+ sf->value.be16 = htons(NX_TUN_FLAG_OAM);
+ sf->mask.be16 = htons(NX_TUN_FLAG_OAM);
+ }
+
+ ofpact_put_OUTPUT(&ofpacts)->port = xport->ofp_port;
return ofproto_dpif_execute_actions(xport->xbridge->ofproto, &flow, NULL,
- &output.ofpact, sizeof output,
- packet);
+ ofpacts.data, ofpacts.size, packet);
}
struct xlate_cache *