X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif-xlate.c;h=c1e98345f48a7f2eb256add52a035b41c1a7862d;hb=f7c5f6aa47338ec48517bb175336ad63c4ac56a5;hp=205798a0968126fb4ddc62c0525dfe5b97b980a9;hpb=74ff3298c8806bb09d0c7e40a25b889ab7564769;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 205798a09..c1e98345f 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. +/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ #include #include -#include "tnl-arp-cache.h" +#include "tnl-neigh-cache.h" #include "bfd.h" #include "bitmap.h" #include "bond.h" @@ -54,6 +54,7 @@ #include "ofproto/ofproto-dpif-sflow.h" #include "ofproto/ofproto-dpif.h" #include "ofproto/ofproto-provider.h" +#include "packets.h" #include "ovs-router.h" #include "tnl-ports.h" #include "tunnel.h" @@ -171,7 +172,7 @@ struct xlate_ctx { * which might lead to an infinite loop. This could happen easily * if a tunnel is marked as 'ip_remote=flow', and the flow does not * actually set the tun_dst field. */ - ovs_be32 orig_tunnel_ip_dst; + struct in6_addr orig_tunnel_ipv6_dst; /* Stack for the push and pop actions. Each stack element is of type * "union mf_subvalue". */ @@ -209,36 +210,65 @@ struct xlate_ctx { bool exit; /* No further actions should be processed. */ mirror_mask_t mirrors; /* Bitmap of associated mirrors. */ - /* These are used for non-bond recirculation. The recirculation IDs are - * stored in xout and must be associated with a datapath flow (ukey), - * otherwise they will be freed when the xout is uninitialized. + /* Freezing Translation + * ==================== * + * At some point during translation, the code may recognize the need to halt + * and checkpoint the translation in a way that it can be restarted again + * later. We call the checkpointing process "freezing" and the restarting + * process "thawing". * - * Steps in Recirculation Translation - * ================================== + * The use cases for freezing are: * - * At some point during translation, the code recognizes the need for - * recirculation. For example, recirculation is necessary when, after - * popping the last MPLS label, an action or a match tries to examine or - * modify a field that has been newly revealed following the MPLS label. + * - "Recirculation", where the translation process discovers that it + * doesn't have enough information to complete translation without + * actually executing the actions that have already been translated, + * which provides the additionally needed information. In these + * situations, translation freezes translation and assigns the frozen + * data a unique "recirculation ID", which it associates with the data + * in a table in userspace (see ofproto-dpif-rid.h). It also adds a + * OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath + * actions. When a packet hits that action, the datapath looks its + * flow up again using the ID. If there's a miss, it comes back to + * userspace, which find the recirculation table entry for the ID, + * thaws the associated frozen data, and continues translation from + * that point given the additional information that is now known. * - * The simplest part of the work to be done is to commit existing changes to - * the packet, which produces datapath actions corresponding to the changes, - * and after this, add an OVS_ACTION_ATTR_RECIRC datapath action. + * The archetypal example is MPLS. As MPLS is implemented in + * OpenFlow, the protocol that follows the last MPLS label becomes + * known only when that label is popped by an OpenFlow action. That + * means that Open vSwitch can't extract the headers beyond the MPLS + * labels until the pop action is executed. Thus, at that point + * translation uses the recirculation process to extract the headers + * beyond the MPLS labels. * - * The main problem here is preserving state. When the datapath executes - * OVS_ACTION_ATTR_RECIRC, it will upcall to userspace to get a translation - * for the post-recirculation actions. At this point userspace has to - * resume the translation where it left off, which means that it has to - * execute the following: + * (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for + * output to bonds. OVS pre-populates all the datapath flows for bond + * output in the datapath, though, which means that the elaborate + * process of coming back to userspace for a second round of + * translation isn't needed, and so bonds don't follow the above + * process.) * - * - The action that prompted recirculation, and any actions following - * it within the same flow. + * - "Continuation". A continuation is a way for an OpenFlow controller + * to interpose on a packet's traversal of the OpenFlow tables. When + * the translation process encounters a "controller" action with the + * "pause" flag, it freezes translation, serializes the frozen data, + * and sends it to an OpenFlow controller. The controller then + * examines and possibly modifies the frozen data and eventually sends + * it back to the switch, which thaws it and continues translation. * - * - If the action that prompted recirculation was invoked within a - * NXAST_RESUBMIT, then any actions following the resubmit. These - * "resubmit"s can be nested, so this has to go all the way up the - * control stack. + * The main problem of freezing translation is preserving state, so that + * when the translation is thawed later it resumes from where it left off, + * without disruption. In particular, actions must be preserved as follows: + * + * - If we're freezing because an action needed more information, the + * action that prompted it. + * + * - Any actions remaining to be translated within the current flow. + * + * - If translation was frozen within a NXAST_RESUBMIT, then any actions + * following the resubmit action. Resubmit actions can be nested, so + * this has to go all the way up the control stack. * * - The OpenFlow 1.1+ action set. * @@ -247,52 +277,48 @@ struct xlate_ctx { * * - Metadata fields (input port, registers, OF1.1+ metadata, ...). * - * - Action set, stack + * - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions. * * - The table ID and cookie of the flow being translated at each level - * of the control stack (since OFPAT_CONTROLLER actions send these to - * the controller). + * of the control stack, because these can become visible through + * OFPAT_CONTROLLER actions (and other ways). * * Translation allows for the control of this state preservation via these - * members. When a need for recirculation is identified, the translation - * process: + * members. When a need to freeze translation is identified, the + * translation process: * - * 1. Sets 'recirc_action_offset' to the current size of 'action_set'. The - * action set is part of what needs to be preserved, so this allows the - * action set and the additional state to share the 'action_set' buffer. - * Later steps can tell that setup for recirculation is in progress from - * the nonnegative value of 'recirc_action_offset'. + * 1. Sets 'freezing' to true. * * 2. Sets 'exit' to true to tell later steps that we're exiting from the * translation process. * - * 3. Adds an OFPACT_UNROLL_XLATE action to 'action_set'. This action - * holds the current table ID and cookie so that they can be restored - * during a post-recirculation upcall translation. + * 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points + * frozen_actions.header to the action to make it easy to find it later. + * This action holds the current table ID and cookie so that they can be + * restored during a post-recirculation upcall translation. * * 4. Adds the action that prompted recirculation and any actions following - * it within the same flow to 'action_set', so that they can be executed - * during a post-recirculation upcall translation. + * it within the same flow to 'frozen_actions', so that they can be + * executed during a post-recirculation upcall translation. * * 5. Returns. * * 6. The action that prompted recirculation might be nested in a stack of * nested "resubmit"s that have actions remaining. Each of these notices - * that we're exiting (from 'exit') and that recirculation setup is in - * progress (from 'recirc_action_offset') and responds by adding more - * OFPACT_UNROLL_XLATE actions to 'action_set', as necessary, and any - * actions that were yet unprocessed. + * that we're exiting and freezing and responds by adding more + * OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary, + * followed by any actions that were yet unprocessed. * - * The caller stores all the state produced by this process associated with - * the recirculation ID. For post-recirculation upcall translation, the - * caller passes it back in for the new translation to execute. The - * process yielded a set of ofpacts that can be translated directly, so it - * is not much of a special case at that point. + * If we're freezing because of recirculation, the caller generates a + * recirculation ID and associates all the state produced by this process + * with it. For post-recirculation upcall translation, the caller passes it + * back in for the new translation to execute. The process yielded a set of + * ofpacts that can be translated directly, so it is not much of a special + * case at that point. */ - int recirc_action_offset; /* Offset in 'action_set' to actions to be - * executed after recirculation, or -1. */ - int last_unroll_offset; /* Offset in 'action_set' to the latest unroll - * action, or -1. */ + bool freezing; + struct ofpbuf frozen_actions; + const struct ofpact_controller *pause; /* True if a packet was but is no longer MPLS (due to an MPLS pop action). * This is a trigger for recirculation in cases where translating an action @@ -300,6 +326,14 @@ struct xlate_ctx { * the MPLS label stack that was originally present. */ bool was_mpls; + /* True if conntrack has been performed on this packet during processing + * on the current bridge. This is used to determine whether conntrack + * state from the datapath should be honored after thawing. */ + bool conntracked; + + /* Pointer to an embedded NAT action in a conntrack action, or NULL. */ + struct ofpact_nat *ct_nat_action; + /* OpenFlow 1.1+ action set. * * 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS. @@ -308,32 +342,60 @@ struct xlate_ctx { * datapath actions. */ bool action_set_has_group; /* Action set contains OFPACT_GROUP? */ struct ofpbuf action_set; /* Action set. */ + + enum xlate_error error; /* Translation failed. */ }; +const char *xlate_strerror(enum xlate_error error) +{ + switch (error) { + case XLATE_OK: + return "OK"; + case XLATE_BRIDGE_NOT_FOUND: + return "Bridge not found"; + case XLATE_RECURSION_TOO_DEEP: + return "Recursion too deep"; + case XLATE_TOO_MANY_RESUBMITS: + return "Too many resubmits"; + case XLATE_STACK_TOO_DEEP: + return "Stack too deep"; + case XLATE_NO_RECIRCULATION_CONTEXT: + return "No recirculation context"; + case XLATE_RECIRCULATION_CONFLICT: + return "Recirculation conflict"; + case XLATE_TOO_MANY_MPLS_LABELS: + return "Too many MPLS labels"; + } + return "Unknown error"; +} + static void xlate_action_set(struct xlate_ctx *ctx); +static void xlate_commit_actions(struct xlate_ctx *ctx); static void -ctx_trigger_recirculation(struct xlate_ctx *ctx) +ctx_trigger_freeze(struct xlate_ctx *ctx) { ctx->exit = true; - ctx->recirc_action_offset = ctx->action_set.size; + ctx->freezing = true; } static bool -ctx_first_recirculation_action(const struct xlate_ctx *ctx) +ctx_first_frozen_action(const struct xlate_ctx *ctx) { - return ctx->recirc_action_offset == ctx->action_set.size; + return !ctx->frozen_actions.size; } -static inline bool -exit_recirculates(const struct xlate_ctx *ctx) +static void +ctx_cancel_freeze(struct xlate_ctx *ctx) { - /* When recirculating the 'recirc_action_offset' has a non-negative value. - */ - return ctx->recirc_action_offset >= 0; + if (ctx->freezing) { + ctx->freezing = false; + ofpbuf_clear(&ctx->frozen_actions); + ctx->frozen_actions.header = NULL; + } } -static void compose_recirculate_action(struct xlate_ctx *ctx); +static void finish_freezing(struct xlate_ctx *ctx); /* A controller may use OFPP_NONE as the ingress port to indicate that * it did not arrive on a "real" port. 'ofpp_none_bundle' exists for @@ -365,7 +427,7 @@ enum xc_type { XC_NORMAL, XC_FIN_TIMEOUT, XC_GROUP, - XC_TNL_ARP, + XC_TNL_NEIGH, }; /* xlate_cache entries hold enough information to perform the side effects of @@ -417,16 +479,16 @@ struct xc_entry { } group; struct { char br_name[IFNAMSIZ]; - ovs_be32 d_ip; - } tnl_arp_cache; + struct in6_addr d_ipv6; + } tnl_neigh_cache; } u; }; -#define XC_ENTRY_FOR_EACH(entry, entries, xcache) \ - entries = xcache->entries; \ - for (entry = ofpbuf_try_pull(&entries, sizeof *entry); \ - entry; \ - entry = ofpbuf_try_pull(&entries, sizeof *entry)) +#define XC_ENTRY_FOR_EACH(ENTRY, ENTRIES, XCACHE) \ + ENTRIES = XCACHE->entries; \ + for (ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY); \ + ENTRY; \ + ENTRY = ofpbuf_try_pull(&ENTRIES, sizeof *ENTRY)) struct xlate_cache { struct ofpbuf entries; @@ -471,6 +533,8 @@ static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port, static struct xbridge *xbridge_lookup(struct xlate_cfg *, const struct ofproto_dpif *); +static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *, + const struct uuid *); static struct xbundle *xbundle_lookup(struct xlate_cfg *, const struct ofbundle *); static struct xport *xport_lookup(struct xlate_cfg *, @@ -530,6 +594,17 @@ xlate_report(struct xlate_ctx *ctx, const char *format, ...) } } +static struct vlog_rate_limit error_report_rl = VLOG_RATE_LIMIT_INIT(1, 5); + +#define XLATE_REPORT_ERROR(CTX, ...) \ + do { \ + if (OVS_UNLIKELY((CTX)->xin->report_hook)) { \ + xlate_report(CTX, __VA_ARGS__); \ + } else { \ + VLOG_ERR_RL(&error_report_rl, __VA_ARGS__); \ + } \ + } while (0) + static inline void xlate_report_actions(struct xlate_ctx *ctx, const char *title, const struct ofpact *ofpacts, size_t ofpacts_len) @@ -1187,6 +1262,19 @@ xbridge_lookup(struct xlate_cfg *xcfg, const struct ofproto_dpif *ofproto) return NULL; } +static struct xbridge * +xbridge_lookup_by_uuid(struct xlate_cfg *xcfg, const struct uuid *uuid) +{ + struct xbridge *xbridge; + + HMAP_FOR_EACH (xbridge, hmap_node, &xcfg->xbridges) { + if (uuid_equals(ofproto_dpif_get_uuid(xbridge->ofproto), uuid)) { + return xbridge; + } + } + return NULL; +} + static struct xbundle * xbundle_lookup(struct xlate_cfg *xcfg, const struct ofbundle *ofbundle) { @@ -1541,10 +1629,15 @@ lookup_input_bundle(const struct xbridge *xbridge, ofp_port_t in_port, return NULL; } +/* Mirrors the packet represented by 'ctx' to appropriate mirror destinations, + * given the packet is ingressing or egressing on 'xbundle', which has ingress + * or egress (as appropriate) mirrors 'mirrors'. */ static void mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, mirror_mask_t mirrors) { + /* Figure out what VLAN the packet is in (because mirrors can select + * packets on basis of VLAN). */ bool warn = ctx->xin->packet != NULL; uint16_t vid = vlan_tci_to_vid(ctx->xin->flow.vlan_tci); if (!input_vid_is_valid(vid, xbundle, warn)) { @@ -1560,9 +1653,6 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, return; } - /* Record these mirrors so that we don't mirror to them again. */ - ctx->mirrors |= mirrors; - if (ctx->xin->resubmit_stats) { mirror_update_stats(xbridge->mbridge, mirrors, ctx->xin->resubmit_stats->n_packets, @@ -1576,27 +1666,36 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, entry->u.mirror.mirrors = mirrors; } + /* 'mirrors' is a bit-mask of candidates for mirroring. Iterate as long as + * some candidates remain. */ while (mirrors) { const unsigned long *vlans; mirror_mask_t dup_mirrors; struct ofbundle *out; int out_vlan; + /* Get the details of the mirror represented by the rightmost 1-bit. */ bool has_mirror = mirror_get(xbridge->mbridge, raw_ctz(mirrors), &vlans, &dup_mirrors, &out, &out_vlan); ovs_assert(has_mirror); + /* If this mirror selects on the basis of VLAN, and it does not select + * 'vlan', then discard this mirror and go on to the next one. */ if (vlans) { ctx->wc->masks.vlan_tci |= htons(VLAN_CFI | VLAN_VID_MASK); } - if (vlans && !bitmap_is_set(vlans, vlan)) { mirrors = zero_rightmost_1bit(mirrors); continue; } - mirrors &= ~dup_mirrors; + /* Record the mirror, and the mirrors that output to the same + * destination, so that we don't mirror to them again. This must be + * done now to ensure that output_normal(), below, doesn't recursively + * output to the same mirrors. */ ctx->mirrors |= dup_mirrors; + + /* Send the packet to the mirror. */ if (out) { struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); struct xbundle *out_xbundle = xbundle_lookup(xcfg, out); @@ -1614,6 +1713,10 @@ mirror_packet(struct xlate_ctx *ctx, struct xbundle *xbundle, } } } + + /* output_normal() could have recursively output (to different + * mirrors), so make sure that we don't send duplicates. */ + mirrors &= ~ctx->mirrors; } } @@ -2348,7 +2451,7 @@ xlate_normal(struct xlate_ctx *ctx) if (is_igmp(flow)) { if (mcast_snooping_is_membership(flow->tp_src) || mcast_snooping_is_query(flow->tp_src)) { - if (ctx->xin->may_learn) { + if (ctx->xin->may_learn && ctx->xin->packet) { update_mcast_snooping_table(ctx->xbridge, flow, vlan, in_xbundle, ctx->xin->packet); } @@ -2380,7 +2483,7 @@ xlate_normal(struct xlate_ctx *ctx) return; } else if (is_mld(flow)) { ctx->xout->slow |= SLOW_ACTION; - if (ctx->xin->may_learn) { + if (ctx->xin->may_learn && ctx->xin->packet) { update_mcast_snooping_table(ctx->xbridge, flow, vlan, in_xbundle, ctx->xin->packet); } @@ -2653,21 +2756,24 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport) static int tnl_route_lookup_flow(const struct flow *oflow, - ovs_be32 *ip, struct xport **out_port) + struct in6_addr *ip, struct xport **out_port) { char out_dev[IFNAMSIZ]; struct xbridge *xbridge; struct xlate_cfg *xcfg; - ovs_be32 gw; + struct in6_addr gw; + struct in6_addr dst; - if (!ovs_router_lookup(oflow->tunnel.ip_dst, out_dev, &gw)) { + dst = flow_tnl_dst(&oflow->tunnel); + if (!ovs_router_lookup(&dst, out_dev, &gw)) { return -ENOENT; } - if (gw) { + if (ipv6_addr_is_set(&gw) && + (!IN6_IS_ADDR_V4MAPPED(&gw) || in6_addr_get_mapped_ipv4(&gw))) { *ip = gw; } else { - *ip = oflow->tunnel.ip_dst; + *ip = dst; } xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); @@ -2689,36 +2795,49 @@ tnl_route_lookup_flow(const struct flow *oflow, } static int -xlate_flood_packet(struct xbridge *xbridge, struct dp_packet *packet) +compose_table_xlate(struct xlate_ctx *ctx, const struct xport *out_dev, + struct dp_packet *packet) { + struct xbridge *xbridge = out_dev->xbridge; struct ofpact_output output; struct flow flow; ofpact_init(&output.ofpact, OFPACT_OUTPUT, sizeof output); - /* Use OFPP_NONE as the in_port to avoid special packet processing. */ flow_extract(packet, &flow); - flow.in_port.ofp_port = OFPP_NONE; - output.port = OFPP_FLOOD; + flow.in_port.ofp_port = out_dev->ofp_port; + output.port = OFPP_TABLE; output.max_len = 0; - return ofproto_dpif_execute_actions(xbridge->ofproto, &flow, NULL, - &output.ofpact, sizeof output, - packet); + return ofproto_dpif_execute_actions__(xbridge->ofproto, &flow, NULL, + &output.ofpact, sizeof output, + ctx->recurse, ctx->resubmits, packet); } static void -tnl_send_arp_request(const struct xport *out_dev, +tnl_send_nd_request(struct xlate_ctx *ctx, const struct xport *out_dev, + const struct eth_addr eth_src, + struct in6_addr * ipv6_src, struct in6_addr * ipv6_dst) +{ + struct dp_packet packet; + + dp_packet_init(&packet, 0); + compose_nd(&packet, eth_src, ipv6_src, ipv6_dst); + compose_table_xlate(ctx, out_dev, &packet); + dp_packet_uninit(&packet); +} + +static void +tnl_send_arp_request(struct xlate_ctx *ctx, const struct xport *out_dev, const struct eth_addr eth_src, ovs_be32 ip_src, ovs_be32 ip_dst) { - struct xbridge *xbridge = out_dev->xbridge; struct dp_packet packet; dp_packet_init(&packet, 0); compose_arp(&packet, ARP_OP_REQUEST, eth_src, eth_addr_zero, true, ip_src, ip_dst); - xlate_flood_packet(xbridge, &packet); + compose_table_xlate(ctx, out_dev, &packet); dp_packet_uninit(&packet); } @@ -2728,18 +2847,24 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport, { struct ovs_action_push_tnl tnl_push_data; struct xport *out_dev = NULL; - ovs_be32 s_ip, d_ip = 0; + ovs_be32 s_ip = 0, d_ip = 0; + struct in6_addr s_ip6 = in6addr_any; + struct in6_addr d_ip6 = in6addr_any; struct eth_addr smac; struct eth_addr dmac; int err; + char buf_sip6[INET6_ADDRSTRLEN]; + char buf_dip6[INET6_ADDRSTRLEN]; - err = tnl_route_lookup_flow(flow, &d_ip, &out_dev); + err = tnl_route_lookup_flow(flow, &d_ip6, &out_dev); if (err) { xlate_report(ctx, "native tunnel routing failed"); return err; } - xlate_report(ctx, "tunneling to "IP_FMT" via %s", - IP_ARGS(d_ip), netdev_get_name(out_dev->netdev)); + + xlate_report(ctx, "tunneling to %s via %s", + ipv6_string_mapped(buf_dip6, &d_ip6), + netdev_get_name(out_dev->netdev)); /* Use mac addr of bridge port of the peer. */ err = netdev_get_etheraddr(out_dev->netdev, &smac); @@ -2748,35 +2873,51 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport, return err; } - err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL); - if (err) { - xlate_report(ctx, "tunnel output device lacks IPv4 address"); - return err; + d_ip = in6_addr_get_mapped_ipv4(&d_ip6); + if (d_ip) { + err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL); + if (err) { + xlate_report(ctx, "tunnel output device lacks IPv4 address"); + return err; + } + in6_addr_set_mapped_ipv4(&s_ip6, s_ip); + } else { + err = netdev_get_in6(out_dev->netdev, &s_ip6); + if (err) { + xlate_report(ctx, "tunnel output device lacks IPv6 address"); + return err; + } } - err = tnl_arp_lookup(out_dev->xbridge->name, d_ip, &dmac); + err = tnl_neigh_lookup(out_dev->xbridge->name, &d_ip6, &dmac); if (err) { - xlate_report(ctx, "ARP cache miss for "IP_FMT" on bridge %s, " - "sending ARP request", - IP_ARGS(d_ip), out_dev->xbridge->name); - tnl_send_arp_request(out_dev, smac, s_ip, d_ip); + xlate_report(ctx, "neighbor cache miss for %s on bridge %s, " + "sending %s request", + buf_dip6, out_dev->xbridge->name, d_ip ? "ARP" : "ND"); + if (d_ip) { + tnl_send_arp_request(ctx, out_dev, smac, s_ip, d_ip); + } else { + tnl_send_nd_request(ctx, out_dev, smac, &s_ip6, &d_ip6); + } return err; } + if (ctx->xin->xcache) { struct xc_entry *entry; - entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_ARP); - ovs_strlcpy(entry->u.tnl_arp_cache.br_name, out_dev->xbridge->name, - sizeof entry->u.tnl_arp_cache.br_name); - entry->u.tnl_arp_cache.d_ip = d_ip; + entry = xlate_cache_add_entry(ctx->xin->xcache, XC_TNL_NEIGH); + ovs_strlcpy(entry->u.tnl_neigh_cache.br_name, out_dev->xbridge->name, + sizeof entry->u.tnl_neigh_cache.br_name); + entry->u.tnl_neigh_cache.d_ipv6 = d_ip6; } - xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" "IP_FMT - " to "ETH_ADDR_FMT" "IP_FMT, - ETH_ADDR_ARGS(smac), IP_ARGS(s_ip), - ETH_ADDR_ARGS(dmac), IP_ARGS(d_ip)); + xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" %s" + " to "ETH_ADDR_FMT" %s", + ETH_ADDR_ARGS(smac), ipv6_string_mapped(buf_sip6, &s_ip6), + ETH_ADDR_ARGS(dmac), buf_dip6); + err = tnl_port_build_header(xport->ofport, flow, - dmac, smac, s_ip, &tnl_push_data); + dmac, smac, &s_ip6, &tnl_push_data); if (err) { return err; } @@ -2786,6 +2927,25 @@ build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport, return 0; } +static void +xlate_commit_actions(struct xlate_ctx *ctx) +{ + bool use_masked = ctx->xbridge->support.masked_set_action; + + ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, + ctx->odp_actions, ctx->wc, + use_masked); +} + +static void +clear_conntrack(struct flow *flow) +{ + flow->ct_state = 0; + flow->ct_zone = 0; + flow->ct_mark = 0; + memset(&flow->ct_label, 0, sizeof flow->ct_label); +} + static void compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, const struct xlate_bond_recirc *xr, bool check_stp) @@ -2803,7 +2963,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 33); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 35); memset(&flow_tnl, 0, sizeof flow_tnl); if (!xport) { @@ -2841,6 +3001,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (xport->peer) { const struct xport *peer = xport->peer; struct flow old_flow = ctx->xin->flow; + bool old_conntrack = ctx->conntracked; bool old_was_mpls = ctx->was_mpls; cls_version_t old_version = ctx->tables_version; struct ofpbuf old_stack = ctx->stack; @@ -2856,6 +3017,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, memset(&flow->tunnel, 0, sizeof flow->tunnel); memset(flow->regs, 0, sizeof flow->regs); flow->actset_output = OFPP_UNSET; + ctx->conntracked = false; + clear_conntrack(flow); /* The bridge is now known so obtain its table version. */ ctx->tables_version @@ -2864,16 +3027,11 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (!process_special(ctx, peer) && may_receive(peer, ctx)) { if (xport_stp_forward_state(peer) && xport_rstp_forward_state(peer)) { xlate_table_action(ctx, flow->in_port.ofp_port, 0, true, true); - if (ctx->action_set.size) { - /* Translate action set only if not dropping the packet and - * not recirculating. */ - if (!exit_recirculates(ctx)) { - xlate_action_set(ctx); - } + if (!ctx->freezing) { + xlate_action_set(ctx); } - /* Check if need to recirculate. */ - if (exit_recirculates(ctx)) { - compose_recirculate_action(ctx); + if (ctx->freezing) { + finish_freezing(ctx); } } else { /* Forwarding is disabled by STP and RSTP. Let OFPP_NORMAL and @@ -2887,12 +3045,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, ctx->base_flow = old_base_flow; ctx->odp_actions->size = old_size; - /* Undo changes that may have been done for recirculation. */ - if (exit_recirculates(ctx)) { - ctx->action_set.size = ctx->recirc_action_offset; - ctx->recirc_action_offset = -1; - ctx->last_unroll_offset = -1; - } + /* Undo changes that may have been done for freezing. */ + ctx_cancel_freeze(ctx); } } @@ -2910,13 +3064,19 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, * bridge. */ ctx->was_mpls = old_was_mpls; + /* The peer bridge's conntrack execution should have no effect on the + * original bridge. */ + ctx->conntracked = old_conntrack; + /* The fact that the peer bridge exits (for any reason) does not mean * that the original bridge should exit. Specifically, if the peer - * bridge recirculates (which typically modifies the packet), the - * original bridge must continue processing with the original, not the - * recirculated packet! */ + * bridge freezes translation, the original bridge must continue + * processing with the original, not the frozen packet! */ ctx->exit = false; + /* Peer bridge errors do not propagate back. */ + ctx->error = XLATE_OK; + if (ctx->xin->resubmit_stats) { netdev_vport_inc_tx(xport->netdev, ctx->xin->resubmit_stats); netdev_vport_inc_rx(peer->netdev, ctx->xin->resubmit_stats); @@ -2949,6 +3109,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, } if (xport->is_tunnel) { + struct in6_addr dst; /* Save tunnel metadata so that changes made due to * the Logical (tunnel) Port are not visible for any further * matches, while explicit set actions on tunnel metadata are. @@ -2959,7 +3120,8 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, xlate_report(ctx, "Tunneling decided against output"); goto out; /* restore flow_nw_tos */ } - if (flow->tunnel.ip_dst == ctx->orig_tunnel_ip_dst) { + dst = flow_tnl_dst(&flow->tunnel); + if (ipv6_addr_equals(&dst, &ctx->orig_tunnel_ipv6_dst)) { xlate_report(ctx, "Not tunneling to our own address"); goto out; /* restore flow_nw_tos */ } @@ -2998,11 +3160,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, } if (out_port != ODPP_NONE) { - bool use_masked = ctx->xbridge->support.masked_set_action; - - ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, - ctx->odp_actions, - wc, use_masked); + xlate_commit_actions(ctx); if (xr) { struct ovs_action_hash *act_hash; @@ -3098,17 +3256,20 @@ xlate_recursively(struct xlate_ctx *ctx, struct rule_dpif *rule) static bool xlate_resubmit_resource_check(struct xlate_ctx *ctx) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - if (ctx->recurse >= MAX_RESUBMIT_RECURSION + MAX_INTERNAL_RESUBMITS) { - VLOG_ERR_RL(&rl, "resubmit actions recursed over %d times", - MAX_RESUBMIT_RECURSION); + XLATE_REPORT_ERROR(ctx, "resubmit actions recursed over %d times", + MAX_RESUBMIT_RECURSION); + ctx->error = XLATE_RECURSION_TOO_DEEP; } else if (ctx->resubmits >= MAX_RESUBMITS + MAX_INTERNAL_RESUBMITS) { - VLOG_ERR_RL(&rl, "over %d resubmit actions", MAX_RESUBMITS); + XLATE_REPORT_ERROR(ctx, "over %d resubmit actions", MAX_RESUBMITS); + ctx->error = XLATE_TOO_MANY_RESUBMITS; } else if (ctx->odp_actions->size > UINT16_MAX) { - VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of actions"); + XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of actions"); + /* NOT an error, as we'll be slow-pathing the flow in this case? */ + ctx->exit = true; /* XXX: translation still terminated! */ } else if (ctx->stack.size >= 65536) { - VLOG_ERR_RL(&rl, "resubmits yielded over 64 kB of stack"); + XLATE_REPORT_ERROR(ctx, "resubmits yielded over 64 kB of stack"); + ctx->error = XLATE_STACK_TOO_DEEP; } else { return true; } @@ -3122,7 +3283,7 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, { /* Check if we need to recirculate before matching in a table. */ if (ctx->was_mpls) { - ctx_trigger_recirculation(ctx); + ctx_trigger_freeze(ctx); return; } if (xlate_resubmit_resource_check(ctx)) { @@ -3160,8 +3321,6 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t in_port, uint8_t table_id, ctx->table_id = old_table_id; return; } - - ctx->exit = true; } static void @@ -3184,24 +3343,22 @@ static void xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket) { uint64_t action_list_stub[1024 / 8]; - struct ofpbuf action_list, action_set; + struct ofpbuf action_list = OFPBUF_STUB_INITIALIZER(action_list_stub); + struct ofpbuf action_set = ofpbuf_const_initializer(bucket->ofpacts, + bucket->ofpacts_len); struct flow old_flow = ctx->xin->flow; bool old_was_mpls = ctx->was_mpls; - ofpbuf_use_const(&action_set, bucket->ofpacts, bucket->ofpacts_len); - ofpbuf_use_stub(&action_list, action_list_stub, sizeof action_list_stub); - ofpacts_execute_action_set(&action_list, &action_set); ctx->recurse++; do_xlate_actions(action_list.data, action_list.size, ctx); ctx->recurse--; - ofpbuf_uninit(&action_set); ofpbuf_uninit(&action_list); - /* Check if need to recirculate. */ - if (exit_recirculates(ctx)) { - compose_recirculate_action(ctx); + /* Check if need to freeze. */ + if (ctx->freezing) { + finish_freezing(ctx); } /* Roll back flow to previous state. @@ -3226,9 +3383,8 @@ xlate_group_bucket(struct xlate_ctx *ctx, struct ofputil_bucket *bucket) /* The fact that the group bucket exits (for any reason) does not mean that * the translation after the group action should exit. Specifically, if - * the group bucket recirculates (which typically modifies the packet), the - * actions after the group action must continue processing with the - * original, not the recirculated packet! */ + * the group bucket freezes translation, the actions after the group action + * must continue processing with the original, not the frozen packet! */ ctx->exit = false; } @@ -3327,6 +3483,11 @@ xlate_hash_fields_select_group(struct xlate_ctx *ctx, struct group_dpif *group) } basis = hash_bytes(&value, mf->n_bytes, basis); + /* For tunnels, hash in whether the field is present. */ + if (mf_is_tun_metadata(mf)) { + basis = hash_boolean(mf_is_set(mf, &ctx->xin->flow), basis); + } + mf_mask_field(mf, &ctx->wc->masks); } } @@ -3449,115 +3610,156 @@ flood_packets(struct xlate_ctx *ctx, bool all) static void execute_controller_action(struct xlate_ctx *ctx, int len, enum ofp_packet_in_reason reason, - uint16_t controller_id) + uint16_t controller_id, + const uint8_t *userdata, size_t userdata_len) { - struct ofproto_packet_in *pin; struct dp_packet *packet; - bool use_masked; ctx->xout->slow |= SLOW_CONTROLLER; + xlate_commit_actions(ctx); if (!ctx->xin->packet) { return; } packet = dp_packet_clone(ctx->xin->packet); - use_masked = ctx->xbridge->support.masked_set_action; - ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - ctx->odp_actions, - ctx->wc, use_masked); - odp_execute_actions(NULL, &packet, 1, false, ctx->odp_actions->data, ctx->odp_actions->size, NULL); - pin = xmalloc(sizeof *pin); - pin->up.packet_len = dp_packet_size(packet); - pin->up.packet = dp_packet_steal_data(packet); - pin->up.reason = reason; - pin->up.table_id = ctx->table_id; - pin->up.cookie = ctx->rule_cookie; - - flow_get_metadata(&ctx->xin->flow, &pin->up.flow_metadata); + /* A packet sent by an action in a table-miss rule is considered an + * explicit table miss. OpenFlow before 1.3 doesn't have that concept so + * it will get translated back to OFPR_ACTION for those versions. */ + if (reason == OFPR_ACTION + && ctx->rule && rule_dpif_is_table_miss(ctx->rule)) { + reason = OFPR_EXPLICIT_MISS; + } + + size_t packet_len = dp_packet_size(packet); + + struct ofproto_async_msg *am = xmalloc(sizeof *am); + *am = (struct ofproto_async_msg) { + .controller_id = controller_id, + .oam = OAM_PACKET_IN, + .pin = { + .up = { + .public = { + .packet = dp_packet_steal_data(packet), + .packet_len = packet_len, + .reason = reason, + .table_id = ctx->table_id, + .cookie = ctx->rule_cookie, + .userdata = (userdata_len + ? xmemdup(userdata, userdata_len) + : NULL), + .userdata_len = userdata_len, + } + }, + .max_len = len, + }, + }; + flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata); - pin->controller_id = controller_id; - pin->send_len = len; - /* If a rule is a table-miss rule then this is - * a table-miss handled by a table-miss rule. - * - * Else, if rule is internal and has a controller action, - * the later being implied by the rule being processed here, - * then this is a table-miss handled without a table-miss rule. - * - * Otherwise this is not a table-miss. */ - pin->miss_type = OFPROTO_PACKET_IN_NO_MISS; - if (ctx->rule) { - if (rule_dpif_is_table_miss(ctx->rule)) { - pin->miss_type = OFPROTO_PACKET_IN_MISS_FLOW; - } else if (rule_dpif_is_internal(ctx->rule)) { - pin->miss_type = OFPROTO_PACKET_IN_MISS_WITHOUT_FLOW; - } - } - ofproto_dpif_send_packet_in(ctx->xbridge->ofproto, pin); + ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am); dp_packet_delete(packet); } -/* Called only when ctx->recirc_action_offset is set. */ static void -compose_recirculate_action(struct xlate_ctx *ctx) -{ - struct recirc_metadata md; - bool use_masked; - uint32_t id; - - use_masked = ctx->xbridge->support.masked_set_action; - ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - ctx->odp_actions, - ctx->wc, use_masked); - - recirc_metadata_from_flow(&md, &ctx->xin->flow); +emit_continuation(struct xlate_ctx *ctx, const struct frozen_state *state) +{ + struct ofproto_async_msg *am = xmalloc(sizeof *am); + *am = (struct ofproto_async_msg) { + .controller_id = ctx->pause->controller_id, + .oam = OAM_PACKET_IN, + .pin = { + .up = { + .public = { + .userdata = xmemdup(ctx->pause->userdata, + ctx->pause->userdata_len), + .userdata_len = ctx->pause->userdata_len, + .packet = xmemdup(dp_packet_data(ctx->xin->packet), + dp_packet_size(ctx->xin->packet)), + .packet_len = dp_packet_size(ctx->xin->packet), + }, + .bridge = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto), + .stack = xmemdup(state->stack, + state->n_stack * sizeof *state->stack), + .n_stack = state->n_stack, + .mirrors = state->mirrors, + .conntracked = state->conntracked, + .actions = xmemdup(state->ofpacts, state->ofpacts_len), + .actions_len = state->ofpacts_len, + .action_set = xmemdup(state->action_set, + state->action_set_len), + .action_set_len = state->action_set_len, + }, + .max_len = UINT16_MAX, + }, + }; + flow_get_metadata(&ctx->xin->flow, &am->pin.up.public.flow_metadata); + ofproto_dpif_send_async_msg(ctx->xbridge->ofproto, am); +} - ovs_assert(ctx->recirc_action_offset >= 0); +static void +finish_freezing__(struct xlate_ctx *ctx, uint8_t table) +{ + ovs_assert(ctx->freezing); - struct recirc_state state = { - .table_id = 0, - .ofproto = ctx->xbridge->ofproto, - .metadata = md, - .stack = &ctx->stack, + struct frozen_state state = { + .table_id = table, + .ofproto_uuid = *ofproto_dpif_get_uuid(ctx->xbridge->ofproto), + .stack = ctx->stack.data, + .n_stack = ctx->stack.size / sizeof(union mf_subvalue), .mirrors = ctx->mirrors, - .action_set_len = ctx->recirc_action_offset, - .ofpacts_len = ctx->action_set.size, - .ofpacts = ctx->action_set.data, + .conntracked = ctx->conntracked, + .ofpacts = ctx->frozen_actions.data, + .ofpacts_len = ctx->frozen_actions.size, + .action_set = ctx->action_set.data, + .action_set_len = ctx->action_set.size, }; + frozen_metadata_from_flow(&state.metadata, &ctx->xin->flow); - /* Only allocate recirculation ID if we have a packet. */ - if (ctx->xin->packet) { + if (ctx->pause) { + if (ctx->xin->packet) { + emit_continuation(ctx, &state); + } + } else { /* Allocate a unique recirc id for the given metadata state in the - * flow. The life-cycle of this recirc id is managed by associating it + * flow. An existing id, with a new reference to the corresponding + * recirculation context, will be returned if possible. + * The life-cycle of this recirc id is managed by associating it * with the udpif key ('ukey') created for each new datapath flow. */ - id = recirc_alloc_id_ctx(&state); + uint32_t id = recirc_alloc_id_ctx(&state); if (!id) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_ERR_RL(&rl, "Failed to allocate recirculation id"); + XLATE_REPORT_ERROR(ctx, "Failed to allocate recirculation id"); + ctx->error = XLATE_NO_RECIRCULATION_CONTEXT; return; } - xlate_out_add_recirc(ctx->xout, id); - } else { - /* Look up an existing recirc id for the given metadata state in the - * flow. No new reference is taken, as the ID is RCU protected and is - * only required temporarily for verification. - * - * This might fail and return 0. We let zero 'id' to be used in the - * RECIRC action below, which will fail all revalidations as zero is - * not a valid recirculation ID. */ - id = recirc_find_id(&state); + recirc_refs_add(&ctx->xout->recircs, id); + + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id); } - nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_RECIRC, id); + /* Undo changes done by freezing. */ + ctx_cancel_freeze(ctx); +} + +/* Called only when we're freezing. */ +static void +finish_freezing(struct xlate_ctx *ctx) +{ + xlate_commit_actions(ctx); + finish_freezing__(ctx, 0); +} - /* Undo changes done by recirculation. */ - ctx->action_set.size = ctx->recirc_action_offset; - ctx->recirc_action_offset = -1; - ctx->last_unroll_offset = -1; +/* Fork the pipeline here. The current packet will continue processing the + * current action list. A clone of the current packet will recirculate, skip + * the remainder of the current action list and asynchronously resume pipeline + * processing in 'table' with the current metadata and action set. */ +static void +compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table) +{ + ctx->freezing = true; + finish_freezing__(ctx, table); } static void @@ -3570,20 +3772,15 @@ compose_mpls_push_action(struct xlate_ctx *ctx, struct ofpact_push_mpls *mpls) n = flow_count_mpls_labels(flow, ctx->wc); if (!n) { - bool use_masked = ctx->xbridge->support.masked_set_action; - - ctx->xout->slow |= commit_odp_actions(flow, &ctx->base_flow, - ctx->odp_actions, - ctx->wc, use_masked); + xlate_commit_actions(ctx); } else if (n >= FLOW_MAX_MPLS_LABELS) { if (ctx->xin->packet != NULL) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "bridge %s: dropping packet on which an " + XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an " "MPLS push action can't be performed as it would " "have more MPLS LSEs than the %d supported.", ctx->xbridge->name, FLOW_MAX_MPLS_LABELS); } - ctx->exit = true; + ctx->error = XLATE_TOO_MANY_MPLS_LABELS; return; } @@ -3602,13 +3799,12 @@ compose_mpls_pop_action(struct xlate_ctx *ctx, ovs_be16 eth_type) } } else if (n >= FLOW_MAX_MPLS_LABELS) { if (ctx->xin->packet != NULL) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - VLOG_WARN_RL(&rl, "bridge %s: dropping packet on which an " + XLATE_REPORT_ERROR(ctx, "bridge %s: dropping packet on which an " "MPLS pop action can't be performed as it has " "more MPLS LSEs than the %d supported.", ctx->xbridge->name, FLOW_MAX_MPLS_LABELS); } - ctx->exit = true; + ctx->error = XLATE_TOO_MANY_MPLS_LABELS; ofpbuf_clear(ctx->odp_actions); } } @@ -3631,7 +3827,7 @@ compose_dec_ttl(struct xlate_ctx *ctx, struct ofpact_cnt_ids *ids) for (i = 0; i < ids->n_controllers; i++) { execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, - ids->cnt_ids[i]); + ids->cnt_ids[i], NULL, 0); } /* Stop processing for current table. */ @@ -3680,7 +3876,8 @@ compose_dec_mpls_ttl_action(struct xlate_ctx *ctx) set_mpls_lse_ttl(&flow->mpls_lse[0], ttl); return false; } else { - execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0); + execute_controller_action(ctx, UINT16_MAX, OFPR_INVALID_TTL, 0, + NULL, 0); } } @@ -3718,7 +3915,7 @@ xlate_output_action(struct xlate_ctx *ctx, (ctx->in_group ? OFPR_GROUP : ctx->in_action_set ? OFPR_ACTION_SET : OFPR_ACTION), - 0); + 0, NULL, 0); break; case OFPP_NONE: break; @@ -3916,7 +4113,6 @@ xlate_sample_action(struct xlate_ctx *ctx, /* Scale the probability from 16-bit to 32-bit while representing * the same percentage. */ uint32_t probability = (os->probability << 16) | os->probability; - bool use_masked; if (!ctx->xbridge->support.variable_length_userdata) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); @@ -3927,10 +4123,7 @@ xlate_sample_action(struct xlate_ctx *ctx, return; } - use_masked = ctx->xbridge->support.masked_set_action; - ctx->xout->slow |= commit_odp_actions(&ctx->xin->flow, &ctx->base_flow, - ctx->odp_actions, - ctx->wc, use_masked); + xlate_commit_actions(ctx); union user_action_cookie cookie = { .flow_sample = { @@ -3967,12 +4160,9 @@ may_receive(const struct xport *xport, struct xlate_ctx *ctx) } static void -xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a) +xlate_write_actions__(struct xlate_ctx *ctx, + const struct ofpact *ofpacts, size_t ofpacts_len) { - const struct ofpact_nest *on = ofpact_get_WRITE_ACTIONS(a); - size_t on_len = ofpact_nest_get_action_len(on); - const struct ofpact *inner; - /* Maintain actset_output depending on the contents of the action set: * * - OFPP_UNSET, if there is no "output" action. @@ -3983,10 +4173,11 @@ xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a) * - OFPP_UNSET, if there is a "group" action. */ if (!ctx->action_set_has_group) { - OFPACT_FOR_EACH (inner, on->actions, on_len) { - if (inner->type == OFPACT_OUTPUT) { - ctx->xin->flow.actset_output = ofpact_get_OUTPUT(inner)->port; - } else if (inner->type == OFPACT_GROUP) { + const struct ofpact *a; + OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { + if (a->type == OFPACT_OUTPUT) { + ctx->xin->flow.actset_output = ofpact_get_OUTPUT(a)->port; + } else if (a->type == OFPACT_GROUP) { ctx->xin->flow.actset_output = OFPP_UNSET; ctx->action_set_has_group = true; break; @@ -3994,8 +4185,13 @@ xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact *a) } } - ofpbuf_put(&ctx->action_set, on->actions, on_len); - ofpact_pad(&ctx->action_set); + ofpbuf_put(&ctx->action_set, ofpacts, ofpacts_len); +} + +static void +xlate_write_actions(struct xlate_ctx *ctx, const struct ofpact_nest *a) +{ + xlate_write_actions__(ctx, a->actions, ofpact_nest_get_action_len(a)); } static void @@ -4015,52 +4211,52 @@ xlate_action_set(struct xlate_ctx *ctx) } static void -recirc_put_unroll_xlate(struct xlate_ctx *ctx) +freeze_put_unroll_xlate(struct xlate_ctx *ctx) { - struct ofpact_unroll_xlate *unroll; - - unroll = ctx->last_unroll_offset < 0 - ? NULL - : ALIGNED_CAST(struct ofpact_unroll_xlate *, - (char *)ctx->action_set.data + ctx->last_unroll_offset); + struct ofpact_unroll_xlate *unroll = ctx->frozen_actions.header; /* Restore the table_id and rule cookie for a potential PACKET * IN if needed. */ if (!unroll || (ctx->table_id != unroll->rule_table_id || ctx->rule_cookie != unroll->rule_cookie)) { - - ctx->last_unroll_offset = ctx->action_set.size; - unroll = ofpact_put_UNROLL_XLATE(&ctx->action_set); + unroll = ofpact_put_UNROLL_XLATE(&ctx->frozen_actions); unroll->rule_table_id = ctx->table_id; unroll->rule_cookie = ctx->rule_cookie; + ctx->frozen_actions.header = unroll; } } -/* Copy remaining actions to the action_set to be executed after recirculation. - * UNROLL_XLATE action is inserted, if not already done so, before actions that - * may generate PACKET_INs from the current table and without matching another - * rule. */ +/* Copy actions 'a' through 'end' to ctx->frozen_actions, which will be + * executed after thawing. Inserts an UNROLL_XLATE action, if none is already + * present, before any action that may depend on the current table ID or flow + * cookie. */ static void -recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len, +freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end, struct xlate_ctx *ctx) { - const struct ofpact *a; - - OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { + for (; a < end; a = ofpact_next(a)) { switch (a->type) { - /* May generate PACKET INs. */ case OFPACT_OUTPUT_REG: case OFPACT_GROUP: case OFPACT_OUTPUT: case OFPACT_CONTROLLER: case OFPACT_DEC_MPLS_TTL: case OFPACT_DEC_TTL: - recirc_put_unroll_xlate(ctx); + /* These actions may generate asynchronous messages, which include + * table ID and flow cookie information. */ + freeze_put_unroll_xlate(ctx); + break; + + case OFPACT_RESUBMIT: + if (ofpact_get_RESUBMIT(a)->table_id == 0xff) { + /* This resubmit action is relative to the current table, so we + * need to track what table that is.*/ + freeze_put_unroll_xlate(ctx); + } break; - /* These may not generate PACKET INs. */ case OFPACT_SET_TUNNEL: case OFPACT_REG_MOVE: case OFPACT_SET_FIELD: @@ -4068,8 +4264,7 @@ recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_STACK_POP: case OFPACT_LEARN: case OFPACT_WRITE_METADATA: - case OFPACT_RESUBMIT: /* May indirectly generate PACKET INs, */ - case OFPACT_GOTO_TABLE: /* but from a different table and rule. */ + case OFPACT_GOTO_TABLE: case OFPACT_ENQUEUE: case OFPACT_SET_VLAN_VID: case OFPACT_SET_VLAN_PCP: @@ -4101,21 +4296,24 @@ recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_METER: case OFPACT_SAMPLE: case OFPACT_DEBUG_RECIRC: + case OFPACT_CT: + case OFPACT_NAT: + /* These may not generate PACKET INs. */ break; - /* These need not be copied for restoration. */ case OFPACT_NOTE: case OFPACT_CONJUNCTION: + /* These need not be copied for restoration. */ continue; } /* Copy the action over. */ - ofpbuf_put(&ctx->action_set, a, OFPACT_ALIGN(a->len)); + ofpbuf_put(&ctx->frozen_actions, a, OFPACT_ALIGN(a->len)); } } #define CHECK_MPLS_RECIRCULATION() \ if (ctx->was_mpls) { \ - ctx_trigger_recirculation(ctx); \ + ctx_trigger_freeze(ctx); \ break; \ } #define CHECK_MPLS_RECIRCULATION_IF(COND) \ @@ -4123,6 +4321,161 @@ recirc_unroll_actions(const struct ofpact *ofpacts, size_t ofpacts_len, CHECK_MPLS_RECIRCULATION(); \ } +static void +put_ct_mark(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc) +{ + struct { + uint32_t key; + uint32_t mask; + } odp_attr; + + odp_attr.key = flow->ct_mark; + odp_attr.mask = wc->masks.ct_mark; + + if (odp_attr.mask && odp_attr.key != base_flow->ct_mark) { + nl_msg_put_unspec(odp_actions, OVS_CT_ATTR_MARK, &odp_attr, + sizeof(odp_attr)); + } +} + +static void +put_ct_label(const struct flow *flow, struct flow *base_flow, + struct ofpbuf *odp_actions, struct flow_wildcards *wc) +{ + if (!ovs_u128_is_zero(&wc->masks.ct_label) + && !ovs_u128_equals(&flow->ct_label, &base_flow->ct_label)) { + struct { + ovs_u128 key; + ovs_u128 mask; + } *odp_ct_label; + + odp_ct_label = nl_msg_put_unspec_uninit(odp_actions, + OVS_CT_ATTR_LABELS, + sizeof(*odp_ct_label)); + odp_ct_label->key = flow->ct_label; + odp_ct_label->mask = wc->masks.ct_label; + } +} + +static void +put_ct_helper(struct ofpbuf *odp_actions, struct ofpact_conntrack *ofc) +{ + if (ofc->alg) { + if (ofc->alg == IPPORT_FTP) { + nl_msg_put_string(odp_actions, OVS_CT_ATTR_HELPER, "ftp"); + } else { + VLOG_WARN("Cannot serialize ct_helper %d\n", ofc->alg); + } + } +} + +static void +put_ct_nat(struct xlate_ctx *ctx) +{ + struct ofpact_nat *ofn = ctx->ct_nat_action; + size_t nat_offset; + + if (!ofn) { + return; + } + + nat_offset = nl_msg_start_nested(ctx->odp_actions, OVS_CT_ATTR_NAT); + if (ofn->flags & NX_NAT_F_SRC || ofn->flags & NX_NAT_F_DST) { + nl_msg_put_flag(ctx->odp_actions, ofn->flags & NX_NAT_F_SRC + ? OVS_NAT_ATTR_SRC : OVS_NAT_ATTR_DST); + if (ofn->flags & NX_NAT_F_PERSISTENT) { + nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PERSISTENT); + } + if (ofn->flags & NX_NAT_F_PROTO_HASH) { + nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_HASH); + } else if (ofn->flags & NX_NAT_F_PROTO_RANDOM) { + nl_msg_put_flag(ctx->odp_actions, OVS_NAT_ATTR_PROTO_RANDOM); + } + if (ofn->range_af == AF_INET) { + nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN, + ofn->range.addr.ipv4.min); + if (ofn->range.addr.ipv4.max && + (ntohl(ofn->range.addr.ipv4.max) + > ntohl(ofn->range.addr.ipv4.min))) { + nl_msg_put_be32(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX, + ofn->range.addr.ipv4.max); + } + } else if (ofn->range_af == AF_INET6) { + nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MIN, + &ofn->range.addr.ipv6.min, + sizeof ofn->range.addr.ipv6.min); + if (!ipv6_mask_is_any(&ofn->range.addr.ipv6.max) && + memcmp(&ofn->range.addr.ipv6.max, &ofn->range.addr.ipv6.min, + sizeof ofn->range.addr.ipv6.max) > 0) { + nl_msg_put_unspec(ctx->odp_actions, OVS_NAT_ATTR_IP_MAX, + &ofn->range.addr.ipv6.max, + sizeof ofn->range.addr.ipv6.max); + } + } + if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) { + nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MIN, + ofn->range.proto.min); + if (ofn->range.proto.max && + ofn->range.proto.max > ofn->range.proto.min) { + nl_msg_put_u16(ctx->odp_actions, OVS_NAT_ATTR_PROTO_MAX, + ofn->range.proto.max); + } + } + } + nl_msg_end_nested(ctx->odp_actions, nat_offset); +} + +static void +compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc) +{ + ovs_u128 old_ct_label = ctx->base_flow.ct_label; + uint32_t old_ct_mark = ctx->base_flow.ct_mark; + size_t ct_offset; + uint16_t zone; + + /* Ensure that any prior actions are applied before composing the new + * conntrack action. */ + xlate_commit_actions(ctx); + + /* Process nested actions first, to populate the key. */ + ctx->ct_nat_action = NULL; + do_xlate_actions(ofc->actions, ofpact_ct_get_action_len(ofc), ctx); + + if (ofc->zone_src.field) { + zone = mf_get_subfield(&ofc->zone_src, &ctx->xin->flow); + } else { + zone = ofc->zone_imm; + } + + ct_offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CT); + if (ofc->flags & NX_CT_F_COMMIT) { + nl_msg_put_flag(ctx->odp_actions, OVS_CT_ATTR_COMMIT); + } + nl_msg_put_u16(ctx->odp_actions, OVS_CT_ATTR_ZONE, zone); + put_ct_mark(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc); + put_ct_label(&ctx->xin->flow, &ctx->base_flow, ctx->odp_actions, ctx->wc); + put_ct_helper(ctx->odp_actions, ofc); + put_ct_nat(ctx); + ctx->ct_nat_action = NULL; + nl_msg_end_nested(ctx->odp_actions, ct_offset); + + /* Restore the original ct fields in the key. These should only be exposed + * after recirculation to another table. */ + ctx->base_flow.ct_mark = old_ct_mark; + ctx->base_flow.ct_label = old_ct_label; + + if (ofc->recirc_table == NX_CT_RECIRC_NONE) { + /* If we do not recirculate as part of this action, hide the results of + * connection tracking from subsequent recirculations. */ + ctx->conntracked = false; + } else { + /* Use ct_* fields from datapath during recirculation upcall. */ + ctx->conntracked = true; + compose_recirculate_and_fork(ctx, ofc->recirc_table); + } +} + static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx) @@ -4132,7 +4485,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, const struct ofpact *a; if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) { - tnl_arp_snoop(flow, wc, ctx->xbridge->name); + tnl_neigh_snoop(flow, wc, ctx->xbridge->name); } /* dl_type already in the mask, not set below. */ @@ -4142,13 +4495,15 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, const struct ofpact_set_field *set_field; const struct mf_field *mf; + if (ctx->error) { + break; + } + if (ctx->exit) { /* Check if need to store the remaining actions for later * execution. */ - if (exit_recirculates(ctx)) { - recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len - - ((uint8_t *)a - - (uint8_t *)ofpacts)), + if (ctx->freezing) { + freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx); } break; @@ -4169,9 +4524,18 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_CONTROLLER: controller = ofpact_get_CONTROLLER(a); - execute_controller_action(ctx, controller->max_len, - controller->reason, - controller->controller_id); + if (controller->pause) { + ctx->pause = controller; + ctx->xout->slow |= SLOW_CONTROLLER; + ctx_trigger_freeze(ctx); + a = ofpact_next(a); + } else { + execute_controller_action(ctx, controller->max_len, + controller->reason, + controller->controller_id, + controller->userdata, + controller->userdata_len); + } break; case OFPACT_ENQUEUE: @@ -4282,8 +4646,30 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; case OFPACT_RESUBMIT: + /* Freezing complicates resubmit. There are two cases: + * + * - If mpls_pop has been executed, then the flow table lookup + * as part of resubmit might depend on fields that can only + * be obtained via recirculation, so the resubmit itself + * triggers recirculation and we need to make sure that the + * resubmit is executed again after recirculation. + * Therefore, in this case we trigger recirculation and let + * the code following this "switch" append the resubmit to + * the post-recirculation actions. + * + * - Otherwise, some action in the flow entry found by resubmit + * might trigger freezing. If that happens, then we do not + * want to execute the resubmit again during thawing, so we + * want to skip back to the head of the loop to avoid that, + * only adding any actions that follow the resubmit to the + * frozen actions. + */ + if (ctx->was_mpls) { + ctx_trigger_freeze(ctx); + break; + } xlate_ofpact_resubmit(ctx, ofpact_get_RESUBMIT(a)); - break; + continue; case OFPACT_SET_TUNNEL: flow->tunnel.tun_id = htonll(ofpact_get_SET_TUNNEL(a)->tun_id); @@ -4326,7 +4712,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, && !eth_type_mpls(flow->dl_type)) { break; } - /* A flow may wildcard nw_frag. Do nothing if setting a trasport + /* A flow may wildcard nw_frag. Do nothing if setting a transport * header field on a packet that does not have them. */ mf_mask_field_and_prereqs(mf, wc); if (mf_are_prereqs_ok(mf, flow)) { @@ -4456,7 +4842,7 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, break; case OFPACT_WRITE_ACTIONS: - xlate_write_actions(ctx, a); + xlate_write_actions(ctx, ofpact_get_WRITE_ACTIONS(a)); break; case OFPACT_WRITE_METADATA: @@ -4472,12 +4858,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_GOTO_TABLE: { struct ofpact_goto_table *ogt = ofpact_get_GOTO_TABLE(a); - /* Allow ctx->table_id == TBL_INTERNAL, which will be greater - * than ogt->table_id. This is to allow goto_table actions that - * triggered recirculation: ctx->table_id will be TBL_INTERNAL - * after recirculation. */ - ovs_assert(ctx->table_id == TBL_INTERNAL - || ctx->table_id < ogt->table_id); + ovs_assert(ctx->table_id < ogt->table_id); + xlate_table_action(ctx, ctx->xin->flow.in_port.ofp_port, ogt->table_id, true, true); break; @@ -4487,19 +4869,26 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, xlate_sample_action(ctx, ofpact_get_SAMPLE(a)); break; + case OFPACT_CT: + CHECK_MPLS_RECIRCULATION(); + compose_conntrack_action(ctx, ofpact_get_CT(a)); + break; + + case OFPACT_NAT: + /* This will be processed by compose_conntrack_action(). */ + ctx->ct_nat_action = ofpact_get_NAT(a); + break; + case OFPACT_DEBUG_RECIRC: - ctx_trigger_recirculation(ctx); + ctx_trigger_freeze(ctx); a = ofpact_next(a); break; } /* Check if need to store this and the remaining actions for later * execution. */ - if (ctx->exit && ctx_first_recirculation_action(ctx)) { - recirc_unroll_actions(a, OFPACT_ALIGN(ofpacts_len - - ((uint8_t *)a - - (uint8_t *)ofpacts)), - ctx); + if (!ctx->error && ctx->exit && ctx_first_frozen_action(ctx)) { + freeze_unroll_actions(a, ofpact_end(ofpacts, ofpacts_len), ctx); break; } } @@ -4526,20 +4915,27 @@ xlate_in_init(struct xlate_in *xin, struct ofproto_dpif *ofproto, xin->resubmit_hook = NULL; xin->report_hook = NULL; xin->resubmit_stats = NULL; + xin->recurse = 0; + xin->resubmits = 0; xin->wc = wc; xin->odp_actions = odp_actions; /* Do recirc lookup. */ - xin->recirc = flow->recirc_id - ? recirc_id_node_find(flow->recirc_id) - : NULL; + xin->frozen_state = NULL; + if (flow->recirc_id) { + const struct recirc_id_node *node + = recirc_id_node_find(flow->recirc_id); + if (node) { + xin->frozen_state = &node->state; + } + } } void xlate_out_uninit(struct xlate_out *xout) { if (xout) { - xlate_out_free_recircs(xout); + recirc_refs_unref(&xout->recircs); } } @@ -4549,8 +4945,15 @@ void xlate_actions_for_side_effects(struct xlate_in *xin) { struct xlate_out xout; + enum xlate_error error; + + error = xlate_actions(xin, &xout); + if (error) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + + VLOG_WARN_RL(&rl, "xlate_actions failed (%s)!", xlate_strerror(error)); + } - xlate_actions(xin, &xout); xlate_out_uninit(&xout); } @@ -4629,7 +5032,7 @@ netdev_max_backlog(void) stream = fopen(filename, "r"); if (!stream) { - VLOG_WARN("%s: open failed (%s)", filename, ovs_strerror(errno)); + VLOG_INFO("%s: open failed (%s)", filename, ovs_strerror(errno)); } else { if (fscanf(stream, "%d", &n) != 1) { VLOG_WARN("%s: read error", filename); @@ -4739,26 +5142,30 @@ xlate_wc_finish(struct xlate_ctx *ctx) /* Translates the flow, actions, or rule in 'xin' into datapath actions in * 'xout'. * The caller must take responsibility for eventually freeing 'xout', with - * xlate_out_uninit(). */ -void + * xlate_out_uninit(). + * Returns 'XLATE_OK' if translation was successful. In case of an error an + * empty set of actions will be returned in 'xin->odp_actions' (if non-NULL), + * so that most callers may ignore the return value and transparently install a + * drop flow when the translation fails. */ +enum xlate_error xlate_actions(struct xlate_in *xin, struct xlate_out *xout) { *xout = (struct xlate_out) { .slow = 0, - .fail_open = false, - .n_recircs = 0, + .recircs = RECIRC_REFS_EMPTY_INITIALIZER, }; struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); struct xbridge *xbridge = xbridge_lookup(xcfg, xin->ofproto); if (!xbridge) { - return; + return XLATE_BRIDGE_NOT_FOUND; } struct flow *flow = &xin->flow; union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)]; uint64_t action_set_stub[1024 / 8]; + uint64_t frozen_actions_stub[1024 / 8]; struct flow_wildcards scratch_wc; uint64_t actions_stub[256 / 8]; struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub); @@ -4766,15 +5173,15 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) .xin = xin, .xout = xout, .base_flow = *flow, - .orig_tunnel_ip_dst = flow->tunnel.ip_dst, + .orig_tunnel_ipv6_dst = flow_tnl_dst(&flow->tunnel), .xbridge = xbridge, .stack = OFPBUF_STUB_INITIALIZER(stack_stub), .rule = xin->rule, .wc = xin->wc ? xin->wc : &scratch_wc, .odp_actions = xin->odp_actions ? xin->odp_actions : &scratch_actions, - .recurse = 0, - .resubmits = 0, + .recurse = xin->recurse, + .resubmits = xin->resubmits, .in_group = false, .in_action_set = false, @@ -4785,12 +5192,17 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) .sflow_odp_port = 0, .nf_output_iface = NF_OUT_DROP, .exit = false, + .error = XLATE_OK, .mirrors = 0, - .recirc_action_offset = -1, - .last_unroll_offset = -1, + .freezing = false, + .frozen_actions = OFPBUF_STUB_INITIALIZER(frozen_actions_stub), + .pause = NULL, .was_mpls = false, + .conntracked = false, + + .ct_nat_action = NULL, .action_set_has_group = false, .action_set = OFPBUF_STUB_INITIALIZER(action_set_stub), @@ -4825,10 +5237,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) COVERAGE_INC(xlate_actions); - if (xin->recirc) { - const struct recirc_state *state = &xin->recirc->state; + if (xin->frozen_state) { + const struct frozen_state *state = xin->frozen_state; - xlate_report(&ctx, "Restoring state post-recirculation:"); + xlate_report(&ctx, "Thawing frozen state:"); if (xin->ofpacts_len > 0 || ctx.rule) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); @@ -4836,38 +5248,45 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) VLOG_WARN_RL(&rl, "Recirculation conflict (%s)!", conflict); xlate_report(&ctx, "- Recirculation conflict (%s)!", conflict); + ctx.error = XLATE_RECIRCULATION_CONFLICT; goto exit; } /* Set the bridge for post-recirculation processing if needed. */ - if (ctx.xbridge->ofproto != state->ofproto) { + if (!uuid_equals(ofproto_dpif_get_uuid(ctx.xbridge->ofproto), + &state->ofproto_uuid)) { struct xlate_cfg *xcfg = ovsrcu_get(struct xlate_cfg *, &xcfgp); const struct xbridge *new_bridge - = xbridge_lookup(xcfg, state->ofproto); + = xbridge_lookup_by_uuid(xcfg, &state->ofproto_uuid); if (OVS_UNLIKELY(!new_bridge)) { /* Drop the packet if the bridge cannot be found. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); - VLOG_WARN_RL(&rl, "Recirculation bridge no longer exists."); - xlate_report(&ctx, "- Recirculation bridge no longer exists."); + VLOG_WARN_RL(&rl, "Frozen bridge no longer exists."); + xlate_report(&ctx, "- Frozen bridge no longer exists."); + ctx.error = XLATE_BRIDGE_NOT_FOUND; goto exit; } ctx.xbridge = new_bridge; } - /* Set the post-recirculation table id. Note: A table lookup is done - * only if there are no post-recirculation actions. */ + /* Set the thawed table id. Note: A table lookup is done only if there + * are no frozen actions. */ ctx.table_id = state->table_id; xlate_report(&ctx, "- Resuming from table %"PRIu8, ctx.table_id); + if (!state->conntracked) { + clear_conntrack(flow); + } + /* Restore pipeline metadata. May change flow's in_port and other - * metadata to the values that existed when recirculation was - * triggered. */ - recirc_metadata_to_flow(&state->metadata, flow); + * metadata to the values that existed when freezing was triggered. */ + frozen_metadata_to_flow(&state->metadata, flow); /* Restore stack, if any. */ if (state->stack) { - ofpbuf_put(&ctx.stack, state->stack->data, state->stack->size); + ofpbuf_put(&ctx.stack, state->stack, + state->n_stack * sizeof *state->stack); } /* Restore mirror state. */ @@ -4875,28 +5294,19 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) /* Restore action set, if any. */ if (state->action_set_len) { - const struct ofpact *a; - xlate_report_actions(&ctx, "- Restoring action set", - state->ofpacts, state->action_set_len); - - ofpbuf_put(&ctx.action_set, state->ofpacts, state->action_set_len); + state->action_set, state->action_set_len); - OFPACT_FOR_EACH(a, state->ofpacts, state->action_set_len) { - if (a->type == OFPACT_GROUP) { - ctx.action_set_has_group = true; - break; - } - } + flow->actset_output = OFPP_UNSET; + xlate_write_actions__(&ctx, state->action_set, + state->action_set_len); } - /* Restore recirculation actions. If there are no actions, processing - * will start with a lookup in the table set above. */ - if (state->ofpacts_len > state->action_set_len) { - xin->ofpacts_len = state->ofpacts_len - state->action_set_len; - xin->ofpacts = state->ofpacts + - state->action_set_len / sizeof *state->ofpacts; - + /* Restore frozen actions. If there are no actions, processing will + * start with a lookup in the table set above. */ + xin->ofpacts = state->ofpacts; + xin->ofpacts_len = state->ofpacts_len; + if (state->ofpacts_len) { xlate_report_actions(&ctx, "- Restoring actions", xin->ofpacts, xin->ofpacts_len); } @@ -4905,6 +5315,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) VLOG_WARN_RL(&rl, "Recirculation context not found for ID %"PRIx32, flow->recirc_id); + ctx.error = XLATE_NO_RECIRCULATION_CONTEXT; goto exit; } /* The bridge is now known so obtain its table version. */ @@ -4930,15 +5341,14 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) ctx.xin->resubmit_hook(ctx.xin, ctx.rule, 0); } } - xout->fail_open = ctx.rule && rule_dpif_is_fail_open(ctx.rule); - /* Get the proximate input port of the packet. (If xin->recirc, + /* Get the proximate input port of the packet. (If xin->frozen_state, * flow->in_port is the ultimate input port of the packet.) */ struct xport *in_port = get_ofp_port(xbridge, ctx.base_flow.in_port.ofp_port); - /* Tunnel stats only for non-recirculated packets. */ - if (!xin->recirc && in_port && in_port->is_tunnel) { + /* Tunnel stats only for not-thawed packets. */ + if (!xin->frozen_state && in_port && in_port->is_tunnel) { if (ctx.xin->resubmit_stats) { netdev_vport_inc_rx(in_port->netdev, ctx.xin->resubmit_stats); if (in_port->bfd) { @@ -4954,11 +5364,11 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) } } - if (!xin->recirc && process_special(&ctx, in_port)) { + if (!xin->frozen_state && process_special(&ctx, in_port)) { /* process_special() did all the processing for this packet. * - * We do not perform special processing on recirculated packets, as - * recirculated packets are not really received by the bridge.*/ + * We do not perform special processing on thawed packets, since that + * was done before they were frozen and should not be redone. */ } else if (in_port && in_port->xbundle && xbundle_mirror_out(xbridge, in_port->xbundle)) { if (ctx.xin->packet != NULL) { @@ -4968,9 +5378,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) ctx.xbridge->name, in_port->xbundle->name); } } else { - /* Sampling is done only for packets really received by the bridge. */ + /* Sampling is done on initial reception; don't redo after thawing. */ unsigned int user_cookie_offset = 0; - if (!xin->recirc) { + if (!xin->frozen_state) { user_cookie_offset = compose_sflow_action(&ctx); compose_ipfix_action(&ctx, ODPP_NONE); } @@ -4996,35 +5406,30 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) mirror_ingress_packet(&ctx); do_xlate_actions(ofpacts, ofpacts_len, &ctx); + if (ctx.error) { + goto exit; + } /* We've let OFPP_NORMAL and the learning action look at the - * packet, so drop it now if forwarding is disabled. */ + * packet, so cancel all actions and freezing if forwarding is + * disabled. */ if (in_port && (!xport_stp_forward_state(in_port) || !xport_rstp_forward_state(in_port))) { - /* Drop all actions added by do_xlate_actions() above. */ ctx.odp_actions->size = sample_actions_len; + ctx_cancel_freeze(&ctx); + ofpbuf_clear(&ctx.action_set); + } - /* Undo changes that may have been done for recirculation. */ - if (exit_recirculates(&ctx)) { - ctx.action_set.size = ctx.recirc_action_offset; - ctx.recirc_action_offset = -1; - ctx.last_unroll_offset = -1; - } - } else if (ctx.action_set.size) { - /* Translate action set only if not dropping the packet and - * not recirculating. */ - if (!exit_recirculates(&ctx)) { - xlate_action_set(&ctx); - } + if (!ctx.freezing) { + xlate_action_set(&ctx); } - /* Check if need to recirculate. */ - if (exit_recirculates(&ctx)) { - compose_recirculate_action(&ctx); + if (ctx.freezing) { + finish_freezing(&ctx); } } /* Output only fully processed packets. */ - if (!exit_recirculates(&ctx) + if (!ctx.freezing && xbridge->has_in_band && in_band_must_output_to_local_port(flow) && !actions_output_to_local_port(&ctx)) { @@ -5048,10 +5453,12 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) ctx.xout->slow |= SLOW_ACTION; } - /* Do netflow only for packets really received by the bridge and not sent - * to the controller. We consider packets sent to the controller to be - * part of the control plane rather than the data plane. */ - if (!xin->recirc && xbridge->netflow && !(xout->slow & SLOW_CONTROLLER)) { + /* Do netflow only for packets on initial reception, that are not sent to + * the controller. We consider packets sent to the controller to be part + * of the control plane rather than the data plane. */ + if (!xin->frozen_state + && xbridge->netflow + && !(xout->slow & SLOW_CONTROLLER)) { if (ctx.xin->resubmit_stats) { netflow_flow_update(xbridge->netflow, flow, ctx.nf_output_iface, @@ -5074,7 +5481,78 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) exit: ofpbuf_uninit(&ctx.stack); ofpbuf_uninit(&ctx.action_set); + ofpbuf_uninit(&ctx.frozen_actions); ofpbuf_uninit(&scratch_actions); + + /* Make sure we return a "drop flow" in case of an error. */ + if (ctx.error) { + xout->slow = 0; + if (xin->odp_actions) { + ofpbuf_clear(xin->odp_actions); + } + } + return ctx.error; +} + +enum ofperr +xlate_resume(struct ofproto_dpif *ofproto, + const struct ofputil_packet_in_private *pin, + struct ofpbuf *odp_actions, + enum slow_path_reason *slow) +{ + struct dp_packet packet; + dp_packet_use_const(&packet, pin->public.packet, + pin->public.packet_len); + + struct flow flow; + flow_extract(&packet, &flow); + + struct xlate_in xin; + xlate_in_init(&xin, ofproto, &flow, 0, NULL, ntohs(flow.tcp_flags), + &packet, NULL, odp_actions); + + struct ofpact_note noop; + ofpact_init_NOTE(&noop); + noop.length = 0; + + bool any_actions = pin->actions_len > 0; + struct frozen_state state = { + .table_id = 0, /* Not the table where NXAST_PAUSE was executed. */ + .ofproto_uuid = pin->bridge, + .stack = pin->stack, + .n_stack = pin->n_stack, + .mirrors = pin->mirrors, + .conntracked = pin->conntracked, + + /* When there are no actions, xlate_actions() will search the flow + * table. We don't want it to do that (we want it to resume), so + * supply a no-op action if there aren't any. + * + * (We can't necessarily avoid translating actions entirely if there + * aren't any actions, because there might be some finishing-up to do + * at the end of the pipeline, and we don't check for those + * conditions.) */ + .ofpacts = any_actions ? pin->actions : &noop.ofpact, + .ofpacts_len = any_actions ? pin->actions_len : sizeof noop, + + .action_set = pin->action_set, + .action_set_len = pin->action_set_len, + }; + frozen_metadata_from_flow(&state.metadata, + &pin->public.flow_metadata.flow); + xin.frozen_state = &state; + + struct xlate_out xout; + enum xlate_error error = xlate_actions(&xin, &xout); + *slow = xout.slow; + xlate_out_uninit(&xout); + + /* xlate_actions() can generate a number of errors, but only + * XLATE_BRIDGE_NOT_FOUND really stands out to me as one that we should be + * sure to report over OpenFlow. The others could come up in packet-outs + * or regular flow translation and I don't think that it's going to be too + * useful to report them to the controller. */ + return error == XLATE_BRIDGE_NOT_FOUND ? OFPERR_NXR_STALE : 0; } /* Sends 'packet' out 'ofport'. @@ -5210,10 +5688,10 @@ xlate_push_stats(struct xlate_cache *xcache, group_dpif_credit_stats(entry->u.group.group, entry->u.group.bucket, stats); break; - case XC_TNL_ARP: - /* Lookup arp to avoid arp timeout. */ - tnl_arp_lookup(entry->u.tnl_arp_cache.br_name, - entry->u.tnl_arp_cache.d_ip, &dmac); + case XC_TNL_NEIGH: + /* Lookup neighbor to avoid timeout. */ + tnl_neigh_lookup(entry->u.tnl_neigh_cache.br_name, + &entry->u.tnl_neigh_cache.d_ipv6, &dmac); break; default: OVS_NOT_REACHED(); @@ -5285,7 +5763,7 @@ xlate_cache_clear(struct xlate_cache *xcache) case XC_GROUP: group_dpif_unref(entry->u.group.group); break; - case XC_TNL_ARP: + case XC_TNL_NEIGH: break; default: OVS_NOT_REACHED();