From: Alex Wang Date: Wed, 17 Dec 2014 02:47:27 +0000 (-0800) Subject: recirculation: Map recirc_id to ofproto_dpif. X-Git-Tag: v2.3.2~52 X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=4ec2ffe905e266ed27c16294668b8162ec5f29dc recirculation: Map recirc_id to ofproto_dpif. After commit 0c7812e5e (recirculation: Do not drop packet when there is no match from internal table.), if flow keys are modified before the recirculation action (e.g. set vlan ID), the miss handling of recirc'ed packets may not reach the intended 'ofproto_dpif' which has rules looking up the 'recirc_id's, causing drops. This commit adds an unittest that captures this bug. Moreover, to solve this bug, this commit checks mapping between 'recirc_id' and the corresponding 'ofproto_dpif', and makes sure that the miss handling of recirc'ed packets are done with the correct 'ofproto_dpif'. Signed-off-by: Alex Wang Acked-by: Andy Zhou Acked-by: Jarno Rajahalme Acked-by: Ethan Jackson --- diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 0fc5443d6..ee353e3b2 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -634,6 +634,8 @@ xlate_receive(const struct dpif_backer *backer, struct ofpbuf *packet, struct dpif_sflow **sflow, struct netflow **netflow, odp_port_t *odp_in_port) { + struct ofproto_dpif *recv_ofproto = NULL; + struct ofproto_dpif *recirc_ofproto = NULL; const struct xport *xport; int error = ENODEV; @@ -655,6 +657,7 @@ xlate_receive(const struct dpif_backer *backer, struct ofpbuf *packet, if (!xport) { goto exit; } + recv_ofproto = xport->xbridge->ofproto; if (vsp_adjust_flow(xport->xbridge->ofproto, flow)) { if (packet) { @@ -667,20 +670,54 @@ xlate_receive(const struct dpif_backer *backer, struct ofpbuf *packet, } error = 0; + /* When recirc_id is set in 'flow', checks whether the ofproto_dpif that + * corresponds to the recirc_id is same as the receiving bridge. If they + * are the same, uses the 'recv_ofproto' and keeps the 'ofp_in_port' as + * assigned. Otherwise, uses the 'recirc_ofproto' that owns recirc_id and + * assigns OFPP_NONE to 'ofp_in_port'. Doing this is in that, the + * recirculated flow must be processced by the ofproto which originates + * the recirculation, and as bridges can only see their own ports, the + * in_port of the 'recv_ofproto' should not be passed to the + * 'recirc_ofproto'. + * + * Admittedly, setting the 'ofp_in_port' to OFPP_NONE limits the + * 'recirc_ofproto' from meaningfully matching on in_port of recirculated + * flow, and should be fixed in the near future. + * + * TODO: Restore the original patch port. + */ + if (flow->recirc_id) { + recirc_ofproto = ofproto_dpif_recirc_get_ofproto(backer, + flow->recirc_id); + /* Returns error if could not find recirculation bridge */ + if (!recirc_ofproto) { + error = ENOENT; + goto exit; + } + + if (recv_ofproto != recirc_ofproto) { + xport = NULL; + flow->in_port.ofp_port = OFPP_NONE; + if (odp_in_port) { + *odp_in_port = ODPP_NONE; + } + } + } + if (ofproto) { - *ofproto = xport->xbridge->ofproto; + *ofproto = xport ? recv_ofproto : recirc_ofproto; } if (ipfix) { - *ipfix = dpif_ipfix_ref(xport->xbridge->ipfix); + *ipfix = xport ? dpif_ipfix_ref(xport->xbridge->ipfix) : NULL; } if (sflow) { - *sflow = dpif_sflow_ref(xport->xbridge->sflow); + *sflow = xport ? dpif_sflow_ref(xport->xbridge->sflow) : NULL; } if (netflow) { - *netflow = netflow_ref(xport->xbridge->netflow); + *netflow = xport ? netflow_ref(xport->xbridge->netflow) : NULL; } exit: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7204ccf4d..39288c1cf 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -57,6 +57,7 @@ #include "ofproto-dpif-sflow.h" #include "ofproto-dpif-upcall.h" #include "ofproto-dpif-xlate.h" +#include "ovs-rcu.h" #include "poll-loop.h" #include "seq.h" #include "simap.h" @@ -238,6 +239,13 @@ COVERAGE_DEFINE(rev_port_toggled); COVERAGE_DEFINE(rev_flow_table); COVERAGE_DEFINE(rev_mac_learning); +/* Stores mapping between 'recirc_id' and 'ofproto-dpif'. */ +struct dpif_backer_recirc_node { + struct hmap_node hmap_node; + struct ofproto_dpif *ofproto; + uint32_t recirc_id; +}; + /* All datapaths of a given type share a single dpif backer instance. */ struct dpif_backer { char *type; @@ -256,6 +264,8 @@ struct dpif_backer { /* Recirculation. */ struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ + struct hmap recirc_map; /* Map of 'recirc_id's to 'ofproto's. */ + struct ovs_mutex recirc_mutex; /* Protects 'recirc_map'. */ bool enable_recirc; /* True if the datapath supports recirculation */ /* True if the datapath supports variable-length @@ -794,6 +804,26 @@ dealloc(struct ofproto *ofproto_) free(ofproto); } +/* Called when 'ofproto' is destructed. Checks for and clears any + * recirc_id leak. */ +static void +dpif_backer_recirc_clear_ofproto(struct dpif_backer *backer, + struct ofproto_dpif *ofproto) +{ + struct dpif_backer_recirc_node *node; + + ovs_mutex_lock(&backer->recirc_mutex); + HMAP_FOR_EACH (node, hmap_node, &backer->recirc_map) { + if (node->ofproto == ofproto) { + VLOG_ERR("recirc_id %"PRIu32", not freed when ofproto (%s) " + "is destructed", node->recirc_id, ofproto->up.name); + hmap_remove(&backer->recirc_map, &node->hmap_node); + ovsrcu_postpone(free, node); + } + } + ovs_mutex_unlock(&backer->recirc_mutex); +} + static void close_dpif_backer(struct dpif_backer *backer) { @@ -810,6 +840,8 @@ close_dpif_backer(struct dpif_backer *backer) hmap_destroy(&backer->odp_to_ofport_map); shash_find_and_delete(&all_dpif_backers, backer->type); recirc_id_pool_destroy(backer->rid_pool); + hmap_destroy(&backer->recirc_map); + ovs_mutex_destroy(&backer->recirc_mutex); free(backer->type); dpif_close(backer->dpif); free(backer); @@ -921,6 +953,8 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) backer->variable_length_userdata = check_variable_length_userdata(backer); backer->max_mpls_depth = check_max_mpls_depth(backer); backer->rid_pool = recirc_id_pool_create(); + ovs_mutex_init(&backer->recirc_mutex); + hmap_init(&backer->recirc_map); error = dpif_recv_set(backer->dpif, backer->recv_set_enable); if (error) { @@ -1306,6 +1340,8 @@ destruct(struct ofproto *ofproto_) } guarded_list_destroy(&ofproto->pins); + dpif_backer_recirc_clear_ofproto(ofproto->backer, ofproto); + mbridge_unref(ofproto->mbridge); netflow_unref(ofproto->netflow); @@ -4758,20 +4794,59 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) } } +struct ofproto_dpif * +ofproto_dpif_recirc_get_ofproto(const struct dpif_backer *backer, + uint32_t recirc_id) +{ + struct dpif_backer_recirc_node *node; + + ovs_mutex_lock(&backer->recirc_mutex); + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map, recirc_id), + struct dpif_backer_recirc_node, hmap_node); + ovs_mutex_unlock(&backer->recirc_mutex); + + return node ? node->ofproto : NULL; +} + uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto) { struct dpif_backer *backer = ofproto->backer; + uint32_t recirc_id = recirc_id_alloc(backer->rid_pool); + + if (recirc_id) { + struct dpif_backer_recirc_node *node = xmalloc(sizeof *node); + + node->recirc_id = recirc_id; + node->ofproto = ofproto; - return recirc_id_alloc(backer->rid_pool); + ovs_mutex_lock(&backer->recirc_mutex); + hmap_insert(&backer->recirc_map, &node->hmap_node, node->recirc_id); + ovs_mutex_unlock(&backer->recirc_mutex); + } + + return recirc_id; } void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id) { struct dpif_backer *backer = ofproto->backer; - - recirc_id_free(backer->rid_pool, recirc_id); + struct dpif_backer_recirc_node *node; + + ovs_mutex_lock(&backer->recirc_mutex); + node = CONTAINER_OF(hmap_first_with_hash(&backer->recirc_map, recirc_id), + struct dpif_backer_recirc_node, hmap_node); + if (node) { + hmap_remove(&backer->recirc_map, &node->hmap_node); + ovs_mutex_unlock(&backer->recirc_mutex); + recirc_id_free(backer->rid_pool, node->recirc_id); + /* RCU postpone the free, since other threads may be referring + * to 'node' at same time. */ + ovsrcu_postpone(free, node); + } else { + ovs_mutex_unlock(&backer->recirc_mutex); + } } int diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 6f77b1aca..cee472313 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -226,8 +226,20 @@ struct ofport_dpif *odp_port_to_ofport(const struct dpif_backer *, odp_port_t); * Post recirculation data path flows are managed like other data path flows. * They are created on demand. Miss handling, stats collection and revalidation * work the same way as regular flows. + * + * If the bridge which originates the recirculation is different from the bridge + * that receives the post recirculation packet (e.g. when patch port is used), + * the packet will be processed directly by the recirculation bridge with + * in_port set to OFPP_NONE. Admittedly, doing this limits the recirculation + * bridge from matching on in_port of post recirculation packets, and will be + * fixed in the near future. + * + * TODO: Always restore the correct in_port. + * */ +struct ofproto_dpif *ofproto_dpif_recirc_get_ofproto(const struct dpif_backer *ofproto, + uint32_t recirc_id); uint32_t ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto); void ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id); int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 1fcd93773..6fb0dce30 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -195,6 +195,74 @@ AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt]) AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 24]) AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 24]) AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 24]) + +OVS_VSWITCHD_STOP() +AT_CLEANUP + +# Makes sure recirculation does not change the way packet is handled. +AT_SETUP([ofproto-dpif, balance-tcp bonding, different recirc flow ]) +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 -- \ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=standalone -- \ + add-bond br1 bond1 p3 p4 bond_mode=balance-tcp lacp=active \ + other-config:lacp-time=fast other-config:bond-rebalance-interval=0 -- \ + set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \ + set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \ + add-port br1 br1- -- set interface br1- type=patch options:peer=br1+ ofport_request=100 -- \ + add-br br-int -- \ + set bridge br-int other-config:hwaddr=aa:77:aa:77:00:00 -- \ + set bridge br-int datapath-type=dummy other-config:datapath-id=1235 \ + fail-mode=secure -- \ + add-port br-int br1+ -- set interface br1+ type=patch options:peer=br1- ofport_request=101 -- \ + add-port br-int p5 -- set interface p5 ofport_request=5 type=dummy +]) +AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK +]) + +# Waits for all ifaces enabled. +OVS_WAIT_UNTIL([test `ovs-appctl bond/show | grep -- "may_enable: true" | wc -l` -ge 4]) + +# The dl_vlan flow should not be ever matched, +# since recirculation should not change the flow handling. +AT_DATA([flows.txt], [dnl +table=0 priority=1 in_port=5 actions=mod_vlan_vid:1,output(101) +table=0 priority=2 in_port=5 dl_vlan=1 actions=drop +]) +AT_CHECK([ovs-ofctl add-flows br-int flows.txt]) + +# Sends a packet to trigger recirculation. +# Should generate recirc_id(0x12d),dp_hash(0xcf/0xff). +AT_CHECK([ovs-appctl netdev-dummy/receive p5 "in_port(5),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no)"]) + +ovs-appctl time/warp 5000 100 + +# Forces revalidators to update all stats. +AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [dnl +megaflows disabled +]) +AT_CHECK([ovs-appctl upcall/enable-megaflows], [0], [dnl +megaflows enabled +]) + +# Checks the flow stats in br1, should only be one flow with non-zero +# 'n_packets' from internal table. +AT_CHECK([ovs-appctl bridge/dump-flows br1 | ofctl_strip | grep -- "n_packets" | grep -- "table_id" | sed -e 's/output:[[0-9]]\+/output/'], [0], [dnl +table_id=254, n_packets=1, n_bytes=64, priority=20,recirc_id=0x12d,dp_hash=0xcf/0xff,actions=output +]) + +# Checks the flow stats in br-int, should be only one match. +AT_CHECK([ovs-ofctl dump-flows br-int | ofctl_strip | sort], [0], [dnl + n_packets=1, n_bytes=60, priority=1,in_port=5 actions=mod_vlan_vid:1,output:101 + priority=2,in_port=5,dl_vlan=1 actions=drop +NXST_FLOW reply: +]) + OVS_VSWITCHD_STOP() AT_CLEANUP