X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif-upcall.c;h=c1919275b668bde54dd33cc4d83ab437aa893cd3;hb=e4d3706cb9240abe9f39d60b835eef0f221d39ac;hp=59010c2d4e96e485a891e175a017628f25cd09c7;hpb=1520ef4f0d51841a64f47136ba9465afce2ea138;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 59010c2d4..c1919275b 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -116,6 +116,20 @@ struct udpif { struct seq *dump_seq; /* Increments each dump iteration. */ atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */ + /* These variables provide a mechanism for the main thread to pause + * all revalidation without having to completely shut the threads down. + * 'pause_latch' is shared between the main thread and the lead + * revalidator thread, so when it is desirable to halt revalidation, the + * main thread will set the latch. 'pause' and 'pause_barrier' are shared + * by revalidator threads. The lead revalidator will set 'pause' when it + * observes the latch has been set, and this will cause all revalidator + * threads to wait on 'pause_barrier' at the beginning of the next + * revalidation round. */ + bool pause; /* Set by leader on 'pause_latch. */ + struct latch pause_latch; /* Set to force revalidators pause. */ + struct ovs_barrier pause_barrier; /* Barrier used to pause all */ + /* revalidators by main thread. */ + /* There are 'N_UMAPS' maps containing 'struct udpif_key' elements. * * During the flow dump phase, revalidators insert into these with a random @@ -150,6 +164,12 @@ enum upcall_type { IPFIX_UPCALL /* Per-bridge sampling. */ }; +enum reval_result { + UKEY_KEEP, + UKEY_DELETE, + UKEY_MODIFY +}; + struct upcall { struct ofproto_dpif *ofproto; /* Parent ofproto. */ const struct recirc_id_node *recirc; /* Recirculation context. */ @@ -163,6 +183,8 @@ struct upcall { unsigned pmd_id; /* Datapath poll mode driver id. */ const struct dp_packet *packet; /* Packet associated with this upcall. */ ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */ + uint16_t mru; /* If !0, Maximum receive unit of + fragmented IP packet */ enum dpif_upcall_type type; /* Datapath type of the upcall. */ const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */ @@ -215,7 +237,6 @@ struct udpif_key { size_t key_len; /* Length of 'key'. */ const struct nlattr *mask; /* Datapath flow mask. */ size_t mask_len; /* Length of 'mask'. */ - struct ofpbuf *actions; /* Datapath flow actions as nlattrs. */ ovs_u128 ufid; /* Unique flow identifier. */ bool ufid_present; /* True if 'ufid' is in datapath. */ uint32_t hash; /* Pre-computed hash for 'key'. */ @@ -228,6 +249,9 @@ struct udpif_key { uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */ bool flow_exists OVS_GUARDED; /* Ensures flows are only deleted once. */ + /* Datapath flow actions as nlattrs. Protected by RCU. Read with + * ukey_get_actions(), and write with ukey_set_actions(). */ + OVSRCU_TYPE(struct ofpbuf *) actions; struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that * are affected by this ukey. @@ -237,9 +261,8 @@ struct udpif_key { struct nlattr nla; } keybuf, maskbuf; - /* Recirculation IDs with references held by the ukey. */ - unsigned n_recircs; - uint32_t recircs[]; /* 'n_recircs' id's for which references are held. */ + uint32_t key_recirc_id; /* Non-zero if reference is held by the ukey. */ + struct recirc_refs recircs; /* Action recirc IDs with references held. */ }; /* Datapath operation with optional ukey attached. */ @@ -259,10 +282,13 @@ static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls); static void udpif_stop_threads(struct udpif *); static void udpif_start_threads(struct udpif *, size_t n_handlers, size_t n_revalidators); +static void udpif_pause_revalidators(struct udpif *); +static void udpif_resume_revalidators(struct udpif *); static void *udpif_upcall_handler(void *); static void *udpif_revalidator(void *); static unsigned long udpif_get_n_flows(struct udpif *); static void revalidate(struct revalidator *); +static void revalidator_pause(struct revalidator *); static void revalidator_sweep(struct revalidator *); static void revalidator_purge(struct revalidator *); static void upcall_unixctl_show(struct unixctl_conn *conn, int argc, @@ -287,6 +313,8 @@ static struct udpif_key *ukey_create_from_upcall(struct upcall *, static int ukey_create_from_dpif_flow(const struct udpif *, const struct dpif_flow *, struct udpif_key **); +static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions, + size_t *size); static bool ukey_install_start(struct udpif *, struct udpif_key *ukey); static bool ukey_install_finish(struct udpif_key *ukey, int error); static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey); @@ -302,10 +330,12 @@ static enum upcall_type classify_upcall(enum dpif_upcall_type type, static int upcall_receive(struct upcall *, const struct dpif_backer *, const struct dp_packet *packet, enum dpif_upcall_type, const struct nlattr *userdata, const struct flow *, + const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id); static void upcall_uninit(struct upcall *); static upcall_callback upcall_cb; +static dp_purge_callback dp_purge_cb; static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true); static atomic_bool enable_ufid = ATOMIC_VAR_INIT(true); @@ -346,6 +376,7 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) udpif->reval_seq = seq_create(); udpif->dump_seq = seq_create(); latch_init(&udpif->exit_latch); + latch_init(&udpif->pause_latch); list_push_back(&all_udpifs, &udpif->list_node); atomic_init(&udpif->enable_ufid, false); atomic_init(&udpif->n_flows, 0); @@ -358,6 +389,7 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) } dpif_register_upcall_cb(dpif, upcall_cb, udpif); + dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif); return udpif; } @@ -391,6 +423,7 @@ udpif_destroy(struct udpif *udpif) list_remove(&udpif->list_node); latch_destroy(&udpif->exit_latch); + latch_destroy(&udpif->pause_latch); seq_destroy(udpif->reval_seq); seq_destroy(udpif->dump_seq); ovs_mutex_destroy(&udpif->n_flows_mutex); @@ -430,6 +463,7 @@ udpif_stop_threads(struct udpif *udpif) latch_poll(&udpif->exit_latch); ovs_barrier_destroy(&udpif->reval_barrier); + ovs_barrier_destroy(&udpif->pause_barrier); free(udpif->revalidators); udpif->revalidators = NULL; @@ -469,7 +503,9 @@ udpif_start_threads(struct udpif *udpif, size_t n_handlers, dpif_enable_upcall(udpif->dpif); ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators); + ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1); udpif->reval_exit = false; + udpif->pause = false; udpif->revalidators = xzalloc(udpif->n_revalidators * sizeof *udpif->revalidators); for (i = 0; i < udpif->n_revalidators; i++) { @@ -482,6 +518,29 @@ udpif_start_threads(struct udpif *udpif, size_t n_handlers, } } +/* Pauses all revalidators. Should only be called by the main thread. + * When function returns, all revalidators are paused and will proceed + * only after udpif_resume_revalidators() is called. */ +static void +udpif_pause_revalidators(struct udpif *udpif) +{ + if (ofproto_dpif_backer_enabled(udpif->backer)) { + latch_set(&udpif->pause_latch); + ovs_barrier_block(&udpif->pause_barrier); + } +} + +/* Resumes the pausing of revalidators. Should only be called by the + * main thread. */ +static void +udpif_resume_revalidators(struct udpif *udpif) +{ + if (ofproto_dpif_backer_enabled(udpif->backer)) { + latch_poll(&udpif->pause_latch); + ovs_barrier_block(&udpif->pause_barrier); + } +} + /* Tells 'udpif' how many threads it should use to handle upcalls. * 'n_handlers' and 'n_revalidators' can never be zero. 'udpif''s * datapath handle must have packet reception enabled before starting @@ -663,6 +722,7 @@ recv_upcalls(struct handler *handler) struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; struct upcall *upcall = &upcalls[n_upcalls]; struct flow *flow = &flows[n_upcalls]; + unsigned int mru; int error; ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], @@ -677,8 +737,14 @@ recv_upcalls(struct handler *handler) goto free_dupcall; } + if (dupcall->mru) { + mru = nl_attr_get_u16(dupcall->mru); + } else { + mru = 0; + } + error = upcall_receive(upcall, udpif->backer, &dupcall->packet, - dupcall->type, dupcall->userdata, flow, + dupcall->type, dupcall->userdata, flow, mru, &dupcall->ufid, PMD_ID_NULL); if (error) { if (error == ENODEV) { @@ -764,6 +830,12 @@ udpif_revalidator(void *arg) udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows); udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2; + /* Only the leader checks the pause latch to prevent a race where + * some threads think it's false and proceed to block on + * reval_barrier and others think it's true and block indefinitely + * on the pause_barrier */ + udpif->pause = latch_is_set(&udpif->pause_latch); + /* Only the leader checks the exit latch to prevent a race where * some threads think it's true and exit and others think it's * false and block indefinitely on the reval_barrier */ @@ -780,6 +852,10 @@ udpif_revalidator(void *arg) /* Wait for the leader to start the flow dump. */ ovs_barrier_block(&udpif->reval_barrier); + if (udpif->pause) { + revalidator_pause(revalidator); + } + if (udpif->reval_exit) { break; } @@ -822,6 +898,7 @@ udpif_revalidator(void *arg) poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500)); seq_wait(udpif->reval_seq, last_reval_seq); latch_wait(&udpif->exit_latch); + latch_wait(&udpif->pause_latch); poll_block(); } } @@ -913,6 +990,7 @@ static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, + const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id) { int error; @@ -942,6 +1020,7 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, upcall->ukey = NULL; upcall->key = NULL; upcall->key_len = 0; + upcall->mru = mru; upcall->out_tun_key = NULL; upcall->actions = NULL; @@ -1018,7 +1097,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall, ofpbuf_use_const(&upcall->put_actions, odp_actions->data, odp_actions->size); } else { - ofpbuf_init(&upcall->put_actions, 0); + /* upcall->put_actions already initialized by upcall_receive(). */ compose_slow_path(udpif, &upcall->xout, upcall->flow, upcall->flow->in_port.odp_port, &upcall->put_actions); @@ -1058,6 +1137,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi const struct nlattr *userdata, struct ofpbuf *actions, struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); struct udpif *udpif = aux; unsigned int flow_limit; struct upcall upcall; @@ -1068,7 +1148,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi atomic_read_relaxed(&udpif->flow_limit, &flow_limit); error = upcall_receive(&upcall, udpif->backer, packet, type, userdata, - flow, ufid, pmd_id); + flow, 0, ufid, pmd_id); if (error) { return error; } @@ -1088,6 +1168,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi } if (udpif_get_n_flows(udpif) >= flow_limit) { + VLOG_WARN_RL(&rl, "upcall_cb failure: datapath flow limit reached"); error = ENOSPC; goto out; } @@ -1095,11 +1176,13 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi /* Prevent miss flow installation if the key has recirculation ID but we * were not able to get a reference on it. */ if (type == DPIF_UC_MISS && upcall.recirc && !upcall.have_recirc_ref) { + VLOG_WARN_RL(&rl, "upcall_cb failure: no reference for recirc flow"); error = ENOSPC; goto out; } if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) { + VLOG_WARN_RL(&rl, "upcall_cb failure: ukey installation fails"); error = ENOSPC; } out: @@ -1127,7 +1210,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, if (upcall->sflow) { union user_action_cookie cookie; const struct nlattr *actions; - int actions_len = 0; + size_t actions_len = 0; struct dpif_sflow_actions sflow_actions; memset(&sflow_actions, 0, sizeof sflow_actions); memset(&cookie, 0, sizeof cookie); @@ -1145,8 +1228,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, /* Lookup actions in userspace cache. */ struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid); if (ukey) { - actions = ukey->actions->data; - actions_len = ukey->actions->size; + ukey_get_actions(ukey, &actions, &actions_len); dpif_sflow_read_actions(flow, actions, actions_len, &sflow_actions); } @@ -1166,7 +1248,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix); if (upcall->out_tun_key) { - odp_tun_key_from_attr(upcall->out_tun_key, + odp_tun_key_from_attr(upcall->out_tun_key, false, &output_tunnel_key); } dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow, @@ -1210,10 +1292,8 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, unsigned int flow_limit; size_t n_ops, n_opsp, i; bool may_put; - bool megaflow; atomic_read_relaxed(&udpif->flow_limit, &flow_limit); - atomic_read_relaxed(&enable_megaflows, &megaflow); may_put = udpif_get_n_flows(udpif) < flow_limit; @@ -1273,8 +1353,8 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, op->dop.u.flow_put.mask_len = ukey->mask_len; op->dop.u.flow_put.ufid = upcall->ufid; op->dop.u.flow_put.stats = NULL; - op->dop.u.flow_put.actions = ukey->actions->data; - op->dop.u.flow_put.actions_len = ukey->actions->size; + ukey_get_actions(ukey, &op->dop.u.flow_put.actions, + &op->dop.u.flow_put.actions_len); } if (upcall->odp_actions.size) { @@ -1288,6 +1368,7 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, op->dop.u.execute.actions_len = upcall->odp_actions.size; op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0; op->dop.u.execute.probe = false; + op->dop.u.execute.mtu = upcall->mru; } } @@ -1340,18 +1421,34 @@ ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid) return NULL; } +/* Provides safe lockless access of RCU protected 'ukey->actions'. Callers may + * alternatively access the field directly if they take 'ukey->mutex'. */ +static void +ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size) +{ + const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions); + *actions = buf->data; + *size = buf->size; +} + +static void +ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions) +{ + ovsrcu_postpone(ofpbuf_delete, + ovsrcu_get_protected(struct ofpbuf *, &ukey->actions)); + ovsrcu_set(&ukey->actions, ofpbuf_clone(actions)); +} + static struct udpif_key * ukey_create__(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, bool ufid_present, const ovs_u128 *ufid, const unsigned pmd_id, const struct ofpbuf *actions, uint64_t dump_seq, uint64_t reval_seq, long long int used, - const struct recirc_id_node *key_recirc, struct xlate_out *xout) + uint32_t key_recirc_id, struct xlate_out *xout) OVS_NO_THREAD_SAFETY_ANALYSIS { - unsigned n_recircs = (key_recirc ? 1 : 0) + (xout ? xout->n_recircs : 0); - struct udpif_key *ukey = xmalloc(sizeof *ukey + - n_recircs * sizeof *ukey->recircs); + struct udpif_key *ukey = xmalloc(sizeof *ukey); memcpy(&ukey->keybuf, key, key_len); ukey->key = &ukey->keybuf.nla; @@ -1363,7 +1460,9 @@ ukey_create__(const struct nlattr *key, size_t key_len, ukey->ufid = *ufid; ukey->pmd_id = pmd_id; ukey->hash = get_ufid_hash(&ukey->ufid); - ukey->actions = ofpbuf_clone(actions); + + ovsrcu_init(&ukey->actions, NULL); + ukey_set_actions(ukey, actions); ovs_mutex_init(&ukey->mutex); ukey->dump_seq = dump_seq; @@ -1374,17 +1473,13 @@ ukey_create__(const struct nlattr *key, size_t key_len, ukey->stats.used = used; ukey->xcache = NULL; - ukey->n_recircs = n_recircs; - if (key_recirc) { - ukey->recircs[0] = key_recirc->id; + ukey->key_recirc_id = key_recirc_id; + recirc_refs_init(&ukey->recircs); + if (xout) { + /* Take ownership of the action recirc id references. */ + recirc_refs_swap(&ukey->recircs, &xout->recircs); } - if (xout && xout->n_recircs) { - const uint32_t *act_recircs = xlate_out_get_recircs(xout); - memcpy(ukey->recircs + (key_recirc ? 1 : 0), act_recircs, - xout->n_recircs * sizeof *ukey->recircs); - xlate_out_take_recircs(xout); - } return ukey; } @@ -1423,7 +1518,7 @@ ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc) true, upcall->ufid, upcall->pmd_id, &upcall->put_actions, upcall->dump_seq, upcall->reval_seq, 0, - upcall->have_recirc_ref ? upcall->recirc : NULL, + upcall->have_recirc_ref ? upcall->recirc->id : 0, &upcall->xout); } @@ -1456,7 +1551,14 @@ ukey_create_from_dpif_flow(const struct udpif *udpif, /* Check the flow actions for recirculation action. As recirculation * relies on OVS userspace internal state, we need to delete all old - * datapath flows with recirculation upon OVS restart. */ + * datapath flows with either a non-zero recirc_id in the key, or any + * recirculation actions upon OVS restart. */ + NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->key, flow->key_len) { + if (nl_attr_type(a) == OVS_KEY_ATTR_RECIRC_ID + && nl_attr_get_u32(a) != 0) { + return EINVAL; + } + } NL_ATTR_FOR_EACH_UNSAFE (a, left, flow->actions, flow->actions_len) { if (nl_attr_type(a) == OVS_ACTION_ATTR_RECIRC) { return EINVAL; @@ -1469,7 +1571,7 @@ ukey_create_from_dpif_flow(const struct udpif *udpif, *ukey = ukey_create__(flow->key, flow->key_len, flow->mask, flow->mask_len, flow->ufid_present, &flow->ufid, flow->pmd_id, &actions, dump_seq, - reval_seq, flow->stats.used, NULL, NULL); + reval_seq, flow->stats.used, 0, NULL); return 0; } @@ -1612,11 +1714,12 @@ ukey_delete__(struct udpif_key *ukey) OVS_NO_THREAD_SAFETY_ANALYSIS { if (ukey) { - for (int i = 0; i < ukey->n_recircs; i++) { - recirc_free_id(ukey->recircs[i]); + if (ukey->key_recirc_id) { + recirc_free_id(ukey->key_recirc_id); } + recirc_refs_unref(&ukey->recircs); xlate_cache_delete(ukey->xcache); - ofpbuf_delete(ukey->actions); + ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions)); ovs_mutex_destroy(&ukey->mutex); free(ukey); } @@ -1663,33 +1766,49 @@ should_revalidate(const struct udpif *udpif, uint64_t packets, return false; } -static bool +/* Verifies that the datapath actions of 'ukey' are still correct, and pushes + * 'stats' for it. + * + * Returns a recommended action for 'ukey', options include: + * UKEY_DELETE The ukey should be deleted. + * UKEY_KEEP The ukey is fine as is. + * UKEY_MODIFY The ukey's actions should be changed but is otherwise + * fine. Callers should change the actions to those found + * in the caller supplied 'odp_actions' buffer. The + * recirculation references can be found in 'recircs' and + * must be handled by the caller. + * + * If the result is UKEY_MODIFY, then references to all recirc_ids used by the + * new flow will be held within 'recircs' (which may be none). + * + * The caller is responsible for both initializing 'recircs' prior this call, + * and ensuring any references are eventually freed. + */ +static enum reval_result revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, - const struct dpif_flow_stats *stats, uint64_t reval_seq) + const struct dpif_flow_stats *stats, + struct ofpbuf *odp_actions, uint64_t reval_seq, + struct recirc_refs *recircs) OVS_REQUIRES(ukey->mutex) { - uint64_t odp_actions_stub[1024 / 8]; - struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); - struct xlate_out xout, *xoutp; struct netflow *netflow; struct ofproto_dpif *ofproto; struct dpif_flow_stats push; - struct flow flow, dp_mask; - struct flow_wildcards wc; - uint64_t *dp64, *xout64; + struct flow flow; + struct flow_wildcards dp_mask, wc; + enum reval_result result; ofp_port_t ofp_in_port; struct xlate_in xin; long long int last_used; int error; - size_t i; - bool ok; bool need_revalidate; - ok = false; + result = UKEY_DELETE; xoutp = NULL; netflow = NULL; + ofpbuf_clear(odp_actions); need_revalidate = (ukey->reval_seq != reval_seq); last_used = ukey->stats.used; push.used = stats->used; @@ -1703,20 +1822,19 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, if (need_revalidate && last_used && !should_revalidate(udpif, push.n_packets, last_used)) { - ok = false; goto exit; } /* We will push the stats, so update the ukey stats cache. */ ukey->stats = *stats; if (!push.n_packets && !need_revalidate) { - ok = true; + result = UKEY_KEEP; goto exit; } if (ukey->xcache && !need_revalidate) { xlate_push_stats(ukey->xcache, &push); - ok = true; + result = UKEY_KEEP; goto exit; } @@ -1739,7 +1857,7 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, } xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags, - NULL, need_revalidate ? &wc : NULL, &odp_actions); + NULL, need_revalidate ? &wc : NULL, odp_actions); if (push.n_packets) { xin.resubmit_stats = &push; xin.may_learn = true; @@ -1749,50 +1867,52 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, xoutp = &xout; if (!need_revalidate) { - ok = true; + result = UKEY_KEEP; goto exit; } if (xout.slow) { - ofpbuf_clear(&odp_actions); + ofpbuf_clear(odp_actions); compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port, - &odp_actions); + odp_actions); } - if (!ofpbuf_equal(&odp_actions, ukey->actions)) { + if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key, + ukey->key_len, &dp_mask.masks, &flow) + == ODP_FIT_ERROR) { goto exit; } - if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key, - ukey->key_len, &dp_mask, &flow) == ODP_FIT_ERROR) { + /* Do not modify if any bit is wildcarded by the installed datapath flow, + * but not the newly revalidated wildcard mask (wc), i.e., if revalidation + * tells that the datapath flow is now too generic and must be narrowed + * down. Note that we do not know if the datapath has ignored any of the + * wildcarded bits, so we may be overtly conservative here. */ + if (flow_wildcards_has_extra(&dp_mask, &wc)) { goto exit; } - /* Since the kernel is free to ignore wildcarded bits in the mask, we can't - * directly check that the masks are the same. Instead we check that the - * mask in the kernel is more specific i.e. less wildcarded, than what - * we've calculated here. This guarantees we don't catch any packets we - * shouldn't with the megaflow. */ - dp64 = (uint64_t *) &dp_mask; - xout64 = (uint64_t *) &wc.masks; - for (i = 0; i < FLOW_U64S; i++) { - if ((dp64[i] | xout64[i]) != dp64[i]) { - goto exit; - } + if (!ofpbuf_equal(odp_actions, + ovsrcu_get(struct ofpbuf *, &ukey->actions))) { + /* The datapath mask was OK, but the actions seem to have changed. + * Let's modify it in place. */ + result = UKEY_MODIFY; + /* Transfer recirc action ID references to the caller. */ + recirc_refs_swap(recircs, &xoutp->recircs); + goto exit; } - ok = true; + result = UKEY_KEEP; exit: - if (ok) { + if (result != UKEY_DELETE) { ukey->reval_seq = reval_seq; } - if (netflow && !ok) { + if (netflow && result == UKEY_DELETE) { netflow_flow_clear(netflow, &flow); } xlate_out_uninit(xoutp); - ofpbuf_uninit(&odp_actions); - return ok; + return result; } static void @@ -1822,6 +1942,23 @@ delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey) op->dop.u.flow_del.terse = udpif_use_ufid(udpif); } +static void +modify_op_init(struct ukey_op *op, struct udpif_key *ukey) +{ + op->ukey = ukey; + op->dop.type = DPIF_OP_FLOW_PUT; + op->dop.u.flow_put.flags = DPIF_FP_MODIFY; + op->dop.u.flow_put.key = ukey->key; + op->dop.u.flow_put.key_len = ukey->key_len; + op->dop.u.flow_put.mask = ukey->mask; + op->dop.u.flow_put.mask_len = ukey->mask_len; + op->dop.u.flow_put.ufid = &ukey->ufid; + op->dop.u.flow_put.pmd_id = ukey->pmd_id; + op->dop.u.flow_put.stats = NULL; + ukey_get_actions(ukey, &op->dop.u.flow_put.actions, + &op->dop.u.flow_put.actions_len); +} + static void push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) { @@ -1841,6 +1978,16 @@ push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) stats = op->dop.u.flow_del.stats; push = &push_buf; + if (op->dop.type != DPIF_OP_FLOW_DEL) { + /* Only deleted flows need their stats pushed. */ + continue; + } + + if (op->dop.error) { + /* flow_del error, 'stats' is unusable. */ + continue; + } + if (op->ukey) { ovs_mutex_lock(&op->ukey->mutex); push->used = MAX(stats->used, op->ukey->stats.used); @@ -1923,9 +2070,31 @@ log_unexpected_flow(const struct dpif_flow *flow, int error) VLOG_WARN_RL(&rl, "%s", ds_cstr(&ds)); } +static void +reval_op_init(struct ukey_op *op, enum reval_result result, + struct udpif *udpif, struct udpif_key *ukey, + struct recirc_refs *recircs, struct ofpbuf *odp_actions) +{ + if (result == UKEY_DELETE) { + delete_op_init(udpif, op, ukey); + } else if (result == UKEY_MODIFY) { + /* Store the new recircs. */ + recirc_refs_swap(&ukey->recircs, recircs); + /* Release old recircs. */ + recirc_refs_unref(recircs); + /* ukey->key_recirc_id remains, as the key is the same as before. */ + + ukey_set_actions(ukey, odp_actions); + modify_op_init(op, ukey); + } +} + static void revalidate(struct revalidator *revalidator) { + uint64_t odp_actions_stub[1024 / 8]; + struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); + struct udpif *udpif = revalidator->udpif; struct dpif_flow_dump_thread *dump_thread; uint64_t dump_seq, reval_seq; @@ -1973,8 +2142,10 @@ revalidate(struct revalidator *revalidator) for (f = flows; f < &flows[n_dumped]; f++) { long long int used = f->stats.used; + struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER; + enum reval_result result; struct udpif_key *ukey; - bool already_dumped, keep; + bool already_dumped; int error; if (ukey_acquire(udpif, f, &ukey, &error)) { @@ -2008,15 +2179,18 @@ revalidate(struct revalidator *revalidator) used = ukey->created; } if (kill_them_all || (used && used < now - max_idle)) { - keep = false; + result = UKEY_DELETE; } else { - keep = revalidate_ukey(udpif, ukey, &f->stats, reval_seq); + result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions, + reval_seq, &recircs); } ukey->dump_seq = dump_seq; - ukey->flow_exists = keep; + ukey->flow_exists = result != UKEY_DELETE; - if (!keep) { - delete_op_init(udpif, &ops[n_ops++], ukey); + if (result != UKEY_KEEP) { + /* Takes ownership of 'recircs'. */ + reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs, + &odp_actions); } ovs_mutex_unlock(&ukey->mutex); } @@ -2027,23 +2201,18 @@ revalidate(struct revalidator *revalidator) ovsrcu_quiesce(); } dpif_flow_dump_thread_destroy(dump_thread); + ofpbuf_uninit(&odp_actions); } -static bool -handle_missed_revalidation(struct udpif *udpif, uint64_t reval_seq, - struct udpif_key *ukey) +/* Pauses the 'revalidator', can only proceed after main thread + * calls udpif_resume_revalidators(). */ +static void +revalidator_pause(struct revalidator *revalidator) { - struct dpif_flow_stats stats; - bool keep; - - COVERAGE_INC(revalidate_missed_dp_flow); - - memset(&stats, 0, sizeof stats); - ovs_mutex_lock(&ukey->mutex); - keep = revalidate_ukey(udpif, ukey, &stats, reval_seq); - ovs_mutex_unlock(&ukey->mutex); - - return keep; + /* The first block is for sync'ing the pause with main thread. */ + ovs_barrier_block(&revalidator->udpif->pause_barrier); + /* The second block is for pausing until main thread resumes. */ + ovs_barrier_block(&revalidator->udpif->pause_barrier); } static void @@ -2060,6 +2229,9 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) ovs_assert(slice < udpif->n_revalidators); for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) { + uint64_t odp_actions_stub[1024 / 8]; + struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); + struct ukey_op ops[REVALIDATE_MAX_BATCH]; struct udpif_key *ukey; struct umap *umap = &udpif->ukeys[i]; @@ -2067,6 +2239,8 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) { bool flow_exists, seq_mismatch; + struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER; + enum reval_result result; /* Handler threads could be holding a ukey lock while it installs a * new flow, so don't hang around waiting for access to it. */ @@ -2076,21 +2250,31 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) flow_exists = ukey->flow_exists; seq_mismatch = (ukey->dump_seq != dump_seq && ukey->reval_seq != reval_seq); + + if (purge) { + result = UKEY_DELETE; + } else if (!seq_mismatch) { + result = UKEY_KEEP; + } else { + struct dpif_flow_stats stats; + COVERAGE_INC(revalidate_missed_dp_flow); + memset(&stats, 0, sizeof stats); + result = revalidate_ukey(udpif, ukey, &stats, &odp_actions, + reval_seq, &recircs); + } + if (result != UKEY_KEEP) { + /* Takes ownership of 'recircs'. */ + reval_op_init(&ops[n_ops++], result, udpif, ukey, &recircs, + &odp_actions); + } ovs_mutex_unlock(&ukey->mutex); - if (flow_exists - && (purge - || (seq_mismatch - && !handle_missed_revalidation(udpif, reval_seq, - ukey)))) { - struct ukey_op *op = &ops[n_ops++]; + if (n_ops == REVALIDATE_MAX_BATCH) { + push_ukey_ops(udpif, umap, ops, n_ops); + n_ops = 0; + } - delete_op_init(udpif, op, ukey); - if (n_ops == REVALIDATE_MAX_BATCH) { - push_ukey_ops(udpif, umap, ops, n_ops); - n_ops = 0; - } - } else if (!flow_exists) { + if (!flow_exists) { ovs_mutex_lock(&umap->mutex); ukey_delete(umap, ukey); ovs_mutex_unlock(&umap->mutex); @@ -2100,6 +2284,8 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) if (n_ops) { push_ukey_ops(udpif, umap, ops, n_ops); } + + ofpbuf_uninit(&odp_actions); ovsrcu_quiesce(); } } @@ -2115,6 +2301,39 @@ revalidator_purge(struct revalidator *revalidator) { revalidator_sweep__(revalidator, true); } + +/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */ +static void +dp_purge_cb(void *aux, unsigned pmd_id) +{ + struct udpif *udpif = aux; + size_t i; + + udpif_pause_revalidators(udpif); + for (i = 0; i < N_UMAPS; i++) { + struct ukey_op ops[REVALIDATE_MAX_BATCH]; + struct udpif_key *ukey; + struct umap *umap = &udpif->ukeys[i]; + size_t n_ops = 0; + + CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) { + if (ukey->pmd_id == pmd_id) { + delete_op_init(udpif, &ops[n_ops++], ukey); + if (n_ops == REVALIDATE_MAX_BATCH) { + push_ukey_ops(udpif, umap, ops, n_ops); + n_ops = 0; + } + } + } + + if (n_ops) { + push_ukey_ops(udpif, umap, ops, n_ops); + } + + ovsrcu_quiesce(); + } + udpif_resume_revalidators(udpif); +} static void upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,