X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif-upcall.c;h=245f52e691848535ac27fe7ba0a6e8a5723cd17a;hb=f3abc57bbfc581eb399a388e94db1a8923243fb4;hp=84a761acfb96c2f23a07dfcaa2a9ad32e6b29882;hpb=35303d715b1f0db46e6a27146815061a60385dc6;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 84a761acf..245f52e69 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. +/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,6 +116,20 @@ struct udpif { struct seq *dump_seq; /* Increments each dump iteration. */ atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */ + /* These variables provide a mechanism for the main thread to pause + * all revalidation without having to completely shut the threads down. + * 'pause_latch' is shared between the main thread and the lead + * revalidator thread, so when it is desirable to halt revalidation, the + * main thread will set the latch. 'pause' and 'pause_barrier' are shared + * by revalidator threads. The lead revalidator will set 'pause' when it + * observes the latch has been set, and this will cause all revalidator + * threads to wait on 'pause_barrier' at the beginning of the next + * revalidation round. */ + bool pause; /* Set by leader on 'pause_latch. */ + struct latch pause_latch; /* Set to force revalidators pause. */ + struct ovs_barrier pause_barrier; /* Barrier used to pause all */ + /* revalidators by main thread. */ + /* There are 'N_UMAPS' maps containing 'struct udpif_key' elements. * * During the flow dump phase, revalidators insert into these with a random @@ -150,6 +164,12 @@ enum upcall_type { IPFIX_UPCALL /* Per-bridge sampling. */ }; +enum reval_result { + UKEY_KEEP, + UKEY_DELETE, + UKEY_MODIFY +}; + struct upcall { struct ofproto_dpif *ofproto; /* Parent ofproto. */ const struct recirc_id_node *recirc; /* Recirculation context. */ @@ -163,13 +183,18 @@ struct upcall { unsigned pmd_id; /* Datapath poll mode driver id. */ const struct dp_packet *packet; /* Packet associated with this upcall. */ ofp_port_t in_port; /* OpenFlow in port, or OFPP_NONE. */ + uint16_t mru; /* If !0, Maximum receive unit of + fragmented IP packet */ enum dpif_upcall_type type; /* Datapath type of the upcall. */ const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */ + const struct nlattr *actions; /* Flow actions in DPIF_UC_ACTION Upcalls. */ bool xout_initialized; /* True if 'xout' must be uninitialized. */ struct xlate_out xout; /* Result of xlate_actions(). */ - struct ofpbuf put_actions; /* Actions 'put' in the fastapath. */ + struct ofpbuf odp_actions; /* Datapath actions from xlate_actions(). */ + struct flow_wildcards wc; /* Dependencies that megaflow must match. */ + struct ofpbuf put_actions; /* Actions 'put' in the fastpath. */ struct dpif_ipfix *ipfix; /* IPFIX pointer or NULL. */ struct dpif_sflow *sflow; /* SFlow pointer or NULL. */ @@ -188,6 +213,8 @@ struct upcall { const struct nlattr *key; /* Datapath flow key. */ size_t key_len; /* Datapath flow key length. */ const struct nlattr *out_tun_key; /* Datapath output tunnel key. */ + + uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */ }; /* 'udpif_key's are responsible for tracking the little bit of state udpif @@ -210,7 +237,6 @@ struct udpif_key { size_t key_len; /* Length of 'key'. */ const struct nlattr *mask; /* Datapath flow mask. */ size_t mask_len; /* Length of 'mask'. */ - struct ofpbuf *actions; /* Datapath flow actions as nlattrs. */ ovs_u128 ufid; /* Unique flow identifier. */ bool ufid_present; /* True if 'ufid' is in datapath. */ uint32_t hash; /* Pre-computed hash for 'key'. */ @@ -223,6 +249,9 @@ struct udpif_key { uint64_t reval_seq OVS_GUARDED; /* Tracks udpif->reval_seq. */ bool flow_exists OVS_GUARDED; /* Ensures flows are only deleted once. */ + /* Datapath flow actions as nlattrs. Protected by RCU. Read with + * ukey_get_actions(), and write with ukey_set_actions(). */ + OVSRCU_TYPE(struct ofpbuf *) actions; struct xlate_cache *xcache OVS_GUARDED; /* Cache for xlate entries that * are affected by this ukey. @@ -249,15 +278,18 @@ static struct ovs_list all_udpifs = OVS_LIST_INITIALIZER(&all_udpifs); static size_t recv_upcalls(struct handler *); static int process_upcall(struct udpif *, struct upcall *, - struct ofpbuf *odp_actions); + struct ofpbuf *odp_actions, struct flow_wildcards *); static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls); static void udpif_stop_threads(struct udpif *); static void udpif_start_threads(struct udpif *, size_t n_handlers, size_t n_revalidators); +static void udpif_pause_revalidators(struct udpif *); +static void udpif_resume_revalidators(struct udpif *); static void *udpif_upcall_handler(void *); static void *udpif_revalidator(void *); static unsigned long udpif_get_n_flows(struct udpif *); static void revalidate(struct revalidator *); +static void revalidator_pause(struct revalidator *); static void revalidator_sweep(struct revalidator *); static void revalidator_purge(struct revalidator *); static void upcall_unixctl_show(struct unixctl_conn *conn, int argc, @@ -277,10 +309,13 @@ static void upcall_unixctl_dump_wait(struct unixctl_conn *conn, int argc, static void upcall_unixctl_purge(struct unixctl_conn *conn, int argc, const char *argv[], void *aux); -static struct udpif_key *ukey_create_from_upcall(struct upcall *); +static struct udpif_key *ukey_create_from_upcall(struct upcall *, + struct flow_wildcards *); static int ukey_create_from_dpif_flow(const struct udpif *, const struct dpif_flow *, struct udpif_key **); +static void ukey_get_actions(struct udpif_key *, const struct nlattr **actions, + size_t *size); static bool ukey_install_start(struct udpif *, struct udpif_key *ukey); static bool ukey_install_finish(struct udpif_key *ukey, int error); static bool ukey_install(struct udpif *udpif, struct udpif_key *ukey); @@ -296,20 +331,20 @@ static enum upcall_type classify_upcall(enum dpif_upcall_type type, static int upcall_receive(struct upcall *, const struct dpif_backer *, const struct dp_packet *packet, enum dpif_upcall_type, const struct nlattr *userdata, const struct flow *, + const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id); static void upcall_uninit(struct upcall *); static upcall_callback upcall_cb; +static dp_purge_callback dp_purge_cb; static atomic_bool enable_megaflows = ATOMIC_VAR_INIT(true); static atomic_bool enable_ufid = ATOMIC_VAR_INIT(true); -struct udpif * -udpif_create(struct dpif_backer *backer, struct dpif *dpif) +void +udpif_init(void) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; - struct udpif *udpif = xzalloc(sizeof *udpif); - if (ovsthread_once_start(&once)) { unixctl_command_register("upcall/show", "", 0, 0, upcall_unixctl_show, NULL); @@ -329,6 +364,12 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) upcall_unixctl_purge, NULL); ovsthread_once_done(&once); } +} + +struct udpif * +udpif_create(struct dpif_backer *backer, struct dpif *dpif) +{ + struct udpif *udpif = xzalloc(sizeof *udpif); udpif->dpif = dpif; udpif->backer = backer; @@ -336,6 +377,7 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) udpif->reval_seq = seq_create(); udpif->dump_seq = seq_create(); latch_init(&udpif->exit_latch); + latch_init(&udpif->pause_latch); list_push_back(&all_udpifs, &udpif->list_node); atomic_init(&udpif->enable_ufid, false); atomic_init(&udpif->n_flows, 0); @@ -348,6 +390,7 @@ udpif_create(struct dpif_backer *backer, struct dpif *dpif) } dpif_register_upcall_cb(dpif, upcall_cb, udpif); + dpif_register_dp_purge_cb(dpif, dp_purge_cb, udpif); return udpif; } @@ -381,6 +424,7 @@ udpif_destroy(struct udpif *udpif) list_remove(&udpif->list_node); latch_destroy(&udpif->exit_latch); + latch_destroy(&udpif->pause_latch); seq_destroy(udpif->reval_seq); seq_destroy(udpif->dump_seq); ovs_mutex_destroy(&udpif->n_flows_mutex); @@ -420,6 +464,7 @@ udpif_stop_threads(struct udpif *udpif) latch_poll(&udpif->exit_latch); ovs_barrier_destroy(&udpif->reval_barrier); + ovs_barrier_destroy(&udpif->pause_barrier); free(udpif->revalidators); udpif->revalidators = NULL; @@ -459,7 +504,9 @@ udpif_start_threads(struct udpif *udpif, size_t n_handlers, dpif_enable_upcall(udpif->dpif); ovs_barrier_init(&udpif->reval_barrier, udpif->n_revalidators); + ovs_barrier_init(&udpif->pause_barrier, udpif->n_revalidators + 1); udpif->reval_exit = false; + udpif->pause = false; udpif->revalidators = xzalloc(udpif->n_revalidators * sizeof *udpif->revalidators); for (i = 0; i < udpif->n_revalidators; i++) { @@ -472,6 +519,29 @@ udpif_start_threads(struct udpif *udpif, size_t n_handlers, } } +/* Pauses all revalidators. Should only be called by the main thread. + * When function returns, all revalidators are paused and will proceed + * only after udpif_resume_revalidators() is called. */ +static void +udpif_pause_revalidators(struct udpif *udpif) +{ + if (ofproto_dpif_backer_enabled(udpif->backer)) { + latch_set(&udpif->pause_latch); + ovs_barrier_block(&udpif->pause_barrier); + } +} + +/* Resumes the pausing of revalidators. Should only be called by the + * main thread. */ +static void +udpif_resume_revalidators(struct udpif *udpif) +{ + if (ofproto_dpif_backer_enabled(udpif->backer)) { + latch_poll(&udpif->pause_latch); + ovs_barrier_block(&udpif->pause_barrier); + } +} + /* Tells 'udpif' how many threads it should use to handle upcalls. * 'n_handlers' and 'n_revalidators' can never be zero. 'udpif''s * datapath handle must have packet reception enabled before starting @@ -653,6 +723,7 @@ recv_upcalls(struct handler *handler) struct dpif_upcall *dupcall = &dupcalls[n_upcalls]; struct upcall *upcall = &upcalls[n_upcalls]; struct flow *flow = &flows[n_upcalls]; + unsigned int mru; int error; ofpbuf_use_stub(recv_buf, recv_stubs[n_upcalls], @@ -667,8 +738,14 @@ recv_upcalls(struct handler *handler) goto free_dupcall; } + if (dupcall->mru) { + mru = nl_attr_get_u16(dupcall->mru); + } else { + mru = 0; + } + error = upcall_receive(upcall, udpif->backer, &dupcall->packet, - dupcall->type, dupcall->userdata, flow, + dupcall->type, dupcall->userdata, flow, mru, &dupcall->ufid, PMD_ID_NULL); if (error) { if (error == ENODEV) { @@ -690,6 +767,7 @@ recv_upcalls(struct handler *handler) upcall->ufid = &dupcall->ufid; upcall->out_tun_key = dupcall->out_tun_key; + upcall->actions = dupcall->actions; if (vsp_adjust_flow(upcall->ofproto, flow, &dupcall->packet)) { upcall->vsp_adjusted = true; @@ -698,7 +776,8 @@ recv_upcalls(struct handler *handler) pkt_metadata_from_flow(&dupcall->packet.md, flow); flow_extract(&dupcall->packet, flow); - error = process_upcall(udpif, upcall, NULL); + error = process_upcall(udpif, upcall, + &upcall->odp_actions, &upcall->wc); if (error) { goto cleanup; } @@ -752,6 +831,12 @@ udpif_revalidator(void *arg) udpif->max_n_flows = MAX(n_flows, udpif->max_n_flows); udpif->avg_n_flows = (udpif->avg_n_flows + n_flows) / 2; + /* Only the leader checks the pause latch to prevent a race where + * some threads think it's false and proceed to block on + * reval_barrier and others think it's true and block indefinitely + * on the pause_barrier */ + udpif->pause = latch_is_set(&udpif->pause_latch); + /* Only the leader checks the exit latch to prevent a race where * some threads think it's true and exit and others think it's * false and block indefinitely on the reval_barrier */ @@ -768,6 +853,10 @@ udpif_revalidator(void *arg) /* Wait for the leader to start the flow dump. */ ovs_barrier_block(&udpif->reval_barrier); + if (udpif->pause) { + revalidator_pause(revalidator); + } + if (udpif->reval_exit) { break; } @@ -810,6 +899,7 @@ udpif_revalidator(void *arg) poll_timer_wait_until(start_time + MIN(ofproto_max_idle, 500)); seq_wait(udpif->reval_seq, last_reval_seq); latch_wait(&udpif->exit_latch); + latch_wait(&udpif->pause_latch); poll_block(); } } @@ -889,8 +979,8 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout, ? ODPP_NONE : odp_in_port; pid = dpif_port_get_pid(udpif->dpif, port, flow_hash_5tuple(flow, 0)); - odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, ODPP_NONE, - buf); + odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, + ODPP_NONE, false, buf); } /* If there is no error, the upcall must be destroyed with upcall_uninit() @@ -901,6 +991,7 @@ static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, + const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id) { int error; @@ -919,6 +1010,8 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, upcall->pmd_id = pmd_id; upcall->type = type; upcall->userdata = userdata; + ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub, + sizeof upcall->odp_actions_stub); ofpbuf_init(&upcall->put_actions, 0); upcall->xout_initialized = false; @@ -928,15 +1021,17 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, upcall->ukey = NULL; upcall->key = NULL; upcall->key_len = 0; + upcall->mru = mru; upcall->out_tun_key = NULL; + upcall->actions = NULL; return 0; } static void upcall_xlate(struct udpif *udpif, struct upcall *upcall, - struct ofpbuf *odp_actions) + struct ofpbuf *odp_actions, struct flow_wildcards *wc) { struct dpif_flow_stats stats; struct xlate_in xin; @@ -947,8 +1042,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall, stats.tcp_flags = ntohs(upcall->flow->tcp_flags); xlate_in_init(&xin, upcall->ofproto, upcall->flow, upcall->in_port, NULL, - stats.tcp_flags, upcall->packet); - xin.odp_actions = odp_actions; + stats.tcp_flags, upcall->packet, wc, odp_actions); if (upcall->type == DPIF_UC_MISS) { xin.resubmit_stats = &stats; @@ -1002,8 +1096,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall, if (!upcall->xout.slow) { ofpbuf_use_const(&upcall->put_actions, - upcall->xout.odp_actions->data, - upcall->xout.odp_actions->size); + odp_actions->data, odp_actions->size); } else { ofpbuf_init(&upcall->put_actions, 0); compose_slow_path(udpif, &upcall->xout, upcall->flow, @@ -1015,7 +1108,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall, * going to create new datapath flows for actual datapath misses, there is * no point in creating a ukey otherwise. */ if (upcall->type == DPIF_UC_MISS) { - upcall->ukey = ukey_create_from_upcall(upcall); + upcall->ukey = ukey_create_from_upcall(upcall, wc); } } @@ -1026,6 +1119,7 @@ upcall_uninit(struct upcall *upcall) if (upcall->xout_initialized) { xlate_out_uninit(&upcall->xout); } + ofpbuf_uninit(&upcall->odp_actions); ofpbuf_uninit(&upcall->put_actions); if (upcall->ukey) { if (!upcall->ukey_persists) { @@ -1044,6 +1138,7 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi const struct nlattr *userdata, struct ofpbuf *actions, struct flow_wildcards *wc, struct ofpbuf *put_actions, void *aux) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); struct udpif *udpif = aux; unsigned int flow_limit; struct upcall upcall; @@ -1054,12 +1149,12 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi atomic_read_relaxed(&udpif->flow_limit, &flow_limit); error = upcall_receive(&upcall, udpif->backer, packet, type, userdata, - flow, ufid, pmd_id); + flow, 0, ufid, pmd_id); if (error) { return error; } - error = process_upcall(udpif, &upcall, actions); + error = process_upcall(udpif, &upcall, actions, wc); if (error) { goto out; } @@ -1069,16 +1164,12 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi upcall.put_actions.size); } - if (OVS_LIKELY(wc)) { - if (megaflow) { - /* XXX: This could be avoided with sufficient API changes. */ - *wc = upcall.xout.wc; - } else { - flow_wildcards_init_for_packet(wc, flow); - } + if (OVS_UNLIKELY(!megaflow)) { + flow_wildcards_init_for_packet(wc, flow); } if (udpif_get_n_flows(udpif) >= flow_limit) { + VLOG_WARN_RL(&rl, "upcall_cb failure: datapath flow limit reached"); error = ENOSPC; goto out; } @@ -1086,11 +1177,13 @@ upcall_cb(const struct dp_packet *packet, const struct flow *flow, ovs_u128 *ufi /* Prevent miss flow installation if the key has recirculation ID but we * were not able to get a reference on it. */ if (type == DPIF_UC_MISS && upcall.recirc && !upcall.have_recirc_ref) { + VLOG_WARN_RL(&rl, "upcall_cb failure: no reference for recirc flow"); error = ENOSPC; goto out; } if (upcall.ukey && !ukey_install(udpif, upcall.ukey)) { + VLOG_WARN_RL(&rl, "upcall_cb failure: ukey installation fails"); error = ENOSPC; } out: @@ -1103,7 +1196,7 @@ out: static int process_upcall(struct udpif *udpif, struct upcall *upcall, - struct ofpbuf *odp_actions) + struct ofpbuf *odp_actions, struct flow_wildcards *wc) { const struct nlattr *userdata = upcall->userdata; const struct dp_packet *packet = upcall->packet; @@ -1111,17 +1204,39 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, switch (classify_upcall(upcall->type, userdata)) { case MISS_UPCALL: - upcall_xlate(udpif, upcall, odp_actions); + upcall_xlate(udpif, upcall, odp_actions, wc); return 0; case SFLOW_UPCALL: if (upcall->sflow) { union user_action_cookie cookie; - + const struct nlattr *actions; + size_t actions_len = 0; + struct dpif_sflow_actions sflow_actions; + memset(&sflow_actions, 0, sizeof sflow_actions); memset(&cookie, 0, sizeof cookie); memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.sflow); + if (upcall->actions) { + /* Actions were passed up from datapath. */ + actions = nl_attr_get(upcall->actions); + actions_len = nl_attr_get_size(upcall->actions); + if (actions && actions_len) { + dpif_sflow_read_actions(flow, actions, actions_len, + &sflow_actions); + } + } + if (actions_len == 0) { + /* Lookup actions in userspace cache. */ + struct udpif_key *ukey = ukey_lookup(udpif, upcall->ufid); + if (ukey) { + ukey_get_actions(ukey, &actions, &actions_len); + dpif_sflow_read_actions(flow, actions, actions_len, + &sflow_actions); + } + } dpif_sflow_received(upcall->sflow, packet, flow, - flow->in_port.odp_port, &cookie); + flow->in_port.odp_port, &cookie, + actions_len > 0 ? &sflow_actions : NULL); } break; @@ -1134,7 +1249,7 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix); if (upcall->out_tun_key) { - odp_tun_key_from_attr(upcall->out_tun_key, + odp_tun_key_from_attr(upcall->out_tun_key, false, &output_tunnel_key); } dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow, @@ -1178,10 +1293,8 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, unsigned int flow_limit; size_t n_ops, n_opsp, i; bool may_put; - bool megaflow; atomic_read_relaxed(&udpif->flow_limit, &flow_limit); - atomic_read_relaxed(&enable_megaflows, &megaflow); may_put = udpif_get_n_flows(udpif) < flow_limit; @@ -1207,7 +1320,7 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, * actions were composed assuming that the packet contained no * VLAN. So, we must remove the VLAN header from the packet before * trying to execute the actions. */ - if (upcall->xout.odp_actions->size) { + if (upcall->odp_actions.size) { eth_pop_vlan(CONST_CAST(struct dp_packet *, upcall->packet)); } @@ -1241,21 +1354,22 @@ handle_upcalls(struct udpif *udpif, struct upcall *upcalls, op->dop.u.flow_put.mask_len = ukey->mask_len; op->dop.u.flow_put.ufid = upcall->ufid; op->dop.u.flow_put.stats = NULL; - op->dop.u.flow_put.actions = ukey->actions->data; - op->dop.u.flow_put.actions_len = ukey->actions->size; + ukey_get_actions(ukey, &op->dop.u.flow_put.actions, + &op->dop.u.flow_put.actions_len); } - if (upcall->xout.odp_actions->size) { + if (upcall->odp_actions.size) { op = &ops[n_ops++]; op->ukey = NULL; op->dop.type = DPIF_OP_EXECUTE; op->dop.u.execute.packet = CONST_CAST(struct dp_packet *, packet); odp_key_to_pkt_metadata(upcall->key, upcall->key_len, &op->dop.u.execute.packet->md); - op->dop.u.execute.actions = upcall->xout.odp_actions->data; - op->dop.u.execute.actions_len = upcall->xout.odp_actions->size; + op->dop.u.execute.actions = upcall->odp_actions.data; + op->dop.u.execute.actions_len = upcall->odp_actions.size; op->dop.u.execute.needs_help = (upcall->xout.slow & SLOW_ACTION) != 0; op->dop.u.execute.probe = false; + op->dop.u.execute.mtu = upcall->mru; } } @@ -1308,6 +1422,24 @@ ukey_lookup(struct udpif *udpif, const ovs_u128 *ufid) return NULL; } +/* Provides safe lockless access of RCU protected 'ukey->actions'. Callers may + * alternatively access the field directly if they take 'ukey->mutex'. */ +static void +ukey_get_actions(struct udpif_key *ukey, const struct nlattr **actions, size_t *size) +{ + const struct ofpbuf *buf = ovsrcu_get(struct ofpbuf *, &ukey->actions); + *actions = buf->data; + *size = buf->size; +} + +static void +ukey_set_actions(struct udpif_key *ukey, const struct ofpbuf *actions) +{ + ovsrcu_postpone(ofpbuf_delete, + ovsrcu_get_protected(struct ofpbuf *, &ukey->actions)); + ovsrcu_set(&ukey->actions, ofpbuf_clone(actions)); +} + static struct udpif_key * ukey_create__(const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, @@ -1331,7 +1463,9 @@ ukey_create__(const struct nlattr *key, size_t key_len, ukey->ufid = *ufid; ukey->pmd_id = pmd_id; ukey->hash = get_ufid_hash(&ukey->ufid); - ukey->actions = ofpbuf_clone(actions); + + ovsrcu_init(&ukey->actions, NULL); + ukey_set_actions(ukey, actions); ovs_mutex_init(&ukey->mutex); ukey->dump_seq = dump_seq; @@ -1357,16 +1491,17 @@ ukey_create__(const struct nlattr *key, size_t key_len, } static struct udpif_key * -ukey_create_from_upcall(struct upcall *upcall) +ukey_create_from_upcall(struct upcall *upcall, struct flow_wildcards *wc) { struct odputil_keybuf keystub, maskstub; struct ofpbuf keybuf, maskbuf; - bool recirc, megaflow; + bool megaflow; struct odp_flow_key_parms odp_parms = { .flow = upcall->flow, - .mask = &upcall->xout.wc.masks, + .mask = &wc->masks, }; + odp_parms.support = ofproto_dpif_get_support(upcall->ofproto)->odp; if (upcall->key_len) { ofpbuf_use_const(&keybuf, upcall->key, upcall->key_len); } else { @@ -1374,17 +1509,13 @@ ukey_create_from_upcall(struct upcall *upcall) * upcall, so convert the upcall's flow here. */ ofpbuf_use_stack(&keybuf, &keystub, sizeof keystub); odp_parms.odp_in_port = upcall->flow->in_port.odp_port; - odp_parms.recirc = true; odp_flow_key_from_flow(&odp_parms, &keybuf); } atomic_read_relaxed(&enable_megaflows, &megaflow); - recirc = ofproto_dpif_get_enable_recirc(upcall->ofproto); ofpbuf_use_stack(&maskbuf, &maskstub, sizeof maskstub); if (megaflow) { odp_parms.odp_in_port = ODPP_NONE; - odp_parms.max_mpls_depth = ofproto_dpif_get_max_mpls_depth(upcall->ofproto); - odp_parms.recirc = recirc; odp_parms.key_buf = &keybuf; odp_flow_key_from_mask(&odp_parms, &maskbuf); @@ -1587,7 +1718,7 @@ ukey_delete__(struct udpif_key *ukey) recirc_free_id(ukey->recircs[i]); } xlate_cache_delete(ukey->xcache); - ofpbuf_delete(ukey->actions); + ofpbuf_delete(ovsrcu_get(struct ofpbuf *, &ukey->actions)); ovs_mutex_destroy(&ukey->mutex); free(ukey); } @@ -1634,31 +1765,39 @@ should_revalidate(const struct udpif *udpif, uint64_t packets, return false; } -static bool +/* Verifies that the datapath actions of 'ukey' are still correct, and pushes + * 'stats' for it. + * + * Returns a recommended action for 'ukey', options include: + * UKEY_DELETE The ukey should be deleted. + * UKEY_KEEP The ukey is fine as is. + * UKEY_MODIFY The ukey's actions should be changed but is otherwise + * fine. Callers should change the actions to those found + * in the caller supplied 'odp_actions' buffer. */ +static enum reval_result revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, - const struct dpif_flow_stats *stats, uint64_t reval_seq) + const struct dpif_flow_stats *stats, + struct ofpbuf *odp_actions, uint64_t reval_seq) OVS_REQUIRES(ukey->mutex) { - uint64_t slow_path_buf[128 / 8]; struct xlate_out xout, *xoutp; struct netflow *netflow; struct ofproto_dpif *ofproto; struct dpif_flow_stats push; - struct ofpbuf xout_actions; - struct flow flow, dp_mask; - uint64_t *dp64, *xout64; + struct flow flow; + struct flow_wildcards dp_mask, wc; + enum reval_result result; ofp_port_t ofp_in_port; struct xlate_in xin; long long int last_used; int error; - size_t i; - bool ok; bool need_revalidate; - ok = false; + result = UKEY_DELETE; xoutp = NULL; netflow = NULL; + ofpbuf_clear(odp_actions); need_revalidate = (ukey->reval_seq != reval_seq); last_used = ukey->stats.used; push.used = stats->used; @@ -1672,20 +1811,19 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, if (need_revalidate && last_used && !should_revalidate(udpif, push.n_packets, last_used)) { - ok = false; goto exit; } /* We will push the stats, so update the ukey stats cache. */ ukey->stats = *stats; if (!push.n_packets && !need_revalidate) { - ok = true; + result = UKEY_KEEP; goto exit; } if (ukey->xcache && !need_revalidate) { xlate_push_stats(ukey->xcache, &push); - ok = true; + result = UKEY_KEEP; goto exit; } @@ -1708,63 +1846,60 @@ revalidate_ukey(struct udpif *udpif, struct udpif_key *ukey, } xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, push.tcp_flags, - NULL); + NULL, need_revalidate ? &wc : NULL, odp_actions); if (push.n_packets) { xin.resubmit_stats = &push; xin.may_learn = true; } xin.xcache = ukey->xcache; - xin.skip_wildcards = !need_revalidate; xlate_actions(&xin, &xout); xoutp = &xout; if (!need_revalidate) { - ok = true; + result = UKEY_KEEP; goto exit; } - if (!xout.slow) { - ofpbuf_use_const(&xout_actions, xout.odp_actions->data, - xout.odp_actions->size); - } else { - ofpbuf_use_stack(&xout_actions, slow_path_buf, sizeof slow_path_buf); + if (xout.slow) { + ofpbuf_clear(odp_actions); compose_slow_path(udpif, &xout, &flow, flow.in_port.odp_port, - &xout_actions); + odp_actions); } - if (!ofpbuf_equal(&xout_actions, ukey->actions)) { + if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key, + ukey->key_len, &dp_mask.masks, &flow) + == ODP_FIT_ERROR) { goto exit; } - if (odp_flow_key_to_mask(ukey->mask, ukey->mask_len, ukey->key, - ukey->key_len, &dp_mask, &flow) == ODP_FIT_ERROR) { + /* Do not modify if any bit is wildcarded by the installed datapath flow, + * but not the newly revalidated wildcard mask (wc), i.e., if revalidation + * tells that the datapath flow is now too generic and must be narrowed + * down. Note that we do not know if the datapath has ignored any of the + * wildcarded bits, so we may be overtly conservative here. */ + if (flow_wildcards_has_extra(&dp_mask, &wc)) { goto exit; } - /* Since the kernel is free to ignore wildcarded bits in the mask, we can't - * directly check that the masks are the same. Instead we check that the - * mask in the kernel is more specific i.e. less wildcarded, than what - * we've calculated here. This guarantees we don't catch any packets we - * shouldn't with the megaflow. */ - dp64 = (uint64_t *) &dp_mask; - xout64 = (uint64_t *) &xout.wc.masks; - for (i = 0; i < FLOW_U64S; i++) { - if ((dp64[i] | xout64[i]) != dp64[i]) { - goto exit; - } + if (!ofpbuf_equal(odp_actions, + ovsrcu_get(struct ofpbuf *, &ukey->actions))) { + /* The datapath mask was OK, but the actions seem to have changed. + * Let's modify it in place. */ + result = UKEY_MODIFY; + goto exit; } - ok = true; + result = UKEY_KEEP; exit: - if (ok) { + if (result != UKEY_DELETE) { ukey->reval_seq = reval_seq; } - if (netflow && !ok) { + if (netflow && result == UKEY_DELETE) { netflow_flow_clear(netflow, &flow); } xlate_out_uninit(xoutp); - return ok; + return result; } static void @@ -1794,6 +1929,23 @@ delete_op_init(struct udpif *udpif, struct ukey_op *op, struct udpif_key *ukey) op->dop.u.flow_del.terse = udpif_use_ufid(udpif); } +static void +modify_op_init(struct ukey_op *op, struct udpif_key *ukey) +{ + op->ukey = ukey; + op->dop.type = DPIF_OP_FLOW_PUT; + op->dop.u.flow_put.flags = DPIF_FP_MODIFY; + op->dop.u.flow_put.key = ukey->key; + op->dop.u.flow_put.key_len = ukey->key_len; + op->dop.u.flow_put.mask = ukey->mask; + op->dop.u.flow_put.mask_len = ukey->mask_len; + op->dop.u.flow_put.ufid = &ukey->ufid; + op->dop.u.flow_put.pmd_id = ukey->pmd_id; + op->dop.u.flow_put.stats = NULL; + ukey_get_actions(ukey, &op->dop.u.flow_put.actions, + &op->dop.u.flow_put.actions_len); +} + static void push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) { @@ -1813,6 +1965,16 @@ push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) stats = op->dop.u.flow_del.stats; push = &push_buf; + if (op->dop.type != DPIF_OP_FLOW_DEL) { + /* Only deleted flows need their stats pushed. */ + continue; + } + + if (op->dop.error) { + /* flow_del error, 'stats' is unusable. */ + continue; + } + if (op->ukey) { ovs_mutex_lock(&op->ukey->mutex); push->used = MAX(stats->used, op->ukey->stats.used); @@ -1856,10 +2018,9 @@ push_ukey_ops__(struct udpif *udpif, struct ukey_op *ops, size_t n_ops) struct xlate_in xin; xlate_in_init(&xin, ofproto, &flow, ofp_in_port, NULL, - push->tcp_flags, NULL); + push->tcp_flags, NULL, NULL, NULL); xin.resubmit_stats = push->n_packets ? push : NULL; xin.may_learn = push->n_packets > 0; - xin.skip_wildcards = true; xlate_actions_for_side_effects(&xin); if (netflow) { @@ -1899,6 +2060,9 @@ log_unexpected_flow(const struct dpif_flow *flow, int error) static void revalidate(struct revalidator *revalidator) { + uint64_t odp_actions_stub[1024 / 8]; + struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); + struct udpif *udpif = revalidator->udpif; struct dpif_flow_dump_thread *dump_thread; uint64_t dump_seq, reval_seq; @@ -1946,8 +2110,9 @@ revalidate(struct revalidator *revalidator) for (f = flows; f < &flows[n_dumped]; f++) { long long int used = f->stats.used; + enum reval_result result; struct udpif_key *ukey; - bool already_dumped, keep; + bool already_dumped; int error; if (ukey_acquire(udpif, f, &ukey, &error)) { @@ -1981,15 +2146,19 @@ revalidate(struct revalidator *revalidator) used = ukey->created; } if (kill_them_all || (used && used < now - max_idle)) { - keep = false; + result = UKEY_DELETE; } else { - keep = revalidate_ukey(udpif, ukey, &f->stats, reval_seq); + result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions, + reval_seq); } ukey->dump_seq = dump_seq; - ukey->flow_exists = keep; + ukey->flow_exists = result != UKEY_DELETE; - if (!keep) { + if (result == UKEY_DELETE) { delete_op_init(udpif, &ops[n_ops++], ukey); + } else if (result == UKEY_MODIFY) { + ukey_set_actions(ukey, &odp_actions); + modify_op_init(&ops[n_ops++], ukey); } ovs_mutex_unlock(&ukey->mutex); } @@ -2000,23 +2169,18 @@ revalidate(struct revalidator *revalidator) ovsrcu_quiesce(); } dpif_flow_dump_thread_destroy(dump_thread); + ofpbuf_uninit(&odp_actions); } -static bool -handle_missed_revalidation(struct udpif *udpif, uint64_t reval_seq, - struct udpif_key *ukey) +/* Pauses the 'revalidator', can only proceed after main thread + * calls udpif_resume_revalidators(). */ +static void +revalidator_pause(struct revalidator *revalidator) { - struct dpif_flow_stats stats; - bool keep; - - COVERAGE_INC(revalidate_missed_dp_flow); - - memset(&stats, 0, sizeof stats); - ovs_mutex_lock(&ukey->mutex); - keep = revalidate_ukey(udpif, ukey, &stats, reval_seq); - ovs_mutex_unlock(&ukey->mutex); - - return keep; + /* The first block is for sync'ing the pause with main thread. */ + ovs_barrier_block(&revalidator->udpif->pause_barrier); + /* The second block is for pausing until main thread resumes. */ + ovs_barrier_block(&revalidator->udpif->pause_barrier); } static void @@ -2033,6 +2197,9 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) ovs_assert(slice < udpif->n_revalidators); for (int i = slice; i < N_UMAPS; i += udpif->n_revalidators) { + uint64_t odp_actions_stub[1024 / 8]; + struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); + struct ukey_op ops[REVALIDATE_MAX_BATCH]; struct udpif_key *ukey; struct umap *umap = &udpif->ukeys[i]; @@ -2040,6 +2207,7 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) { bool flow_exists, seq_mismatch; + enum reval_result result; /* Handler threads could be holding a ukey lock while it installs a * new flow, so don't hang around waiting for access to it. */ @@ -2049,21 +2217,33 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) flow_exists = ukey->flow_exists; seq_mismatch = (ukey->dump_seq != dump_seq && ukey->reval_seq != reval_seq); + + if (purge) { + result = UKEY_DELETE; + } else if (!seq_mismatch) { + result = UKEY_KEEP; + } else { + struct dpif_flow_stats stats; + COVERAGE_INC(revalidate_missed_dp_flow); + memset(&stats, 0, sizeof stats); + result = revalidate_ukey(udpif, ukey, &stats, &odp_actions, + reval_seq); + } ovs_mutex_unlock(&ukey->mutex); - if (flow_exists - && (purge - || (seq_mismatch - && !handle_missed_revalidation(udpif, reval_seq, - ukey)))) { - struct ukey_op *op = &ops[n_ops++]; + if (result == UKEY_DELETE) { + delete_op_init(udpif, &ops[n_ops++], ukey); + } else if (result == UKEY_MODIFY) { + ukey_set_actions(ukey, &odp_actions); + modify_op_init(&ops[n_ops++], ukey); + } + + if (n_ops == REVALIDATE_MAX_BATCH) { + push_ukey_ops(udpif, umap, ops, n_ops); + n_ops = 0; + } - delete_op_init(udpif, op, ukey); - if (n_ops == REVALIDATE_MAX_BATCH) { - push_ukey_ops(udpif, umap, ops, n_ops); - n_ops = 0; - } - } else if (!flow_exists) { + if (!flow_exists) { ovs_mutex_lock(&umap->mutex); ukey_delete(umap, ukey); ovs_mutex_unlock(&umap->mutex); @@ -2073,6 +2253,8 @@ revalidator_sweep__(struct revalidator *revalidator, bool purge) if (n_ops) { push_ukey_ops(udpif, umap, ops, n_ops); } + + ofpbuf_uninit(&odp_actions); ovsrcu_quiesce(); } } @@ -2088,6 +2270,39 @@ revalidator_purge(struct revalidator *revalidator) { revalidator_sweep__(revalidator, true); } + +/* In reaction to dpif purge, purges all 'ukey's with same 'pmd_id'. */ +static void +dp_purge_cb(void *aux, unsigned pmd_id) +{ + struct udpif *udpif = aux; + size_t i; + + udpif_pause_revalidators(udpif); + for (i = 0; i < N_UMAPS; i++) { + struct ukey_op ops[REVALIDATE_MAX_BATCH]; + struct udpif_key *ukey; + struct umap *umap = &udpif->ukeys[i]; + size_t n_ops = 0; + + CMAP_FOR_EACH(ukey, cmap_node, &umap->cmap) { + if (ukey->pmd_id == pmd_id) { + delete_op_init(udpif, &ops[n_ops++], ukey); + if (n_ops == REVALIDATE_MAX_BATCH) { + push_ukey_ops(udpif, umap, ops, n_ops); + n_ops = 0; + } + } + } + + if (n_ops) { + push_ukey_ops(udpif, umap, ops, n_ops); + } + + ovsrcu_quiesce(); + } + udpif_resume_revalidators(udpif); +} static void upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,