X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=adfaeb681356b43ed75cb525f428de39e7935aa6;hb=6c38bdc0c5bcc8b080fc689e436b35830d69eb75;hp=d6a4d8ccb512a573602a252578fb5f30c9b64095;hpb=0c4b9393b6a5792b888d4b580e04cc3fa64ebc12;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d6a4d8ccb..adfaeb681 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -89,6 +89,15 @@ struct rule_dpif { struct ovs_mutex stats_mutex; struct dpif_flow_stats stats OVS_GUARDED; + /* In non-NULL, will point to a new rule (for which a reference is held) to + * which all the stats updates should be forwarded. This exists only + * transitionally when flows are replaced. + * + * Protected by stats_mutex. If both 'rule->stats_mutex' and + * 'rule->new_rule->stats_mutex' must be held together, acquire them in that + * order, */ + struct rule_dpif *new_rule OVS_GUARDED; + /* If non-zero then the recirculation id that has * been allocated for use with this rule. * The recirculation id and associated internal flow should @@ -255,13 +264,6 @@ COVERAGE_DEFINE(rev_flow_table); COVERAGE_DEFINE(rev_mac_learning); COVERAGE_DEFINE(rev_mcast_snooping); -/* Stores mapping between 'recirc_id' and 'ofproto-dpif'. */ -struct dpif_backer_recirc_node { - struct cmap_node cmap_node; - struct ofproto_dpif *ofproto; - uint32_t recirc_id; -}; - /* All datapaths of a given type share a single dpif backer instance. */ struct dpif_backer { char *type; @@ -278,33 +280,11 @@ struct dpif_backer { bool recv_set_enable; /* Enables or disables receiving packets. */ - /* Recirculation. */ - struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ - struct cmap recirc_map; /* Map of 'recirc_id's to 'ofproto's. */ - struct ovs_mutex recirc_mutex; /* Protects 'recirc_map'. */ - bool enable_recirc; /* True if the datapath supports recirculation */ - - /* True if the datapath supports unique flow identifiers */ - bool enable_ufid; - - /* True if the datapath supports variable-length - * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. - * False if the datapath supports only 8-byte (or shorter) userdata. */ - bool variable_length_userdata; - - /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET - * actions. */ - bool masked_set_action; - - /* Maximum number of MPLS label stack entries that the datapath supports - * in a match */ - size_t max_mpls_depth; - /* Version string of the datapath stored in OVSDB. */ char *dp_version_string; - /* True if the datapath supports tnl_push and pop actions. */ - bool enable_tnl_push_pop; + /* Datapath feature support. */ + struct dpif_backer_support support; struct atomic_count tnl_count; }; @@ -316,12 +296,18 @@ struct ofproto_dpif { struct ofproto up; struct dpif_backer *backer; + /* Unique identifier for this instantiation of this bridge in this running + * process. */ + struct uuid uuid; + + ATOMIC(cls_version_t) tables_version; /* For classifier lookups. */ + uint64_t dump_seq; /* Last read of udpif_dump_seq(). */ /* Special OpenFlow rules. */ struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */ struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */ - struct rule_dpif *drop_frags_rule; /* Used in OFPC_FRAG_DROP mode. */ + struct rule_dpif *drop_frags_rule; /* Used in OFPUTIL_FRAG_DROP mode. */ /* Bridging. */ struct netflow *netflow; @@ -359,9 +345,9 @@ struct ofproto_dpif { uint64_t change_seq; /* Connectivity status changes. */ /* Work queues. */ - struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */ - struct seq *pins_seq; /* For notifying 'pins' reception. */ - uint64_t pins_seqno; + struct guarded_list ams; /* Contains "struct ofproto_async_msgs"s. */ + struct seq *ams_seq; /* For notifying 'ams' reception. */ + uint64_t ams_seqno; }; /* All existing ofproto_dpif instances, indexed by ->up.name. */ @@ -377,26 +363,18 @@ ofproto_dpif_cast(const struct ofproto *ofproto) return CONTAINER_OF(ofproto, struct ofproto_dpif, up); } -size_t -ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto) -{ - return ofproto->backer->max_mpls_depth; -} - bool -ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto) +ofproto_dpif_get_enable_ufid(const struct dpif_backer *backer) { - return ofproto->backer->enable_recirc; + return backer->support.ufid; } -bool -ofproto_dpif_get_enable_ufid(struct dpif_backer *backer) +struct dpif_backer_support * +ofproto_dpif_get_support(const struct ofproto_dpif *ofproto) { - return backer->enable_ufid; + return &ofproto->backer->support; } -static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, - ofp_port_t ofp_port); static void ofproto_trace(struct ofproto_dpif *, struct flow *, const struct dp_packet *packet, const struct ofpact[], size_t ofpacts_len, @@ -412,25 +390,33 @@ static struct shash init_ofp_ports = SHASH_INITIALIZER(&init_ofp_ports); * it. */ void ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto, - struct ofputil_flow_mod *fm) + const struct ofputil_flow_mod *fm) { - ofproto_flow_mod(&ofproto->up, fm); + struct ofproto_flow_mod ofm; + + /* Multiple threads may do this for the same 'fm' at the same time. + * Allocate ofproto_flow_mod with execution context from stack. + * + * Note: This copy could be avoided by making ofproto_flow_mod more + * complex, but that may not be desireable, and a learn action is not that + * fast to begin with. */ + ofm.fm = *fm; + ofproto_flow_mod(&ofproto->up, &ofm); } -/* Appends 'pin' to the queue of "packet ins" to be sent to the controller. - * Takes ownership of 'pin' and pin->packet. */ +/* Appends 'am' to the queue of asynchronous messages to be sent to the + * controller. Takes ownership of 'am' and any data it points to. */ void -ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto, - struct ofproto_packet_in *pin) +ofproto_dpif_send_async_msg(struct ofproto_dpif *ofproto, + struct ofproto_async_msg *am) { - if (!guarded_list_push_back(&ofproto->pins, &pin->list_node, 1024)) { + if (!guarded_list_push_back(&ofproto->ams, &am->list_node, 1024)) { COVERAGE_INC(packet_in_overflow); - free(CONST_CAST(void *, pin->up.packet)); - free(pin); + ofproto_async_msg_free(am); } /* Wakes up main thread for packet-in I/O. */ - seq_change(ofproto->pins_seq); + seq_change(ofproto->ams_seq); } /* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the @@ -463,6 +449,9 @@ init(const struct shash *iface_hints) shash_add(&init_ofp_ports, node->name, new_hint); } + + ofproto_unixctl_init(); + udpif_init(); } static void @@ -529,6 +518,12 @@ lookup_ofproto_dpif_by_port_name(const char *name) return NULL; } +bool +ofproto_dpif_backer_enabled(struct dpif_backer* backer) +{ + return backer->recv_set_enable; +} + static int type_run(const char *type) { @@ -569,7 +564,7 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } - dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask); + dpif_poll_threads_set(backer->dpif, pmd_cpu_mask); if (backer->need_revalidate) { struct ofproto_dpif *ofproto; @@ -658,10 +653,7 @@ type_run(const char *type) ofproto->netflow, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), - ofproto->backer->enable_recirc, - ofproto->backer->variable_length_userdata, - ofproto->backer->max_mpls_depth, - ofproto->backer->masked_set_action); + &ofproto->backer->support); HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { xlate_bundle_set(ofproto, bundle, bundle->name, @@ -846,7 +838,7 @@ static int add_internal_flows(struct ofproto_dpif *); static struct ofproto * alloc(void) { - struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto); + struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto); return &ofproto->up; } @@ -857,27 +849,6 @@ dealloc(struct ofproto *ofproto_) free(ofproto); } -/* Called when 'ofproto' is destructed. Checks for and clears any - * recirc_id leak. */ -static void -dpif_backer_recirc_clear_ofproto(struct dpif_backer *backer, - struct ofproto_dpif *ofproto) -{ - struct dpif_backer_recirc_node *node; - - ovs_mutex_lock(&backer->recirc_mutex); - CMAP_FOR_EACH (node, cmap_node, &backer->recirc_map) { - if (node->ofproto == ofproto) { - VLOG_ERR("recirc_id %"PRIu32", not freed when ofproto (%s) " - "is destructed", node->recirc_id, ofproto->up.name); - cmap_remove(&backer->recirc_map, &node->cmap_node, - node->recirc_id); - ovsrcu_postpone(free, node); - } - } - ovs_mutex_unlock(&backer->recirc_mutex); -} - static void close_dpif_backer(struct dpif_backer *backer) { @@ -893,9 +864,6 @@ close_dpif_backer(struct dpif_backer *backer) ovs_rwlock_destroy(&backer->odp_to_ofport_lock); hmap_destroy(&backer->odp_to_ofport_map); shash_find_and_delete(&all_dpif_backers, backer->type); - recirc_id_pool_destroy(backer->rid_pool); - cmap_destroy(&backer->recirc_map); - ovs_mutex_destroy(&backer->recirc_mutex); free(backer->type); free(backer->dp_version_string); dpif_close(backer->dpif); @@ -909,10 +877,7 @@ struct odp_garbage { }; static bool check_variable_length_userdata(struct dpif_backer *backer); -static size_t check_max_mpls_depth(struct dpif_backer *backer); -static bool check_recirc(struct dpif_backer *backer); -static bool check_ufid(struct dpif_backer *backer); -static bool check_masked_set_action(struct dpif_backer *backer); +static void check_support(struct dpif_backer *backer); static int open_dpif_backer(const char *type, struct dpif_backer **backerp) @@ -922,13 +887,15 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) struct dpif_port port; struct shash_node *node; struct ovs_list garbage_list; - struct odp_garbage *garbage, *next; + struct odp_garbage *garbage; struct sset names; char *backer_name; const char *name; int error; + recirc_init(); + backer = shash_find_data(&all_dpif_backers, type); if (backer) { backer->refcount++; @@ -998,23 +965,14 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) } dpif_port_dump_done(&port_dump); - LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) { + LIST_FOR_EACH_POP (garbage, list_node, &garbage_list) { dpif_port_del(backer->dpif, garbage->odp_port); - list_remove(&garbage->list_node); free(garbage); } shash_add(&all_dpif_backers, type, backer); - backer->enable_recirc = check_recirc(backer); - backer->max_mpls_depth = check_max_mpls_depth(backer); - backer->masked_set_action = check_masked_set_action(backer); - backer->enable_ufid = check_ufid(backer); - backer->rid_pool = recirc_id_pool_create(); - ovs_mutex_init(&backer->recirc_mutex); - cmap_init(&backer->recirc_map); - - backer->enable_tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif); + check_support(backer); atomic_count_init(&backer->tnl_count, 0); error = dpif_recv_set(backer->dpif, backer->recv_set_enable); @@ -1032,7 +990,8 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) /* This check fails if performed before udpif threads have been set, * as the kernel module checks that the 'pid' in userspace action * is non-zero. */ - backer->variable_length_userdata = check_variable_length_userdata(backer); + backer->support.variable_length_userdata + = check_variable_length_userdata(backer); backer->dp_version_string = dpif_get_dp_version(backer->dpif); return error; @@ -1041,7 +1000,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) bool ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto) { - return ofproto_use_tnl_push_pop && ofproto->backer->enable_tnl_push_pop && + return ofproto_use_tnl_push_pop && ofproto->backer->support.tnl_push_pop && atomic_count_get(&ofproto->backer->tnl_count); } @@ -1059,13 +1018,19 @@ check_recirc(struct dpif_backer *backer) struct odputil_keybuf keybuf; struct ofpbuf key; bool enable_recirc; + struct odp_flow_key_parms odp_parms = { + .flow = &flow, + .support = { + .recirc = true, + }, + }; memset(&flow, 0, sizeof flow); flow.recirc_id = 1; flow.dp_hash = 1; ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &flow, NULL, 0, true); + odp_flow_key_from_flow(&odp_parms, &key); enable_recirc = dpif_probe_feature(backer->dpif, "recirculation", &key, NULL); @@ -1093,12 +1058,15 @@ check_ufid(struct dpif_backer *backer) struct ofpbuf key; ovs_u128 ufid; bool enable_ufid; + struct odp_flow_key_parms odp_parms = { + .flow = &flow, + }; memset(&flow, 0, sizeof flow); flow.dl_type = htons(0x1234); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &flow, NULL, 0, true); + odp_flow_key_from_flow(&odp_parms, &key); dpif_flow_hash(backer->dpif, key.data, key.size, &ufid); enable_ufid = dpif_probe_feature(backer->dpif, "UFID", &key, &ufid); @@ -1157,6 +1125,7 @@ check_variable_length_userdata(struct dpif_backer *backer) execute.packet = &packet; execute.needs_help = false; execute.probe = true; + execute.mtu = 0; error = dpif_execute(backer->dpif, &execute); @@ -1200,13 +1169,16 @@ check_max_mpls_depth(struct dpif_backer *backer) for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) { struct odputil_keybuf keybuf; struct ofpbuf key; + struct odp_flow_key_parms odp_parms = { + .flow = &flow, + }; memset(&flow, 0, sizeof flow); flow.dl_type = htons(ETH_TYPE_MPLS); flow_set_mpls_bos(&flow, n, 1); ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); - odp_flow_key_from_flow(&key, &flow, NULL, 0, false); + odp_flow_key_from_flow(&odp_parms, &key); if (!dpif_probe_feature(backer->dpif, "MPLS", &key, NULL)) { break; } @@ -1252,6 +1224,7 @@ check_masked_set_action(struct dpif_backer *backer) execute.packet = &packet; execute.needs_help = false; execute.probe = true; + execute.mtu = 0; error = dpif_execute(backer->dpif, &execute); @@ -1266,6 +1239,68 @@ check_masked_set_action(struct dpif_backer *backer) return !error; } +#define CHECK_FEATURE__(NAME, SUPPORT, FIELD, VALUE) \ +static bool \ +check_##NAME(struct dpif_backer *backer) \ +{ \ + struct flow flow; \ + struct odputil_keybuf keybuf; \ + struct ofpbuf key; \ + bool enable; \ + struct odp_flow_key_parms odp_parms = { \ + .flow = &flow, \ + .support = { \ + .SUPPORT = true, \ + }, \ + }; \ + \ + memset(&flow, 0, sizeof flow); \ + flow.FIELD = VALUE; \ + \ + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); \ + odp_flow_key_from_flow(&odp_parms, &key); \ + enable = dpif_probe_feature(backer->dpif, #NAME, &key, NULL); \ + \ + if (enable) { \ + VLOG_INFO("%s: Datapath supports "#NAME, dpif_name(backer->dpif)); \ + } else { \ + VLOG_INFO("%s: Datapath does not support "#NAME, \ + dpif_name(backer->dpif)); \ + } \ + \ + return enable; \ +} +#define CHECK_FEATURE(FIELD) CHECK_FEATURE__(FIELD, FIELD, FIELD, 1) + +CHECK_FEATURE(ct_state) +CHECK_FEATURE(ct_zone) +CHECK_FEATURE(ct_mark) +CHECK_FEATURE__(ct_label, ct_label, ct_label.u64.lo, 1) +CHECK_FEATURE__(ct_state_nat, ct_state, ct_state, CS_TRACKED|CS_SRC_NAT) + +#undef CHECK_FEATURE +#undef CHECK_FEATURE__ + +static void +check_support(struct dpif_backer *backer) +{ + /* This feature needs to be tested after udpif threads are set. */ + backer->support.variable_length_userdata = false; + + backer->support.odp.recirc = check_recirc(backer); + backer->support.odp.max_mpls_depth = check_max_mpls_depth(backer); + backer->support.masked_set_action = check_masked_set_action(backer); + backer->support.ufid = check_ufid(backer); + backer->support.tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif); + + backer->support.odp.ct_state = check_ct_state(backer); + backer->support.odp.ct_zone = check_ct_zone(backer); + backer->support.odp.ct_mark = check_ct_mark(backer); + backer->support.odp.ct_label = check_ct_label(backer); + + backer->support.odp.ct_state_nat = check_ct_state_nat(backer); +} + static int construct(struct ofproto *ofproto_) { @@ -1281,6 +1316,8 @@ construct(struct ofproto *ofproto_) return error; } + uuid_generate(&ofproto->uuid); + atomic_init(&ofproto->tables_version, CLS_MIN_VERSION); ofproto->netflow = NULL; ofproto->sflow = NULL; ofproto->ipfix = NULL; @@ -1296,9 +1333,7 @@ construct(struct ofproto *ofproto_) ovs_mutex_init_adaptive(&ofproto->stats_mutex); ovs_mutex_init(&ofproto->vsp_mutex); - guarded_list_init(&ofproto->pins); - - ofproto_unixctl_init(); + guarded_list_init(&ofproto->ams); hmap_init(&ofproto->vlandev_map); hmap_init(&ofproto->realdev_vid_map); @@ -1308,8 +1343,8 @@ construct(struct ofproto *ofproto_) sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; ofproto->change_seq = 0; - ofproto->pins_seq = seq_create(); - ofproto->pins_seqno = seq_read(ofproto->pins_seq); + ofproto->ams_seq = seq_create(); + ofproto->ams_seqno = seq_read(ofproto->ams_seq); SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { @@ -1365,7 +1400,6 @@ add_internal_flows(struct ofproto_dpif *ofproto) uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; struct rule *unused_rulep OVS_UNUSED; - struct ofpact_resubmit *resubmit; struct match match; int error; int id; @@ -1376,8 +1410,7 @@ add_internal_flows(struct ofproto_dpif *ofproto) controller = ofpact_put_CONTROLLER(&ofpacts); controller->max_len = UINT16_MAX; controller->controller_id = 0; - controller->reason = OFPR_NO_MATCH; - ofpact_pad(&ofpacts); + controller->reason = OFPR_IMPLICIT_MISS; error = add_internal_miss_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); @@ -1408,22 +1441,6 @@ add_internal_flows(struct ofproto_dpif *ofproto) match_set_recirc_id(&match, 0); error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts, &unused_rulep); - if (error) { - return error; - } - - /* Continue rule lookups for not-matched recirc rules from table 0. - * - * (priority=1), actions=resubmit(, 0) - */ - resubmit = ofpact_put_RESUBMIT(&ofpacts); - resubmit->in_port = OFPP_IN_PORT; - resubmit->table_id = 0; - - match_init_catchall(&match); - error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts, - &unused_rulep); - return error; } @@ -1431,10 +1448,10 @@ static void destruct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct ofproto_packet_in *pin, *next_pin; + struct ofproto_async_msg *am; struct rule_dpif *rule; struct oftable *table; - struct ovs_list pins; + struct ovs_list ams; ofproto->backer->need_revalidate = REV_RECONFIGURE; xlate_txn_start(); @@ -1452,16 +1469,15 @@ destruct(struct ofproto *ofproto_) ofproto_rule_delete(&ofproto->up, &rule->up); } } + ofproto_group_delete_all(&ofproto->up); - guarded_list_pop_all(&ofproto->pins, &pins); - LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) { - list_remove(&pin->list_node); - free(CONST_CAST(void *, pin->up.packet)); - free(pin); + guarded_list_pop_all(&ofproto->ams, &ams); + LIST_FOR_EACH_POP (am, list_node, &ams) { + ofproto_async_msg_free(am); } - guarded_list_destroy(&ofproto->pins); + guarded_list_destroy(&ofproto->ams); - dpif_backer_recirc_clear_ofproto(ofproto->backer, ofproto); + recirc_free_ofproto(ofproto, ofproto->up.name); mbridge_unref(ofproto->mbridge); @@ -1482,7 +1498,7 @@ destruct(struct ofproto *ofproto_) ovs_mutex_destroy(&ofproto->stats_mutex); ovs_mutex_destroy(&ofproto->vsp_mutex); - seq_destroy(ofproto->pins_seq); + seq_destroy(ofproto->ams_seq); close_dpif_backer(ofproto->backer); } @@ -1501,24 +1517,22 @@ run(struct ofproto *ofproto_) mcast_snooping_mdb_flush(ofproto->ms); } - /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during + /* Always updates the ofproto->ams_seqno to avoid frequent wakeup during * flow restore. Even though nothing is processed during flow restore, - * all queued 'pins' will be handled immediately when flow restore + * all queued 'ams' will be handled immediately when flow restore * completes. */ - ofproto->pins_seqno = seq_read(ofproto->pins_seq); + ofproto->ams_seqno = seq_read(ofproto->ams_seq); /* Do not perform any periodic activity required by 'ofproto' while * waiting for flow restore to complete. */ if (!ofproto_get_flow_restore_wait()) { - struct ofproto_packet_in *pin, *next_pin; - struct ovs_list pins; + struct ofproto_async_msg *am; + struct ovs_list ams; - guarded_list_pop_all(&ofproto->pins, &pins); - LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) { - connmgr_send_packet_in(ofproto->up.connmgr, pin); - list_remove(&pin->list_node); - free(CONST_CAST(void *, pin->up.packet)); - free(pin); + guarded_list_pop_all(&ofproto->ams, &ams); + LIST_FOR_EACH_POP (am, list_node, &ams) { + connmgr_send_async_msg(ofproto->up.connmgr, am); + ofproto_async_msg_free(am); } } @@ -1625,13 +1639,11 @@ wait(struct ofproto *ofproto_) mcast_snooping_wait(ofproto->ms); stp_wait(ofproto); if (ofproto->backer->need_revalidate) { - /* Shouldn't happen, but if it does just go around again. */ - VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()"); poll_immediate_wake(); } seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq); - seq_wait(ofproto->pins_seq, ofproto->pins_seqno); + seq_wait(ofproto->ams_seq, ofproto->ams_seqno); } static void @@ -1678,10 +1690,19 @@ query_tables(struct ofproto *ofproto, } } +static void +set_tables_version(struct ofproto *ofproto_, cls_version_t version) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + atomic_store_relaxed(&ofproto->tables_version, version); +} + + static struct ofport * port_alloc(void) { - struct ofport_dpif *port = xmalloc(sizeof *port); + struct ofport_dpif *port = xzalloc(sizeof *port); return &port->up; } @@ -1699,6 +1720,7 @@ port_construct(struct ofport *port_) struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const struct netdev *netdev = port->up.netdev; char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; + const char *dp_port_name; struct dpif_port dpif_port; int error; @@ -1732,9 +1754,8 @@ port_construct(struct ofport *port_) return 0; } - error = dpif_port_query_by_name(ofproto->backer->dpif, - netdev_vport_get_dpif_port(netdev, namebuf, - sizeof namebuf), + dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); + error = dpif_port_query_by_name(ofproto->backer->dpif, dp_port_name, &dpif_port); if (error) { return error; @@ -1744,8 +1765,14 @@ port_construct(struct ofport *port_) if (netdev_get_tunnel_config(netdev)) { atomic_count_inc(&ofproto->backer->tnl_count); - tnl_port_add(port, port->up.netdev, port->odp_port, - ovs_native_tunneling_is_on(ofproto), namebuf); + error = tnl_port_add(port, port->up.netdev, port->odp_port, + ovs_native_tunneling_is_on(ofproto), dp_port_name); + if (error) { + atomic_count_dec(&ofproto->backer->tnl_count); + dpif_port_destroy(&dpif_port); + return error; + } + port->is_tunnel = true; if (ofproto->ipfix) { dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port); @@ -1775,7 +1802,7 @@ port_construct(struct ofport *port_) } static void -port_destruct(struct ofport *port_) +port_destruct(struct ofport *port_, bool del) { struct ofport_dpif *port = ofport_dpif_cast(port_); struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); @@ -1790,7 +1817,7 @@ port_destruct(struct ofport *port_) dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, sizeof namebuf); - if (dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { + if (del && dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { /* The underlying device is still there, so delete it. This * happens when the ofproto is being destroyed, since the caller * assumes that removal of attached ports will happen as part of @@ -1842,6 +1869,7 @@ port_modified(struct ofport *port_) { struct ofport_dpif *port = ofport_dpif_cast(port_); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; + const char *dp_port_name; struct netdev *netdev = port->up.netdev; if (port->bundle && port->bundle->bond) { @@ -1857,15 +1885,16 @@ port_modified(struct ofport *port_) } ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm, - port->lldp, port->up.pp.hw_addr); + port->lldp, &port->up.pp.hw_addr); - netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); + dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); if (port->is_tunnel) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); if (tnl_port_reconfigure(port, netdev, port->odp_port, - ovs_native_tunneling_is_on(ofproto), namebuf)) { + ovs_native_tunneling_is_on(ofproto), + dp_port_name)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } } @@ -1969,14 +1998,12 @@ static int set_cfm(struct ofport *ofport_, const struct cfm_settings *s) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct cfm *old = ofport->cfm; int error = 0; if (s) { if (!ofport->cfm) { - struct ofproto_dpif *ofproto; - - ofproto = ofproto_dpif_cast(ofport->up.ofproto); - ofproto->backer->need_revalidate = REV_RECONFIGURE; ofport->cfm = cfm_create(ofport->up.netdev); } @@ -1990,8 +2017,11 @@ set_cfm(struct ofport *ofport_, const struct cfm_settings *s) cfm_unref(ofport->cfm); ofport->cfm = NULL; out: + if (ofport->cfm != old) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm, - ofport->lldp, ofport->up.pp.hw_addr); + ofport->lldp, &ofport->up.pp.hw_addr); return error; } @@ -2033,7 +2063,7 @@ set_bfd(struct ofport *ofport_, const struct smap *cfg) ofproto->backer->need_revalidate = REV_RECONFIGURE; } ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm, - ofport->lldp, ofport->up.pp.hw_addr); + ofport->lldp, &ofport->up.pp.hw_addr); return 0; } @@ -2076,21 +2106,20 @@ set_lldp(struct ofport *ofport_, ofport->lldp = lldp_create(ofport->up.netdev, ofport_->mtu, cfg); } - if (lldp_configure(ofport->lldp)) { - error = 0; - goto out; + if (!lldp_configure(ofport->lldp, cfg)) { + error = EINVAL; } - - error = EINVAL; } - lldp_unref(ofport->lldp); - ofport->lldp = NULL; -out: + if (error) { + lldp_unref(ofport->lldp); + ofport->lldp = NULL; + } + ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm, ofport->lldp, - ofport->up.pp.hw_addr); + &ofport->up.pp.hw_addr); return error; } @@ -2146,7 +2175,7 @@ rstp_send_bpdu_cb(struct dp_packet *pkt, void *ofport_, void *ofproto_) struct ofport_dpif *ofport = ofport_; struct eth_header *eth = dp_packet_l2(pkt); - netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); + netdev_get_etheraddr(ofport->up.netdev, ð->eth_src); if (eth_addr_is_zero(eth->eth_src)) { VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which " "does not have a configured source MAC address.", @@ -2171,7 +2200,7 @@ send_bpdu_cb(struct dp_packet *pkt, int port_num, void *ofproto_) } else { struct eth_header *eth = dp_packet_l2(pkt); - netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); + netdev_get_etheraddr(ofport->up.netdev, ð->eth_src); if (eth_addr_is_zero(eth->eth_src)) { VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d " "with unknown MAC", ofproto->up.name, port_num); @@ -2317,9 +2346,14 @@ rstp_run(struct ofproto_dpif *ofproto) } if (rstp_shift_root_learned_address(ofproto->rstp)) { - bundle_move(((struct ofport_dpif *)rstp_get_old_root_aux(ofproto->rstp))->bundle, - ((struct ofport_dpif *)rstp_get_new_root_aux(ofproto->rstp))->bundle); - rstp_reset_root_changed(ofproto->rstp); + struct ofport_dpif *old_root_aux = + (struct ofport_dpif *)rstp_get_old_root_aux(ofproto->rstp); + struct ofport_dpif *new_root_aux = + (struct ofport_dpif *)rstp_get_new_root_aux(ofproto->rstp); + if (old_root_aux != NULL && new_root_aux != NULL) { + bundle_move(old_root_aux->bundle, new_root_aux->bundle); + rstp_reset_root_changed(ofproto->rstp); + } } } } @@ -2559,8 +2593,11 @@ set_rstp_port(struct ofport *ofport_, if (!s || !s->enable) { if (rp) { - rstp_port_unref(rp); + rstp_port_set_aux(rp, NULL); + rstp_port_set_state(rp, RSTP_DISABLED); + rstp_port_set_mac_operational(rp, false); ofport->rstp_port = NULL; + rstp_port_unref(rp); update_rstp_port_state(ofport); } return; @@ -2743,7 +2780,7 @@ bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port, { struct ofport_dpif *port; - port = get_ofp_port(bundle->ofproto, ofp_port); + port = ofp_port_to_ofport(bundle->ofproto, ofp_port); if (!port) { return false; } @@ -3005,10 +3042,10 @@ send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); struct ofport_dpif *port = port_; - uint8_t ea[ETH_ADDR_LEN]; + struct eth_addr ea; int error; - error = netdev_get_etheraddr(port->up.netdev, ea); + error = netdev_get_etheraddr(port->up.netdev, &ea); if (!error) { struct dp_packet packet; void *packet_pdu; @@ -3031,43 +3068,41 @@ static void bundle_send_learning_packets(struct ofbundle *bundle) { struct ofproto_dpif *ofproto = bundle->ofproto; - struct dp_packet *learning_packet; int error, n_packets, n_errors; struct mac_entry *e; + struct pkt_list { + struct ovs_list list_node; + struct ofport_dpif *port; + struct dp_packet *pkt; + } *pkt_node; struct ovs_list packets; list_init(&packets); ovs_rwlock_rdlock(&ofproto->ml->rwlock); LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) { if (mac_entry_get_port(ofproto->ml, e) != bundle) { - void *port_void; - - learning_packet = bond_compose_learning_packet(bundle->bond, - e->mac, e->vlan, - &port_void); - /* Temporarily use 'frame' as a private pointer (see below). */ - ovs_assert(learning_packet->frame == dp_packet_data(learning_packet)); - learning_packet->frame = port_void; - list_push_back(&packets, &learning_packet->list_node); + pkt_node = xmalloc(sizeof *pkt_node); + pkt_node->pkt = bond_compose_learning_packet(bundle->bond, + e->mac, e->vlan, + (void **)&pkt_node->port); + list_push_back(&packets, &pkt_node->list_node); } } ovs_rwlock_unlock(&ofproto->ml->rwlock); error = n_packets = n_errors = 0; - LIST_FOR_EACH (learning_packet, list_node, &packets) { + LIST_FOR_EACH_POP (pkt_node, list_node, &packets) { int ret; - void *port_void = learning_packet->frame; - /* Restore 'frame'. */ - learning_packet->frame = dp_packet_data(learning_packet); - ret = ofproto_dpif_send_packet(port_void, learning_packet); + ret = ofproto_dpif_send_packet(pkt_node->port, pkt_node->pkt); + dp_packet_delete(pkt_node->pkt); + free(pkt_node); if (ret) { error = ret; n_errors++; } n_packets++; } - dp_packet_list_delete(&packets); if (n_errors) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -3249,8 +3284,8 @@ set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, /* Ports. */ -static struct ofport_dpif * -get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) +struct ofport_dpif * +ofp_port_to_ofport(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port); return ofport ? ofport_dpif_cast(ofport) : NULL; @@ -3444,7 +3479,7 @@ static int port_del(struct ofproto *ofproto_, ofp_port_t ofp_port) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port); + struct ofport_dpif *ofport = ofp_port_to_ofport(ofproto, ofp_port); int error = 0; if (!ofport) { @@ -3665,14 +3700,13 @@ rule_expire(struct rule_dpif *rule) } } -/* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'. - * 'flow' must reflect the data in 'packet'. */ int -ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, - const struct flow *flow, - struct rule_dpif *rule, - const struct ofpact *ofpacts, size_t ofpacts_len, - struct dp_packet *packet) +ofproto_dpif_execute_actions__(struct ofproto_dpif *ofproto, + const struct flow *flow, + struct rule_dpif *rule, + const struct ofpact *ofpacts, size_t ofpacts_len, + int recurse, int resubmits, + struct dp_packet *packet) { struct dpif_flow_stats stats; struct xlate_out xout; @@ -3689,20 +3723,28 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, rule_dpif_credit_stats(rule, &stats); } + uint64_t odp_actions_stub[1024 / 8]; + struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub); xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule, - stats.tcp_flags, packet); + stats.tcp_flags, packet, NULL, &odp_actions); xin.ofpacts = ofpacts; xin.ofpacts_len = ofpacts_len; xin.resubmit_stats = &stats; - xlate_actions(&xin, &xout); + xin.recurse = recurse; + xin.resubmits = resubmits; + if (xlate_actions(&xin, &xout) != XLATE_OK) { + error = EINVAL; + goto out; + } - execute.actions = xout.odp_actions->data; - execute.actions_len = xout.odp_actions->size; + execute.actions = odp_actions.data; + execute.actions_len = odp_actions.size; pkt_metadata_from_flow(&packet->md, flow); execute.packet = packet; execute.needs_help = (xout.slow & SLOW_ACTION) != 0; execute.probe = false; + execute.mtu = 0; /* Fix up in_port. */ in_port = flow->in_port.ofp_port; @@ -3712,20 +3754,38 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, execute.packet->md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port); error = dpif_execute(ofproto->backer->dpif, &execute); - +out: xlate_out_uninit(&xout); + ofpbuf_uninit(&odp_actions); return error; } +/* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'. + * 'flow' must reflect the data in 'packet'. */ +int +ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, + const struct flow *flow, + struct rule_dpif *rule, + const struct ofpact *ofpacts, size_t ofpacts_len, + struct dp_packet *packet) +{ + return ofproto_dpif_execute_actions__(ofproto, flow, rule, ofpacts, + ofpacts_len, 0, 0, packet); +} + void rule_dpif_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats) { ovs_mutex_lock(&rule->stats_mutex); - rule->stats.n_packets += stats->n_packets; - rule->stats.n_bytes += stats->n_bytes; - rule->stats.used = MAX(rule->stats.used, stats->used); + if (OVS_UNLIKELY(rule->new_rule)) { + rule_dpif_credit_stats(rule->new_rule, stats); + } else { + rule->stats.n_packets += stats->n_packets; + rule->stats.n_bytes += stats->n_bytes; + rule->stats.used = MAX(rule->stats.used, stats->used); + } ovs_mutex_unlock(&rule->stats_mutex); } @@ -3756,21 +3816,13 @@ static void rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id) OVS_REQUIRES(rule->up.mutex) { - ovs_assert(!rule->recirc_id); - rule->recirc_id = id; -} - -/* Returns 'rule''s recirculation id. */ -uint32_t -rule_dpif_get_recirc_id(struct rule_dpif *rule) - OVS_REQUIRES(rule->up.mutex) -{ - if (!rule->recirc_id) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - - rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto)); + ovs_assert(!rule->recirc_id || rule->recirc_id == id); + if (rule->recirc_id == id) { + /* Release the new reference to the same id. */ + recirc_free_id(id); + } else { + rule->recirc_id = id; } - return rule->recirc_id; } /* Sets 'rule''s recirculation id. */ @@ -3784,64 +3836,38 @@ rule_set_recirc_id(struct rule *rule_, uint32_t id) ovs_mutex_unlock(&rule->up.mutex); } -/* Lookup 'flow' in table 0 of 'ofproto''s classifier. - * If 'wc' is non-null, sets the fields that were relevant as part of - * the lookup. Returns the table id where a match or miss occurred via - * 'table_id'. This will be zero unless there was a miss and - * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables - * where misses occur, or TBL_INTERNAL if the rule has a non-zero - * recirculation ID, and a match was found in the internal table, or if - * there was no match and one of the special rules (drop_frags_rule, - * miss_rule, or no_packet_in_rule) was returned. - * - * The return value is the found rule, which is valid at least until the next - * RCU quiescent period. If the rule needs to stay around longer, - * a non-zero 'take_ref' must be passed in to cause a reference to be taken - * on it before this returns. */ -struct rule_dpif * -rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, - struct flow_wildcards *wc, bool take_ref, - const struct dpif_flow_stats *stats, uint8_t *table_id) +cls_version_t +ofproto_dpif_get_tables_version(struct ofproto_dpif *ofproto OVS_UNUSED) { - *table_id = rule_dpif_lookup_get_init_table_id(flow); + cls_version_t version; + + atomic_read_relaxed(&ofproto->tables_version, &version); - return rule_dpif_lookup_from_table(ofproto, flow, wc, take_ref, stats, - table_id, flow->in_port.ofp_port, true, - true); + return version; } /* The returned rule (if any) is valid at least until the next RCU quiescent - * period. If the rule needs to stay around longer, a non-zero 'take_ref' - * must be passed in to cause a reference to be taken on it. + * period. If the rule needs to stay around longer, the caller should take + * a reference. * * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ static struct rule_dpif * -rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, - struct flow *flow, struct flow_wildcards *wc, - bool take_ref) +rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, cls_version_t version, + uint8_t table_id, struct flow *flow, + struct flow_wildcards *wc) { struct classifier *cls = &ofproto->up.tables[table_id].cls; - const struct cls_rule *cls_rule; - struct rule_dpif *rule; - - do { - cls_rule = classifier_lookup(cls, flow, wc); - - rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); - - /* Try again if the rule was released before we get the reference. */ - } while (rule && take_ref && !rule_dpif_try_ref(rule)); - - return rule; + return rule_dpif_cast(rule_from_cls_rule(classifier_lookup(cls, version, + flow, wc))); } -/* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'. - * Returns the rule that was found, which may be one of the special rules - * according to packet miss hadling. If 'may_packet_in' is false, returning of - * the miss_rule (which issues packet ins for the controller) is avoided. - * Updates 'wc', if nonnull, to reflect the fields that were used during the - * lookup. +/* Look up 'flow' in 'ofproto''s classifier version 'version', starting from + * table '*table_id'. Returns the rule that was found, which may be one of the + * special rules according to packet miss hadling. If 'may_packet_in' is + * false, returning of the miss_rule (which issues packet ins for the + * controller) is avoided. Updates 'wc', if nonnull, to reflect the fields + * that were used during the lookup. * * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but * if none is found then the table miss configuration for that table is @@ -3853,9 +3879,8 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, * '*table_id'. * * The rule is returned in '*rule', which is valid at least until the next - * RCU quiescent period. If the '*rule' needs to stay around longer, - * a non-zero 'take_ref' must be passed in to cause a reference to be taken - * on it before this returns. + * RCU quiescent period. If the '*rule' needs to stay around longer, the + * caller must take a reference. * * 'in_port' allows the lookup to take place as if the in port had the value * 'in_port'. This is needed for resubmit action support. @@ -3863,8 +3888,9 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, * 'flow' is non-const to allow for temporary modifications during the lookup. * Any changes are restored before returning. */ struct rule_dpif * -rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, - struct flow_wildcards *wc, bool take_ref, +rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, + cls_version_t version, struct flow *flow, + struct flow_wildcards *wc, const struct dpif_flow_stats *stats, uint8_t *table_id, ofp_port_t in_port, bool may_packet_in, bool honor_table_miss) @@ -3878,18 +3904,15 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, /* We always unwildcard nw_frag (for IP), so they * need not be unwildcarded here. */ if (flow->nw_frag & FLOW_NW_FRAG_ANY - && ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) { - if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { + && ofproto->up.frag_handling != OFPUTIL_FRAG_NX_MATCH) { + if (ofproto->up.frag_handling == OFPUTIL_FRAG_NORMAL) { /* We must pretend that transport ports are unavailable. */ flow->tp_src = htons(0); flow->tp_dst = htons(0); } else { - /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM). + /* Must be OFPUTIL_FRAG_DROP (we don't have OFPUTIL_FRAG_REASM). * Use the drop_frags_rule (which cannot disappear). */ rule = ofproto->drop_frags_rule; - if (take_ref) { - rule_dpif_ref(rule); - } if (stats) { struct oftable *tbl = &ofproto->up.tables[*table_id]; unsigned long orig; @@ -3916,7 +3939,7 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, next_id++, next_id += (next_id == TBL_INTERNAL)) { *table_id = next_id; - rule = rule_dpif_lookup_in_table(ofproto, next_id, flow, wc, take_ref); + rule = rule_dpif_lookup_in_table(ofproto, version, next_id, flow, wc); if (stats) { struct oftable *tbl = &ofproto->up.tables[next_id]; unsigned long orig; @@ -3943,7 +3966,7 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, || miss_config == OFPUTIL_TABLE_MISS_CONTROLLER) { struct ofport_dpif *port; - port = get_ofp_port(ofproto, old_in_port); + port = ofp_port_to_ofport(ofproto, old_in_port); if (!port) { VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, old_in_port); @@ -3955,9 +3978,6 @@ rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, struct flow *flow, rule = ofproto->miss_rule; } } - if (take_ref) { - rule_dpif_ref(rule); - } out: /* Restore port numbers, as they may have been modified above. */ flow->tp_src = old_tp_src; @@ -3985,7 +4005,7 @@ static struct rule_dpif *rule_dpif_cast(const struct rule *rule) static struct rule * rule_alloc(void) { - struct rule_dpif *rule = xmalloc(sizeof *rule); + struct rule_dpif *rule = xzalloc(sizeof *rule); return &rule->up; } @@ -3996,27 +4016,140 @@ rule_dealloc(struct rule *rule_) free(rule); } +static enum ofperr +check_mask(struct ofproto_dpif *ofproto, const struct miniflow *flow) +{ + const struct odp_support *support; + uint16_t ct_state, ct_zone; + ovs_u128 ct_label; + uint32_t ct_mark; + + support = &ofproto_dpif_get_support(ofproto)->odp; + ct_state = MINIFLOW_GET_U16(flow, ct_state); + if (support->ct_state && support->ct_zone && support->ct_mark + && support->ct_label && support->ct_state_nat) { + return ct_state & CS_UNSUPPORTED_MASK ? OFPERR_OFPBMC_BAD_MASK : 0; + } + + ct_zone = MINIFLOW_GET_U16(flow, ct_zone); + ct_mark = MINIFLOW_GET_U32(flow, ct_mark); + ct_label = MINIFLOW_GET_U128(flow, ct_label); + + if ((ct_state && !support->ct_state) + || (ct_state & CS_UNSUPPORTED_MASK) + || ((ct_state & (CS_SRC_NAT | CS_DST_NAT)) && !support->ct_state_nat) + || (ct_zone && !support->ct_zone) + || (ct_mark && !support->ct_mark) + || (!ovs_u128_is_zero(&ct_label) && !support->ct_label)) { + return OFPERR_OFPBMC_BAD_MASK; + } + + return 0; +} + +static enum ofperr +check_actions(const struct ofproto_dpif *ofproto, + const struct rule_actions *const actions) +{ + const struct ofpact *ofpact; + + OFPACT_FOR_EACH (ofpact, actions->ofpacts, actions->ofpacts_len) { + const struct odp_support *support; + const struct ofpact_conntrack *ct; + const struct ofpact *a; + + if (ofpact->type != OFPACT_CT) { + continue; + } + + ct = CONTAINER_OF(ofpact, struct ofpact_conntrack, ofpact); + support = &ofproto_dpif_get_support(ofproto)->odp; + + if (!support->ct_state) { + return OFPERR_OFPBAC_BAD_TYPE; + } + if ((ct->zone_imm || ct->zone_src.field) && !support->ct_zone) { + return OFPERR_OFPBAC_BAD_ARGUMENT; + } + + OFPACT_FOR_EACH(a, ct->actions, ofpact_ct_get_action_len(ct)) { + const struct mf_field *dst = ofpact_get_mf_dst(a); + + if (a->type == OFPACT_NAT && !support->ct_state_nat) { + /* The backer doesn't seem to support the NAT bits in + * 'ct_state': assume that it doesn't support the NAT + * action. */ + return OFPERR_OFPBAC_BAD_TYPE; + } + if (dst && ((dst->id == MFF_CT_MARK && !support->ct_mark) + || (dst->id == MFF_CT_LABEL && !support->ct_label))) { + return OFPERR_OFPBAC_BAD_SET_ARGUMENT; + } + } + } + + return 0; +} + +static enum ofperr +rule_check(struct rule *rule) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->ofproto); + enum ofperr err; + + err = check_mask(ofproto, &rule->cr.match.mask->masks); + if (err) { + return err; + } + return check_actions(ofproto, rule->actions); +} + static enum ofperr rule_construct(struct rule *rule_) OVS_NO_THREAD_SAFETY_ANALYSIS { struct rule_dpif *rule = rule_dpif_cast(rule_); + int error; + + error = rule_check(rule_); + if (error) { + return error; + } + ovs_mutex_init_adaptive(&rule->stats_mutex); rule->stats.n_packets = 0; rule->stats.n_bytes = 0; rule->stats.used = rule->up.modified; rule->recirc_id = 0; + rule->new_rule = NULL; return 0; } -static enum ofperr -rule_insert(struct rule *rule_) +static void +rule_insert(struct rule *rule_, struct rule *old_rule_, bool forward_stats) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); + + if (old_rule_ && forward_stats) { + struct rule_dpif *old_rule = rule_dpif_cast(old_rule_); + + ovs_assert(!old_rule->new_rule); + + /* Take a reference to the new rule, and refer all stats updates from + * the old rule to the new rule. */ + rule_dpif_ref(rule); + + ovs_mutex_lock(&old_rule->stats_mutex); + ovs_mutex_lock(&rule->stats_mutex); + old_rule->new_rule = rule; /* Forward future stats. */ + rule->stats = old_rule->stats; /* Transfer stats to the new rule. */ + ovs_mutex_unlock(&rule->stats_mutex); + ovs_mutex_unlock(&old_rule->stats_mutex); + } + complete_operation(rule); - return 0; } static void @@ -4029,14 +4162,17 @@ rule_delete(struct rule *rule_) static void rule_destruct(struct rule *rule_) + OVS_NO_THREAD_SAFETY_ANALYSIS { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_destroy(&rule->stats_mutex); + /* Release reference to the new rule, if any. */ + if (rule->new_rule) { + rule_dpif_unref(rule->new_rule); + } if (rule->recirc_id) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); - - ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id); + recirc_free_id(rule->recirc_id); } } @@ -4047,9 +4183,13 @@ rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes, struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_lock(&rule->stats_mutex); - *packets = rule->stats.n_packets; - *bytes = rule->stats.n_bytes; - *used = rule->stats.used; + if (OVS_UNLIKELY(rule->new_rule)) { + rule_get_stats(&rule->new_rule->up, packets, bytes, used); + } else { + *packets = rule->stats.n_packets; + *bytes = rule->stats.n_bytes; + *used = rule->stats.used; + } ovs_mutex_unlock(&rule->stats_mutex); } @@ -4071,22 +4211,6 @@ rule_execute(struct rule *rule, const struct flow *flow, return 0; } -static void -rule_modify_actions(struct rule *rule_, bool reset_counters) - OVS_REQUIRES(ofproto_mutex) -{ - struct rule_dpif *rule = rule_dpif_cast(rule_); - - if (reset_counters) { - ovs_mutex_lock(&rule->stats_mutex); - rule->stats.n_packets = 0; - rule->stats.n_bytes = 0; - ovs_mutex_unlock(&rule->stats_mutex); - } - - complete_operation(rule); -} - static struct group_dpif *group_dpif_cast(const struct ofgroup *group) { return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL; @@ -4150,19 +4274,6 @@ static enum ofperr group_construct(struct ofgroup *group_) { struct group_dpif *group = group_dpif_cast(group_); - const struct ofputil_bucket *bucket; - - /* Prevent group chaining because our locking structure makes it hard to - * implement deadlock-free. (See xlate_group_resource_check().) */ - LIST_FOR_EACH (bucket, list_node, &group->up.buckets) { - const struct ofpact *a; - - OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) { - if (a->type == OFPACT_GROUP) { - return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED; - } - } - } ovs_mutex_init_adaptive(&group->stats_mutex); ovs_mutex_lock(&group->stats_mutex); @@ -4291,10 +4402,10 @@ get_datapath_version(const struct ofproto *ofproto_) static bool set_frag_handling(struct ofproto *ofproto_, - enum ofp_config_flags frag_handling) + enum ofputil_frag_handling frag_handling) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - if (frag_handling != OFPC_FRAG_REASM) { + if (frag_handling != OFPUTIL_FRAG_REASM) { ofproto->backer->need_revalidate = REV_RECONFIGURE; return true; } else { @@ -4487,8 +4598,9 @@ ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn, bundle = b->port; ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, name, sizeof name); - ds_put_format(&ds, "%5s %4d "IP_FMT" %3d\n", - name, grp->vlan, IP_ARGS(grp->ip4), + ds_put_format(&ds, "%5s %4d ", name, grp->vlan); + ipv6_format_mapped(&grp->addr, &ds); + ds_put_format(&ds, " %3d\n", mcast_bundle_age(ofproto->ms, b)); } } @@ -4500,7 +4612,7 @@ ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn, bundle = mrouter->port; ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, name, sizeof name); - ds_put_format(&ds, "%5s %4d querier %3d\n", + ds_put_format(&ds, "%5s %4d querier %3d\n", name, mrouter->vlan, mcast_mrouter_age(ofproto->ms, mrouter)); } @@ -4514,8 +4626,9 @@ struct trace_ctx { struct xlate_in xin; const struct flow *key; struct flow flow; - struct flow_wildcards wc; struct ds *result; + struct flow_wildcards wc; + struct ofpbuf odp_actions; }; static void @@ -4582,7 +4695,7 @@ static void trace_format_odp(struct ds *result, int level, const char *title, struct trace_ctx *trace) { - struct ofpbuf *odp_actions = trace->xout.odp_actions; + struct ofpbuf *odp_actions = &trace->odp_actions; ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); @@ -4598,13 +4711,17 @@ trace_format_megaflow(struct ds *result, int level, const char *title, ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); - flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc); match_init(&match, trace->key, &trace->wc); match_format(&match, result, OFP_DEFAULT_PRIORITY); ds_put_char(result, '\n'); } -static void trace_report(struct xlate_in *xin, const char *s, int recurse); +static void trace_report(struct xlate_in *, int recurse, + const char *format, ...) + OVS_PRINTF_FORMAT(3, 4); +static void trace_report_valist(struct xlate_in *, int recurse, + const char *format, va_list args) + OVS_PRINTF_FORMAT(3, 0); static void trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse) @@ -4614,15 +4731,15 @@ trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse) if (!recurse) { if (rule == xin->ofproto->miss_rule) { - trace_report(xin, "No match, flow generates \"packet in\"s.", - recurse); + trace_report(xin, recurse, + "No match, flow generates \"packet in\"s."); } else if (rule == xin->ofproto->no_packet_in_rule) { - trace_report(xin, "No match, packets dropped because " - "OFPPC_NO_PACKET_IN is set on in_port.", recurse); + trace_report(xin, recurse, "No match, packets dropped because " + "OFPPC_NO_PACKET_IN is set on in_port."); } else if (rule == xin->ofproto->drop_frags_rule) { - trace_report(xin, "Packets dropped because they are IP " + trace_report(xin, recurse, "Packets dropped because they are IP " "fragments and the fragment handling mode is " - "\"drop\".", recurse); + "\"drop\"."); } } @@ -4637,16 +4754,27 @@ trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse) } static void -trace_report(struct xlate_in *xin, const char *s, int recurse) +trace_report_valist(struct xlate_in *xin, int recurse, + const char *format, va_list args) { struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin); struct ds *result = trace->result; ds_put_char_multiple(result, '\t', recurse); - ds_put_cstr(result, s); + ds_put_format_valist(result, format, args); ds_put_char(result, '\n'); } +static void +trace_report(struct xlate_in *xin, int recurse, const char *format, ...) +{ + va_list args; + + va_start(args, format); + trace_report_valist(xin, recurse, format, args); + va_end(args); +} + /* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following * forms are supported: * @@ -4729,7 +4857,7 @@ parse_flow_and_packet(int argc, const char *argv[], } if (odp_flow_key_to_flow(odp_key.data, odp_key.size, flow) == ODP_FIT_ERROR) { - error = "Failed to parse flow key"; + error = "Failed to parse datapath flow key"; goto exit; } @@ -4746,7 +4874,7 @@ parse_flow_and_packet(int argc, const char *argv[], char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL); if (err) { - m_err = xasprintf("Bad flow syntax: %s", err); + m_err = xasprintf("Bad openflow flow syntax: %s", err); free(err); goto exit; } else { @@ -4877,13 +5005,18 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, goto exit; } if (enforce_consistency) { - retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, - &flow, u16_to_ofp(ofproto->up.max_ports), - 0, 0, usable_protocols); + retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, &flow, + u16_to_ofp(ofproto->up.max_ports), + 0, ofproto->up.n_tables, + usable_protocols); } else { retval = ofpacts_check(ofpacts.data, ofpacts.size, &flow, - u16_to_ofp(ofproto->up.max_ports), 0, 0, - &usable_protocols); + u16_to_ofp(ofproto->up.max_ports), 0, + ofproto->up.n_tables, &usable_protocols); + } + if (!retval) { + retval = ofproto_check_ofpacts(&ofproto->up, ofpacts.data, + ofpacts.size); } if (retval) { @@ -4919,35 +5052,38 @@ ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow, struct ds *ds) { struct trace_ctx trace; + enum xlate_error error; ds_put_format(ds, "Bridge: %s\n", ofproto->up.name); ds_put_cstr(ds, "Flow: "); flow_format(ds, flow); ds_put_char(ds, '\n'); - flow_wildcards_init_catchall(&trace.wc); + ofpbuf_init(&trace.odp_actions, 0); trace.result = ds; trace.key = flow; /* Original flow key, used for megaflow. */ trace.flow = *flow; /* May be modified by actions. */ xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, NULL, - ntohs(flow->tcp_flags), packet); + ntohs(flow->tcp_flags), packet, &trace.wc, + &trace.odp_actions); trace.xin.ofpacts = ofpacts; trace.xin.ofpacts_len = ofpacts_len; trace.xin.resubmit_hook = trace_resubmit; - trace.xin.report_hook = trace_report; - - xlate_actions(&trace.xin, &trace.xout); + trace.xin.report_hook = trace_report_valist; + error = xlate_actions(&trace.xin, &trace.xout); ds_put_char(ds, '\n'); trace_format_flow(ds, 0, "Final flow", &trace); trace_format_megaflow(ds, 0, "Megaflow", &trace); ds_put_cstr(ds, "Datapath actions: "); - format_odp_actions(ds, trace.xout.odp_actions->data, - trace.xout.odp_actions->size); + format_odp_actions(ds, trace.odp_actions.data, trace.odp_actions.size); - if (trace.xout.slow) { + if (error != XLATE_OK) { + ds_put_format(ds, "\nTranslation failed (%s), packet is dropped.\n", + xlate_strerror(error)); + } else if (trace.xout.slow) { enum slow_path_reason slow; ds_put_cstr(ds, "\nThis flow is handled by the userspace " @@ -4965,6 +5101,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow, } xlate_out_uninit(&trace.xout); + ofpbuf_uninit(&trace.odp_actions); } /* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list @@ -5192,6 +5329,8 @@ disable_tnl_push_pop(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, ofproto_use_tnl_push_pop = true; unixctl_command_reply(conn, "Tunnel push-pop on"); ofproto_revalidate_all_backers(); + } else { + unixctl_command_reply_error(conn, "Invalid argument"); } } @@ -5476,7 +5615,7 @@ vsp_add(struct ofport_dpif *port, ofp_port_t realdev_ofp_port, int vid) static odp_port_t ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port) { - const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port); + const struct ofport_dpif *ofport = ofp_port_to_ofport(ofproto, ofp_port); return ofport ? ofport->odp_port : ODPP_NONE; } @@ -5511,61 +5650,6 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) } } -struct ofproto_dpif * -ofproto_dpif_recirc_get_ofproto(const struct dpif_backer *backer, - uint32_t recirc_id) -{ - struct dpif_backer_recirc_node *node; - - node = CONTAINER_OF(cmap_find(&backer->recirc_map, recirc_id), - struct dpif_backer_recirc_node, cmap_node); - - return node ? node->ofproto : NULL; -} - -uint32_t -ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto) -{ - struct dpif_backer *backer = ofproto->backer; - uint32_t recirc_id = recirc_id_alloc(backer->rid_pool); - - if (recirc_id) { - struct dpif_backer_recirc_node *node = xmalloc(sizeof *node); - - node->recirc_id = recirc_id; - node->ofproto = ofproto; - - ovs_mutex_lock(&backer->recirc_mutex); - cmap_insert(&backer->recirc_map, &node->cmap_node, node->recirc_id); - ovs_mutex_unlock(&backer->recirc_mutex); - } - - return recirc_id; -} - -void -ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id) -{ - struct dpif_backer *backer = ofproto->backer; - struct dpif_backer_recirc_node *node; - - node = CONTAINER_OF(cmap_find(&backer->recirc_map, recirc_id), - struct dpif_backer_recirc_node, cmap_node); - if (node) { - ovs_mutex_lock(&backer->recirc_mutex); - cmap_remove(&backer->recirc_map, &node->cmap_node, node->recirc_id); - ovs_mutex_unlock(&backer->recirc_mutex); - recirc_id_free(backer->rid_pool, node->recirc_id); - - /* 'recirc_id' should never be freed by non-owning 'ofproto'. */ - ovs_assert(node->ofproto == ofproto); - - /* RCU postpone the free, since other threads may be referring - * to 'node' at same time. */ - ovsrcu_postpone(free, node); - } -} - int ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, const struct match *match, int priority, @@ -5573,28 +5657,23 @@ ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, const struct ofpbuf *ofpacts, struct rule **rulep) { - struct ofputil_flow_mod fm; + struct ofproto_flow_mod ofm; struct rule_dpif *rule; int error; - fm.match = *match; - fm.priority = priority; - fm.new_cookie = htonll(0); - fm.cookie = htonll(0); - fm.cookie_mask = htonll(0); - fm.modify_cookie = false; - fm.table_id = TBL_INTERNAL; - fm.command = OFPFC_ADD; - fm.idle_timeout = idle_timeout; - fm.hard_timeout = 0; - fm.importance = 0; - fm.buffer_id = 0; - fm.out_port = 0; - fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; - fm.ofpacts = ofpacts->data; - fm.ofpacts_len = ofpacts->size; - - error = ofproto_flow_mod(&ofproto->up, &fm); + ofm.fm = (struct ofputil_flow_mod) { + .match = *match, + .priority = priority, + .table_id = TBL_INTERNAL, + .command = OFPFC_ADD, + .idle_timeout = idle_timeout, + .flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY, + .ofpacts = ofpacts->data, + .ofpacts_len = ofpacts->size, + .delete_reason = OVS_OFPRR_NONE, + }; + + error = ofproto_flow_mod(&ofproto->up, &ofm); if (error) { VLOG_ERR_RL(&rl, "failed to add internal flow (%s)", ofperr_to_string(error)); @@ -5602,8 +5681,10 @@ ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, return error; } - rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow, - &fm.match.wc, false); + rule = rule_dpif_lookup_in_table(ofproto, + ofproto_dpif_get_tables_version(ofproto), + TBL_INTERNAL, &ofm.fm.match.flow, + &ofm.fm.match.wc); if (rule) { *rulep = &rule->up; } else { @@ -5616,20 +5697,18 @@ int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, struct match *match, int priority) { - struct ofputil_flow_mod fm; + struct ofproto_flow_mod ofm; int error; - fm.match = *match; - fm.priority = priority; - fm.new_cookie = htonll(0); - fm.cookie = htonll(0); - fm.cookie_mask = htonll(0); - fm.modify_cookie = false; - fm.table_id = TBL_INTERNAL; - fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; - fm.command = OFPFC_DELETE_STRICT; - - error = ofproto_flow_mod(&ofproto->up, &fm); + ofm.fm = (struct ofputil_flow_mod) { + .match = *match, + .priority = priority, + .table_id = TBL_INTERNAL, + .flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY, + .command = OFPFC_DELETE_STRICT, + }; + + error = ofproto_flow_mod(&ofproto->up, &ofm); if (error) { VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)", ofperr_to_string(error)); @@ -5639,6 +5718,12 @@ ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, return 0; } +const struct uuid * +ofproto_dpif_get_uuid(const struct ofproto_dpif *ofproto) +{ + return &ofproto->uuid; +} + const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, @@ -5657,6 +5742,7 @@ const struct ofproto_class ofproto_dpif_class = { type_get_memory_usage, flush, query_tables, + set_tables_version, port_alloc, port_construct, port_destruct, @@ -5683,8 +5769,6 @@ const struct ofproto_class ofproto_dpif_class = { rule_dealloc, rule_get_stats, rule_execute, - NULL, /* rule_premodify_actions */ - rule_modify_actions, set_frag_handling, packet_out, set_netflow,