X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif.c;h=1d964ddc8a89f13717507fcde0033dcf69a79cc7;hb=6bef3c7ca859f208239ca61ec3b25c09a3571553;hp=cf6e2138329b795025c3f899c359569755b0305d;hpb=bc2d31ffe9503b64d7626d53221c70da397dcfeb;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index cf6e21383..1d964ddc8 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,7 @@ #include "lacp.h" #include "learn.h" #include "mac-learning.h" +#include "mcast-snooping.h" #include "meta-flow.h" #include "multipath.h" #include "netdev-vport.h" @@ -53,6 +54,7 @@ #include "ofproto-dpif-ipfix.h" #include "ofproto-dpif-mirror.h" #include "ofproto-dpif-monitor.h" +#include "ofproto-dpif-rid.h" #include "ofproto-dpif-sflow.h" #include "ofproto-dpif-upcall.h" #include "ofproto-dpif-xlate.h" @@ -72,11 +74,6 @@ VLOG_DEFINE_THIS_MODULE(ofproto_dpif); COVERAGE_DEFINE(ofproto_dpif_expired); COVERAGE_DEFINE(packet_in_overflow); -/* Number of implemented OpenFlow tables. */ -enum { N_TABLES = 255 }; -enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */ -BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); - struct flow_miss; struct rule_dpif { @@ -87,11 +84,20 @@ struct rule_dpif { * - Do include packets and bytes from datapath flows which have not * recently been processed by a revalidator. */ struct ovs_mutex stats_mutex; - uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ - uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ + struct dpif_flow_stats stats OVS_GUARDED; + + /* If non-zero then the recirculation id that has + * been allocated for use with this rule. + * The recirculation id and associated internal flow should + * be freed when the rule is freed */ + uint32_t recirc_id; }; -static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes); +/* RULE_CAST() depends on this. */ +BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0); + +static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes, + long long int *used); static struct rule_dpif *rule_dpif_cast(const struct rule *); static void rule_expire(struct rule_dpif *); @@ -105,7 +111,6 @@ struct group_dpif { struct ovs_mutex stats_mutex; uint64_t packet_count OVS_GUARDED; /* Number of packets received. */ uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */ - struct bucket_counter *bucket_stats OVS_GUARDED; /* Bucket statistics. */ }; struct ofbundle { @@ -140,6 +145,10 @@ static void stp_wait(struct ofproto_dpif *ofproto); static int set_stp_port(struct ofport *, const struct ofproto_port_stp_settings *); +static void rstp_run(struct ofproto_dpif *ofproto); +static void set_rstp_port(struct ofport *, + const struct ofproto_port_rstp_settings *); + struct ofport_dpif { struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */ struct ofport up; @@ -160,6 +169,10 @@ struct ofport_dpif { enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */ long long int stp_state_entered; + /* Rapid Spanning Tree. */ + struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */ + enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */ + /* Queue to DSCP mapping. */ struct ofproto_port_queue *qdscp; size_t n_qdscp; @@ -208,11 +221,6 @@ static int set_bfd(struct ofport *, const struct smap *); static int set_cfm(struct ofport *, const struct cfm_settings *); static void ofport_update_peer(struct ofport_dpif *); -struct dpif_completion { - struct list list_node; - struct ofoperation *op; -}; - /* Reasons that we might need to revalidate every datapath flow, and * corresponding coverage counters. * @@ -224,17 +232,21 @@ struct dpif_completion { enum revalidate_reason { REV_RECONFIGURE = 1, /* Switch configuration changed. */ REV_STP, /* Spanning tree protocol port status change. */ + REV_RSTP, /* RSTP port status change. */ REV_BOND, /* Bonding changed. */ REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/ REV_FLOW_TABLE, /* Flow table changed. */ REV_MAC_LEARNING, /* Mac learning changed. */ + REV_MCAST_SNOOPING, /* Multicast snooping changed. */ }; COVERAGE_DEFINE(rev_reconfigure); COVERAGE_DEFINE(rev_stp); +COVERAGE_DEFINE(rev_rstp); COVERAGE_DEFINE(rev_bond); COVERAGE_DEFINE(rev_port_toggled); COVERAGE_DEFINE(rev_flow_table); COVERAGE_DEFINE(rev_mac_learning); +COVERAGE_DEFINE(rev_mcast_snooping); /* All datapaths of a given type share a single dpif backer instance. */ struct dpif_backer { @@ -252,10 +264,22 @@ struct dpif_backer { bool recv_set_enable; /* Enables or disables receiving packets. */ + /* Recirculation. */ + struct recirc_id_pool *rid_pool; /* Recirculation ID pool. */ + bool enable_recirc; /* True if the datapath supports recirculation */ + /* True if the datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. * False if the datapath supports only 8-byte (or shorter) userdata. */ bool variable_length_userdata; + + /* True if the datapath supports masked data in OVS_ACTION_ATTR_SET + * actions. */ + bool masked_set_action; + + /* Maximum number of MPLS label stack entries that the datapath supports + * in a match */ + size_t max_mpls_depth; }; /* All existing ofproto_backer instances, indexed by ofproto->up.type. */ @@ -279,6 +303,7 @@ struct ofproto_dpif { struct dpif_ipfix *ipfix; struct hmap bundles; /* Contains "struct ofbundle"s. */ struct mac_learning *ml; + struct mcast_snooping *ms; bool has_bonded_bundles; bool lacp_enabled; struct mbridge *mbridge; @@ -291,6 +316,10 @@ struct ofproto_dpif { struct stp *stp; long long int stp_last_tick; + /* Rapid Spanning Tree. */ + struct rstp *rstp; + long long int rstp_last_tick; + /* VLAN splinters. */ struct ovs_mutex vsp_mutex; struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */ @@ -305,6 +334,8 @@ struct ofproto_dpif { /* Work queues. */ struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */ + struct seq *pins_seq; /* For notifying 'pins' reception. */ + uint64_t pins_seqno; }; /* All existing ofproto_dpif instances, indexed by ->up.name. */ @@ -319,9 +350,21 @@ ofproto_dpif_cast(const struct ofproto *ofproto) return CONTAINER_OF(ofproto, struct ofproto_dpif, up); } +size_t +ofproto_dpif_get_max_mpls_depth(const struct ofproto_dpif *ofproto) +{ + return ofproto->backer->max_mpls_depth; +} + +bool +ofproto_dpif_get_enable_recirc(const struct ofproto_dpif *ofproto) +{ + return ofproto->backer->enable_recirc; +} + static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port); -static void ofproto_trace(struct ofproto_dpif *, const struct flow *, +static void ofproto_trace(struct ofproto_dpif *, struct flow *, const struct ofpbuf *packet, const struct ofpact[], size_t ofpacts_len, struct ds *); @@ -352,6 +395,21 @@ ofproto_dpif_send_packet_in(struct ofproto_dpif *ofproto, free(CONST_CAST(void *, pin->up.packet)); free(pin); } + + /* Wakes up main thread for packet-in I/O. */ + seq_change(ofproto->pins_seq); +} + +/* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the + * packet rather than to send the packet to the controller. + * + * This function returns false to indicate that a packet_in message + * for a "table-miss" should be sent to at least one controller. + * False otherwise. */ +bool +ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto) +{ + return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr); } /* Factory functions. */ @@ -451,6 +509,7 @@ type_run(const char *type) } dpif_run(backer->dpif); + udpif_run(backer->udpif); /* If vswitchd started with other_config:flow_restore_wait set as "true", * and the configuration has now changed to "false", enable receiving @@ -473,6 +532,8 @@ type_run(const char *type) udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } + dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask); + if (backer->need_revalidate) { struct ofproto_dpif *ofproto; struct simap_node *node; @@ -532,12 +593,14 @@ type_run(const char *type) simap_destroy(&tmp_backers); switch (backer->need_revalidate) { - case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break; - case REV_STP: COVERAGE_INC(rev_stp); break; - case REV_BOND: COVERAGE_INC(rev_bond); break; - case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break; - case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break; - case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break; + case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break; + case REV_STP: COVERAGE_INC(rev_stp); break; + case REV_RSTP: COVERAGE_INC(rev_rstp); break; + case REV_BOND: COVERAGE_INC(rev_bond); break; + case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break; + case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break; + case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break; + case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break; } backer->need_revalidate = 0; @@ -549,16 +612,19 @@ type_run(const char *type) continue; } - ovs_rwlock_wrlock(&xlate_rwlock); + xlate_txn_start(); xlate_ofproto_set(ofproto, ofproto->up.name, ofproto->backer->dpif, ofproto->miss_rule, ofproto->no_packet_in_rule, ofproto->ml, - ofproto->stp, ofproto->mbridge, - ofproto->sflow, ofproto->ipfix, + ofproto->stp, ofproto->rstp, ofproto->ms, + ofproto->mbridge, ofproto->sflow, ofproto->ipfix, ofproto->netflow, ofproto->up.frag_handling, ofproto->up.forward_bpdu, connmgr_has_in_band(ofproto->up.connmgr), - ofproto->backer->variable_length_userdata); + ofproto->backer->enable_recirc, + ofproto->backer->variable_length_userdata, + ofproto->backer->max_mpls_depth, + ofproto->backer->masked_set_action); HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { xlate_bundle_set(ofproto, bundle, bundle->name, @@ -576,11 +642,12 @@ type_run(const char *type) ofport->up.ofp_port, ofport->odp_port, ofport->up.netdev, ofport->cfm, ofport->bfd, ofport->peer, stp_port, - ofport->qdscp, ofport->n_qdscp, - ofport->up.pp.config, ofport->up.pp.state, - ofport->is_tunnel, ofport->may_enable); + ofport->rstp_port, ofport->qdscp, + ofport->n_qdscp, ofport->up.pp.config, + ofport->up.pp.state, ofport->is_tunnel, + ofport->may_enable); } - ovs_rwlock_unlock(&xlate_rwlock); + xlate_txn_commit(); } udpif_revalidate(backer->udpif); @@ -768,9 +835,9 @@ close_dpif_backer(struct dpif_backer *backer) ovs_rwlock_destroy(&backer->odp_to_ofport_lock); hmap_destroy(&backer->odp_to_ofport_map); shash_find_and_delete(&all_dpif_backers, backer->type); + recirc_id_pool_destroy(backer->rid_pool); free(backer->type); dpif_close(backer->dpif); - free(backer); } @@ -781,6 +848,9 @@ struct odp_garbage { }; static bool check_variable_length_userdata(struct dpif_backer *backer); +static size_t check_max_mpls_depth(struct dpif_backer *backer); +static bool check_recirc(struct dpif_backer *backer); +static bool check_masked_set_action(struct dpif_backer *backer); static int open_dpif_backer(const char *type, struct dpif_backer **backerp) @@ -791,6 +861,7 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) struct shash_node *node; struct list garbage_list; struct odp_garbage *garbage, *next; + struct sset names; char *backer_name; const char *name; @@ -873,6 +944,11 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) shash_add(&all_dpif_backers, type, backer); + backer->enable_recirc = check_recirc(backer); + backer->max_mpls_depth = check_max_mpls_depth(backer); + backer->masked_set_action = check_masked_set_action(backer); + backer->rid_pool = recirc_id_pool_create(); + error = dpif_recv_set(backer->dpif, backer->recv_set_enable); if (error) { VLOG_ERR("failed to listen on datapath of type %s: %s", @@ -880,15 +956,74 @@ open_dpif_backer(const char *type, struct dpif_backer **backerp) close_dpif_backer(backer); return error; } - backer->variable_length_userdata = check_variable_length_userdata(backer); if (backer->recv_set_enable) { udpif_set_threads(backer->udpif, n_handlers, n_revalidators); } + /* This check fails if performed before udpif threads have been set, + * as the kernel module checks that the 'pid' in userspace action + * is non-zero. */ + backer->variable_length_userdata = check_variable_length_userdata(backer); + return error; } +/* Tests whether 'backer''s datapath supports recirculation. Only newer + * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some + * features on older datapaths that don't support this feature. + * + * Returns false if 'backer' definitely does not support recirculation, true if + * it seems to support recirculation or if at least the error we get is + * ambiguous. */ +static bool +check_recirc(struct dpif_backer *backer) +{ + struct flow flow; + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + bool enable_recirc = false; + + memset(&flow, 0, sizeof flow); + flow.recirc_id = 1; + flow.dp_hash = 1; + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow, NULL, 0, true); + + error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE, + ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL, + 0, NULL); + if (error && error != EEXIST) { + if (error != EINVAL) { + VLOG_WARN("%s: Reciculation flow probe failed (%s)", + dpif_name(backer->dpif), ovs_strerror(error)); + } + goto done; + } + + error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), ofpbuf_size(&key), + NULL); + if (error) { + VLOG_WARN("%s: failed to delete recirculation feature probe flow", + dpif_name(backer->dpif)); + } + + enable_recirc = true; + +done: + if (enable_recirc) { + VLOG_INFO("%s: Datapath supports recirculation", + dpif_name(backer->dpif)); + } else { + VLOG_INFO("%s: Datapath does not support recirculation", + dpif_name(backer->dpif)); + } + + return enable_recirc; +} + /* Tests whether 'backer''s datapath supports variable-length * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. We need * to disable some features on older datapaths that don't support this @@ -917,7 +1052,7 @@ check_variable_length_userdata(struct dpif_backer *backer) ofpbuf_init(&actions, 64); start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID, - dpif_port_get_pid(backer->dpif, ODPP_NONE)); + dpif_port_get_pid(backer->dpif, ODPP_NONE, 0)); nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4); nl_msg_end_nested(&actions, start); @@ -928,8 +1063,8 @@ check_variable_length_userdata(struct dpif_backer *backer) /* Execute the actions. On older datapaths this fails with ERANGE, on * newer datapaths it succeeds. */ - execute.actions = actions.data; - execute.actions_len = actions.size; + execute.actions = ofpbuf_data(&actions); + execute.actions_len = ofpbuf_size(&actions); execute.packet = &packet; execute.md = PKT_METADATA_INITIALIZER(0); execute.needs_help = false; @@ -941,11 +1076,6 @@ check_variable_length_userdata(struct dpif_backer *backer) switch (error) { case 0: - /* Variable-length userdata is supported. - * - * Purge received packets to avoid processing the nonsense packet we - * sent to userspace, then report success. */ - dpif_recv_purge(backer->dpif); return true; case ERANGE: @@ -965,6 +1095,104 @@ check_variable_length_userdata(struct dpif_backer *backer) } } +/* Tests the MPLS label stack depth supported by 'backer''s datapath. + * + * Returns the number of elements in a struct flow's mpls_lse field + * if the datapath supports at least that many entries in an + * MPLS label stack. + * Otherwise returns the number of MPLS push actions supported by + * the datapath. */ +static size_t +check_max_mpls_depth(struct dpif_backer *backer) +{ + struct flow flow; + int n; + + for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) { + struct odputil_keybuf keybuf; + struct ofpbuf key; + int error; + + memset(&flow, 0, sizeof flow); + flow.dl_type = htons(ETH_TYPE_MPLS); + flow_set_mpls_bos(&flow, n, 1); + + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow, NULL, 0, false); + + error = dpif_flow_put(backer->dpif, DPIF_FP_CREATE, + ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, + NULL, 0, NULL); + if (error && error != EEXIST) { + if (error != EINVAL) { + VLOG_WARN("%s: MPLS stack length feature probe failed (%s)", + dpif_name(backer->dpif), ovs_strerror(error)); + } + break; + } + + error = dpif_flow_del(backer->dpif, ofpbuf_data(&key), + ofpbuf_size(&key), NULL); + if (error) { + VLOG_WARN("%s: failed to delete MPLS feature probe flow", + dpif_name(backer->dpif)); + } + } + + VLOG_INFO("%s: MPLS label stack length probed as %d", + dpif_name(backer->dpif), n); + return n; +} + +/* Tests whether 'backer''s datapath supports masked data in + * OVS_ACTION_ATTR_SET actions. We need to disable some features on older + * datapaths that don't support this feature. */ +static bool +check_masked_set_action(struct dpif_backer *backer) +{ + struct eth_header *eth; + struct ofpbuf actions; + struct dpif_execute execute; + struct ofpbuf packet; + int error; + struct ovs_key_ethernet key, mask; + + /* Compose a set action that will cause an EINVAL error on older + * datapaths that don't support masked set actions. + * Avoid using a full mask, as it could be translated to a non-masked + * set action instead. */ + ofpbuf_init(&actions, 64); + memset(&key, 0x53, sizeof key); + memset(&mask, 0x7f, sizeof mask); + commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask, + sizeof key); + + /* Compose a dummy ethernet packet. */ + ofpbuf_init(&packet, ETH_HEADER_LEN); + eth = ofpbuf_put_zeros(&packet, ETH_HEADER_LEN); + eth->eth_type = htons(0x1234); + + /* Execute the actions. On older datapaths this fails with EINVAL, on + * newer datapaths it succeeds. */ + execute.actions = ofpbuf_data(&actions); + execute.actions_len = ofpbuf_size(&actions); + execute.packet = &packet; + execute.md = PKT_METADATA_INITIALIZER(0); + execute.needs_help = false; + + error = dpif_execute(backer->dpif, &execute); + + ofpbuf_uninit(&packet); + ofpbuf_uninit(&actions); + + if (error) { + /* Masked set action is not supported. */ + VLOG_INFO("%s: datapath does not support masked set action feature.", + dpif_name(backer->dpif)); + } + return !error; +} + static int construct(struct ofproto *ofproto_) { @@ -981,13 +1209,15 @@ construct(struct ofproto *ofproto_) ofproto->sflow = NULL; ofproto->ipfix = NULL; ofproto->stp = NULL; + ofproto->rstp = NULL; ofproto->dump_seq = 0; hmap_init(&ofproto->bundles); ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME); + ofproto->ms = NULL; ofproto->mbridge = mbridge_create(); ofproto->has_bonded_bundles = false; ofproto->lacp_enabled = false; - ovs_mutex_init(&ofproto->stats_mutex); + ovs_mutex_init_adaptive(&ofproto->stats_mutex); ovs_mutex_init(&ofproto->vsp_mutex); guarded_list_init(&ofproto->pins); @@ -1002,6 +1232,9 @@ construct(struct ofproto *ofproto_) sset_init(&ofproto->port_poll_set); ofproto->port_poll_errno = 0; ofproto->change_seq = 0; + ofproto->pins_seq = seq_create(); + ofproto->pins_seqno = seq_read(ofproto->pins_seq); + SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) { struct iface_hint *iface_hint = node->data; @@ -1025,50 +1258,28 @@ construct(struct ofproto *ofproto_) ofproto_init_tables(ofproto_, N_TABLES); error = add_internal_flows(ofproto); + ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY; return error; } static int -add_internal_flow(struct ofproto_dpif *ofproto, int id, +add_internal_miss_flow(struct ofproto_dpif *ofproto, int id, const struct ofpbuf *ofpacts, struct rule_dpif **rulep) { - struct ofputil_flow_mod fm; + struct match match; int error; + struct rule *rule; - match_init_catchall(&fm.match); - fm.priority = 0; - match_set_reg(&fm.match, 0, id); - fm.new_cookie = htonll(0); - fm.cookie = htonll(0); - fm.cookie_mask = htonll(0); - fm.modify_cookie = false; - fm.table_id = TBL_INTERNAL; - fm.command = OFPFC_ADD; - fm.idle_timeout = 0; - fm.hard_timeout = 0; - fm.buffer_id = 0; - fm.out_port = 0; - fm.flags = 0; - fm.ofpacts = ofpacts->data; - fm.ofpacts_len = ofpacts->size; - - error = ofproto_flow_mod(&ofproto->up, &fm); - if (error) { - VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)", - id, ofperr_to_string(error)); - return error; - } + match_init_catchall(&match); + match_set_reg(&match, 0, id); - if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL, - rulep)) { - rule_dpif_unref(*rulep); - } else { - OVS_NOT_REACHED(); - } + error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts, + &rule); + *rulep = error ? NULL : rule_dpif_cast(rule); - return 0; + return error; } static int @@ -1077,6 +1288,9 @@ add_internal_flows(struct ofproto_dpif *ofproto) struct ofpact_controller *controller; uint64_t ofpacts_stub[128 / 8]; struct ofpbuf ofpacts; + struct rule *unused_rulep OVS_UNUSED; + struct ofpact_resubmit *resubmit; + struct match match; int error; int id; @@ -1089,20 +1303,52 @@ add_internal_flows(struct ofproto_dpif *ofproto) controller->reason = OFPR_NO_MATCH; ofpact_pad(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule); + error = add_internal_miss_flow(ofproto, id++, &ofpacts, + &ofproto->miss_rule); if (error) { return error; } ofpbuf_clear(&ofpacts); - error = add_internal_flow(ofproto, id++, &ofpacts, - &ofproto->no_packet_in_rule); + error = add_internal_miss_flow(ofproto, id++, &ofpacts, + &ofproto->no_packet_in_rule); + if (error) { + return error; + } + + error = add_internal_miss_flow(ofproto, id++, &ofpacts, + &ofproto->drop_frags_rule); + if (error) { + return error; + } + + /* Continue non-recirculation rule lookups from table 0. + * + * (priority=2), recirc=0, actions=resubmit(, 0) + */ + resubmit = ofpact_put_RESUBMIT(&ofpacts); + resubmit->in_port = OFPP_IN_PORT; + resubmit->table_id = 0; + + match_init_catchall(&match); + match_set_recirc_id(&match, 0); + + error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts, + &unused_rulep); if (error) { return error; } - error = add_internal_flow(ofproto, id++, &ofpacts, - &ofproto->drop_frags_rule); + /* Drop any run away recirc rule lookups. Recirc_id has to be + * non-zero when reaching this rule. + * + * (priority=1), *, actions=drop + */ + ofpbuf_clear(&ofpacts); + match_init_catchall(&match); + error = ofproto_dpif_add_internal_flow(ofproto, &match, 1, 0, &ofpacts, + &unused_rulep); + return error; } @@ -1110,29 +1356,24 @@ static void destruct(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct rule_dpif *rule, *next_rule; struct ofproto_packet_in *pin, *next_pin; + struct rule_dpif *rule; struct oftable *table; struct list pins; ofproto->backer->need_revalidate = REV_RECONFIGURE; - ovs_rwlock_wrlock(&xlate_rwlock); + xlate_txn_start(); xlate_remove_ofproto(ofproto); - ovs_rwlock_unlock(&xlate_rwlock); + xlate_txn_commit(); - /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a - * use-after-free error. */ - udpif_revalidate(ofproto->backer->udpif); + /* Ensure that the upcall processing threads have no remaining references + * to the ofproto or anything in it. */ + udpif_synchronize(ofproto->backer->udpif); hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node); OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) { - struct cls_cursor cursor; - - ovs_rwlock_rdlock(&table->cls.rwlock); - cls_cursor_init(&cursor, &table->cls, NULL); - ovs_rwlock_unlock(&table->cls.rwlock); - CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) { + CLS_FOR_EACH_SAFE (rule, up.cr, &table->cls) { ofproto_rule_delete(&ofproto->up, &rule->up); } } @@ -1149,8 +1390,10 @@ destruct(struct ofproto *ofproto_) netflow_unref(ofproto->netflow); dpif_sflow_unref(ofproto->sflow); + dpif_ipfix_unref(ofproto->ipfix); hmap_destroy(&ofproto->bundles); mac_learning_unref(ofproto->ml); + mcast_snooping_unref(ofproto->ms); hmap_destroy(&ofproto->vlandev_map); hmap_destroy(&ofproto->realdev_vid_map); @@ -1162,6 +1405,8 @@ destruct(struct ofproto *ofproto_) ovs_mutex_destroy(&ofproto->stats_mutex); ovs_mutex_destroy(&ofproto->vsp_mutex); + seq_destroy(ofproto->pins_seq); + close_dpif_backer(ofproto->backer); } @@ -1176,8 +1421,15 @@ run(struct ofproto *ofproto_) ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); + mcast_snooping_mdb_flush(ofproto->ms); } + /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during + * flow restore. Even though nothing is processed during flow restore, + * all queued 'pins' will be handled immediately when flow restore + * completes. */ + ofproto->pins_seqno = seq_read(ofproto->pins_seq); + /* Do not perform any periodic activity required by 'ofproto' while * waiting for flow restore to complete. */ if (!ofproto_get_flow_restore_wait()) { @@ -1222,12 +1474,17 @@ run(struct ofproto *ofproto_) } stp_run(ofproto); + rstp_run(ofproto); ovs_rwlock_wrlock(&ofproto->ml->rwlock); if (mac_learning_run(ofproto->ml)) { ofproto->backer->need_revalidate = REV_MAC_LEARNING; } ovs_rwlock_unlock(&ofproto->ml->rwlock); + if (mcast_snooping_run(ofproto->ms)) { + ofproto->backer->need_revalidate = REV_MCAST_SNOOPING; + } + new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif)); if (ofproto->dump_seq != new_dump_seq) { struct rule *rule, *next_rule; @@ -1289,6 +1546,7 @@ wait(struct ofproto *ofproto_) ovs_rwlock_rdlock(&ofproto->ml->rwlock); mac_learning_wait(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); + mcast_snooping_wait(ofproto->ms); stp_wait(ofproto); if (ofproto->backer->need_revalidate) { /* Shouldn't happen, but if it does just go around again. */ @@ -1297,6 +1555,7 @@ wait(struct ofproto *ofproto_) } seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq); + seq_wait(ofproto->pins_seq, ofproto->pins_seqno); } static void @@ -1311,48 +1570,36 @@ type_get_memory_usage(const char *type, struct simap *usage) } static void -flush(struct ofproto *ofproto OVS_UNUSED) +flush(struct ofproto *ofproto_) { - udpif_flush(); -} + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct dpif_backer *backer = ofproto->backer; -static void -get_features(struct ofproto *ofproto_ OVS_UNUSED, - bool *arp_match_ip, enum ofputil_action_bitmap *actions) -{ - *arp_match_ip = true; - *actions = (OFPUTIL_A_OUTPUT | - OFPUTIL_A_SET_VLAN_VID | - OFPUTIL_A_SET_VLAN_PCP | - OFPUTIL_A_STRIP_VLAN | - OFPUTIL_A_SET_DL_SRC | - OFPUTIL_A_SET_DL_DST | - OFPUTIL_A_SET_NW_SRC | - OFPUTIL_A_SET_NW_DST | - OFPUTIL_A_SET_NW_TOS | - OFPUTIL_A_SET_TP_SRC | - OFPUTIL_A_SET_TP_DST | - OFPUTIL_A_ENQUEUE); + if (backer) { + udpif_flush(backer->udpif); + } } static void -get_tables(struct ofproto *ofproto_, struct ofp12_table_stats *ots) +query_tables(struct ofproto *ofproto, + struct ofputil_table_features *features, + struct ofputil_table_stats *stats) { - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - struct dpif_dp_stats s; - uint64_t n_miss, n_no_pkt_in, n_bytes, n_dropped_frags; - uint64_t n_lookup; + strcpy(features->name, "classifier"); + + if (stats) { + int i; - strcpy(ots->name, "classifier"); + for (i = 0; i < ofproto->n_tables; i++) { + unsigned long missed, matched; - dpif_get_dp_stats(ofproto->backer->dpif, &s); - rule_get_stats(&ofproto->miss_rule->up, &n_miss, &n_bytes); - rule_get_stats(&ofproto->no_packet_in_rule->up, &n_no_pkt_in, &n_bytes); - rule_get_stats(&ofproto->drop_frags_rule->up, &n_dropped_frags, &n_bytes); + atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched); + atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed); - n_lookup = s.n_hit + s.n_missed - n_dropped_frags; - ots->lookup_count = htonll(n_lookup); - ots->matched_count = htonll(n_lookup - n_miss - n_no_pkt_in); + stats[i].matched_count = matched; + stats[i].lookup_count = matched + missed; + } + } } static struct ofport * @@ -1383,9 +1630,11 @@ port_construct(struct ofport *port_) port->bundle = NULL; port->cfm = NULL; port->bfd = NULL; - port->may_enable = true; + port->may_enable = false; port->stp_port = NULL; port->stp_state = STP_DISABLED; + port->rstp_port = NULL; + port->rstp_state = RSTP_DISABLED; port->is_tunnel = false; port->peer = NULL; port->qdscp = NULL; @@ -1419,6 +1668,9 @@ port_construct(struct ofport *port_) if (netdev_get_tunnel_config(netdev)) { tnl_port_add(port, port->up.netdev, port->odp_port); port->is_tunnel = true; + if (ofproto->ipfix) { + dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port); + } } else { /* Sanity-check that a mapping doesn't already exist. This * shouldn't happen for non-tunnel ports. */ @@ -1453,9 +1705,9 @@ port_destruct(struct ofport *port_) const char *dp_port_name; ofproto->backer->need_revalidate = REV_RECONFIGURE; - ovs_rwlock_wrlock(&xlate_rwlock); + xlate_txn_start(); xlate_ofport_remove(port); - ovs_rwlock_unlock(&xlate_rwlock); + xlate_txn_commit(); dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, sizeof namebuf); @@ -1480,12 +1732,20 @@ port_destruct(struct ofport *port_) ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock); } + if (port->is_tunnel && ofproto->ipfix) { + dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port); + } + tnl_port_del(port); sset_find_and_delete(&ofproto->ports, devname); sset_find_and_delete(&ofproto->ghost_ports, devname); bundle_remove(port_); set_cfm(port_, NULL); set_bfd(port_, NULL); + if (port->stp_port) { + stp_port_disable(port->stp_port); + } + set_rstp_port(port_, NULL); if (ofproto->sflow) { dpif_sflow_del_port(ofproto->sflow, port->odp_port); } @@ -1578,9 +1838,11 @@ set_ipfix( struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); struct dpif_ipfix *di = ofproto->ipfix; bool has_options = bridge_exporter_options || flow_exporters_options; + bool new_di = false; if (has_options && !di) { di = ofproto->ipfix = dpif_ipfix_create(); + new_di = true; } if (di) { @@ -1590,6 +1852,16 @@ set_ipfix( di, bridge_exporter_options, flow_exporters_options, n_flow_exporters_options); + /* Add tunnel ports only when a new ipfix created */ + if (new_di == true) { + struct ofport_dpif *ofport; + HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) { + if (ofport->is_tunnel == true) { + dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port); + } + } + } + if (!has_options) { dpif_ipfix_unref(di); ofproto->ipfix = NULL; @@ -1630,21 +1902,27 @@ out: } static bool +cfm_status_changed(struct ofport *ofport_) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true; +} + +static int get_cfm_status(const struct ofport *ofport_, - struct ofproto_cfm_status *status) + struct cfm_status *status) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + int ret = 0; if (ofport->cfm) { - status->faults = cfm_get_fault(ofport->cfm); - status->flap_count = cfm_get_flap_count(ofport->cfm); - status->remote_opstate = cfm_get_opup(ofport->cfm); - status->health = cfm_get_health(ofport->cfm); - cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps); - return true; + cfm_get_status(ofport->cfm, status); } else { - return false; + ret = ENOENT; } + + return ret; } static int @@ -1665,21 +1943,50 @@ set_bfd(struct ofport *ofport_, const struct smap *cfg) return 0; } +static bool +bfd_status_changed(struct ofport *ofport_) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + + return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true; +} + static int get_bfd_status(struct ofport *ofport_, struct smap *smap) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + int ret = 0; if (ofport->bfd) { bfd_get_status(ofport->bfd, smap); - return 0; } else { - return ENOENT; + ret = ENOENT; } + + return ret; } /* Spanning Tree. */ +/* Called while rstp_mutex is held. */ +static void +rstp_send_bpdu_cb(struct ofpbuf *pkt, void *ofport_, void *ofproto_) +{ + struct ofproto_dpif *ofproto = ofproto_; + struct ofport_dpif *ofport = ofport_; + struct eth_header *eth = ofpbuf_l2(pkt); + + netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); + if (eth_addr_is_zero(eth->eth_src)) { + VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which " + "does not have a configured source MAC address.", + ofproto->up.name, ofp_to_u16(ofport->up.ofp_port)); + } else { + ofproto_dpif_send_packet(ofport, pkt); + } + ofpbuf_delete(pkt); +} + static void send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) { @@ -1692,7 +1999,7 @@ send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d", ofproto->up.name, port_num); } else { - struct eth_header *eth = pkt->l2; + struct eth_header *eth = ofpbuf_l2(pkt); netdev_get_etheraddr(ofport->up.netdev, eth->eth_src); if (eth_addr_is_zero(eth->eth_src)) { @@ -1705,134 +2012,271 @@ send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_) ofpbuf_delete(pkt); } -/* Configures STP on 'ofproto_' using the settings defined in 's'. */ -static int -set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s) +/* Configure RSTP on 'ofproto_' using the settings defined in 's'. */ +static void +set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); /* Only revalidate flows if the configuration changed. */ - if (!s != !ofproto->stp) { + if (!s != !ofproto->rstp) { ofproto->backer->need_revalidate = REV_RECONFIGURE; } if (s) { - if (!ofproto->stp) { - ofproto->stp = stp_create(ofproto_->name, s->system_id, - send_bpdu_cb, ofproto); - ofproto->stp_last_tick = time_msec(); + if (!ofproto->rstp) { + ofproto->rstp = rstp_create(ofproto_->name, s->address, + rstp_send_bpdu_cb, ofproto); + ofproto->rstp_last_tick = time_msec(); } - - stp_set_bridge_id(ofproto->stp, s->system_id); - stp_set_bridge_priority(ofproto->stp, s->priority); - stp_set_hello_time(ofproto->stp, s->hello_time); - stp_set_max_age(ofproto->stp, s->max_age); - stp_set_forward_delay(ofproto->stp, s->fwd_delay); - } else { + rstp_set_bridge_address(ofproto->rstp, s->address); + rstp_set_bridge_priority(ofproto->rstp, s->priority); + rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time); + rstp_set_bridge_force_protocol_version(ofproto->rstp, + s->force_protocol_version); + rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age); + rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay); + rstp_set_bridge_transmit_hold_count(ofproto->rstp, + s->transmit_hold_count); + } else { struct ofport *ofport; - HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) { - set_stp_port(ofport, NULL); + set_rstp_port(ofport, NULL); } - - stp_unref(ofproto->stp); - ofproto->stp = NULL; + rstp_unref(ofproto->rstp); + ofproto->rstp = NULL; } - - return 0; } -static int -get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s) +static void +get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); - if (ofproto->stp) { + if (ofproto->rstp) { s->enabled = true; - s->bridge_id = stp_get_bridge_id(ofproto->stp); - s->designated_root = stp_get_designated_root(ofproto->stp); - s->root_path_cost = stp_get_root_path_cost(ofproto->stp); + s->root_id = rstp_get_root_id(ofproto->rstp); + s->bridge_id = rstp_get_bridge_id(ofproto->rstp); + s->designated_id = rstp_get_designated_id(ofproto->rstp); + s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp); + s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp); + s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp); } else { s->enabled = false; } - - return 0; } static void -update_stp_port_state(struct ofport_dpif *ofport) +update_rstp_port_state(struct ofport_dpif *ofport) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); - enum stp_state state; + enum rstp_state state; /* Figure out new state. */ - state = ofport->stp_port ? stp_port_get_state(ofport->stp_port) - : STP_DISABLED; + state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port) + : RSTP_DISABLED; /* Update state. */ - if (ofport->stp_state != state) { + if (ofport->rstp_state != state) { enum ofputil_port_state of_state; bool fwd_change; - VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s", - netdev_get_name(ofport->up.netdev), - stp_state_name(ofport->stp_state), - stp_state_name(state)); - if (stp_learn_in_state(ofport->stp_state) - != stp_learn_in_state(state)) { + VLOG_DBG("port %s: RSTP state changed from %s to %s", + netdev_get_name(ofport->up.netdev), + rstp_state_name(ofport->rstp_state), + rstp_state_name(state)); + if (rstp_learn_in_state(ofport->rstp_state) + != rstp_learn_in_state(state)) { /* xxx Learning action flows should also be flushed. */ ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); } - fwd_change = stp_forward_in_state(ofport->stp_state) - != stp_forward_in_state(state); + fwd_change = rstp_forward_in_state(ofport->rstp_state) + != rstp_forward_in_state(state); - ofproto->backer->need_revalidate = REV_STP; - ofport->stp_state = state; - ofport->stp_state_entered = time_msec(); + ofproto->backer->need_revalidate = REV_RSTP; + ofport->rstp_state = state; if (fwd_change && ofport->bundle) { bundle_update(ofport->bundle); } - /* Update the STP state bits in the OpenFlow port description. */ + /* Update the RSTP state bits in the OpenFlow port description. */ of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK; - of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN - : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN - : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD - : state == STP_BLOCKING ? OFPUTIL_PS_STP_BLOCK - : 0); + of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN + : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD + : state == RSTP_DISCARDING ? OFPUTIL_PS_STP_LISTEN + : 0); ofproto_port_set_state(&ofport->up, of_state); } } -/* Configures STP on 'ofport_' using the settings defined in 's'. The - * caller is responsible for assigning STP port numbers and ensuring - * there are no duplicates. */ -static int -set_stp_port(struct ofport *ofport_, - const struct ofproto_port_stp_settings *s) +static void +rstp_run(struct ofproto_dpif *ofproto) { - struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); - struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); - struct stp_port *sp = ofport->stp_port; + if (ofproto->rstp) { + long long int now = time_msec(); + long long int elapsed = now - ofproto->rstp_last_tick; + struct rstp_port *rp; + struct ofport_dpif *ofport; - if (!s || !s->enable) { - if (sp) { + /* Every second, decrease the values of the timers. */ + if (elapsed >= 1000) { + rstp_tick_timers(ofproto->rstp); + ofproto->rstp_last_tick = now; + } + rp = NULL; + while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) { + update_rstp_port_state(ofport); + } + /* FIXME: This check should be done on-event (i.e., when setting + * p->fdb_flush) and not periodically. + */ + if (rstp_check_and_reset_fdb_flush(ofproto->rstp)) { + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + /* FIXME: RSTP should be able to flush the entries pertaining to a + * single port, not the whole table. + */ + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); + } + } +} + +/* Configures STP on 'ofproto_' using the settings defined in 's'. */ +static int +set_stp(struct ofproto *ofproto_, const struct ofproto_stp_settings *s) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + /* Only revalidate flows if the configuration changed. */ + if (!s != !ofproto->stp) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } + + if (s) { + if (!ofproto->stp) { + ofproto->stp = stp_create(ofproto_->name, s->system_id, + send_bpdu_cb, ofproto); + ofproto->stp_last_tick = time_msec(); + } + + stp_set_bridge_id(ofproto->stp, s->system_id); + stp_set_bridge_priority(ofproto->stp, s->priority); + stp_set_hello_time(ofproto->stp, s->hello_time); + stp_set_max_age(ofproto->stp, s->max_age); + stp_set_forward_delay(ofproto->stp, s->fwd_delay); + } else { + struct ofport *ofport; + + HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) { + set_stp_port(ofport, NULL); + } + + stp_unref(ofproto->stp); + ofproto->stp = NULL; + } + + return 0; +} + +static int +get_stp_status(struct ofproto *ofproto_, struct ofproto_stp_status *s) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + if (ofproto->stp) { + s->enabled = true; + s->bridge_id = stp_get_bridge_id(ofproto->stp); + s->designated_root = stp_get_designated_root(ofproto->stp); + s->root_path_cost = stp_get_root_path_cost(ofproto->stp); + } else { + s->enabled = false; + } + + return 0; +} + +static void +update_stp_port_state(struct ofport_dpif *ofport) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + enum stp_state state; + + /* Figure out new state. */ + state = ofport->stp_port ? stp_port_get_state(ofport->stp_port) + : STP_DISABLED; + + /* Update state. */ + if (ofport->stp_state != state) { + enum ofputil_port_state of_state; + bool fwd_change; + + VLOG_DBG("port %s: STP state changed from %s to %s", + netdev_get_name(ofport->up.netdev), + stp_state_name(ofport->stp_state), + stp_state_name(state)); + if (stp_learn_in_state(ofport->stp_state) + != stp_learn_in_state(state)) { + /* xxx Learning action flows should also be flushed. */ + ovs_rwlock_wrlock(&ofproto->ml->rwlock); + mac_learning_flush(ofproto->ml); + ovs_rwlock_unlock(&ofproto->ml->rwlock); + mcast_snooping_mdb_flush(ofproto->ms); + } + fwd_change = stp_forward_in_state(ofport->stp_state) + != stp_forward_in_state(state); + + ofproto->backer->need_revalidate = REV_STP; + ofport->stp_state = state; + ofport->stp_state_entered = time_msec(); + + if (fwd_change && ofport->bundle) { + bundle_update(ofport->bundle); + } + + /* Update the STP state bits in the OpenFlow port description. */ + of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK; + of_state |= (state == STP_LISTENING ? OFPUTIL_PS_STP_LISTEN + : state == STP_LEARNING ? OFPUTIL_PS_STP_LEARN + : state == STP_FORWARDING ? OFPUTIL_PS_STP_FORWARD + : state == STP_BLOCKING ? OFPUTIL_PS_STP_BLOCK + : 0); + ofproto_port_set_state(&ofport->up, of_state); + } +} + +/* Configures STP on 'ofport_' using the settings defined in 's'. The + * caller is responsible for assigning STP port numbers and ensuring + * there are no duplicates. */ +static int +set_stp_port(struct ofport *ofport_, + const struct ofproto_port_stp_settings *s) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct stp_port *sp = ofport->stp_port; + + if (!s || !s->enable) { + if (sp) { ofport->stp_port = NULL; stp_port_disable(sp); update_stp_port_state(ofport); } return 0; } else if (sp && stp_port_no(sp) != s->port_num - && ofport == stp_port_get_aux(sp)) { + && ofport == stp_port_get_aux(sp)) { /* The port-id changed, so disable the old one if it's not * already in use by another port. */ stp_port_disable(sp); } sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num); + + /* Set name before enabling the port so that debugging messages can print + * the name. */ + stp_port_set_name(sp, netdev_get_name(ofport->up.netdev)); stp_port_enable(sp); stp_port_set_aux(sp, ofport); @@ -1909,6 +2353,7 @@ stp_run(struct ofproto_dpif *ofproto) ovs_rwlock_wrlock(&ofproto->ml->rwlock); mac_learning_flush(ofproto->ml); ovs_rwlock_unlock(&ofproto->ml->rwlock); + mcast_snooping_mdb_flush(ofproto->ms); } } } @@ -1920,6 +2365,57 @@ stp_wait(struct ofproto_dpif *ofproto) poll_timer_wait(1000); } } + +/* Configures RSTP on 'ofport_' using the settings defined in 's'. The + * caller is responsible for assigning RSTP port numbers and ensuring + * there are no duplicates. */ +static void +set_rstp_port(struct ofport *ofport_, + const struct ofproto_port_rstp_settings *s) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct rstp_port *rp = ofport->rstp_port; + + if (!s || !s->enable) { + if (rp) { + rstp_port_unref(rp); + ofport->rstp_port = NULL; + update_rstp_port_state(ofport); + } + return; + } + + /* Check if need to add a new port. */ + if (!rp) { + rp = ofport->rstp_port = rstp_add_port(ofproto->rstp); + } + + rstp_port_set(rp, s->port_num, s->priority, s->path_cost, + s->admin_edge_port, s->auto_edge, s->mcheck, ofport); + update_rstp_port_state(ofport); + /* Synchronize operational status. */ + rstp_port_set_mac_operational(rp, ofport->may_enable); +} + +static void +get_rstp_port_status(struct ofport *ofport_, + struct ofproto_port_rstp_status *s) +{ + struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + struct rstp_port *rp = ofport->rstp_port; + + if (!ofproto->rstp || !rp) { + s->enabled = false; + return; + } + + s->enabled = true; + rstp_port_get_status(rp, &s->port_id, &s->state, &s->role, &s->tx_count, + &s->rx_count, &s->error_count, &s->uptime); +} + static int set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp, @@ -2083,9 +2579,9 @@ bundle_destroy(struct ofbundle *bundle) ofproto = bundle->ofproto; mbridge_unregister_bundle(ofproto->mbridge, bundle->aux); - ovs_rwlock_wrlock(&xlate_rwlock); + xlate_txn_start(); xlate_bundle_remove(bundle); - ovs_rwlock_unlock(&xlate_rwlock); + xlate_txn_commit(); LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { bundle_del_port(port); @@ -2260,12 +2756,13 @@ bundle_set(struct ofproto *ofproto_, void *aux, ofproto->backer->need_revalidate = REV_RECONFIGURE; } } else { - bundle->bond = bond_create(s->bond); + bundle->bond = bond_create(s->bond, ofproto); ofproto->backer->need_revalidate = REV_RECONFIGURE; } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bond_slave_register(bundle->bond, port, port->up.netdev); + bond_slave_register(bundle->bond, port, + port->up.ofp_port, port->up.netdev); } } else { bond_unref(bundle->bond); @@ -2343,7 +2840,9 @@ bundle_send_learning_packets(struct ofbundle *bundle) learning_packet = bond_compose_learning_packet(bundle->bond, e->mac, e->vlan, &port_void); - learning_packet->private_p = port_void; + /* Temporarily use 'frame' as a private pointer (see below). */ + ovs_assert(learning_packet->frame == ofpbuf_data(learning_packet)); + learning_packet->frame = port_void; list_push_back(&packets, &learning_packet->list_node); } } @@ -2352,8 +2851,11 @@ bundle_send_learning_packets(struct ofbundle *bundle) error = n_packets = n_errors = 0; LIST_FOR_EACH (learning_packet, list_node, &packets) { int ret; + void *port_void = learning_packet->frame; - ret = ofproto_dpif_send_packet(learning_packet->private_p, learning_packet); + /* Restore 'frame'. */ + learning_packet->frame = ofpbuf_data(learning_packet); + ret = ofproto_dpif_send_packet(port_void, learning_packet); if (ret) { error = ret; n_errors++; @@ -2487,6 +2989,56 @@ set_mac_table_config(struct ofproto *ofproto_, unsigned int idle_time, mac_learning_set_max_entries(ofproto->ml, max_entries); ovs_rwlock_unlock(&ofproto->ml->rwlock); } + +/* Configures multicast snooping on 'ofport' using the settings + * defined in 's'. */ +static int +set_mcast_snooping(struct ofproto *ofproto_, + const struct ofproto_mcast_snooping_settings *s) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + + /* Only revalidate flows if the configuration changed. */ + if (!s != !ofproto->ms) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } + + if (s) { + if (!ofproto->ms) { + ofproto->ms = mcast_snooping_create(); + } + + ovs_rwlock_wrlock(&ofproto->ms->rwlock); + mcast_snooping_set_idle_time(ofproto->ms, s->idle_time); + mcast_snooping_set_max_entries(ofproto->ms, s->max_entries); + if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) { + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } + ovs_rwlock_unlock(&ofproto->ms->rwlock); + } else { + mcast_snooping_unref(ofproto->ms); + ofproto->ms = NULL; + } + + return 0; +} + +/* Configures multicast snooping port's flood setting on 'ofproto'. */ +static int +set_mcast_snooping_port(struct ofproto *ofproto_, void *aux, bool flood) +{ + struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); + struct ofbundle *bundle = bundle_lookup(ofproto, aux); + + if (ofproto->ms) { + ovs_rwlock_wrlock(&ofproto->ms->rwlock); + mcast_snooping_set_port_flood(ofproto->ms, bundle->vlan, bundle, + flood); + ovs_rwlock_unlock(&ofproto->ms->rwlock); + } + return 0; +} + /* Ports. */ @@ -2597,7 +3149,12 @@ port_run(struct ofport_dpif *ofport) if (ofport->may_enable != enable) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto); + ofproto->backer->need_revalidate = REV_PORT_TOGGLED; + + if (ofport->rstp_port) { + rstp_port_set_mac_operational(ofport->rstp_port, enable); + } } ofport->may_enable = enable; @@ -2851,24 +3408,37 @@ static void rule_expire(struct rule_dpif *rule) OVS_REQUIRES(ofproto_mutex) { - uint16_t idle_timeout, hard_timeout; + uint16_t hard_timeout, idle_timeout; long long int now = time_msec(); - int reason; - - ovs_assert(!rule->up.pending); + int reason = -1; - /* Has 'rule' expired? */ - ovs_mutex_lock(&rule->up.mutex); hard_timeout = rule->up.hard_timeout; idle_timeout = rule->up.idle_timeout; - if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) { - reason = OFPRR_HARD_TIMEOUT; - } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) { - reason = OFPRR_IDLE_TIMEOUT; - } else { - reason = -1; + + /* Has 'rule' expired? */ + if (hard_timeout) { + long long int modified; + + ovs_mutex_lock(&rule->up.mutex); + modified = rule->up.modified; + ovs_mutex_unlock(&rule->up.mutex); + + if (now > modified + hard_timeout * 1000) { + reason = OFPRR_HARD_TIMEOUT; + } + } + + if (reason < 0 && idle_timeout) { + long long int used; + + ovs_mutex_lock(&rule->stats_mutex); + used = rule->stats.used; + ovs_mutex_unlock(&rule->stats_mutex); + + if (now > used + idle_timeout * 1000) { + reason = OFPRR_IDLE_TIMEOUT; + } } - ovs_mutex_unlock(&rule->up.mutex); if (reason >= 0) { COVERAGE_INC(ofproto_dpif_expired); @@ -2895,28 +3465,30 @@ ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto, ovs_assert((rule != NULL) != (ofpacts != NULL)); dpif_flow_stats_extract(flow, packet, time_msec(), &stats); + if (rule) { rule_dpif_credit_stats(rule, &stats); } - xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet); + xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule, + stats.tcp_flags, packet); xin.ofpacts = ofpacts; xin.ofpacts_len = ofpacts_len; xin.resubmit_stats = &stats; xlate_actions(&xin, &xout); + execute.actions = ofpbuf_data(xout.odp_actions); + execute.actions_len = ofpbuf_size(xout.odp_actions); + execute.packet = packet; + execute.md = pkt_metadata_from_flow(flow); + execute.needs_help = (xout.slow & SLOW_ACTION) != 0; + + /* Fix up in_port. */ in_port = flow->in_port.ofp_port; if (in_port == OFPP_NONE) { in_port = OFPP_LOCAL; } - execute.actions = xout.odp_actions.data; - execute.actions_len = xout.odp_actions.size; - execute.packet = packet; - execute.md.tunnel = flow->tunnel; - execute.md.skb_priority = flow->skb_priority; - execute.md.pkt_mark = flow->pkt_mark; - execute.md.in_port = ofp_port_to_odp_port(ofproto, in_port); - execute.needs_help = (xout.slow & SLOW_ACTION) != 0; + execute.md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port); error = dpif_execute(ofproto->backer->dpif, &execute); @@ -2930,24 +3502,12 @@ rule_dpif_credit_stats(struct rule_dpif *rule, const struct dpif_flow_stats *stats) { ovs_mutex_lock(&rule->stats_mutex); - rule->packet_count += stats->n_packets; - rule->byte_count += stats->n_bytes; - rule->up.used = MAX(rule->up.used, stats->used); + rule->stats.n_packets += stats->n_packets; + rule->stats.n_bytes += stats->n_bytes; + rule->stats.used = MAX(rule->stats.used, stats->used); ovs_mutex_unlock(&rule->stats_mutex); } -bool -rule_dpif_is_fail_open(const struct rule_dpif *rule) -{ - return is_fail_open_rule(&rule->up); -} - -bool -rule_dpif_is_table_miss(const struct rule_dpif *rule) -{ - return rule_is_table_miss(&rule->up); -} - ovs_be64 rule_dpif_get_flow_cookie(const struct rule_dpif *rule) OVS_REQUIRES(rule->up.mutex) @@ -2965,99 +3525,267 @@ rule_dpif_reduce_timeouts(struct rule_dpif *rule, uint16_t idle_timeout, /* Returns 'rule''s actions. The caller owns a reference on the returned * actions and must eventually release it (with rule_actions_unref()) to avoid * a memory leak. */ -struct rule_actions * +const struct rule_actions * rule_dpif_get_actions(const struct rule_dpif *rule) { return rule_get_actions(&rule->up); } -/* Lookup 'flow' in 'ofproto''s classifier. If 'wc' is non-null, sets - * the fields that were relevant as part of the lookup. */ +/* Sets 'rule''s recirculation id. */ +static void +rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id) + OVS_REQUIRES(rule->up.mutex) +{ + ovs_assert(!rule->recirc_id); + rule->recirc_id = id; +} + +/* Returns 'rule''s recirculation id. */ +uint32_t +rule_dpif_get_recirc_id(struct rule_dpif *rule) + OVS_REQUIRES(rule->up.mutex) +{ + if (!rule->recirc_id) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + rule_dpif_set_recirc_id(rule, ofproto_dpif_alloc_recirc_id(ofproto)); + } + return rule->recirc_id; +} + +/* Sets 'rule''s recirculation id. */ void -rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow, - struct flow_wildcards *wc, struct rule_dpif **rule) +rule_set_recirc_id(struct rule *rule_, uint32_t id) { - struct ofport_dpif *port; + struct rule_dpif *rule = rule_dpif_cast(rule_); - if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) { - return; + ovs_mutex_lock(&rule->up.mutex); + rule_dpif_set_recirc_id(rule, id); + ovs_mutex_unlock(&rule->up.mutex); +} + +/* Lookup 'flow' in table 0 of 'ofproto''s classifier. + * If 'wc' is non-null, sets the fields that were relevant as part of + * the lookup. Returns the table_id where a match or miss occurred. + * + * The return value will be zero unless there was a miss and + * OFPTC11_TABLE_MISS_CONTINUE is in effect for the sequence of tables + * where misses occur. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a non-zero 'take_ref' must be passed in to cause a reference to be taken + * on it before this returns. */ +uint8_t +rule_dpif_lookup(struct ofproto_dpif *ofproto, struct flow *flow, + struct flow_wildcards *wc, struct rule_dpif **rule, + bool take_ref, const struct dpif_flow_stats *stats) +{ + enum rule_dpif_lookup_verdict verdict; + enum ofputil_port_config config = 0; + uint8_t table_id; + + if (ofproto_dpif_get_enable_recirc(ofproto)) { + /* Always exactly match recirc_id since datapath supports + * recirculation. */ + if (wc) { + wc->masks.recirc_id = UINT32_MAX; + } + + /* Start looking up from internal table for post recirculation flows + * or packets. We can also simply send all, including normal flows + * or packets to the internal table. They will not match any post + * recirculation rules except the 'catch all' rule that resubmit + * them to table 0. + * + * As an optimization, we send normal flows and packets to table 0 + * directly, saving one table lookup. */ + table_id = flow->recirc_id ? TBL_INTERNAL : 0; + } else { + table_id = 0; } - port = get_ofp_port(ofproto, flow->in_port.ofp_port); - if (!port) { - VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, - flow->in_port.ofp_port); + + verdict = rule_dpif_lookup_from_table(ofproto, flow, wc, true, + &table_id, rule, take_ref, stats); + + switch (verdict) { + case RULE_DPIF_LOOKUP_VERDICT_MATCH: + return table_id; + case RULE_DPIF_LOOKUP_VERDICT_CONTROLLER: { + struct ofport_dpif *port; + + port = get_ofp_port(ofproto, flow->in_port.ofp_port); + if (!port) { + VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16, + flow->in_port.ofp_port); + } + config = port ? port->up.pp.config : 0; + break; + } + case RULE_DPIF_LOOKUP_VERDICT_DROP: + config = OFPUTIL_PC_NO_PACKET_IN; + break; + case RULE_DPIF_LOOKUP_VERDICT_DEFAULT: + if (!connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) { + config = OFPUTIL_PC_NO_PACKET_IN; + } + break; + default: + OVS_NOT_REACHED(); } - choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule, - ofproto->no_packet_in_rule, rule); + choose_miss_rule(config, ofproto->miss_rule, + ofproto->no_packet_in_rule, rule, take_ref); + return table_id; } -bool -rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, +/* The returned rule is valid at least until the next RCU quiescent period. + * If the '*rule' needs to stay around longer, a non-zero 'take_ref' must be + * passed in to cause a reference to be taken on it before this returns. */ +static struct rule_dpif * +rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, uint8_t table_id, const struct flow *flow, struct flow_wildcards *wc, - uint8_t table_id, struct rule_dpif **rule) + bool take_ref) { + struct classifier *cls = &ofproto->up.tables[table_id].cls; const struct cls_rule *cls_rule; - struct classifier *cls; - bool frag; - - *rule = NULL; - if (table_id >= N_TABLES) { - return false; - } - - if (wc) { - memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type); - wc->masks.nw_frag |= FLOW_NW_FRAG_MASK; + struct rule_dpif *rule; + struct flow ofpc_normal_flow; + + if (ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) { + /* We always unwildcard dl_type and nw_frag (for IP), so they + * need not be unwildcarded here. */ + + if (flow->nw_frag & FLOW_NW_FRAG_ANY) { + if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { + /* We must pretend that transport ports are unavailable. */ + ofpc_normal_flow = *flow; + ofpc_normal_flow.tp_src = htons(0); + ofpc_normal_flow.tp_dst = htons(0); + flow = &ofpc_normal_flow; + } else { + /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM). + * Use the drop_frags_rule (which cannot disappear). */ + cls_rule = &ofproto->drop_frags_rule->up.cr; + rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); + if (take_ref) { + rule_dpif_ref(rule); + } + return rule; + } + } } - cls = &ofproto->up.tables[table_id].cls; - ovs_rwlock_rdlock(&cls->rwlock); - frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0; - if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) { - /* We must pretend that transport ports are unavailable. */ - struct flow ofpc_normal_flow = *flow; - ofpc_normal_flow.tp_src = htons(0); - ofpc_normal_flow.tp_dst = htons(0); - cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc); - } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) { - cls_rule = &ofproto->drop_frags_rule->up.cr; - /* Frag mask in wc already set above. */ - } else { + do { cls_rule = classifier_lookup(cls, flow, wc); - } - *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); - rule_dpif_ref(*rule); - ovs_rwlock_unlock(&cls->rwlock); + rule = rule_dpif_cast(rule_from_cls_rule(cls_rule)); - return *rule != NULL; -} + /* Try again if the rule was released before we get the reference. */ + } while (rule && take_ref && !rule_dpif_try_ref(rule)); -/* Given a port configuration (specified as zero if there's no port), chooses - * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a - * flow table miss. */ -void -choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule, - struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule) -{ - *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule; - rule_dpif_ref(*rule); + return rule; } -void -rule_dpif_ref(struct rule_dpif *rule) -{ - if (rule) { - ofproto_rule_ref(&rule->up); +/* Look up 'flow' in 'ofproto''s classifier starting from table '*table_id'. + * Stores the rule that was found in '*rule', or NULL if none was found. + * Updates 'wc', if nonnull, to reflect the fields that were used during the + * lookup. + * + * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but + * if none is found then the table miss configuration for that table is + * honored, which can result in additional lookups in other OpenFlow tables. + * In this case the function updates '*table_id' to reflect the final OpenFlow + * table that was searched. + * + * If 'honor_table_miss' is false, then only one table lookup occurs, in + * '*table_id'. + * + * Returns: + * + * - RULE_DPIF_LOOKUP_VERDICT_MATCH if a rule (in '*rule') was found. + * + * - RULE_OFPTC_TABLE_MISS_CONTROLLER if no rule was found and either: + * + 'honor_table_miss' is false + * + a table miss configuration specified that the packet should be + * sent to the controller in this case. + * + * - RULE_DPIF_LOOKUP_VERDICT_DROP if no rule was found, 'honor_table_miss' + * is true and a table miss configuration specified that the packet + * should be dropped in this case. + * + * - RULE_DPIF_LOOKUP_VERDICT_DEFAULT if no rule was found, + * 'honor_table_miss' is true and a table miss configuration has + * not been specified in this case. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a non-zero 'take_ref' must be passed in to cause a reference to be taken + * on it before this returns. */ +enum rule_dpif_lookup_verdict +rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto, + const struct flow *flow, + struct flow_wildcards *wc, + bool honor_table_miss, + uint8_t *table_id, struct rule_dpif **rule, + bool take_ref, const struct dpif_flow_stats *stats) +{ + uint8_t next_id; + + for (next_id = *table_id; + next_id < ofproto->up.n_tables; + next_id++, next_id += (next_id == TBL_INTERNAL)) + { + *table_id = next_id; + *rule = rule_dpif_lookup_in_table(ofproto, *table_id, flow, wc, + take_ref); + if (stats) { + struct oftable *tbl = &ofproto->up.tables[next_id]; + unsigned long orig; + + atomic_add_relaxed(*rule ? &tbl->n_matched : &tbl->n_missed, + stats->n_packets, &orig); + } + if (*rule) { + return RULE_DPIF_LOOKUP_VERDICT_MATCH; + } else if (!honor_table_miss) { + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; + } else { + switch (ofproto_table_get_miss_config(&ofproto->up, *table_id)) { + case OFPUTIL_TABLE_MISS_CONTINUE: + break; + + case OFPUTIL_TABLE_MISS_CONTROLLER: + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; + + case OFPUTIL_TABLE_MISS_DROP: + return RULE_DPIF_LOOKUP_VERDICT_DROP; + + case OFPUTIL_TABLE_MISS_DEFAULT: + return RULE_DPIF_LOOKUP_VERDICT_DEFAULT; + } + } } + + return RULE_DPIF_LOOKUP_VERDICT_CONTROLLER; } +/* Given a port configuration (specified as zero if there's no port), chooses + * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a + * flow table miss. + * + * The rule is returned in '*rule', which is valid at least until the next + * RCU quiescent period. If the '*rule' needs to stay around longer, + * a reference must be taken on it (rule_dpif_ref()). + */ void -rule_dpif_unref(struct rule_dpif *rule) +choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule, + struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule, + bool take_ref) { - if (rule) { - ofproto_rule_unref(&rule->up); + *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule; + if (take_ref) { + rule_dpif_ref(*rule); } } @@ -3068,7 +3796,6 @@ complete_operation(struct rule_dpif *rule) struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); ofproto->backer->need_revalidate = REV_FLOW_TABLE; - ofoperation_complete(rule->up.pending, 0); } static struct rule_dpif *rule_dpif_cast(const struct rule *rule) @@ -3092,22 +3819,25 @@ rule_dealloc(struct rule *rule_) static enum ofperr rule_construct(struct rule *rule_) + OVS_NO_THREAD_SAFETY_ANALYSIS { struct rule_dpif *rule = rule_dpif_cast(rule_); - ovs_mutex_init(&rule->stats_mutex); - ovs_mutex_lock(&rule->stats_mutex); - rule->packet_count = 0; - rule->byte_count = 0; - ovs_mutex_unlock(&rule->stats_mutex); + ovs_mutex_init_adaptive(&rule->stats_mutex); + rule->stats.n_packets = 0; + rule->stats.n_bytes = 0; + rule->stats.used = rule->up.modified; + rule->recirc_id = 0; + return 0; } -static void +static enum ofperr rule_insert(struct rule *rule_) OVS_REQUIRES(ofproto_mutex) { struct rule_dpif *rule = rule_dpif_cast(rule_); complete_operation(rule); + return 0; } static void @@ -3122,17 +3852,25 @@ static void rule_destruct(struct rule *rule_) { struct rule_dpif *rule = rule_dpif_cast(rule_); + ovs_mutex_destroy(&rule->stats_mutex); + if (rule->recirc_id) { + struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto); + + ofproto_dpif_free_recirc_id(ofproto, rule->recirc_id); + } } static void -rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes) +rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes, + long long int *used) { struct rule_dpif *rule = rule_dpif_cast(rule_); ovs_mutex_lock(&rule->stats_mutex); - *packets = rule->packet_count; - *bytes = rule->byte_count; + *packets = rule->stats.n_packets; + *bytes = rule->stats.n_bytes; + *used = rule->stats.used; ovs_mutex_unlock(&rule->stats_mutex); } @@ -3162,8 +3900,8 @@ rule_modify_actions(struct rule *rule_, bool reset_counters) if (reset_counters) { ovs_mutex_lock(&rule->stats_mutex); - rule->packet_count = 0; - rule->byte_count = 0; + rule->stats.n_packets = 0; + rule->stats.n_bytes = 0; ovs_mutex_unlock(&rule->stats_mutex); } @@ -3193,58 +3931,80 @@ static void group_construct_stats(struct group_dpif *group) OVS_REQUIRES(group->stats_mutex) { + struct ofputil_bucket *bucket; + const struct list *buckets; + group->packet_count = 0; group->byte_count = 0; - if (!group->bucket_stats) { - group->bucket_stats = xcalloc(group->up.n_buckets, - sizeof *group->bucket_stats); - } else { - memset(group->bucket_stats, 0, group->up.n_buckets * - sizeof *group->bucket_stats); + + group_dpif_get_buckets(group, &buckets); + LIST_FOR_EACH (bucket, list_node, buckets) { + bucket->stats.packet_count = 0; + bucket->stats.byte_count = 0; + } +} + +void +group_dpif_credit_stats(struct group_dpif *group, + struct ofputil_bucket *bucket, + const struct dpif_flow_stats *stats) +{ + ovs_mutex_lock(&group->stats_mutex); + group->packet_count += stats->n_packets; + group->byte_count += stats->n_bytes; + if (bucket) { + bucket->stats.packet_count += stats->n_packets; + bucket->stats.byte_count += stats->n_bytes; + } else { /* Credit to all buckets */ + const struct list *buckets; + + group_dpif_get_buckets(group, &buckets); + LIST_FOR_EACH (bucket, list_node, buckets) { + bucket->stats.packet_count += stats->n_packets; + bucket->stats.byte_count += stats->n_bytes; + } } + ovs_mutex_unlock(&group->stats_mutex); } static enum ofperr group_construct(struct ofgroup *group_) { struct group_dpif *group = group_dpif_cast(group_); - ovs_mutex_init(&group->stats_mutex); + const struct ofputil_bucket *bucket; + + /* Prevent group chaining because our locking structure makes it hard to + * implement deadlock-free. (See xlate_group_resource_check().) */ + LIST_FOR_EACH (bucket, list_node, &group->up.buckets) { + const struct ofpact *a; + + OFPACT_FOR_EACH (a, bucket->ofpacts, bucket->ofpacts_len) { + if (a->type == OFPACT_GROUP) { + return OFPERR_OFPGMFC_CHAINING_UNSUPPORTED; + } + } + } + + ovs_mutex_init_adaptive(&group->stats_mutex); ovs_mutex_lock(&group->stats_mutex); group_construct_stats(group); ovs_mutex_unlock(&group->stats_mutex); return 0; } -static void -group_destruct__(struct group_dpif *group) - OVS_REQUIRES(group->stats_mutex) -{ - free(group->bucket_stats); - group->bucket_stats = NULL; -} - static void group_destruct(struct ofgroup *group_) { struct group_dpif *group = group_dpif_cast(group_); - ovs_mutex_lock(&group->stats_mutex); - group_destruct__(group); - ovs_mutex_unlock(&group->stats_mutex); ovs_mutex_destroy(&group->stats_mutex); } static enum ofperr -group_modify(struct ofgroup *group_, struct ofgroup *victim_) +group_modify(struct ofgroup *group_) { - struct group_dpif *group = group_dpif_cast(group_); - struct group_dpif *victim = group_dpif_cast(victim_); + struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto); - ovs_mutex_lock(&group->stats_mutex); - if (victim->up.n_buckets < group->up.n_buckets) { - group_destruct__(group); - } - group_construct_stats(group); - ovs_mutex_unlock(&group->stats_mutex); + ofproto->backer->need_revalidate = REV_FLOW_TABLE; return 0; } @@ -3253,39 +4013,43 @@ static enum ofperr group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs) { struct group_dpif *group = group_dpif_cast(group_); + struct ofputil_bucket *bucket; + const struct list *buckets; + struct bucket_counter *bucket_stats; ovs_mutex_lock(&group->stats_mutex); ogs->packet_count = group->packet_count; ogs->byte_count = group->byte_count; - memcpy(ogs->bucket_stats, group->bucket_stats, - group->up.n_buckets * sizeof *group->bucket_stats); + + group_dpif_get_buckets(group, &buckets); + bucket_stats = ogs->bucket_stats; + LIST_FOR_EACH (bucket, list_node, buckets) { + bucket_stats->packet_count = bucket->stats.packet_count; + bucket_stats->byte_count = bucket->stats.byte_count; + bucket_stats++; + } ovs_mutex_unlock(&group->stats_mutex); return 0; } +/* If the group exists, this function increments the groups's reference count. + * + * Make sure to call group_dpif_unref() after no longer needing to maintain + * a reference to the group. */ bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, struct group_dpif **group) - OVS_TRY_RDLOCK(true, (*group)->up.rwlock) { struct ofgroup *ofgroup; bool found; - *group = NULL; found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup); *group = found ? group_dpif_cast(ofgroup) : NULL; return found; } -void -group_dpif_release(struct group_dpif *group) - OVS_RELEASES(group->up.rwlock) -{ - ofproto_group_release(&group->up); -} - void group_dpif_get_buckets(const struct group_dpif *group, const struct list **buckets) @@ -3312,7 +4076,7 @@ ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet ovs_mutex_lock(&ofproto->stats_mutex); ofproto->stats.tx_packets++; - ofproto->stats.tx_bytes += packet->size; + ofproto->stats.tx_bytes += ofpbuf_size(packet); ovs_mutex_unlock(&ofproto->stats_mutex); return error; } @@ -3414,6 +4178,36 @@ ofproto_unixctl_fdb_flush(struct unixctl_conn *conn, int argc, unixctl_command_reply(conn, "table successfully flushed"); } +static void +ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux OVS_UNUSED) +{ + struct ofproto_dpif *ofproto; + + if (argc > 1) { + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply_error(conn, "no such bridge"); + return; + } + + if (!mcast_snooping_enabled(ofproto->ms)) { + unixctl_command_reply_error(conn, "multicast snooping is disabled"); + return; + } + mcast_snooping_mdb_flush(ofproto->ms); + } else { + HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) { + if (!mcast_snooping_enabled(ofproto->ms)) { + continue; + } + mcast_snooping_mdb_flush(ofproto->ms); + } + } + + unixctl_command_reply(conn, "table successfully flushed"); +} + static struct ofport_dpif * ofbundle_get_a_port(const struct ofbundle *bundle) { @@ -3452,17 +4246,74 @@ ofproto_unixctl_fdb_show(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_destroy(&ds); } +static void +ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[], + void *aux OVS_UNUSED) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + const struct ofproto_dpif *ofproto; + const struct ofbundle *bundle; + const struct mcast_group *grp; + struct mcast_group_bundle *b; + struct mcast_mrouter_bundle *mrouter; + + ofproto = ofproto_dpif_lookup(argv[1]); + if (!ofproto) { + unixctl_command_reply_error(conn, "no such bridge"); + return; + } + + if (!mcast_snooping_enabled(ofproto->ms)) { + unixctl_command_reply_error(conn, "multicast snooping is disabled"); + return; + } + + ds_put_cstr(&ds, " port VLAN GROUP Age\n"); + ovs_rwlock_rdlock(&ofproto->ms->rwlock); + LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) { + LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) { + char name[OFP_MAX_PORT_NAME_LEN]; + + bundle = b->port; + ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, + name, sizeof name); + ds_put_format(&ds, "%5s %4d "IP_FMT" %3d\n", + name, grp->vlan, IP_ARGS(grp->ip4), + mcast_bundle_age(ofproto->ms, b)); + } + } + + /* ports connected to multicast routers */ + LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) { + char name[OFP_MAX_PORT_NAME_LEN]; + + bundle = mrouter->port; + ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port, + name, sizeof name); + ds_put_format(&ds, "%5s %4d querier %3d\n", + name, mrouter->vlan, + mcast_mrouter_age(ofproto->ms, mrouter)); + } + ovs_rwlock_unlock(&ofproto->ms->rwlock); + unixctl_command_reply(conn, ds_cstr(&ds)); + ds_destroy(&ds); +} + struct trace_ctx { struct xlate_out xout; struct xlate_in xin; + const struct flow *key; struct flow flow; + struct flow_wildcards wc; struct ds *result; }; static void trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule) { - struct rule_actions *actions; + const struct rule_actions *actions; ovs_be64 cookie; ds_put_char_multiple(result, '\t', level); @@ -3486,8 +4337,6 @@ trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule) ds_put_cstr(result, "OpenFlow actions="); ofpacts_format(actions->ofpacts, actions->ofpacts_len, result); ds_put_char(result, '\n'); - - rule_actions_unref(actions); } static void @@ -3496,7 +4345,9 @@ trace_format_flow(struct ds *result, int level, const char *title, { ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); - if (flow_equal(&trace->xin.flow, &trace->flow)) { + /* Do not report unchanged flows for resubmits. */ + if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow)) + || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) { ds_put_cstr(result, "unchanged"); } else { flow_format(result, &trace->xin.flow); @@ -3523,11 +4374,26 @@ static void trace_format_odp(struct ds *result, int level, const char *title, struct trace_ctx *trace) { - struct ofpbuf *odp_actions = &trace->xout.odp_actions; + struct ofpbuf *odp_actions = trace->xout.odp_actions; ds_put_char_multiple(result, '\t', level); ds_put_format(result, "%s: ", title); - format_odp_actions(result, odp_actions->data, odp_actions->size); + format_odp_actions(result, ofpbuf_data(odp_actions), + ofpbuf_size(odp_actions)); + ds_put_char(result, '\n'); +} + +static void +trace_format_megaflow(struct ds *result, int level, const char *title, + struct trace_ctx *trace) +{ + struct match match; + + ds_put_char_multiple(result, '\t', level); + ds_put_format(result, "%s: ", title); + flow_wildcards_or(&trace->wc, &trace->xout.wc, &trace->wc); + match_init(&match, trace->key, &trace->wc); + match_format(&match, result, OFP_DEFAULT_PRIORITY); ds_put_char(result, '\n'); } @@ -3541,6 +4407,7 @@ trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse) trace_format_flow(result, recurse + 1, "Resubmitted flow", trace); trace_format_regs(result, recurse + 1, "Resubmitted regs", trace); trace_format_odp(result, recurse + 1, "Resubmitted odp", trace); + trace_format_megaflow(result, recurse + 1, "Resubmitted megaflow", trace); trace_format_rule(result, recurse + 1, rule); } @@ -3636,11 +4503,21 @@ parse_flow_and_packet(int argc, const char *argv[], goto exit; } - if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, flow, - NULL, ofprotop, NULL, NULL, NULL, NULL)) { + if (odp_flow_key_to_flow(ofpbuf_data(&odp_key), ofpbuf_size(&odp_key), + flow) == ODP_FIT_ERROR) { + error = "Failed to parse flow key"; + goto exit; + } + + *ofprotop = xlate_lookup_ofproto(backer, flow, + &flow->in_port.ofp_port); + if (*ofprotop == NULL) { error = "Invalid datapath flow"; goto exit; } + + vsp_adjust_flow(*ofprotop, flow, NULL); + } else { char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL); @@ -3664,15 +4541,14 @@ parse_flow_and_packet(int argc, const char *argv[], /* Generate a packet, if requested. */ if (packet) { - if (!packet->size) { + if (!ofpbuf_size(packet)) { flow_compose(packet, flow); } else { - union flow_in_port in_port = flow->in_port; + struct pkt_metadata md = pkt_metadata_from_flow(flow); /* Use the metadata from the flow and the packet argument * to reconstruct the flow. */ - flow_extract(packet, flow->skb_priority, flow->pkt_mark, NULL, - &in_port, flow); + flow_extract(packet, &md, flow); } } @@ -3737,7 +4613,7 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, ofpbuf_init(&ofpacts, 0); /* Parse actions. */ - error = parse_ofpacts(argv[--argc], &ofpacts, &usable_protocols); + error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols); if (error) { unixctl_command_reply_error(conn, error); free(error); @@ -3778,11 +4654,11 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, goto exit; } if (enforce_consistency) { - retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, &flow, - u16_to_ofp(ofproto->up.max_ports), + retval = ofpacts_check_consistency(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), + &flow, u16_to_ofp(ofproto->up.max_ports), 0, 0, usable_protocols); } else { - retval = ofpacts_check(ofpacts.data, ofpacts.size, &flow, + retval = ofpacts_check(ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &flow, u16_to_ofp(ofproto->up.max_ports), 0, 0, &usable_protocols); } @@ -3794,7 +4670,8 @@ ofproto_unixctl_trace_actions(struct unixctl_conn *conn, int argc, goto exit; } - ofproto_trace(ofproto, &flow, packet, ofpacts.data, ofpacts.size, &result); + ofproto_trace(ofproto, &flow, packet, + ofpbuf_data(&ofpacts), ofpbuf_size(&ofpacts), &result); unixctl_command_reply(conn, ds_cstr(&result)); exit: @@ -3813,24 +4690,24 @@ exit: * If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to * trace, otherwise the actions are determined by a flow table lookup. */ static void -ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, +ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow, const struct ofpbuf *packet, const struct ofpact ofpacts[], size_t ofpacts_len, struct ds *ds) { struct rule_dpif *rule; - struct flow_wildcards wc; + struct trace_ctx trace; ds_put_format(ds, "Bridge: %s\n", ofproto->up.name); ds_put_cstr(ds, "Flow: "); flow_format(ds, flow); ds_put_char(ds, '\n'); - flow_wildcards_init_catchall(&wc); + flow_wildcards_init_catchall(&trace.wc); if (ofpacts) { rule = NULL; } else { - rule_dpif_lookup(ofproto, flow, &wc, &rule); + rule_dpif_lookup(ofproto, flow, &trace.wc, &rule, false, NULL); trace_format_rule(ds, 0, rule); if (rule == ofproto->miss_rule) { @@ -3845,14 +4722,11 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, } if (rule || ofpacts) { - struct trace_ctx trace; - struct match match; - uint16_t tcp_flags; - - tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0; trace.result = ds; - trace.flow = *flow; - xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet); + trace.key = flow; /* Original flow key, used for megaflow. */ + trace.flow = *flow; /* May be modified by actions. */ + xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, rule, + ntohs(flow->tcp_flags), packet); if (ofpacts) { trace.xin.ofpacts = ofpacts; trace.xin.ofpacts_len = ofpacts_len; @@ -3861,19 +4735,14 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, trace.xin.report_hook = trace_report; xlate_actions(&trace.xin, &trace.xout); - flow_wildcards_or(&trace.xout.wc, &trace.xout.wc, &wc); ds_put_char(ds, '\n'); trace_format_flow(ds, 0, "Final flow", &trace); - - match_init(&match, &trace.flow, &trace.xout.wc); - ds_put_cstr(ds, "Relevant fields: "); - match_format(&match, ds, OFP_DEFAULT_PRIORITY); - ds_put_char(ds, '\n'); + trace_format_megaflow(ds, 0, "Megaflow", &trace); ds_put_cstr(ds, "Datapath actions: "); - format_odp_actions(ds, trace.xout.odp_actions.data, - trace.xout.odp_actions.size); + format_odp_actions(ds, ofpbuf_data(trace.xout.odp_actions), + ofpbuf_size(trace.xout.odp_actions)); if (trace.xout.slow) { enum slow_path_reason slow; @@ -3894,8 +4763,6 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, xlate_out_uninit(&trace.xout); } - - rule_dpif_unref(rule); } /* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list @@ -4025,39 +4892,25 @@ ofproto_unixctl_dpif_show(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_destroy(&ds); } -static bool -ofproto_dpif_contains_flow(const struct ofproto_dpif *ofproto, - const struct nlattr *key, size_t key_len) -{ - enum odp_key_fitness fitness; - struct ofproto_dpif *ofp; - struct flow flow; - - xlate_receive(ofproto->backer, NULL, key, key_len, &flow, &fitness, &ofp, - NULL, NULL, NULL, NULL); - return ofp == ofproto; -} - static void ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[], void *aux OVS_UNUSED) { - struct ds ds = DS_EMPTY_INITIALIZER; - const struct dpif_flow_stats *stats; const struct ofproto_dpif *ofproto; - struct dpif_flow_dump flow_dump; - const struct nlattr *actions; - const struct nlattr *mask; - const struct nlattr *key; - size_t actions_len; - size_t mask_len; - size_t key_len; + + struct ds ds = DS_EMPTY_INITIALIZER; bool verbosity = false; + struct dpif_port dpif_port; struct dpif_port_dump port_dump; struct hmap portno_names; + struct dpif_flow_dump *flow_dump; + struct dpif_flow_dump_thread *flow_dump_thread; + struct dpif_flow f; + int error; + ofproto = ofproto_dpif_lookup(argv[argc - 1]); if (!ofproto) { unixctl_command_reply_error(conn, "no such bridge"); @@ -4074,23 +4927,28 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, } ds_init(&ds); - dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif); - while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len, - &actions, &actions_len, &stats)) { - if (!ofproto_dpif_contains_flow(ofproto, key, key_len)) { + flow_dump = dpif_flow_dump_create(ofproto->backer->dpif); + flow_dump_thread = dpif_flow_dump_thread_create(flow_dump); + while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) { + struct flow flow; + + if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR + || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) { continue; } - odp_flow_format(key, key_len, mask, mask_len, &portno_names, &ds, - verbosity); + odp_flow_format(f.key, f.key_len, f.mask, f.mask_len, + &portno_names, &ds, verbosity); ds_put_cstr(&ds, ", "); - dpif_flow_stats_format(stats, &ds); + dpif_flow_stats_format(&f.stats, &ds); ds_put_cstr(&ds, ", actions:"); - format_odp_actions(&ds, actions, actions_len); + format_odp_actions(&ds, f.actions, f.actions_len); ds_put_char(&ds, '\n'); } + dpif_flow_dump_thread_destroy(flow_dump_thread); + error = dpif_flow_dump_destroy(flow_dump); - if (dpif_flow_dump_done(&flow_dump)) { + if (error) { ds_clear(&ds); ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno)); unixctl_command_reply_error(conn, ds_cstr(&ds)); @@ -4123,6 +4981,10 @@ ofproto_dpif_unixctl_init(void) ofproto_unixctl_fdb_flush, NULL); unixctl_command_register("fdb/show", "bridge", 1, 1, ofproto_unixctl_fdb_show, NULL); + unixctl_command_register("mdb/flush", "[bridge]", 0, 1, + ofproto_unixctl_mcast_snooping_flush, NULL); + unixctl_command_register("mdb/show", "bridge", 1, 1, + ofproto_unixctl_mcast_snooping_show, NULL); unixctl_command_register("dpif/dump-dps", "", 0, 0, ofproto_unixctl_dpif_dump_dps, NULL); unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show, @@ -4130,6 +4992,14 @@ ofproto_dpif_unixctl_init(void) unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2, ofproto_unixctl_dpif_dump_flows, NULL); } + +/* Returns true if 'table' is the table used for internal rules, + * false otherwise. */ +bool +table_is_internal(uint8_t table_id) +{ + return table_id == TBL_INTERNAL; +} /* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.) * @@ -4157,7 +5027,7 @@ set_realdev(struct ofport *ofport_, ofp_port_t realdev_ofp_port, int vid) if (realdev_ofp_port && ofport->bundle) { /* vlandevs are enslaved to their realdevs, so they are not allowed to * themselves be part of a bundle. */ - bundle_set(ofport->up.ofproto, ofport->bundle, NULL); + bundle_set(ofport_->ofproto, ofport->bundle, NULL); } ofport->realdev_ofp_port = realdev_ofp_port; @@ -4180,14 +5050,11 @@ bool ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto) OVS_EXCLUDED(ofproto->vsp_mutex) { - bool ret; - - ovs_mutex_lock(&ofproto->vsp_mutex); - ret = !hmap_is_empty(&ofproto->realdev_vid_map); - ovs_mutex_unlock(&ofproto->vsp_mutex); - return ret; + /* hmap_is_empty is thread safe. */ + return !hmap_is_empty(&ofproto->realdev_vid_map); } + static ofp_port_t vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto, ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci) @@ -4223,6 +5090,10 @@ vsp_realdev_to_vlandev(const struct ofproto_dpif *ofproto, { ofp_port_t ret; + /* hmap_is_empty is thread safe, see if we can return immediately. */ + if (hmap_is_empty(&ofproto->realdev_vid_map)) { + return realdev_ofp_port; + } ovs_mutex_lock(&ofproto->vsp_mutex); ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci); ovs_mutex_unlock(&ofproto->vsp_mutex); @@ -4276,16 +5147,23 @@ vsp_vlandev_to_realdev(const struct ofproto_dpif *ofproto, /* Given 'flow', a flow representing a packet received on 'ofproto', checks * whether 'flow->in_port' represents a Linux VLAN device. If so, changes * 'flow->in_port' to the "real" device backing the VLAN device, sets - * 'flow->vlan_tci' to the VLAN VID, and returns true. Otherwise (which is - * always the case unless VLAN splinters are enabled), returns false without - * making any changes. */ + * 'flow->vlan_tci' to the VLAN VID, and returns true. Optionally pushes the + * appropriate VLAN on 'packet' if provided. Otherwise (which is always the + * case unless VLAN splinters are enabled), returns false without making any + * changes. */ bool -vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow) +vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow, + struct ofpbuf *packet) OVS_EXCLUDED(ofproto->vsp_mutex) { ofp_port_t realdev; int vid; + /* hmap_is_empty is thread safe. */ + if (hmap_is_empty(&ofproto->vlandev_map)) { + return false; + } + ovs_mutex_lock(&ofproto->vsp_mutex); realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid); ovs_mutex_unlock(&ofproto->vsp_mutex); @@ -4297,6 +5175,15 @@ vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow) * the VLAN device's VLAN ID. */ flow->in_port.ofp_port = realdev; flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI); + + if (packet) { + /* Make the packet resemble the flow, so that it gets sent to an + * OpenFlow controller properly, so that it looks correct for sFlow, + * and so that flow_extract() will get the correct vlan_tci if it is + * called on 'packet'. */ + eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci); + } + return true; } @@ -4386,6 +5273,94 @@ odp_port_to_ofp_port(const struct ofproto_dpif *ofproto, odp_port_t odp_port) } } +uint32_t +ofproto_dpif_alloc_recirc_id(struct ofproto_dpif *ofproto) +{ + struct dpif_backer *backer = ofproto->backer; + + return recirc_id_alloc(backer->rid_pool); +} + +void +ofproto_dpif_free_recirc_id(struct ofproto_dpif *ofproto, uint32_t recirc_id) +{ + struct dpif_backer *backer = ofproto->backer; + + recirc_id_free(backer->rid_pool, recirc_id); +} + +int +ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto, + const struct match *match, int priority, + uint16_t idle_timeout, + const struct ofpbuf *ofpacts, + struct rule **rulep) +{ + struct ofputil_flow_mod fm; + struct rule_dpif *rule; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.command = OFPFC_ADD; + fm.idle_timeout = idle_timeout; + fm.hard_timeout = 0; + fm.buffer_id = 0; + fm.out_port = 0; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.ofpacts = ofpbuf_data(ofpacts); + fm.ofpacts_len = ofpbuf_size(ofpacts); + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to add internal flow (%s)", + ofperr_to_string(error)); + *rulep = NULL; + return error; + } + + rule = rule_dpif_lookup_in_table(ofproto, TBL_INTERNAL, &fm.match.flow, + &fm.match.wc, false); + if (rule) { + *rulep = &rule->up; + } else { + OVS_NOT_REACHED(); + } + return 0; +} + +int +ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto, + struct match *match, int priority) +{ + struct ofputil_flow_mod fm; + int error; + + fm.match = *match; + fm.priority = priority; + fm.new_cookie = htonll(0); + fm.cookie = htonll(0); + fm.cookie_mask = htonll(0); + fm.modify_cookie = false; + fm.table_id = TBL_INTERNAL; + fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY; + fm.command = OFPFC_DELETE_STRICT; + + error = ofproto_flow_mod(&ofproto->up, &fm); + if (error) { + VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)", + ofperr_to_string(error)); + return error; + } + + return 0; +} + const struct ofproto_class ofproto_dpif_class = { init, enumerate_types, @@ -4403,8 +5378,7 @@ const struct ofproto_class ofproto_dpif_class = { NULL, /* get_memory_usage. */ type_get_memory_usage, flush, - get_features, - get_tables, + query_tables, port_alloc, port_construct, port_destruct, @@ -4430,6 +5404,7 @@ const struct ofproto_class ofproto_dpif_class = { rule_dealloc, rule_get_stats, rule_execute, + NULL, /* rule_premodify_actions */ rule_modify_actions, set_frag_handling, packet_out, @@ -4438,14 +5413,20 @@ const struct ofproto_class ofproto_dpif_class = { set_sflow, set_ipfix, set_cfm, + cfm_status_changed, get_cfm_status, set_bfd, + bfd_status_changed, get_bfd_status, set_stp, get_stp_status, set_stp_port, get_stp_port_status, get_stp_port_stats, + set_rstp, + get_rstp_status, + set_rstp_port, + get_rstp_port_status, set_queues, bundle_set, bundle_remove, @@ -4455,6 +5436,8 @@ const struct ofproto_class ofproto_dpif_class = { is_mirror_output_bundle, forward_bpdu_changed, set_mac_table_config, + set_mcast_snooping, + set_mcast_snooping_port, set_realdev, NULL, /* meter_get_features */ NULL, /* meter_set */