/*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "connmgr.h"
#include "coverage.h"
#include "cfm.h"
+#include "ovs-lldp.h"
#include "dpif.h"
#include "dynamic-string.h"
#include "fail-open.h"
#include "lacp.h"
#include "learn.h"
#include "mac-learning.h"
+#include "mcast-snooping.h"
#include "meta-flow.h"
#include "multipath.h"
#include "netdev-vport.h"
#include "ofp-actions.h"
#include "ofp-parse.h"
#include "ofp-print.h"
-#include "ofproto-dpif-governor.h"
#include "ofproto-dpif-ipfix.h"
#include "ofproto-dpif-mirror.h"
#include "ofproto-dpif-monitor.h"
+#include "ofproto-dpif-rid.h"
#include "ofproto-dpif-sflow.h"
#include "ofproto-dpif-upcall.h"
#include "ofproto-dpif-xlate.h"
#include "poll-loop.h"
+#include "ovs-rcu.h"
+#include "ovs-router.h"
#include "seq.h"
#include "simap.h"
#include "smap.h"
#include "unaligned.h"
#include "unixctl.h"
#include "vlan-bitmap.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(ofproto_dpif);
COVERAGE_DEFINE(ofproto_dpif_expired);
-COVERAGE_DEFINE(facet_revalidate);
-COVERAGE_DEFINE(facet_unexpected);
-COVERAGE_DEFINE(facet_create);
-COVERAGE_DEFINE(facet_remove);
-COVERAGE_DEFINE(subfacet_create);
-COVERAGE_DEFINE(subfacet_destroy);
-COVERAGE_DEFINE(subfacet_install_fail);
COVERAGE_DEFINE(packet_in_overflow);
-/* Number of implemented OpenFlow tables. */
-enum { N_TABLES = 255 };
-enum { TBL_INTERNAL = N_TABLES - 1 }; /* Used for internal hidden rules. */
-BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
-
struct flow_miss;
-struct facet;
struct rule_dpif {
struct rule up;
/* These statistics:
*
- * - Do include packets and bytes from facets that have been deleted or
- * whose own statistics have been folded into the rule.
- *
- * - Do include packets and bytes sent "by hand" that were accounted to
- * the rule without any facet being involved (this is a rare corner
- * case in rule_execute()).
- *
- * - Do not include packet or bytes that can be obtained from any facet's
- * packet_count or byte_count member or that can be obtained from the
- * datapath by, e.g., dpif_flow_get() for any subfacet.
- */
+ * - Do include packets and bytes from datapath flows which have not
+ * recently been processed by a revalidator. */
struct ovs_mutex stats_mutex;
- uint64_t packet_count OVS_GUARDED; /* Number of packets received. */
- uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */
+ struct dpif_flow_stats stats OVS_GUARDED;
+
+ /* In non-NULL, will point to a new rule (for which a reference is held) to
+ * which all the stats updates should be forwarded. This exists only
+ * transitionally when flows are replaced.
+ *
+ * Protected by stats_mutex. If both 'rule->stats_mutex' and
+ * 'rule->new_rule->stats_mutex' must be held together, acquire them in that
+ * order, */
+ struct rule_dpif *new_rule OVS_GUARDED;
+
+ /* If non-zero then the recirculation id that has
+ * been allocated for use with this rule.
+ * The recirculation id and associated internal flow should
+ * be freed when the rule is freed */
+ uint32_t recirc_id;
};
-static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
+/* RULE_CAST() depends on this. */
+BUILD_ASSERT_DECL(offsetof(struct rule_dpif, up) == 0);
+
+static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes,
+ long long int *used);
static struct rule_dpif *rule_dpif_cast(const struct rule *);
+static void rule_expire(struct rule_dpif *);
struct group_dpif {
struct ofgroup up;
/* These statistics:
*
- * - Do include packets and bytes from facets that have been deleted or
- * whose own statistics have been folded into the rule.
- *
- * - Do include packets and bytes sent "by hand" that were accounted to
- * the rule without any facet being involved (this is a rare corner
- * case in rule_execute()).
- *
- * - Do not include packet or bytes that can be obtained from any facet's
- * packet_count or byte_count member or that can be obtained from the
- * datapath by, e.g., dpif_flow_get() for any subfacet.
- */
+ * - Do include packets and bytes from datapath flows which have not
+ * recently been processed by a revalidator. */
struct ovs_mutex stats_mutex;
uint64_t packet_count OVS_GUARDED; /* Number of packets received. */
uint64_t byte_count OVS_GUARDED; /* Number of bytes received. */
- struct bucket_counter *bucket_stats OVS_GUARDED; /* Bucket statistics. */
};
struct ofbundle {
char *name; /* Identifier for log messages. */
/* Configuration. */
- struct list ports; /* Contains "struct ofport"s. */
+ struct ovs_list ports; /* Contains "struct ofport"s. */
enum port_vlan_mode vlan_mode; /* VLAN mode */
int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
static void bundle_del_port(struct ofport_dpif *);
static void bundle_run(struct ofbundle *);
static void bundle_wait(struct ofbundle *);
+static void bundle_flush_macs(struct ofbundle *, bool);
+static void bundle_move(struct ofbundle *, struct ofbundle *);
static void stp_run(struct ofproto_dpif *ofproto);
static void stp_wait(struct ofproto_dpif *ofproto);
static int set_stp_port(struct ofport *,
const struct ofproto_port_stp_settings *);
-static void compose_slow_path(const struct ofproto_dpif *, const struct flow *,
- enum slow_path_reason,
- uint64_t *stub, size_t stub_size,
- const struct nlattr **actionsp,
- size_t *actions_lenp);
-
-/* A subfacet (see "struct subfacet" below) has three possible installation
- * states:
- *
- * - SF_NOT_INSTALLED: Not installed in the datapath. This will only be the
- * case just after the subfacet is created, just before the subfacet is
- * destroyed, or if the datapath returns an error when we try to install a
- * subfacet.
- *
- * - SF_FAST_PATH: The subfacet's actions are installed in the datapath.
- *
- * - SF_SLOW_PATH: An action that sends every packet for the subfacet through
- * ofproto_dpif is installed in the datapath.
- */
-enum subfacet_path {
- SF_NOT_INSTALLED, /* No datapath flow for this subfacet. */
- SF_FAST_PATH, /* Full actions are installed. */
- SF_SLOW_PATH, /* Send-to-userspace action is installed. */
-};
-
-/* A dpif flow and actions associated with a facet.
- *
- * See also the large comment on struct facet. */
-struct subfacet {
- /* Owners. */
- struct hmap_node hmap_node; /* In struct ofproto_dpif 'subfacets' list. */
- struct list list_node; /* In struct facet's 'facets' list. */
- struct facet *facet; /* Owning facet. */
- struct dpif_backer *backer; /* Owning backer. */
-
- struct nlattr *key;
- int key_len;
-
- long long int used; /* Time last used; time created if not used. */
- long long int created; /* Time created. */
-
- uint64_t dp_packet_count; /* Last known packet count in the datapath. */
- uint64_t dp_byte_count; /* Last known byte count in the datapath. */
-
- enum subfacet_path path; /* Installed in datapath? */
-};
-
-#define SUBFACET_DESTROY_MAX_BATCH 50
-
-static struct subfacet *subfacet_create(struct facet *, struct flow_miss *,
- uint32_t key_hash);
-static struct subfacet *subfacet_find(struct dpif_backer *,
- const struct nlattr *key, size_t key_len,
- uint32_t key_hash);
-static void subfacet_destroy(struct subfacet *);
-static void subfacet_destroy__(struct subfacet *);
-static void subfacet_destroy_batch(struct dpif_backer *,
- struct subfacet **, int n);
-static void subfacet_reset_dp_stats(struct subfacet *,
- struct dpif_flow_stats *);
-static void subfacet_update_stats(struct subfacet *,
- const struct dpif_flow_stats *);
-static int subfacet_install(struct subfacet *,
- const struct ofpbuf *odp_actions,
- struct dpif_flow_stats *);
-static void subfacet_uninstall(struct subfacet *);
-
-/* A unique, non-overlapping instantiation of an OpenFlow flow.
- *
- * A facet associates a "struct flow", which represents the Open vSwitch
- * userspace idea of an exact-match flow, with one or more subfacets.
- * While the facet is created based on an exact-match flow, it is stored
- * within the ofproto based on the wildcards that could be expressed
- * based on the flow table and other configuration. (See the 'wc'
- * description in "struct xlate_out" for more details.)
- *
- * Each subfacet tracks the datapath's idea of the flow equivalent to
- * the facet. When the kernel module (or other dpif implementation) and
- * Open vSwitch userspace agree on the definition of a flow key, there
- * is exactly one subfacet per facet. If the dpif implementation
- * supports more-specific flow matching than userspace, however, a facet
- * can have more than one subfacet. Examples include the dpif
- * implementation not supporting the same wildcards as userspace or some
- * distinction in flow that userspace simply doesn't understand.
- *
- * Flow expiration works in terms of subfacets, so a facet must have at
- * least one subfacet or it will never expire, leaking memory. */
-struct facet {
- /* Owner. */
- struct ofproto_dpif *ofproto;
-
- /* Owned data. */
- struct list subfacets;
- long long int used; /* Time last used; time created if not used. */
-
- /* Key. */
- struct flow flow; /* Flow of the creating subfacet. */
- struct cls_rule cr; /* In 'ofproto_dpif's facets classifier. */
-
- /* These statistics:
- *
- * - Do include packets and bytes sent "by hand", e.g. with
- * dpif_execute().
- *
- * - Do include packets and bytes that were obtained from the datapath
- * when a subfacet's statistics were reset (e.g. dpif_flow_put() with
- * DPIF_FP_ZERO_STATS).
- *
- * - Do not include packets or bytes that can be obtained from the
- * datapath for any existing subfacet.
- */
- uint64_t packet_count; /* Number of packets received. */
- uint64_t byte_count; /* Number of bytes received. */
-
- /* Resubmit statistics. */
- uint64_t prev_packet_count; /* Number of packets from last stats push. */
- uint64_t prev_byte_count; /* Number of bytes from last stats push. */
- long long int prev_used; /* Used time from last stats push. */
-
- /* Accounting. */
- uint16_t tcp_flags; /* TCP flags seen for this 'rule'. */
-
- struct xlate_out xout;
-
- /* Storage for a single subfacet, to reduce malloc() time and space
- * overhead. (A facet always has at least one subfacet and in the common
- * case has exactly one subfacet. However, 'one_subfacet' may not
- * always be valid, since it could have been removed after newer
- * subfacets were pushed onto the 'subfacets' list.) */
- struct subfacet one_subfacet;
-
- long long int learn_rl; /* Rate limiter for facet_learn(). */
-};
-
-static struct facet *facet_create(const struct flow_miss *);
-static void facet_remove(struct facet *);
-static void facet_free(struct facet *);
-
-static struct facet *facet_find(struct ofproto_dpif *, const struct flow *);
-static struct facet *facet_lookup_valid(struct ofproto_dpif *,
- const struct flow *);
-static bool facet_revalidate(struct facet *);
-static bool facet_check_consistency(struct facet *);
-
-static void facet_flush_stats(struct facet *);
-
-static void facet_reset_counters(struct facet *);
-static void flow_push_stats(struct ofproto_dpif *, struct flow *,
- struct dpif_flow_stats *, bool may_learn);
-static void facet_push_stats(struct facet *, bool may_learn);
-static void facet_learn(struct facet *);
-static void push_all_stats(void);
-
-static bool facet_is_controller_flow(struct facet *);
+static void rstp_run(struct ofproto_dpif *ofproto);
+static void set_rstp_port(struct ofport *,
+ const struct ofproto_port_rstp_settings *);
struct ofport_dpif {
struct hmap_node odp_port_node; /* In dpif_backer's "odp_to_ofport_map". */
odp_port_t odp_port;
struct ofbundle *bundle; /* Bundle that contains this port, if any. */
- struct list bundle_node; /* In struct ofbundle's "ports" list. */
+ struct ovs_list bundle_node;/* In struct ofbundle's "ports" list. */
struct cfm *cfm; /* Connectivity Fault Management, if any. */
struct bfd *bfd; /* BFD, if any. */
+ struct lldp *lldp; /* lldp, if any. */
bool may_enable; /* May be enabled in bonds. */
bool is_tunnel; /* This port is a tunnel. */
bool is_layer3; /* This is a layer 3 port. */
enum stp_state stp_state; /* Always STP_DISABLED if STP not in use. */
long long int stp_state_entered;
+ /* Rapid Spanning Tree. */
+ struct rstp_port *rstp_port; /* Rapid Spanning Tree Protocol, if any. */
+ enum rstp_state rstp_state; /* Always RSTP_DISABLED if RSTP not in use. */
+
/* Queue to DSCP mapping. */
struct ofproto_port_queue *qdscp;
size_t n_qdscp;
static void port_run(struct ofport_dpif *);
static int set_bfd(struct ofport *, const struct smap *);
static int set_cfm(struct ofport *, const struct cfm_settings *);
+static int set_lldp(struct ofport *ofport_, const struct smap *cfg);
static void ofport_update_peer(struct ofport_dpif *);
-struct dpif_completion {
- struct list list_node;
- struct ofoperation *op;
-};
-
-/* Reasons that we might need to revalidate every facet, and corresponding
- * coverage counters.
+/* Reasons that we might need to revalidate every datapath flow, and
+ * corresponding coverage counters.
*
* A value of 0 means that there is no need to revalidate.
*
enum revalidate_reason {
REV_RECONFIGURE = 1, /* Switch configuration changed. */
REV_STP, /* Spanning tree protocol port status change. */
+ REV_RSTP, /* RSTP port status change. */
REV_BOND, /* Bonding changed. */
REV_PORT_TOGGLED, /* Port enabled or disabled by CFM, LACP, ...*/
REV_FLOW_TABLE, /* Flow table changed. */
REV_MAC_LEARNING, /* Mac learning changed. */
- REV_INCONSISTENCY /* Facet self-check failed. */
+ REV_MCAST_SNOOPING, /* Multicast snooping changed. */
};
COVERAGE_DEFINE(rev_reconfigure);
COVERAGE_DEFINE(rev_stp);
+COVERAGE_DEFINE(rev_rstp);
COVERAGE_DEFINE(rev_bond);
COVERAGE_DEFINE(rev_port_toggled);
COVERAGE_DEFINE(rev_flow_table);
COVERAGE_DEFINE(rev_mac_learning);
-COVERAGE_DEFINE(rev_inconsistency);
+COVERAGE_DEFINE(rev_mcast_snooping);
/* All datapaths of a given type share a single dpif backer instance. */
struct dpif_backer {
int refcount;
struct dpif *dpif;
struct udpif *udpif;
- struct timer next_expiration;
struct ovs_rwlock odp_to_ofport_lock;
struct hmap odp_to_ofport_map OVS_GUARDED; /* Contains "struct ofport"s. */
struct simap tnl_backers; /* Set of dpif ports backing tunnels. */
- /* Facet revalidation flags applying to facets which use this backer. */
- enum revalidate_reason need_revalidate; /* Revalidate every facet. */
+ enum revalidate_reason need_revalidate; /* Revalidate all flows. */
- struct hmap drop_keys; /* Set of dropped odp keys. */
bool recv_set_enable; /* Enables or disables receiving packets. */
- struct hmap subfacets;
- struct governor *governor;
+ /* Version string of the datapath stored in OVSDB. */
+ char *dp_version_string;
- /* Subfacet statistics.
- *
- * These keep track of the total number of subfacets added and deleted and
- * flow life span. They are useful for computing the flow rates stats
- * exposed via "ovs-appctl dpif/show". The goal is to learn about
- * traffic patterns in ways that we can use later to improve Open vSwitch
- * performance in new situations. */
- unsigned max_n_subfacet; /* Maximum number of flows */
- unsigned avg_n_subfacet; /* Average number of flows. */
+ /* Datapath feature support. */
+ struct dpif_backer_support support;
+ struct atomic_count tnl_count;
};
/* All existing ofproto_backer instances, indexed by ofproto->up.type. */
static struct shash all_dpif_backers = SHASH_INITIALIZER(&all_dpif_backers);
-static void drop_key_clear(struct dpif_backer *);
-
struct ofproto_dpif {
struct hmap_node all_ofproto_dpifs_node; /* In 'all_ofproto_dpifs'. */
struct ofproto up;
struct dpif_backer *backer;
+ ATOMIC(cls_version_t) tables_version; /* For classifier lookups. */
+
+ uint64_t dump_seq; /* Last read of udpif_dump_seq(). */
+
/* Special OpenFlow rules. */
struct rule_dpif *miss_rule; /* Sends flow table misses to controller. */
struct rule_dpif *no_packet_in_rule; /* Drops flow table misses. */
struct dpif_ipfix *ipfix;
struct hmap bundles; /* Contains "struct ofbundle"s. */
struct mac_learning *ml;
+ struct mcast_snooping *ms;
bool has_bonded_bundles;
bool lacp_enabled;
struct mbridge *mbridge;
- /* Facets. */
- struct classifier facets; /* Contains 'struct facet's. */
- long long int consistency_rl;
-
struct ovs_mutex stats_mutex;
struct netdev_stats stats OVS_GUARDED; /* To account packets generated and
* consumed in userspace. */
struct stp *stp;
long long int stp_last_tick;
+ /* Rapid Spanning Tree. */
+ struct rstp *rstp;
+ long long int rstp_last_tick;
+
/* VLAN splinters. */
struct ovs_mutex vsp_mutex;
struct hmap realdev_vid_map OVS_GUARDED; /* (realdev,vid) -> vlandev. */
int port_poll_errno; /* Last errno for port_poll() reply. */
uint64_t change_seq; /* Connectivity status changes. */
- /* Per ofproto's dpif stats. */
- uint64_t n_hit;
- uint64_t n_missed;
-
/* Work queues. */
struct guarded_list pins; /* Contains "struct ofputil_packet_in"s. */
+ struct seq *pins_seq; /* For notifying 'pins' reception. */
+ uint64_t pins_seqno;
};
-/* By default, flows in the datapath are wildcarded (megaflows). They
- * may be disabled with the "ovs-appctl dpif/disable-megaflows" command. */
-static bool enable_megaflows = true;
-
/* All existing ofproto_dpif instances, indexed by ->up.name. */
static struct hmap all_ofproto_dpifs = HMAP_INITIALIZER(&all_ofproto_dpifs);
-static void ofproto_dpif_unixctl_init(void);
+static bool ofproto_use_tnl_push_pop = true;
+static void ofproto_unixctl_init(void);
static inline struct ofproto_dpif *
ofproto_dpif_cast(const struct ofproto *ofproto)
return CONTAINER_OF(ofproto, struct ofproto_dpif, up);
}
-static struct ofport_dpif *get_ofp_port(const struct ofproto_dpif *ofproto,
- ofp_port_t ofp_port);
-static void ofproto_trace(struct ofproto_dpif *, const struct flow *,
- const struct ofpbuf *packet,
- const struct ofpact[], size_t ofpacts_len,
- struct ds *);
+bool
+ofproto_dpif_get_enable_ufid(const struct dpif_backer *backer)
+{
+ return backer->support.ufid;
+}
-/* Upcalls. */
-static void handle_upcalls(struct dpif_backer *);
+struct dpif_backer_support *
+ofproto_dpif_get_support(const struct ofproto_dpif *ofproto)
+{
+ return &ofproto->backer->support;
+}
-/* Flow expiration. */
-static int expire(struct dpif_backer *);
+static void ofproto_trace(struct ofproto_dpif *, struct flow *,
+ const struct dp_packet *packet,
+ const struct ofpact[], size_t ofpacts_len,
+ struct ds *);
/* Global variables. */
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
* it. */
void
ofproto_dpif_flow_mod(struct ofproto_dpif *ofproto,
- struct ofputil_flow_mod *fm)
+ const struct ofputil_flow_mod *fm)
{
- ofproto_flow_mod(&ofproto->up, fm);
+ struct ofproto_flow_mod ofm;
+
+ /* Multiple threads may do this for the same 'fm' at the same time.
+ * Allocate ofproto_flow_mod with execution context from stack.
+ *
+ * Note: This copy could be avoided by making ofproto_flow_mod more
+ * complex, but that may not be desireable, and a learn action is not that
+ * fast to begin with. */
+ ofm.fm = *fm;
+ ofproto_flow_mod(&ofproto->up, &ofm);
}
/* Appends 'pin' to the queue of "packet ins" to be sent to the controller.
free(CONST_CAST(void *, pin->up.packet));
free(pin);
}
+
+ /* Wakes up main thread for packet-in I/O. */
+ seq_change(ofproto->pins_seq);
+}
+
+/* The default "table-miss" behaviour for OpenFlow1.3+ is to drop the
+ * packet rather than to send the packet to the controller.
+ *
+ * This function returns false to indicate that a packet_in message
+ * for a "table-miss" should be sent to at least one controller.
+ * False otherwise. */
+bool
+ofproto_dpif_wants_packet_in_on_miss(struct ofproto_dpif *ofproto)
+{
+ return connmgr_wants_packet_in_on_miss(ofproto->up.connmgr);
}
\f
/* Factory functions. */
shash_add(&init_ofp_ports, node->name, new_hint);
}
+
+ ofproto_unixctl_init();
+ udpif_init();
}
static void
return NULL;
}
+bool
+ofproto_dpif_backer_enabled(struct dpif_backer* backer)
+{
+ return backer->recv_set_enable;
+}
+
static int
type_run(const char *type)
{
- static long long int push_timer = LLONG_MIN;
struct dpif_backer *backer;
backer = shash_find_data(&all_dpif_backers, type);
return 0;
}
- dpif_run(backer->dpif);
-
- handle_upcalls(backer);
- /* The most natural place to push facet statistics is when they're pulled
- * from the datapath. However, when there are many flows in the datapath,
- * this expensive operation can occur so frequently, that it reduces our
- * ability to quickly set up flows. To reduce the cost, we push statistics
- * here instead. */
- if (time_msec() > push_timer) {
- push_timer = time_msec() + 2000;
- push_all_stats();
+ if (dpif_run(backer->dpif)) {
+ backer->need_revalidate = REV_RECONFIGURE;
}
+ udpif_run(backer->udpif);
+
/* If vswitchd started with other_config:flow_restore_wait set as "true",
* and the configuration has now changed to "false", enable receiving
* packets from the datapath. */
}
if (backer->recv_set_enable) {
- udpif_set_threads(backer->udpif, n_handlers);
+ udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
+ dpif_poll_threads_set(backer->dpif, n_dpdk_rxqs, pmd_cpu_mask);
+
if (backer->need_revalidate) {
struct ofproto_dpif *ofproto;
struct simap_node *node;
iter->odp_port = node ? u32_to_odp(node->data) : ODPP_NONE;
if (tnl_port_reconfigure(iter, iter->up.netdev,
- iter->odp_port)) {
+ iter->odp_port,
+ ovs_native_tunneling_is_on(ofproto), dp_port)) {
backer->need_revalidate = REV_RECONFIGURE;
}
}
simap_destroy(&tmp_backers);
switch (backer->need_revalidate) {
- case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break;
- case REV_STP: COVERAGE_INC(rev_stp); break;
- case REV_BOND: COVERAGE_INC(rev_bond); break;
- case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break;
- case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break;
- case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break;
- case REV_INCONSISTENCY: COVERAGE_INC(rev_inconsistency); break;
+ case REV_RECONFIGURE: COVERAGE_INC(rev_reconfigure); break;
+ case REV_STP: COVERAGE_INC(rev_stp); break;
+ case REV_RSTP: COVERAGE_INC(rev_rstp); break;
+ case REV_BOND: COVERAGE_INC(rev_bond); break;
+ case REV_PORT_TOGGLED: COVERAGE_INC(rev_port_toggled); break;
+ case REV_FLOW_TABLE: COVERAGE_INC(rev_flow_table); break;
+ case REV_MAC_LEARNING: COVERAGE_INC(rev_mac_learning); break;
+ case REV_MCAST_SNOOPING: COVERAGE_INC(rev_mcast_snooping); break;
}
backer->need_revalidate = 0;
- /* Clear the drop_keys in case we should now be accepting some
- * formerly dropped flows. */
- drop_key_clear(backer);
-
HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- struct facet *facet, *next;
struct ofport_dpif *ofport;
- struct cls_cursor cursor;
struct ofbundle *bundle;
if (ofproto->backer != backer) {
continue;
}
- ovs_rwlock_wrlock(&xlate_rwlock);
+ xlate_txn_start();
xlate_ofproto_set(ofproto, ofproto->up.name,
- ofproto->backer->dpif, ofproto->miss_rule,
- ofproto->no_packet_in_rule, ofproto->ml,
- ofproto->stp, ofproto->mbridge,
- ofproto->sflow, ofproto->ipfix,
- ofproto->netflow, ofproto->up.frag_handling,
+ ofproto->backer->dpif, ofproto->ml,
+ ofproto->stp, ofproto->rstp, ofproto->ms,
+ ofproto->mbridge, ofproto->sflow, ofproto->ipfix,
+ ofproto->netflow,
ofproto->up.forward_bpdu,
- connmgr_has_in_band(ofproto->up.connmgr));
+ connmgr_has_in_band(ofproto->up.connmgr),
+ &ofproto->backer->support);
HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
xlate_bundle_set(ofproto, bundle, bundle->name,
: -1;
xlate_ofport_set(ofproto, ofport->bundle, ofport,
ofport->up.ofp_port, ofport->odp_port,
- ofport->up.netdev, ofport->cfm,
- ofport->bfd, ofport->peer, stp_port,
- ofport->qdscp, ofport->n_qdscp,
- ofport->up.pp.config, ofport->up.pp.state,
- ofport->is_tunnel, ofport->may_enable);
- }
- ovs_rwlock_unlock(&xlate_rwlock);
-
- /* Only ofproto-dpif cares about the facet classifier so we just
- * lock cls_cursor_init() to appease the thread safety analysis. */
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- CLS_CURSOR_FOR_EACH_SAFE (facet, next, cr, &cursor) {
- facet_revalidate(facet);
+ ofport->up.netdev, ofport->cfm, ofport->bfd,
+ ofport->lldp, ofport->peer, stp_port,
+ ofport->rstp_port, ofport->qdscp,
+ ofport->n_qdscp, ofport->up.pp.config,
+ ofport->up.pp.state, ofport->is_tunnel,
+ ofport->may_enable);
}
+ xlate_txn_commit();
}
udpif_revalidate(backer->udpif);
}
- if (!backer->recv_set_enable) {
- /* Wake up before a max of 1000ms. */
- timer_set_duration(&backer->next_expiration, 1000);
- } else if (timer_expired(&backer->next_expiration)) {
- int delay = expire(backer);
- timer_set_duration(&backer->next_expiration, delay);
- }
-
process_dpif_port_changes(backer);
- if (backer->governor) {
- size_t n_subfacets;
-
- governor_run(backer->governor);
-
- /* If the governor has shrunk to its minimum size and the number of
- * subfacets has dwindled, then drop the governor entirely.
- *
- * For hysteresis, the number of subfacets to drop the governor is
- * smaller than the number needed to trigger its creation. */
- n_subfacets = hmap_count(&backer->subfacets);
- if (n_subfacets * 4 < flow_eviction_threshold
- && governor_is_idle(backer->governor)) {
- governor_destroy(backer->governor);
- backer->governor = NULL;
- }
- }
-
return 0;
}
return;
}
- if (backer->governor) {
- governor_wait(backer->governor);
- }
-
- timer_wait(&backer->next_expiration);
dpif_wait(backer->dpif);
- udpif_wait(backer->udpif);
}
\f
/* Basic life-cycle. */
static struct ofproto *
alloc(void)
{
- struct ofproto_dpif *ofproto = xmalloc(sizeof *ofproto);
+ struct ofproto_dpif *ofproto = xzalloc(sizeof *ofproto);
return &ofproto->up;
}
return;
}
- drop_key_clear(backer);
- hmap_destroy(&backer->drop_keys);
-
udpif_destroy(backer->udpif);
simap_destroy(&backer->tnl_backers);
hmap_destroy(&backer->odp_to_ofport_map);
shash_find_and_delete(&all_dpif_backers, backer->type);
free(backer->type);
+ free(backer->dp_version_string);
dpif_close(backer->dpif);
-
- ovs_assert(hmap_is_empty(&backer->subfacets));
- hmap_destroy(&backer->subfacets);
- governor_destroy(backer->governor);
-
free(backer);
}
/* Datapath port slated for removal from datapath. */
struct odp_garbage {
- struct list list_node;
+ struct ovs_list list_node;
odp_port_t odp_port;
};
+static bool check_variable_length_userdata(struct dpif_backer *backer);
+static void check_support(struct dpif_backer *backer);
+
static int
open_dpif_backer(const char *type, struct dpif_backer **backerp)
{
struct dpif_port_dump port_dump;
struct dpif_port port;
struct shash_node *node;
- struct list garbage_list;
- struct odp_garbage *garbage, *next;
+ struct ovs_list garbage_list;
+ struct odp_garbage *garbage;
+
struct sset names;
char *backer_name;
const char *name;
int error;
+ recirc_init();
+
backer = shash_find_data(&all_dpif_backers, type);
if (backer) {
backer->refcount++;
backer->udpif = udpif_create(backer, backer->dpif);
backer->type = xstrdup(type);
- backer->governor = NULL;
backer->refcount = 1;
hmap_init(&backer->odp_to_ofport_map);
ovs_rwlock_init(&backer->odp_to_ofport_lock);
- hmap_init(&backer->drop_keys);
- hmap_init(&backer->subfacets);
- timer_set_duration(&backer->next_expiration, 1000);
backer->need_revalidate = 0;
simap_init(&backer->tnl_backers);
backer->recv_set_enable = !ofproto_get_flow_restore_wait();
}
dpif_port_dump_done(&port_dump);
- LIST_FOR_EACH_SAFE (garbage, next, list_node, &garbage_list) {
+ LIST_FOR_EACH_POP (garbage, list_node, &garbage_list) {
dpif_port_del(backer->dpif, garbage->odp_port);
- list_remove(&garbage->list_node);
free(garbage);
}
shash_add(&all_dpif_backers, type, backer);
+ check_support(backer);
+ atomic_count_init(&backer->tnl_count, 0);
+
error = dpif_recv_set(backer->dpif, backer->recv_set_enable);
if (error) {
VLOG_ERR("failed to listen on datapath of type %s: %s",
}
if (backer->recv_set_enable) {
- udpif_set_threads(backer->udpif, n_handlers);
+ udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
}
- backer->max_n_subfacet = 0;
- backer->avg_n_subfacet = 0;
+ /* This check fails if performed before udpif threads have been set,
+ * as the kernel module checks that the 'pid' in userspace action
+ * is non-zero. */
+ backer->support.variable_length_userdata
+ = check_variable_length_userdata(backer);
+ backer->dp_version_string = dpif_get_dp_version(backer->dpif);
return error;
}
+bool
+ovs_native_tunneling_is_on(struct ofproto_dpif *ofproto)
+{
+ return ofproto_use_tnl_push_pop && ofproto->backer->support.tnl_push_pop &&
+ atomic_count_get(&ofproto->backer->tnl_count);
+}
+
+/* Tests whether 'backer''s datapath supports recirculation. Only newer
+ * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some
+ * features on older datapaths that don't support this feature.
+ *
+ * Returns false if 'backer' definitely does not support recirculation, true if
+ * it seems to support recirculation or if at least the error we get is
+ * ambiguous. */
+static bool
+check_recirc(struct dpif_backer *backer)
+{
+ struct flow flow;
+ struct odputil_keybuf keybuf;
+ struct ofpbuf key;
+ bool enable_recirc;
+ struct odp_flow_key_parms odp_parms = {
+ .flow = &flow,
+ .support = {
+ .recirc = true,
+ },
+ };
+
+ memset(&flow, 0, sizeof flow);
+ flow.recirc_id = 1;
+ flow.dp_hash = 1;
+
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+ odp_flow_key_from_flow(&odp_parms, &key);
+ enable_recirc = dpif_probe_feature(backer->dpif, "recirculation", &key,
+ NULL);
+
+ if (enable_recirc) {
+ VLOG_INFO("%s: Datapath supports recirculation",
+ dpif_name(backer->dpif));
+ } else {
+ VLOG_INFO("%s: Datapath does not support recirculation",
+ dpif_name(backer->dpif));
+ }
+
+ return enable_recirc;
+}
+
+/* Tests whether 'dpif' supports unique flow ids. We can skip serializing
+ * some flow attributes for datapaths that support this feature.
+ *
+ * Returns true if 'dpif' supports UFID for flow operations.
+ * Returns false if 'dpif' does not support UFID. */
+static bool
+check_ufid(struct dpif_backer *backer)
+{
+ struct flow flow;
+ struct odputil_keybuf keybuf;
+ struct ofpbuf key;
+ ovs_u128 ufid;
+ bool enable_ufid;
+ struct odp_flow_key_parms odp_parms = {
+ .flow = &flow,
+ };
+
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(0x1234);
+
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+ odp_flow_key_from_flow(&odp_parms, &key);
+ dpif_flow_hash(backer->dpif, key.data, key.size, &ufid);
+
+ enable_ufid = dpif_probe_feature(backer->dpif, "UFID", &key, &ufid);
+
+ if (enable_ufid) {
+ VLOG_INFO("%s: Datapath supports unique flow ids",
+ dpif_name(backer->dpif));
+ } else {
+ VLOG_INFO("%s: Datapath does not support unique flow ids",
+ dpif_name(backer->dpif));
+ }
+ return enable_ufid;
+}
+
+/* Tests whether 'backer''s datapath supports variable-length
+ * OVS_USERSPACE_ATTR_USERDATA in OVS_ACTION_ATTR_USERSPACE actions. We need
+ * to disable some features on older datapaths that don't support this
+ * feature.
+ *
+ * Returns false if 'backer' definitely does not support variable-length
+ * userdata, true if it seems to support them or if at least the error we get
+ * is ambiguous. */
+static bool
+check_variable_length_userdata(struct dpif_backer *backer)
+{
+ struct eth_header *eth;
+ struct ofpbuf actions;
+ struct dpif_execute execute;
+ struct dp_packet packet;
+ size_t start;
+ int error;
+
+ /* Compose a userspace action that will cause an ERANGE error on older
+ * datapaths that don't support variable-length userdata.
+ *
+ * We really test for using userdata longer than 8 bytes, but older
+ * datapaths accepted these, silently truncating the userdata to 8 bytes.
+ * The same older datapaths rejected userdata shorter than 8 bytes, so we
+ * test for that instead as a proxy for longer userdata support. */
+ ofpbuf_init(&actions, 64);
+ start = nl_msg_start_nested(&actions, OVS_ACTION_ATTR_USERSPACE);
+ nl_msg_put_u32(&actions, OVS_USERSPACE_ATTR_PID,
+ dpif_port_get_pid(backer->dpif, ODPP_NONE, 0));
+ nl_msg_put_unspec_zero(&actions, OVS_USERSPACE_ATTR_USERDATA, 4);
+ nl_msg_end_nested(&actions, start);
+
+ /* Compose a dummy ethernet packet. */
+ dp_packet_init(&packet, ETH_HEADER_LEN);
+ eth = dp_packet_put_zeros(&packet, ETH_HEADER_LEN);
+ eth->eth_type = htons(0x1234);
+
+ /* Execute the actions. On older datapaths this fails with ERANGE, on
+ * newer datapaths it succeeds. */
+ execute.actions = actions.data;
+ execute.actions_len = actions.size;
+ execute.packet = &packet;
+ execute.needs_help = false;
+ execute.probe = true;
+ execute.mtu = 0;
+
+ error = dpif_execute(backer->dpif, &execute);
+
+ dp_packet_uninit(&packet);
+ ofpbuf_uninit(&actions);
+
+ switch (error) {
+ case 0:
+ return true;
+
+ case ERANGE:
+ /* Variable-length userdata is not supported. */
+ VLOG_WARN("%s: datapath does not support variable-length userdata "
+ "feature (needs Linux 3.10+ or kernel module from OVS "
+ "1..11+). The NXAST_SAMPLE action will be ignored.",
+ dpif_name(backer->dpif));
+ return false;
+
+ default:
+ /* Something odd happened. We're not sure whether variable-length
+ * userdata is supported. Default to "yes". */
+ VLOG_WARN("%s: variable-length userdata feature probe failed (%s)",
+ dpif_name(backer->dpif), ovs_strerror(error));
+ return true;
+ }
+}
+
+/* Tests the MPLS label stack depth supported by 'backer''s datapath.
+ *
+ * Returns the number of elements in a struct flow's mpls_lse field
+ * if the datapath supports at least that many entries in an
+ * MPLS label stack.
+ * Otherwise returns the number of MPLS push actions supported by
+ * the datapath. */
+static size_t
+check_max_mpls_depth(struct dpif_backer *backer)
+{
+ struct flow flow;
+ int n;
+
+ for (n = 0; n < FLOW_MAX_MPLS_LABELS; n++) {
+ struct odputil_keybuf keybuf;
+ struct ofpbuf key;
+ struct odp_flow_key_parms odp_parms = {
+ .flow = &flow,
+ };
+
+ memset(&flow, 0, sizeof flow);
+ flow.dl_type = htons(ETH_TYPE_MPLS);
+ flow_set_mpls_bos(&flow, n, 1);
+
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
+ odp_flow_key_from_flow(&odp_parms, &key);
+ if (!dpif_probe_feature(backer->dpif, "MPLS", &key, NULL)) {
+ break;
+ }
+ }
+
+ VLOG_INFO("%s: MPLS label stack length probed as %d",
+ dpif_name(backer->dpif), n);
+ return n;
+}
+
+/* Tests whether 'backer''s datapath supports masked data in
+ * OVS_ACTION_ATTR_SET actions. We need to disable some features on older
+ * datapaths that don't support this feature. */
+static bool
+check_masked_set_action(struct dpif_backer *backer)
+{
+ struct eth_header *eth;
+ struct ofpbuf actions;
+ struct dpif_execute execute;
+ struct dp_packet packet;
+ int error;
+ struct ovs_key_ethernet key, mask;
+
+ /* Compose a set action that will cause an EINVAL error on older
+ * datapaths that don't support masked set actions.
+ * Avoid using a full mask, as it could be translated to a non-masked
+ * set action instead. */
+ ofpbuf_init(&actions, 64);
+ memset(&key, 0x53, sizeof key);
+ memset(&mask, 0x7f, sizeof mask);
+ commit_masked_set_action(&actions, OVS_KEY_ATTR_ETHERNET, &key, &mask,
+ sizeof key);
+
+ /* Compose a dummy ethernet packet. */
+ dp_packet_init(&packet, ETH_HEADER_LEN);
+ eth = dp_packet_put_zeros(&packet, ETH_HEADER_LEN);
+ eth->eth_type = htons(0x1234);
+
+ /* Execute the actions. On older datapaths this fails with EINVAL, on
+ * newer datapaths it succeeds. */
+ execute.actions = actions.data;
+ execute.actions_len = actions.size;
+ execute.packet = &packet;
+ execute.needs_help = false;
+ execute.probe = true;
+ execute.mtu = 0;
+
+ error = dpif_execute(backer->dpif, &execute);
+
+ dp_packet_uninit(&packet);
+ ofpbuf_uninit(&actions);
+
+ if (error) {
+ /* Masked set action is not supported. */
+ VLOG_INFO("%s: datapath does not support masked set action feature.",
+ dpif_name(backer->dpif));
+ }
+ return !error;
+}
+
+#define CHECK_FEATURE__(NAME, FIELD) \
+static bool \
+check_##NAME(struct dpif_backer *backer) \
+{ \
+ struct flow flow; \
+ struct odputil_keybuf keybuf; \
+ struct ofpbuf key; \
+ bool enable; \
+ struct odp_flow_key_parms odp_parms = { \
+ .flow = &flow, \
+ .support = { \
+ .NAME = true, \
+ }, \
+ }; \
+ \
+ memset(&flow, 0, sizeof flow); \
+ flow.FIELD = 1; \
+ \
+ ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); \
+ odp_flow_key_from_flow(&odp_parms, &key); \
+ enable = dpif_probe_feature(backer->dpif, #NAME, &key, NULL); \
+ \
+ if (enable) { \
+ VLOG_INFO("%s: Datapath supports "#NAME, dpif_name(backer->dpif)); \
+ } else { \
+ VLOG_INFO("%s: Datapath does not support "#NAME, \
+ dpif_name(backer->dpif)); \
+ } \
+ \
+ return enable; \
+}
+#define CHECK_FEATURE(FIELD) CHECK_FEATURE__(FIELD, FIELD)
+
+CHECK_FEATURE(ct_state)
+CHECK_FEATURE(ct_zone)
+CHECK_FEATURE(ct_mark)
+CHECK_FEATURE__(ct_label, ct_label.u64.lo)
+
+#undef CHECK_FEATURE
+#undef CHECK_FEATURE__
+
+static void
+check_support(struct dpif_backer *backer)
+{
+ /* This feature needs to be tested after udpif threads are set. */
+ backer->support.variable_length_userdata = false;
+
+ backer->support.odp.recirc = check_recirc(backer);
+ backer->support.odp.max_mpls_depth = check_max_mpls_depth(backer);
+ backer->support.masked_set_action = check_masked_set_action(backer);
+ backer->support.ufid = check_ufid(backer);
+ backer->support.tnl_push_pop = dpif_supports_tnl_push_pop(backer->dpif);
+
+ backer->support.odp.ct_state = check_ct_state(backer);
+ backer->support.odp.ct_zone = check_ct_zone(backer);
+ backer->support.odp.ct_mark = check_ct_mark(backer);
+ backer->support.odp.ct_label = check_ct_label(backer);
+}
+
static int
construct(struct ofproto *ofproto_)
{
struct shash_node *node, *next;
int error;
+ /* Tunnel module can get used right after the udpif threads are running. */
+ ofproto_tunnel_init();
+
error = open_dpif_backer(ofproto->up.type, &ofproto->backer);
if (error) {
return error;
}
+ atomic_init(&ofproto->tables_version, CLS_MIN_VERSION);
ofproto->netflow = NULL;
ofproto->sflow = NULL;
ofproto->ipfix = NULL;
ofproto->stp = NULL;
+ ofproto->rstp = NULL;
+ ofproto->dump_seq = 0;
hmap_init(&ofproto->bundles);
ofproto->ml = mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME);
+ ofproto->ms = NULL;
ofproto->mbridge = mbridge_create();
ofproto->has_bonded_bundles = false;
ofproto->lacp_enabled = false;
- ovs_mutex_init(&ofproto->stats_mutex);
+ ovs_mutex_init_adaptive(&ofproto->stats_mutex);
ovs_mutex_init(&ofproto->vsp_mutex);
- classifier_init(&ofproto->facets, NULL);
- ofproto->consistency_rl = LLONG_MIN;
-
guarded_list_init(&ofproto->pins);
- ofproto_dpif_unixctl_init();
-
hmap_init(&ofproto->vlandev_map);
hmap_init(&ofproto->realdev_vid_map);
sset_init(&ofproto->port_poll_set);
ofproto->port_poll_errno = 0;
ofproto->change_seq = 0;
+ ofproto->pins_seq = seq_create();
+ ofproto->pins_seqno = seq_read(ofproto->pins_seq);
+
SHASH_FOR_EACH_SAFE (node, next, &init_ofp_ports) {
struct iface_hint *iface_hint = node->data;
ofproto_init_tables(ofproto_, N_TABLES);
error = add_internal_flows(ofproto);
- ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
- ofproto->n_hit = 0;
- ofproto->n_missed = 0;
+ ofproto->up.tables[TBL_INTERNAL].flags = OFTABLE_HIDDEN | OFTABLE_READONLY;
return error;
}
static int
-add_internal_flow(struct ofproto_dpif *ofproto, int id,
+add_internal_miss_flow(struct ofproto_dpif *ofproto, int id,
const struct ofpbuf *ofpacts, struct rule_dpif **rulep)
{
- struct ofputil_flow_mod fm;
+ struct match match;
int error;
+ struct rule *rule;
- match_init_catchall(&fm.match);
- fm.priority = 0;
- match_set_reg(&fm.match, 0, id);
- fm.new_cookie = htonll(0);
- fm.cookie = htonll(0);
- fm.cookie_mask = htonll(0);
- fm.modify_cookie = false;
- fm.table_id = TBL_INTERNAL;
- fm.command = OFPFC_ADD;
- fm.idle_timeout = 0;
- fm.hard_timeout = 0;
- fm.buffer_id = 0;
- fm.out_port = 0;
- fm.flags = 0;
- fm.ofpacts = ofpacts->data;
- fm.ofpacts_len = ofpacts->size;
-
- error = ofproto_flow_mod(&ofproto->up, &fm);
- if (error) {
- VLOG_ERR_RL(&rl, "failed to add internal flow %d (%s)",
- id, ofperr_to_string(error));
- return error;
- }
+ match_init_catchall(&match);
+ match_set_reg(&match, 0, id);
- if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
- rulep)) {
- rule_dpif_unref(*rulep);
- } else {
- OVS_NOT_REACHED();
- }
+ error = ofproto_dpif_add_internal_flow(ofproto, &match, 0, 0, ofpacts,
+ &rule);
+ *rulep = error ? NULL : rule_dpif_cast(rule);
- return 0;
+ return error;
}
static int
struct ofpact_controller *controller;
uint64_t ofpacts_stub[128 / 8];
struct ofpbuf ofpacts;
+ struct rule *unused_rulep OVS_UNUSED;
+ struct match match;
int error;
int id;
controller->reason = OFPR_NO_MATCH;
ofpact_pad(&ofpacts);
- error = add_internal_flow(ofproto, id++, &ofpacts, &ofproto->miss_rule);
+ error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+ &ofproto->miss_rule);
if (error) {
return error;
}
ofpbuf_clear(&ofpacts);
- error = add_internal_flow(ofproto, id++, &ofpacts,
- &ofproto->no_packet_in_rule);
+ error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+ &ofproto->no_packet_in_rule);
+ if (error) {
+ return error;
+ }
+
+ error = add_internal_miss_flow(ofproto, id++, &ofpacts,
+ &ofproto->drop_frags_rule);
if (error) {
return error;
}
- error = add_internal_flow(ofproto, id++, &ofpacts,
- &ofproto->drop_frags_rule);
+ /* Drop any run away non-recirc rule lookups. Recirc_id has to be
+ * zero when reaching this rule.
+ *
+ * (priority=2), recirc_id=0, actions=drop
+ */
+ ofpbuf_clear(&ofpacts);
+ match_init_catchall(&match);
+ match_set_recirc_id(&match, 0);
+ error = ofproto_dpif_add_internal_flow(ofproto, &match, 2, 0, &ofpacts,
+ &unused_rulep);
return error;
}
destruct(struct ofproto *ofproto_)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- struct rule_dpif *rule, *next_rule;
- struct ofproto_packet_in *pin, *next_pin;
- struct facet *facet, *next_facet;
- struct cls_cursor cursor;
+ struct ofproto_packet_in *pin;
+ struct rule_dpif *rule;
struct oftable *table;
- struct list pins;
-
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- CLS_CURSOR_FOR_EACH_SAFE (facet, next_facet, cr, &cursor) {
- facet_remove(facet);
- }
+ struct ovs_list pins;
ofproto->backer->need_revalidate = REV_RECONFIGURE;
- ovs_rwlock_wrlock(&xlate_rwlock);
+ xlate_txn_start();
xlate_remove_ofproto(ofproto);
- ovs_rwlock_unlock(&xlate_rwlock);
+ xlate_txn_commit();
- /* Discard any flow_miss_batches queued up for 'ofproto', avoiding a
- * use-after-free error. */
- udpif_revalidate(ofproto->backer->udpif);
+ /* Ensure that the upcall processing threads have no remaining references
+ * to the ofproto or anything in it. */
+ udpif_synchronize(ofproto->backer->udpif);
hmap_remove(&all_ofproto_dpifs, &ofproto->all_ofproto_dpifs_node);
OFPROTO_FOR_EACH_TABLE (table, &ofproto->up) {
- struct cls_cursor cursor;
-
- ovs_rwlock_rdlock(&table->cls.rwlock);
- cls_cursor_init(&cursor, &table->cls, NULL);
- ovs_rwlock_unlock(&table->cls.rwlock);
- CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, up.cr, &cursor) {
+ CLS_FOR_EACH (rule, up.cr, &table->cls) {
ofproto_rule_delete(&ofproto->up, &rule->up);
}
}
+ ofproto_group_delete_all(&ofproto->up);
guarded_list_pop_all(&ofproto->pins, &pins);
- LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
- list_remove(&pin->list_node);
+ LIST_FOR_EACH_POP (pin, list_node, &pins) {
free(CONST_CAST(void *, pin->up.packet));
free(pin);
}
guarded_list_destroy(&ofproto->pins);
+ recirc_free_ofproto(ofproto, ofproto->up.name);
+
mbridge_unref(ofproto->mbridge);
netflow_unref(ofproto->netflow);
dpif_sflow_unref(ofproto->sflow);
+ dpif_ipfix_unref(ofproto->ipfix);
hmap_destroy(&ofproto->bundles);
mac_learning_unref(ofproto->ml);
-
- classifier_destroy(&ofproto->facets);
+ mcast_snooping_unref(ofproto->ms);
hmap_destroy(&ofproto->vlandev_map);
hmap_destroy(&ofproto->realdev_vid_map);
ovs_mutex_destroy(&ofproto->stats_mutex);
ovs_mutex_destroy(&ofproto->vsp_mutex);
+ seq_destroy(ofproto->pins_seq);
+
close_dpif_backer(ofproto->backer);
}
run(struct ofproto *ofproto_)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- uint64_t new_seq;
+ uint64_t new_seq, new_dump_seq;
if (mbridge_need_revalidate(ofproto->mbridge)) {
ofproto->backer->need_revalidate = REV_RECONFIGURE;
ovs_rwlock_wrlock(&ofproto->ml->rwlock);
mac_learning_flush(ofproto->ml);
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ mcast_snooping_mdb_flush(ofproto->ms);
}
+ /* Always updates the ofproto->pins_seqno to avoid frequent wakeup during
+ * flow restore. Even though nothing is processed during flow restore,
+ * all queued 'pins' will be handled immediately when flow restore
+ * completes. */
+ ofproto->pins_seqno = seq_read(ofproto->pins_seq);
+
/* Do not perform any periodic activity required by 'ofproto' while
* waiting for flow restore to complete. */
if (!ofproto_get_flow_restore_wait()) {
- struct ofproto_packet_in *pin, *next_pin;
- struct list pins;
+ struct ofproto_packet_in *pin;
+ struct ovs_list pins;
guarded_list_pop_all(&ofproto->pins, &pins);
- LIST_FOR_EACH_SAFE (pin, next_pin, list_node, &pins) {
+ LIST_FOR_EACH_POP (pin, list_node, &pins) {
connmgr_send_packet_in(ofproto->up.connmgr, pin);
- list_remove(&pin->list_node);
free(CONST_CAST(void *, pin->up.packet));
free(pin);
}
}
stp_run(ofproto);
+ rstp_run(ofproto);
ovs_rwlock_wrlock(&ofproto->ml->rwlock);
if (mac_learning_run(ofproto->ml)) {
ofproto->backer->need_revalidate = REV_MAC_LEARNING;
}
ovs_rwlock_unlock(&ofproto->ml->rwlock);
- /* Check the consistency of a random facet, to aid debugging. */
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- if (time_msec() >= ofproto->consistency_rl
- && !classifier_is_empty(&ofproto->facets)
- && !ofproto->backer->need_revalidate) {
- struct cls_subtable *table;
- struct cls_rule *cr;
- struct facet *facet;
+ if (mcast_snooping_run(ofproto->ms)) {
+ ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
+ }
- ofproto->consistency_rl = time_msec() + 250;
+ new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
+ if (ofproto->dump_seq != new_dump_seq) {
+ struct rule *rule, *next_rule;
- table = CONTAINER_OF(hmap_random_node(&ofproto->facets.subtables),
- struct cls_subtable, hmap_node);
- cr = CONTAINER_OF(hmap_random_node(&table->rules), struct cls_rule,
- hmap_node);
- facet = CONTAINER_OF(cr, struct facet, cr);
+ /* We know stats are relatively fresh, so now is a good time to do some
+ * periodic work. */
+ ofproto->dump_seq = new_dump_seq;
- if (!facet_check_consistency(facet)) {
- ofproto->backer->need_revalidate = REV_INCONSISTENCY;
+ /* Expire OpenFlow flows whose idle_timeout or hard_timeout
+ * has passed. */
+ ovs_mutex_lock(&ofproto_mutex);
+ LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
+ &ofproto->up.expirable) {
+ rule_expire(rule_dpif_cast(rule));
}
- }
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
+ ovs_mutex_unlock(&ofproto_mutex);
+
+ /* All outstanding data in existing flows has been accounted, so it's a
+ * good time to do bond rebalancing. */
+ if (ofproto->has_bonded_bundles) {
+ struct ofbundle *bundle;
+ HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
+ if (bundle->bond) {
+ bond_rebalance(bundle->bond);
+ }
+ }
+ }
+ }
return 0;
}
ovs_rwlock_rdlock(&ofproto->ml->rwlock);
mac_learning_wait(ofproto->ml);
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ mcast_snooping_wait(ofproto->ms);
stp_wait(ofproto);
if (ofproto->backer->need_revalidate) {
/* Shouldn't happen, but if it does just go around again. */
VLOG_DBG_RL(&rl, "need revalidate in ofproto_wait_cb()");
poll_immediate_wake();
}
-}
-
-static void
-get_memory_usage(const struct ofproto *ofproto_, struct simap *usage)
-{
- const struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- struct cls_cursor cursor;
- size_t n_subfacets = 0;
- struct facet *facet;
-
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- simap_increase(usage, "facets", classifier_count(&ofproto->facets));
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
- n_subfacets += list_size(&facet->subfacets);
- }
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- simap_increase(usage, "subfacets", n_subfacets);
+ seq_wait(udpif_dump_seq(ofproto->backer->udpif), ofproto->dump_seq);
+ seq_wait(ofproto->pins_seq, ofproto->pins_seqno);
}
static void
flush(struct ofproto *ofproto_)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- struct subfacet *subfacet, *next_subfacet;
- struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
- int n_batch;
-
- n_batch = 0;
- HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
- &ofproto->backer->subfacets) {
- if (subfacet->facet->ofproto != ofproto) {
- continue;
- }
-
- if (subfacet->path != SF_NOT_INSTALLED) {
- batch[n_batch++] = subfacet;
- if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
- subfacet_destroy_batch(ofproto->backer, batch, n_batch);
- n_batch = 0;
- }
- } else {
- subfacet_destroy(subfacet);
- }
- }
+ struct dpif_backer *backer = ofproto->backer;
- if (n_batch > 0) {
- subfacet_destroy_batch(ofproto->backer, batch, n_batch);
+ if (backer) {
+ udpif_flush(backer->udpif);
}
}
static void
-get_features(struct ofproto *ofproto_ OVS_UNUSED,
- bool *arp_match_ip, enum ofputil_action_bitmap *actions)
-{
- *arp_match_ip = true;
- *actions = (OFPUTIL_A_OUTPUT |
- OFPUTIL_A_SET_VLAN_VID |
- OFPUTIL_A_SET_VLAN_PCP |
- OFPUTIL_A_STRIP_VLAN |
- OFPUTIL_A_SET_DL_SRC |
- OFPUTIL_A_SET_DL_DST |
- OFPUTIL_A_SET_NW_SRC |
- OFPUTIL_A_SET_NW_DST |
- OFPUTIL_A_SET_NW_TOS |
- OFPUTIL_A_SET_TP_SRC |
- OFPUTIL_A_SET_TP_DST |
- OFPUTIL_A_ENQUEUE);
+query_tables(struct ofproto *ofproto,
+ struct ofputil_table_features *features,
+ struct ofputil_table_stats *stats)
+{
+ strcpy(features->name, "classifier");
+
+ if (stats) {
+ int i;
+
+ for (i = 0; i < ofproto->n_tables; i++) {
+ unsigned long missed, matched;
+
+ atomic_read_relaxed(&ofproto->tables[i].n_matched, &matched);
+ atomic_read_relaxed(&ofproto->tables[i].n_missed, &missed);
+
+ stats[i].matched_count = matched;
+ stats[i].lookup_count = matched + missed;
+ }
+ }
}
static void
-get_tables(struct ofproto *ofproto_, struct ofp12_table_stats *ots)
+set_tables_version(struct ofproto *ofproto_, cls_version_t version)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- struct dpif_dp_stats s;
- uint64_t n_miss, n_no_pkt_in, n_bytes, n_dropped_frags;
- uint64_t n_lookup;
-
- strcpy(ots->name, "classifier");
- dpif_get_dp_stats(ofproto->backer->dpif, &s);
- rule_get_stats(&ofproto->miss_rule->up, &n_miss, &n_bytes);
- rule_get_stats(&ofproto->no_packet_in_rule->up, &n_no_pkt_in, &n_bytes);
- rule_get_stats(&ofproto->drop_frags_rule->up, &n_dropped_frags, &n_bytes);
-
- n_lookup = s.n_hit + s.n_missed - n_dropped_frags;
- ots->lookup_count = htonll(n_lookup);
- ots->matched_count = htonll(n_lookup - n_miss - n_no_pkt_in);
+ atomic_store_relaxed(&ofproto->tables_version, version);
}
+
static struct ofport *
port_alloc(void)
{
- struct ofport_dpif *port = xmalloc(sizeof *port);
+ struct ofport_dpif *port = xzalloc(sizeof *port);
return &port->up;
}
struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
const struct netdev *netdev = port->up.netdev;
char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+ const char *dp_port_name;
struct dpif_port dpif_port;
int error;
port->bundle = NULL;
port->cfm = NULL;
port->bfd = NULL;
- port->may_enable = true;
+ port->lldp = NULL;
+ port->may_enable = false;
port->stp_port = NULL;
port->stp_state = STP_DISABLED;
+ port->rstp_port = NULL;
+ port->rstp_state = RSTP_DISABLED;
port->is_tunnel = false;
port->peer = NULL;
port->qdscp = NULL;
if (netdev_vport_is_patch(netdev)) {
/* By bailing out here, we don't submit the port to the sFlow module
- * to be considered for counter polling export. This is correct
- * because the patch port represents an interface that sFlow considers
- * to be "internal" to the switch as a whole, and therefore not an
- * candidate for counter polling. */
+ * to be considered for counter polling export. This is correct
+ * because the patch port represents an interface that sFlow considers
+ * to be "internal" to the switch as a whole, and therefore not a
+ * candidate for counter polling. */
port->odp_port = ODPP_NONE;
ofport_update_peer(port);
return 0;
}
- error = dpif_port_query_by_name(ofproto->backer->dpif,
- netdev_vport_get_dpif_port(netdev, namebuf,
- sizeof namebuf),
+ dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
+ error = dpif_port_query_by_name(ofproto->backer->dpif, dp_port_name,
&dpif_port);
if (error) {
return error;
port->odp_port = dpif_port.port_no;
if (netdev_get_tunnel_config(netdev)) {
- tnl_port_add(port, port->up.netdev, port->odp_port);
- port->is_tunnel = true;
- } else {
- /* Sanity-check that a mapping doesn't already exist. This
+ atomic_count_inc(&ofproto->backer->tnl_count);
+ error = tnl_port_add(port, port->up.netdev, port->odp_port,
+ ovs_native_tunneling_is_on(ofproto), dp_port_name);
+ if (error) {
+ atomic_count_dec(&ofproto->backer->tnl_count);
+ dpif_port_destroy(&dpif_port);
+ return error;
+ }
+
+ port->is_tunnel = true;
+ if (ofproto->ipfix) {
+ dpif_ipfix_add_tunnel_port(ofproto->ipfix, port_, port->odp_port);
+ }
+ } else {
+ /* Sanity-check that a mapping doesn't already exist. This
* shouldn't happen for non-tunnel ports. */
if (odp_port_to_ofp_port(ofproto, port->odp_port) != OFPP_NONE) {
VLOG_ERR("port %s already has an OpenFlow port number",
const char *dp_port_name;
ofproto->backer->need_revalidate = REV_RECONFIGURE;
- ovs_rwlock_wrlock(&xlate_rwlock);
+ xlate_txn_start();
xlate_ofport_remove(port);
- ovs_rwlock_unlock(&xlate_rwlock);
+ xlate_txn_commit();
dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf,
sizeof namebuf);
ovs_rwlock_unlock(&ofproto->backer->odp_to_ofport_lock);
}
+ if (port->is_tunnel) {
+ atomic_count_dec(&ofproto->backer->tnl_count);
+ }
+
+ if (port->is_tunnel && ofproto->ipfix) {
+ dpif_ipfix_del_tunnel_port(ofproto->ipfix, port->odp_port);
+ }
+
tnl_port_del(port);
sset_find_and_delete(&ofproto->ports, devname);
sset_find_and_delete(&ofproto->ghost_ports, devname);
bundle_remove(port_);
set_cfm(port_, NULL);
set_bfd(port_, NULL);
+ set_lldp(port_, NULL);
+ if (port->stp_port) {
+ stp_port_disable(port->stp_port);
+ }
+ set_rstp_port(port_, NULL);
if (ofproto->sflow) {
dpif_sflow_del_port(ofproto->sflow, port->odp_port);
}
port_modified(struct ofport *port_)
{
struct ofport_dpif *port = ofport_dpif_cast(port_);
+ char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
+ const char *dp_port_name;
+ struct netdev *netdev = port->up.netdev;
if (port->bundle && port->bundle->bond) {
- bond_slave_set_netdev(port->bundle->bond, port, port->up.netdev);
+ bond_slave_set_netdev(port->bundle->bond, port, netdev);
}
if (port->cfm) {
- cfm_set_netdev(port->cfm, port->up.netdev);
+ cfm_set_netdev(port->cfm, netdev);
}
if (port->bfd) {
- bfd_set_netdev(port->bfd, port->up.netdev);
+ bfd_set_netdev(port->bfd, netdev);
}
ofproto_dpif_monitor_port_update(port, port->bfd, port->cfm,
- port->up.pp.hw_addr);
+ port->lldp, &port->up.pp.hw_addr);
- if (port->is_tunnel && tnl_port_reconfigure(port, port->up.netdev,
- port->odp_port)) {
- ofproto_dpif_cast(port->up.ofproto)->backer->need_revalidate =
- REV_RECONFIGURE;
+ dp_port_name = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf);
+
+ if (port->is_tunnel) {
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto);
+
+ if (tnl_port_reconfigure(port, netdev, port->odp_port,
+ ovs_native_tunneling_is_on(ofproto),
+ dp_port_name)) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
}
ofport_update_peer(port);
struct dpif_sflow *ds = ofproto->sflow;
if (sflow_options) {
+ uint32_t old_probability = ds ? dpif_sflow_get_probability(ds) : 0;
if (!ds) {
struct ofport_dpif *ofport;
HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
dpif_sflow_add_port(ds, &ofport->up, ofport->odp_port);
}
- ofproto->backer->need_revalidate = REV_RECONFIGURE;
}
dpif_sflow_set_options(ds, sflow_options);
+ if (dpif_sflow_get_probability(ds) != old_probability) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
} else {
if (ds) {
dpif_sflow_unref(ds);
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
struct dpif_ipfix *di = ofproto->ipfix;
bool has_options = bridge_exporter_options || flow_exporters_options;
+ bool new_di = false;
if (has_options && !di) {
di = ofproto->ipfix = dpif_ipfix_create();
+ new_di = true;
}
if (di) {
di, bridge_exporter_options, flow_exporters_options,
n_flow_exporters_options);
+ /* Add tunnel ports only when a new ipfix created */
+ if (new_di == true) {
+ struct ofport_dpif *ofport;
+ HMAP_FOR_EACH (ofport, up.hmap_node, &ofproto->up.ports) {
+ if (ofport->is_tunnel == true) {
+ dpif_ipfix_add_tunnel_port(di, &ofport->up, ofport->odp_port);
+ }
+ }
+ }
+
if (!has_options) {
dpif_ipfix_unref(di);
ofproto->ipfix = NULL;
set_cfm(struct ofport *ofport_, const struct cfm_settings *s)
{
struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ struct cfm *old = ofport->cfm;
int error = 0;
if (s) {
if (!ofport->cfm) {
- struct ofproto_dpif *ofproto;
-
- ofproto = ofproto_dpif_cast(ofport->up.ofproto);
- ofproto->backer->need_revalidate = REV_RECONFIGURE;
ofport->cfm = cfm_create(ofport->up.netdev);
}
cfm_unref(ofport->cfm);
ofport->cfm = NULL;
out:
+ if (ofport->cfm != old) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
- ofport->up.pp.hw_addr);
+ ofport->lldp, &ofport->up.pp.hw_addr);
return error;
}
static bool
+cfm_status_changed(struct ofport *ofport_)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+ return ofport->cfm ? cfm_check_status_change(ofport->cfm) : true;
+}
+
+static int
get_cfm_status(const struct ofport *ofport_,
- struct ofproto_cfm_status *status)
+ struct cfm_status *status)
{
struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ int ret = 0;
if (ofport->cfm) {
- status->faults = cfm_get_fault(ofport->cfm);
- status->flap_count = cfm_get_flap_count(ofport->cfm);
- status->remote_opstate = cfm_get_opup(ofport->cfm);
- status->health = cfm_get_health(ofport->cfm);
- cfm_get_remote_mpids(ofport->cfm, &status->rmps, &status->n_rmps);
- return true;
+ cfm_get_status(ofport->cfm, status);
} else {
- return false;
+ ret = ENOENT;
}
+
+ return ret;
}
static int
ofproto->backer->need_revalidate = REV_RECONFIGURE;
}
ofproto_dpif_monitor_port_update(ofport, ofport->bfd, ofport->cfm,
- ofport->up.pp.hw_addr);
+ ofport->lldp, &ofport->up.pp.hw_addr);
return 0;
}
+static bool
+bfd_status_changed(struct ofport *ofport_)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+ return ofport->bfd ? bfd_check_status_change(ofport->bfd) : true;
+}
+
static int
get_bfd_status(struct ofport *ofport_, struct smap *smap)
{
struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ int ret = 0;
if (ofport->bfd) {
bfd_get_status(ofport->bfd, smap);
- return 0;
} else {
- return ENOENT;
+ ret = ENOENT;
+ }
+
+ return ret;
+}
+
+static int
+set_lldp(struct ofport *ofport_,
+ const struct smap *cfg)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ int error = 0;
+
+ if (cfg) {
+ if (!ofport->lldp) {
+ struct ofproto_dpif *ofproto;
+
+ ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ ofport->lldp = lldp_create(ofport->up.netdev, ofport_->mtu, cfg);
+ }
+
+ if (!lldp_configure(ofport->lldp, cfg)) {
+ error = EINVAL;
+ }
+ }
+ if (error) {
+ lldp_unref(ofport->lldp);
+ ofport->lldp = NULL;
}
+
+ ofproto_dpif_monitor_port_update(ofport,
+ ofport->bfd,
+ ofport->cfm,
+ ofport->lldp,
+ &ofport->up.pp.hw_addr);
+ return error;
+}
+
+static bool
+get_lldp_status(const struct ofport *ofport_,
+ struct lldp_status *status OVS_UNUSED)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+
+ return ofport->lldp ? true : false;
+}
+
+static int
+set_aa(struct ofproto *ofproto OVS_UNUSED,
+ const struct aa_settings *s)
+{
+ return aa_configure(s);
+}
+
+static int
+aa_mapping_set(struct ofproto *ofproto_ OVS_UNUSED, void *aux,
+ const struct aa_mapping_settings *s)
+{
+ return aa_mapping_register(aux, s);
+}
+
+static int
+aa_mapping_unset(struct ofproto *ofproto OVS_UNUSED, void *aux)
+{
+ return aa_mapping_unregister(aux);
+}
+
+static int
+aa_vlan_get_queued(struct ofproto *ofproto OVS_UNUSED, struct ovs_list *list)
+{
+ return aa_get_vlan_queued(list);
+}
+
+static unsigned int
+aa_vlan_get_queue_size(struct ofproto *ofproto OVS_UNUSED)
+{
+ return aa_get_vlan_queue_size();
}
+
\f
/* Spanning Tree. */
+/* Called while rstp_mutex is held. */
+static void
+rstp_send_bpdu_cb(struct dp_packet *pkt, void *ofport_, void *ofproto_)
+{
+ struct ofproto_dpif *ofproto = ofproto_;
+ struct ofport_dpif *ofport = ofport_;
+ struct eth_header *eth = dp_packet_l2(pkt);
+
+ netdev_get_etheraddr(ofport->up.netdev, ð->eth_src);
+ if (eth_addr_is_zero(eth->eth_src)) {
+ VLOG_WARN_RL(&rl, "%s port %d: cannot send RSTP BPDU on a port which "
+ "does not have a configured source MAC address.",
+ ofproto->up.name, ofp_to_u16(ofport->up.ofp_port));
+ } else {
+ ofproto_dpif_send_packet(ofport, pkt);
+ }
+ dp_packet_delete(pkt);
+}
+
static void
-send_bpdu_cb(struct ofpbuf *pkt, int port_num, void *ofproto_)
+send_bpdu_cb(struct dp_packet *pkt, int port_num, void *ofproto_)
{
struct ofproto_dpif *ofproto = ofproto_;
struct stp_port *sp = stp_get_port(ofproto->stp, port_num);
VLOG_WARN_RL(&rl, "%s: cannot send BPDU on unknown port %d",
ofproto->up.name, port_num);
} else {
- struct eth_header *eth = pkt->l2;
+ struct eth_header *eth = dp_packet_l2(pkt);
- netdev_get_etheraddr(ofport->up.netdev, eth->eth_src);
+ netdev_get_etheraddr(ofport->up.netdev, ð->eth_src);
if (eth_addr_is_zero(eth->eth_src)) {
VLOG_WARN_RL(&rl, "%s: cannot send BPDU on port %d "
"with unknown MAC", ofproto->up.name, port_num);
ofproto_dpif_send_packet(ofport, pkt);
}
}
- ofpbuf_delete(pkt);
+ dp_packet_delete(pkt);
+}
+
+/* Configure RSTP on 'ofproto_' using the settings defined in 's'. */
+static void
+set_rstp(struct ofproto *ofproto_, const struct ofproto_rstp_settings *s)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+
+ /* Only revalidate flows if the configuration changed. */
+ if (!s != !ofproto->rstp) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
+
+ if (s) {
+ if (!ofproto->rstp) {
+ ofproto->rstp = rstp_create(ofproto_->name, s->address,
+ rstp_send_bpdu_cb, ofproto);
+ ofproto->rstp_last_tick = time_msec();
+ }
+ rstp_set_bridge_address(ofproto->rstp, s->address);
+ rstp_set_bridge_priority(ofproto->rstp, s->priority);
+ rstp_set_bridge_ageing_time(ofproto->rstp, s->ageing_time);
+ rstp_set_bridge_force_protocol_version(ofproto->rstp,
+ s->force_protocol_version);
+ rstp_set_bridge_max_age(ofproto->rstp, s->bridge_max_age);
+ rstp_set_bridge_forward_delay(ofproto->rstp, s->bridge_forward_delay);
+ rstp_set_bridge_transmit_hold_count(ofproto->rstp,
+ s->transmit_hold_count);
+ } else {
+ struct ofport *ofport;
+ HMAP_FOR_EACH (ofport, hmap_node, &ofproto->up.ports) {
+ set_rstp_port(ofport, NULL);
+ }
+ rstp_unref(ofproto->rstp);
+ ofproto->rstp = NULL;
+ }
+}
+
+static void
+get_rstp_status(struct ofproto *ofproto_, struct ofproto_rstp_status *s)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+
+ if (ofproto->rstp) {
+ s->enabled = true;
+ s->root_id = rstp_get_root_id(ofproto->rstp);
+ s->bridge_id = rstp_get_bridge_id(ofproto->rstp);
+ s->designated_id = rstp_get_designated_id(ofproto->rstp);
+ s->root_path_cost = rstp_get_root_path_cost(ofproto->rstp);
+ s->designated_port_id = rstp_get_designated_port_id(ofproto->rstp);
+ s->bridge_port_id = rstp_get_bridge_port_id(ofproto->rstp);
+ } else {
+ s->enabled = false;
+ }
+}
+
+static void
+update_rstp_port_state(struct ofport_dpif *ofport)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ enum rstp_state state;
+
+ /* Figure out new state. */
+ state = ofport->rstp_port ? rstp_port_get_state(ofport->rstp_port)
+ : RSTP_DISABLED;
+
+ /* Update state. */
+ if (ofport->rstp_state != state) {
+ enum ofputil_port_state of_state;
+ bool fwd_change;
+
+ VLOG_DBG("port %s: RSTP state changed from %s to %s",
+ netdev_get_name(ofport->up.netdev),
+ rstp_state_name(ofport->rstp_state),
+ rstp_state_name(state));
+
+ if (rstp_learn_in_state(ofport->rstp_state)
+ != rstp_learn_in_state(state)) {
+ /* XXX: Learning action flows should also be flushed. */
+ if (ofport->bundle) {
+ if (!rstp_shift_root_learned_address(ofproto->rstp)
+ || rstp_get_old_root_aux(ofproto->rstp) != ofport) {
+ bundle_flush_macs(ofport->bundle, false);
+ }
+ }
+ }
+ fwd_change = rstp_forward_in_state(ofport->rstp_state)
+ != rstp_forward_in_state(state);
+
+ ofproto->backer->need_revalidate = REV_RSTP;
+ ofport->rstp_state = state;
+
+ if (fwd_change && ofport->bundle) {
+ bundle_update(ofport->bundle);
+ }
+
+ /* Update the RSTP state bits in the OpenFlow port description. */
+ of_state = ofport->up.pp.state & ~OFPUTIL_PS_STP_MASK;
+ of_state |= (state == RSTP_LEARNING ? OFPUTIL_PS_STP_LEARN
+ : state == RSTP_FORWARDING ? OFPUTIL_PS_STP_FORWARD
+ : state == RSTP_DISCARDING ? OFPUTIL_PS_STP_LISTEN
+ : 0);
+ ofproto_port_set_state(&ofport->up, of_state);
+ }
+}
+
+static void
+rstp_run(struct ofproto_dpif *ofproto)
+{
+ if (ofproto->rstp) {
+ long long int now = time_msec();
+ long long int elapsed = now - ofproto->rstp_last_tick;
+ struct rstp_port *rp;
+ struct ofport_dpif *ofport;
+
+ /* Every second, decrease the values of the timers. */
+ if (elapsed >= 1000) {
+ rstp_tick_timers(ofproto->rstp);
+ ofproto->rstp_last_tick = now;
+ }
+ rp = NULL;
+ while ((ofport = rstp_get_next_changed_port_aux(ofproto->rstp, &rp))) {
+ update_rstp_port_state(ofport);
+ }
+ rp = NULL;
+ ofport = NULL;
+ /* FIXME: This check should be done on-event (i.e., when setting
+ * p->fdb_flush) and not periodically.
+ */
+ while ((ofport = rstp_check_and_reset_fdb_flush(ofproto->rstp, &rp))) {
+ if (!rstp_shift_root_learned_address(ofproto->rstp)
+ || rstp_get_old_root_aux(ofproto->rstp) != ofport) {
+ bundle_flush_macs(ofport->bundle, false);
+ }
+ }
+
+ if (rstp_shift_root_learned_address(ofproto->rstp)) {
+ struct ofport_dpif *old_root_aux =
+ (struct ofport_dpif *)rstp_get_old_root_aux(ofproto->rstp);
+ struct ofport_dpif *new_root_aux =
+ (struct ofport_dpif *)rstp_get_new_root_aux(ofproto->rstp);
+ if (old_root_aux != NULL && new_root_aux != NULL) {
+ bundle_move(old_root_aux->bundle, new_root_aux->bundle);
+ rstp_reset_root_changed(ofproto->rstp);
+ }
+ }
+ }
}
/* Configures STP on 'ofproto_' using the settings defined in 's'. */
enum ofputil_port_state of_state;
bool fwd_change;
- VLOG_DBG_RL(&rl, "port %s: STP state changed from %s to %s",
- netdev_get_name(ofport->up.netdev),
- stp_state_name(ofport->stp_state),
- stp_state_name(state));
+ VLOG_DBG("port %s: STP state changed from %s to %s",
+ netdev_get_name(ofport->up.netdev),
+ stp_state_name(ofport->stp_state),
+ stp_state_name(state));
if (stp_learn_in_state(ofport->stp_state)
!= stp_learn_in_state(state)) {
/* xxx Learning action flows should also be flushed. */
ovs_rwlock_wrlock(&ofproto->ml->rwlock);
mac_learning_flush(ofproto->ml);
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ mcast_snooping_mdb_flush(ofproto->ms);
}
fwd_change = stp_forward_in_state(ofport->stp_state)
!= stp_forward_in_state(state);
}
return 0;
} else if (sp && stp_port_no(sp) != s->port_num
- && ofport == stp_port_get_aux(sp)) {
+ && ofport == stp_port_get_aux(sp)) {
/* The port-id changed, so disable the old one if it's not
* already in use by another port. */
stp_port_disable(sp);
}
sp = ofport->stp_port = stp_get_port(ofproto->stp, s->port_num);
+
+ /* Set name before enabling the port so that debugging messages can print
+ * the name. */
+ stp_port_set_name(sp, netdev_get_name(ofport->up.netdev));
stp_port_enable(sp);
stp_port_set_aux(sp, ofport);
ovs_rwlock_wrlock(&ofproto->ml->rwlock);
mac_learning_flush(ofproto->ml);
ovs_rwlock_unlock(&ofproto->ml->rwlock);
+ mcast_snooping_mdb_flush(ofproto->ms);
}
}
}
poll_timer_wait(1000);
}
}
+
+/* Configures RSTP on 'ofport_' using the settings defined in 's'. The
+ * caller is responsible for assigning RSTP port numbers and ensuring
+ * there are no duplicates. */
+static void
+set_rstp_port(struct ofport *ofport_,
+ const struct ofproto_port_rstp_settings *s)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ struct rstp_port *rp = ofport->rstp_port;
+
+ if (!s || !s->enable) {
+ if (rp) {
+ rstp_port_set_aux(rp, NULL);
+ rstp_port_set_state(rp, RSTP_DISABLED);
+ rstp_port_set_mac_operational(rp, false);
+ ofport->rstp_port = NULL;
+ rstp_port_unref(rp);
+ update_rstp_port_state(ofport);
+ }
+ return;
+ }
+
+ /* Check if need to add a new port. */
+ if (!rp) {
+ rp = ofport->rstp_port = rstp_add_port(ofproto->rstp);
+ }
+
+ rstp_port_set(rp, s->port_num, s->priority, s->path_cost,
+ s->admin_edge_port, s->auto_edge,
+ s->admin_p2p_mac_state, s->admin_port_state, s->mcheck,
+ ofport);
+ update_rstp_port_state(ofport);
+ /* Synchronize operational status. */
+ rstp_port_set_mac_operational(rp, ofport->may_enable);
+}
+
+static void
+get_rstp_port_status(struct ofport *ofport_,
+ struct ofproto_port_rstp_status *s)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+ struct rstp_port *rp = ofport->rstp_port;
+
+ if (!ofproto->rstp || !rp) {
+ s->enabled = false;
+ return;
+ }
+
+ s->enabled = true;
+ rstp_port_get_status(rp, &s->port_id, &s->state, &s->role,
+ &s->designated_bridge_id, &s->designated_port_id,
+ &s->designated_path_cost, &s->tx_count,
+ &s->rx_count, &s->error_count, &s->uptime);
+}
+
\f
static int
set_queues(struct ofport *ofport_, const struct ofproto_port_queue *qdscp,
ofproto->backer->need_revalidate = REV_RECONFIGURE;
ovs_rwlock_wrlock(&ml->rwlock);
LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
- if (mac->port.p == bundle) {
+ if (mac_entry_get_port(ml, mac) == bundle) {
if (all_ofprotos) {
struct ofproto_dpif *o;
ovs_rwlock_unlock(&ml->rwlock);
}
+static void
+bundle_move(struct ofbundle *old, struct ofbundle *new)
+{
+ struct ofproto_dpif *ofproto = old->ofproto;
+ struct mac_learning *ml = ofproto->ml;
+ struct mac_entry *mac, *next_mac;
+
+ ovs_assert(new->ofproto == old->ofproto);
+
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ ovs_rwlock_wrlock(&ml->rwlock);
+ LIST_FOR_EACH_SAFE (mac, next_mac, lru_node, &ml->lrus) {
+ if (mac_entry_get_port(ml, mac) == old) {
+ mac_entry_set_port(ml, mac, new);
+ }
+ }
+ ovs_rwlock_unlock(&ml->rwlock);
+}
+
static struct ofbundle *
bundle_lookup(const struct ofproto_dpif *ofproto, void *aux)
{
LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
|| port->is_layer3
- || !stp_forward_in_state(port->stp_state)) {
+ || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
+ || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
bundle->floodable = false;
break;
}
{
struct ofport_dpif *port;
- port = get_ofp_port(bundle->ofproto, ofp_port);
+ port = ofp_port_to_ofport(bundle->ofproto, ofp_port);
if (!port) {
return false;
}
list_push_back(&bundle->ports, &port->bundle_node);
if (port->up.pp.config & OFPUTIL_PC_NO_FLOOD
|| port->is_layer3
- || !stp_forward_in_state(port->stp_state)) {
+ || (bundle->ofproto->stp && !stp_forward_in_state(port->stp_state))
+ || (bundle->ofproto->rstp && !rstp_forward_in_state(port->rstp_state))) {
bundle->floodable = false;
}
}
}
ofproto = bundle->ofproto;
- mbridge_unregister_bundle(ofproto->mbridge, bundle->aux);
+ mbridge_unregister_bundle(ofproto->mbridge, bundle);
- ovs_rwlock_wrlock(&xlate_rwlock);
+ xlate_txn_start();
xlate_bundle_remove(bundle);
- ovs_rwlock_unlock(&xlate_rwlock);
+ xlate_txn_commit();
LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
bundle_del_port(port);
ofproto->backer->need_revalidate = REV_RECONFIGURE;
}
} else {
- bundle->bond = bond_create(s->bond);
+ bundle->bond = bond_create(s->bond, ofproto);
ofproto->backer->need_revalidate = REV_RECONFIGURE;
}
LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
- bond_slave_register(bundle->bond, port, port->up.netdev);
+ bond_slave_register(bundle->bond, port,
+ port->up.ofp_port, port->up.netdev);
}
} else {
bond_unref(bundle->bond);
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
struct ofport_dpif *port = port_;
- uint8_t ea[ETH_ADDR_LEN];
+ struct eth_addr ea;
int error;
- error = netdev_get_etheraddr(port->up.netdev, ea);
+ error = netdev_get_etheraddr(port->up.netdev, &ea);
if (!error) {
- struct ofpbuf packet;
+ struct dp_packet packet;
void *packet_pdu;
- ofpbuf_init(&packet, 0);
+ dp_packet_init(&packet, 0);
packet_pdu = eth_compose(&packet, eth_addr_lacp, ea, ETH_TYPE_LACP,
pdu_size);
memcpy(packet_pdu, pdu, pdu_size);
ofproto_dpif_send_packet(port, &packet);
- ofpbuf_uninit(&packet);
+ dp_packet_uninit(&packet);
} else {
VLOG_ERR_RL(&rl, "port %s: cannot obtain Ethernet address of iface "
"%s (%s)", port->bundle->name,
bundle_send_learning_packets(struct ofbundle *bundle)
{
struct ofproto_dpif *ofproto = bundle->ofproto;
- struct ofpbuf *learning_packet;
int error, n_packets, n_errors;
struct mac_entry *e;
- struct list packets;
+ struct pkt_list {
+ struct ovs_list list_node;
+ struct ofport_dpif *port;
+ struct dp_packet *pkt;
+ } *pkt_node;
+ struct ovs_list packets;
list_init(&packets);
ovs_rwlock_rdlock(&ofproto->ml->rwlock);
LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
- if (e->port.p != bundle) {
- void *port_void;
-
- learning_packet = bond_compose_learning_packet(bundle->bond,
- e->mac, e->vlan,
- &port_void);
- learning_packet->private_p = port_void;
- list_push_back(&packets, &learning_packet->list_node);
+ if (mac_entry_get_port(ofproto->ml, e) != bundle) {
+ pkt_node = xmalloc(sizeof *pkt_node);
+ pkt_node->pkt = bond_compose_learning_packet(bundle->bond,
+ e->mac, e->vlan,
+ (void **)&pkt_node->port);
+ list_push_back(&packets, &pkt_node->list_node);
}
}
ovs_rwlock_unlock(&ofproto->ml->rwlock);
error = n_packets = n_errors = 0;
- LIST_FOR_EACH (learning_packet, list_node, &packets) {
+ LIST_FOR_EACH_POP (pkt_node, list_node, &packets) {
int ret;
- ret = ofproto_dpif_send_packet(learning_packet->private_p, learning_packet);
+ ret = ofproto_dpif_send_packet(pkt_node->port, pkt_node->pkt);
+ dp_packet_delete(pkt_node->pkt);
+ free(pkt_node);
if (ret) {
error = ret;
n_errors++;
}
n_packets++;
}
- ofpbuf_list_delete(&packets);
if (n_errors) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
mirror_get_stats__(struct ofproto *ofproto, void *aux,
uint64_t *packets, uint64_t *bytes)
{
- push_all_stats();
return mirror_get_stats(ofproto_dpif_cast(ofproto)->mbridge, aux, packets,
bytes);
}
mac_learning_set_max_entries(ofproto->ml, max_entries);
ovs_rwlock_unlock(&ofproto->ml->rwlock);
}
+
+/* Configures multicast snooping on 'ofport' using the settings
+ * defined in 's'. */
+static int
+set_mcast_snooping(struct ofproto *ofproto_,
+ const struct ofproto_mcast_snooping_settings *s)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+
+ /* Only revalidate flows if the configuration changed. */
+ if (!s != !ofproto->ms) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
+
+ if (s) {
+ if (!ofproto->ms) {
+ ofproto->ms = mcast_snooping_create();
+ }
+
+ ovs_rwlock_wrlock(&ofproto->ms->rwlock);
+ mcast_snooping_set_idle_time(ofproto->ms, s->idle_time);
+ mcast_snooping_set_max_entries(ofproto->ms, s->max_entries);
+ if (mcast_snooping_set_flood_unreg(ofproto->ms, s->flood_unreg)) {
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
+ ovs_rwlock_unlock(&ofproto->ms->rwlock);
+ } else {
+ mcast_snooping_unref(ofproto->ms);
+ ofproto->ms = NULL;
+ }
+
+ return 0;
+}
+
+/* Configures multicast snooping port's flood settings on 'ofproto'. */
+static int
+set_mcast_snooping_port(struct ofproto *ofproto_, void *aux,
+ const struct ofproto_mcast_snooping_port_settings *s)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
+ struct ofbundle *bundle = bundle_lookup(ofproto, aux);
+
+ if (ofproto->ms && s) {
+ ovs_rwlock_wrlock(&ofproto->ms->rwlock);
+ mcast_snooping_set_port_flood(ofproto->ms, bundle, s->flood);
+ mcast_snooping_set_port_flood_reports(ofproto->ms, bundle,
+ s->flood_reports);
+ ovs_rwlock_unlock(&ofproto->ms->rwlock);
+ }
+ return 0;
+}
+
\f
/* Ports. */
-static struct ofport_dpif *
-get_ofp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
+struct ofport_dpif *
+ofp_port_to_ofport(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
{
struct ofport *ofport = ofproto_get_port(&ofproto->up, ofp_port);
return ofport ? ofport_dpif_cast(ofport) : NULL;
if (ofport->may_enable != enable) {
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
+
ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
+
+ if (ofport->rstp_port) {
+ rstp_port_set_mac_operational(ofport->rstp_port, enable);
+ }
}
ofport->may_enable = enable;
port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
+ struct ofport_dpif *ofport = ofp_port_to_ofport(ofproto, ofp_port);
int error = 0;
if (!ofport) {
struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
int error;
- push_all_stats();
-
error = netdev_get_stats(ofport->up.netdev, stats);
if (!error && ofport_->ofp_port == OFPP_LOCAL) {
return error;
}
-struct port_dump_state {
- uint32_t bucket;
+static int
+port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats)
+{
+ struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
+ if (ofport->bundle && ofport->bundle->lacp) {
+ if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) {
+ return 0;
+ }
+ }
+ return -1;
+}
+
+struct port_dump_state {
+ uint32_t bucket;
uint32_t offset;
bool ghost;
: -1);
}
\f
-/* Upcall handling. */
-
-struct flow_miss_op {
- struct dpif_op dpif_op;
-
- uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */
- struct xlate_out xout;
- bool xout_garbage; /* 'xout' needs to be uninitialized? */
-
- struct ofpbuf mask; /* Flow mask for "put" ops. */
- struct odputil_keybuf maskbuf;
-
- /* If this is a "put" op, then a pointer to the subfacet that should
- * be marked as uninstalled if the operation fails. */
- struct subfacet *subfacet;
-};
-
-/* Figures out whether a flow that missed in 'ofproto', whose details are in
- * 'miss' masked by 'wc', is likely to be worth tracking in detail in userspace
- * and (usually) installing a datapath flow. The answer is usually "yes" (a
- * return value of true). However, for short flows the cost of bookkeeping is
- * much higher than the benefits, so when the datapath holds a large number of
- * flows we impose some heuristics to decide which flows are likely to be worth
- * tracking. */
-static bool
-flow_miss_should_make_facet(struct flow_miss *miss)
-{
- struct dpif_backer *backer = miss->ofproto->backer;
- uint32_t hash;
-
- switch (flow_miss_model) {
- case OFPROTO_HANDLE_MISS_AUTO:
- break;
- case OFPROTO_HANDLE_MISS_WITH_FACETS:
- return true;
- case OFPROTO_HANDLE_MISS_WITHOUT_FACETS:
- return false;
- }
-
- if (!backer->governor) {
- size_t n_subfacets;
-
- n_subfacets = hmap_count(&backer->subfacets);
- if (n_subfacets * 2 <= flow_eviction_threshold) {
- return true;
- }
-
- backer->governor = governor_create();
- }
-
- hash = flow_hash_in_wildcards(&miss->flow, &miss->xout.wc, 0);
- return governor_should_install_flow(backer->governor, hash,
- miss->stats.n_packets);
-}
-
-/* Handles 'miss', which matches 'facet'. May add any required datapath
- * operations to 'ops', incrementing '*n_ops' for each new op.
- *
- * All of the packets in 'miss' are considered to have arrived at time
- * 'miss->stats.used'. This is really important only for new facets: if we
- * just called time_msec() here, then the new subfacet or its packets could
- * look (occasionally) as though it was used some time after the facet was
- * used. That can make a one-packet flow look like it has a nonzero duration,
- * which looks odd in e.g. NetFlow statistics. */
-static void
-handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet,
- struct flow_miss_op *ops, size_t *n_ops)
-{
- enum subfacet_path want_path;
- struct subfacet *subfacet;
- uint32_t key_hash;
-
- /* Update facet stats. */
- facet->packet_count += miss->stats.n_packets;
- facet->prev_packet_count += miss->stats.n_packets;
- facet->byte_count += miss->stats.n_bytes;
- facet->prev_byte_count += miss->stats.n_bytes;
-
- /* Look for an existing subfacet. If we find one, update its used time. */
- key_hash = odp_flow_key_hash(miss->key, miss->key_len);
- if (!list_is_empty(&facet->subfacets)) {
- subfacet = subfacet_find(miss->ofproto->backer,
- miss->key, miss->key_len, key_hash);
- if (subfacet) {
- if (subfacet->facet == facet) {
- subfacet->used = MAX(subfacet->used, miss->stats.used);
- } else {
- /* This shouldn't happen. */
- VLOG_ERR_RL(&rl, "subfacet with wrong facet");
- subfacet_destroy(subfacet);
- subfacet = NULL;
- }
- }
- } else {
- subfacet = NULL;
- }
-
- /* Don't install the flow if it's the result of the "userspace"
- * action for an already installed facet. This can occur when a
- * datapath flow with wildcards has a "userspace" action and flows
- * sent to userspace result in a different subfacet, which will then
- * be rejected as overlapping by the datapath. */
- if (miss->upcall_type == DPIF_UC_ACTION
- && !list_is_empty(&facet->subfacets)) {
- return;
- }
-
- /* Create a subfacet, if we don't already have one. */
- if (!subfacet) {
- subfacet = subfacet_create(facet, miss, key_hash);
- }
-
- /* Install the subfacet, if it's not already installed. */
- want_path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
- if (subfacet->path != want_path) {
- struct flow_miss_op *op = &ops[(*n_ops)++];
- struct dpif_flow_put *put = &op->dpif_op.u.flow_put;
-
- subfacet->path = want_path;
-
- ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf);
- if (enable_megaflows) {
- odp_flow_key_from_mask(&op->mask, &facet->xout.wc.masks,
- &miss->flow, UINT32_MAX);
- }
-
- op->xout_garbage = false;
- op->dpif_op.type = DPIF_OP_FLOW_PUT;
- op->subfacet = subfacet;
- put->flags = DPIF_FP_CREATE;
- put->key = miss->key;
- put->key_len = miss->key_len;
- put->mask = op->mask.data;
- put->mask_len = op->mask.size;
-
- if (want_path == SF_FAST_PATH) {
- put->actions = facet->xout.odp_actions.data;
- put->actions_len = facet->xout.odp_actions.size;
- } else {
- compose_slow_path(facet->ofproto, &miss->flow, facet->xout.slow,
- op->slow_stub, sizeof op->slow_stub,
- &put->actions, &put->actions_len);
- }
- put->stats = NULL;
- }
-}
-
-/* Handles flow miss 'miss'. May add any required datapath operations
- * to 'ops', incrementing '*n_ops' for each new op. */
-static void
-handle_flow_miss(struct flow_miss *miss, struct flow_miss_op *ops,
- size_t *n_ops)
-{
- struct facet *facet;
-
- miss->ofproto->n_missed += miss->stats.n_packets;
-
- facet = facet_lookup_valid(miss->ofproto, &miss->flow);
- if (!facet) {
- /* There does not exist a bijection between 'struct flow' and datapath
- * flow keys with fitness ODP_FIT_TO_LITTLE. This breaks a fundamental
- * assumption used throughout the facet and subfacet handling code.
- * Since we have to handle these misses in userspace anyway, we simply
- * skip facet creation, avoiding the problem altogether. */
- if (miss->key_fitness == ODP_FIT_TOO_LITTLE
- || !flow_miss_should_make_facet(miss)) {
- return;
- }
-
- facet = facet_create(miss);
- }
- handle_flow_miss_with_facet(miss, facet, ops, n_ops);
-}
-
-static struct drop_key *
-drop_key_lookup(const struct dpif_backer *backer, const struct nlattr *key,
- size_t key_len)
-{
- struct drop_key *drop_key;
-
- HMAP_FOR_EACH_WITH_HASH (drop_key, hmap_node, hash_bytes(key, key_len, 0),
- &backer->drop_keys) {
- if (drop_key->key_len == key_len
- && !memcmp(drop_key->key, key, key_len)) {
- return drop_key;
- }
- }
- return NULL;
-}
-
-static void
-drop_key_clear(struct dpif_backer *backer)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
- struct drop_key *drop_key, *next;
-
- HMAP_FOR_EACH_SAFE (drop_key, next, hmap_node, &backer->drop_keys) {
- int error;
-
- error = dpif_flow_del(backer->dpif, drop_key->key, drop_key->key_len,
- NULL);
- if (error && !VLOG_DROP_WARN(&rl)) {
- struct ds ds = DS_EMPTY_INITIALIZER;
- odp_flow_key_format(drop_key->key, drop_key->key_len, &ds);
- VLOG_WARN("Failed to delete drop key (%s) (%s)",
- ovs_strerror(error), ds_cstr(&ds));
- ds_destroy(&ds);
- }
-
- hmap_remove(&backer->drop_keys, &drop_key->hmap_node);
- drop_key_destroy(drop_key);
- }
-
- udpif_drop_key_clear(backer->udpif);
-}
-
-static void
-handle_flow_misses(struct dpif_backer *backer, struct flow_miss_batch *fmb)
-{
- struct flow_miss_op flow_miss_ops[FLOW_MISS_MAX_BATCH];
- struct dpif_op *dpif_ops[FLOW_MISS_MAX_BATCH];
- struct flow_miss *miss;
- size_t n_ops, i;
-
- /* Process each element in the to-do list, constructing the set of
- * operations to batch. */
- n_ops = 0;
- HMAP_FOR_EACH (miss, hmap_node, &fmb->misses) {
- handle_flow_miss(miss, flow_miss_ops, &n_ops);
- }
- ovs_assert(n_ops <= ARRAY_SIZE(flow_miss_ops));
-
- /* Execute batch. */
- for (i = 0; i < n_ops; i++) {
- dpif_ops[i] = &flow_miss_ops[i].dpif_op;
- }
- dpif_operate(backer->dpif, dpif_ops, n_ops);
-
- for (i = 0; i < n_ops; i++) {
- if (dpif_ops[i]->error != 0
- && flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT
- && flow_miss_ops[i].subfacet) {
- struct subfacet *subfacet = flow_miss_ops[i].subfacet;
-
- COVERAGE_INC(subfacet_install_fail);
-
- /* Zero-out subfacet counters when installation failed, but
- * datapath reported hits. This should not happen and
- * indicates a bug, since if the datapath flow exists, we
- * should not be attempting to create a new subfacet. A
- * buggy datapath could trigger this, so just zero out the
- * counters and log an error. */
- if (subfacet->dp_packet_count || subfacet->dp_byte_count) {
- VLOG_ERR_RL(&rl, "failed to install subfacet for which "
- "datapath reported hits");
- subfacet->dp_packet_count = subfacet->dp_byte_count = 0;
- }
-
- subfacet->path = SF_NOT_INSTALLED;
- }
- }
-}
-
-static void
-handle_upcalls(struct dpif_backer *backer)
-{
- struct flow_miss_batch *fmb;
- int n_processed;
-
- for (n_processed = 0; n_processed < FLOW_MISS_MAX_BATCH; n_processed++) {
- struct drop_key *drop_key = drop_key_next(backer->udpif);
- if (!drop_key) {
- break;
- }
-
- if (!drop_key_lookup(backer, drop_key->key, drop_key->key_len)) {
- hmap_insert(&backer->drop_keys, &drop_key->hmap_node,
- hash_bytes(drop_key->key, drop_key->key_len, 0));
- dpif_flow_put(backer->dpif, DPIF_FP_CREATE | DPIF_FP_MODIFY,
- drop_key->key, drop_key->key_len,
- NULL, 0, NULL, 0, NULL);
- } else {
- drop_key_destroy(drop_key);
- }
- }
-
- fmb = flow_miss_batch_next(backer->udpif);
- if (fmb) {
- handle_flow_misses(backer, fmb);
- flow_miss_batch_destroy(fmb);
- }
-}
-\f
-/* Flow expiration. */
-
-static int subfacet_max_idle(const struct dpif_backer *);
-static void update_stats(struct dpif_backer *);
-static void rule_expire(struct rule_dpif *) OVS_REQUIRES(ofproto_mutex);
-static void expire_subfacets(struct dpif_backer *, int dp_max_idle);
-
-/* This function is called periodically by run(). Its job is to collect
- * updates for the flows that have been installed into the datapath, most
- * importantly when they last were used, and then use that information to
- * expire flows that have not been used recently.
- *
- * Returns the number of milliseconds after which it should be called again. */
-static int
-expire(struct dpif_backer *backer)
-{
- struct ofproto_dpif *ofproto;
- size_t n_subfacets;
- int max_idle;
-
- /* Periodically clear out the drop keys in an effort to keep them
- * relatively few. */
- drop_key_clear(backer);
-
- /* Update stats for each flow in the backer. */
- update_stats(backer);
-
- n_subfacets = hmap_count(&backer->subfacets);
- backer->avg_n_subfacet += n_subfacets;
- backer->avg_n_subfacet /= 2;
-
- backer->max_n_subfacet = MAX(backer->max_n_subfacet, n_subfacets);
-
- max_idle = subfacet_max_idle(backer);
- expire_subfacets(backer, max_idle);
-
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- struct rule *rule, *next_rule;
-
- if (ofproto->backer != backer) {
- continue;
- }
-
- /* Expire OpenFlow flows whose idle_timeout or hard_timeout
- * has passed. */
- ovs_mutex_lock(&ofproto_mutex);
- LIST_FOR_EACH_SAFE (rule, next_rule, expirable,
- &ofproto->up.expirable) {
- rule_expire(rule_dpif_cast(rule));
- }
- ovs_mutex_unlock(&ofproto_mutex);
-
- /* All outstanding data in existing flows has been accounted, so it's a
- * good time to do bond rebalancing. */
- if (ofproto->has_bonded_bundles) {
- struct ofbundle *bundle;
-
- HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) {
- if (bundle->bond) {
- bond_rebalance(bundle->bond);
- }
- }
- }
- }
-
- return MIN(max_idle, 1000);
-}
-
-/* Updates flow table statistics given that the datapath just reported 'stats'
- * as 'subfacet''s statistics. */
-static void
-update_subfacet_stats(struct subfacet *subfacet,
- const struct dpif_flow_stats *stats)
-{
- struct facet *facet = subfacet->facet;
- struct dpif_flow_stats diff;
-
- diff.tcp_flags = stats->tcp_flags;
- diff.used = stats->used;
-
- if (stats->n_packets >= subfacet->dp_packet_count) {
- diff.n_packets = stats->n_packets - subfacet->dp_packet_count;
- } else {
- VLOG_WARN_RL(&rl, "unexpected packet count from the datapath");
- diff.n_packets = 0;
- }
-
- if (stats->n_bytes >= subfacet->dp_byte_count) {
- diff.n_bytes = stats->n_bytes - subfacet->dp_byte_count;
- } else {
- VLOG_WARN_RL(&rl, "unexpected byte count from datapath");
- diff.n_bytes = 0;
- }
-
- facet->ofproto->n_hit += diff.n_packets;
- subfacet->dp_packet_count = stats->n_packets;
- subfacet->dp_byte_count = stats->n_bytes;
- subfacet_update_stats(subfacet, &diff);
-
- if (diff.n_packets) {
- facet_learn(facet);
- }
-}
-
-/* 'key' with length 'key_len' bytes is a flow in 'dpif' that we know nothing
- * about, or a flow that shouldn't be installed but was anyway. Delete it. */
-static void
-delete_unexpected_flow(struct dpif_backer *backer,
- const struct nlattr *key, size_t key_len)
-{
- if (!VLOG_DROP_WARN(&rl)) {
- struct ds s;
-
- ds_init(&s);
- odp_flow_key_format(key, key_len, &s);
- VLOG_WARN("unexpected flow: %s", ds_cstr(&s));
- ds_destroy(&s);
- }
-
- COVERAGE_INC(facet_unexpected);
- dpif_flow_del(backer->dpif, key, key_len, NULL);
-}
-
-/* Update 'packet_count', 'byte_count', and 'used' members of installed facets.
- *
- * This function also pushes statistics updates to rules which each facet
- * resubmits into. Generally these statistics will be accurate. However, if a
- * facet changes the rule it resubmits into at some time in between
- * update_stats() runs, it is possible that statistics accrued to the
- * old rule will be incorrectly attributed to the new rule. This could be
- * avoided by calling update_stats() whenever rules are created or
- * deleted. However, the performance impact of making so many calls to the
- * datapath do not justify the benefit of having perfectly accurate statistics.
- *
- * In addition, this function maintains per ofproto flow hit counts. The patch
- * port is not treated specially. e.g. A packet ingress from br0 patched into
- * br1 will increase the hit count of br0 by 1, however, does not affect
- * the hit or miss counts of br1.
- */
-static void
-update_stats(struct dpif_backer *backer)
-{
- const struct dpif_flow_stats *stats;
- struct dpif_flow_dump dump;
- const struct nlattr *key, *mask;
- size_t key_len, mask_len;
-
- dpif_flow_dump_start(&dump, backer->dpif);
- while (dpif_flow_dump_next(&dump, &key, &key_len,
- &mask, &mask_len, NULL, NULL, &stats)) {
- struct subfacet *subfacet;
- uint32_t key_hash;
-
- key_hash = odp_flow_key_hash(key, key_len);
- subfacet = subfacet_find(backer, key, key_len, key_hash);
- switch (subfacet ? subfacet->path : SF_NOT_INSTALLED) {
- case SF_FAST_PATH:
- update_subfacet_stats(subfacet, stats);
- break;
-
- case SF_SLOW_PATH:
- /* Stats are updated per-packet. */
- break;
-
- case SF_NOT_INSTALLED:
- default:
- delete_unexpected_flow(backer, key, key_len);
- break;
- }
- }
- dpif_flow_dump_done(&dump);
-}
-
-/* Calculates and returns the number of milliseconds of idle time after which
- * subfacets should expire from the datapath. When a subfacet expires, we fold
- * its statistics into its facet, and when a facet's last subfacet expires, we
- * fold its statistic into its rule. */
-static int
-subfacet_max_idle(const struct dpif_backer *backer)
-{
- /*
- * Idle time histogram.
- *
- * Most of the time a switch has a relatively small number of subfacets.
- * When this is the case we might as well keep statistics for all of them
- * in userspace and to cache them in the kernel datapath for performance as
- * well.
- *
- * As the number of subfacets increases, the memory required to maintain
- * statistics about them in userspace and in the kernel becomes
- * significant. However, with a large number of subfacets it is likely
- * that only a few of them are "heavy hitters" that consume a large amount
- * of bandwidth. At this point, only heavy hitters are worth caching in
- * the kernel and maintaining in userspaces; other subfacets we can
- * discard.
- *
- * The technique used to compute the idle time is to build a histogram with
- * N_BUCKETS buckets whose width is BUCKET_WIDTH msecs each. Each subfacet
- * that is installed in the kernel gets dropped in the appropriate bucket.
- * After the histogram has been built, we compute the cutoff so that only
- * the most-recently-used 1% of subfacets (but at least
- * flow_eviction_threshold flows) are kept cached. At least
- * the most-recently-used bucket of subfacets is kept, so actually an
- * arbitrary number of subfacets can be kept in any given expiration run
- * (though the next run will delete most of those unless they receive
- * additional data).
- *
- * This requires a second pass through the subfacets, in addition to the
- * pass made by update_stats(), because the former function never looks at
- * uninstallable subfacets.
- */
- enum { BUCKET_WIDTH = 100 };
- enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
- int buckets[N_BUCKETS] = { 0 };
- int total, subtotal, bucket;
- struct subfacet *subfacet;
- long long int now;
- int i;
-
- total = hmap_count(&backer->subfacets);
- if (total <= flow_eviction_threshold) {
- return N_BUCKETS * BUCKET_WIDTH;
- }
-
- /* Build histogram. */
- now = time_msec();
- HMAP_FOR_EACH (subfacet, hmap_node, &backer->subfacets) {
- long long int idle = now - subfacet->used;
- int bucket = (idle <= 0 ? 0
- : idle >= BUCKET_WIDTH * N_BUCKETS ? N_BUCKETS - 1
- : (unsigned int) idle / BUCKET_WIDTH);
- buckets[bucket]++;
- }
-
- /* Find the first bucket whose flows should be expired. */
- subtotal = bucket = 0;
- do {
- subtotal += buckets[bucket++];
- } while (bucket < N_BUCKETS &&
- subtotal < MAX(flow_eviction_threshold, total / 100));
-
- if (VLOG_IS_DBG_ENABLED()) {
- struct ds s;
-
- ds_init(&s);
- ds_put_cstr(&s, "keep");
- for (i = 0; i < N_BUCKETS; i++) {
- if (i == bucket) {
- ds_put_cstr(&s, ", drop");
- }
- if (buckets[i]) {
- ds_put_format(&s, " %d:%d", i * BUCKET_WIDTH, buckets[i]);
- }
- }
- VLOG_INFO("%s (msec:count)", ds_cstr(&s));
- ds_destroy(&s);
- }
-
- return bucket * BUCKET_WIDTH;
-}
-
-static void
-expire_subfacets(struct dpif_backer *backer, int dp_max_idle)
-{
- /* Cutoff time for most flows. */
- long long int normal_cutoff = time_msec() - dp_max_idle;
-
- /* We really want to keep flows for special protocols around, so use a more
- * conservative cutoff. */
- long long int special_cutoff = time_msec() - 10000;
-
- struct subfacet *subfacet, *next_subfacet;
- struct subfacet *batch[SUBFACET_DESTROY_MAX_BATCH];
- int n_batch;
-
- n_batch = 0;
- HMAP_FOR_EACH_SAFE (subfacet, next_subfacet, hmap_node,
- &backer->subfacets) {
- long long int cutoff;
-
- cutoff = (subfacet->facet->xout.slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP
- | SLOW_STP)
- ? special_cutoff
- : normal_cutoff);
- if (subfacet->used < cutoff) {
- if (subfacet->path != SF_NOT_INSTALLED) {
- batch[n_batch++] = subfacet;
- if (n_batch >= SUBFACET_DESTROY_MAX_BATCH) {
- subfacet_destroy_batch(backer, batch, n_batch);
- n_batch = 0;
- }
- } else {
- subfacet_destroy(subfacet);
- }
- }
- }
-
- if (n_batch > 0) {
- subfacet_destroy_batch(backer, batch, n_batch);
- }
-}
-
/* If 'rule' is an OpenFlow rule, that has expired according to OpenFlow rules,
* then delete it entirely. */
static void
rule_expire(struct rule_dpif *rule)
OVS_REQUIRES(ofproto_mutex)
{
- uint16_t idle_timeout, hard_timeout;
+ uint16_t hard_timeout, idle_timeout;
long long int now = time_msec();
- int reason;
-
- ovs_assert(!rule->up.pending);
+ int reason = -1;
- /* Has 'rule' expired? */
- ovs_mutex_lock(&rule->up.mutex);
hard_timeout = rule->up.hard_timeout;
idle_timeout = rule->up.idle_timeout;
- if (hard_timeout && now > rule->up.modified + hard_timeout * 1000) {
- reason = OFPRR_HARD_TIMEOUT;
- } else if (idle_timeout && now > rule->up.used + idle_timeout * 1000) {
- reason = OFPRR_IDLE_TIMEOUT;
- } else {
- reason = -1;
- }
- ovs_mutex_unlock(&rule->up.mutex);
-
- if (reason >= 0) {
- COVERAGE_INC(ofproto_dpif_expired);
- ofproto_rule_expire(&rule->up, reason);
- }
-}
-\f
-/* Facets. */
-
-/* Creates and returns a new facet based on 'miss'.
- *
- * The caller must already have determined that no facet with an identical
- * 'miss->flow' exists in 'miss->ofproto'.
- *
- * 'rule' and 'xout' must have been created based on 'miss'.
- *
- * 'facet'' statistics are initialized based on 'stats'.
- *
- * The facet will initially have no subfacets. The caller should create (at
- * least) one subfacet with subfacet_create(). */
-static struct facet *
-facet_create(const struct flow_miss *miss)
-{
- struct ofproto_dpif *ofproto = miss->ofproto;
- struct facet *facet;
- struct match match;
-
- COVERAGE_INC(facet_create);
- facet = xzalloc(sizeof *facet);
- facet->ofproto = miss->ofproto;
- facet->used = miss->stats.used;
- facet->flow = miss->flow;
- facet->learn_rl = time_msec() + 500;
-
- list_init(&facet->subfacets);
-
- xlate_out_copy(&facet->xout, &miss->xout);
-
- match_init(&match, &facet->flow, &facet->xout.wc);
- cls_rule_init(&facet->cr, &match, OFP_DEFAULT_PRIORITY);
- ovs_rwlock_wrlock(&ofproto->facets.rwlock);
- classifier_insert(&ofproto->facets, &facet->cr);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
-
- return facet;
-}
-
-static void
-facet_free(struct facet *facet)
-{
- if (facet) {
- xlate_out_uninit(&facet->xout);
- free(facet);
- }
-}
-
-/* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
- * 'flow' must reflect the data in 'packet'. */
-int
-ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
- const struct flow *flow,
- struct rule_dpif *rule,
- const struct ofpact *ofpacts, size_t ofpacts_len,
- struct ofpbuf *packet)
-{
- struct odputil_keybuf keybuf;
- struct dpif_flow_stats stats;
- struct xlate_out xout;
- struct xlate_in xin;
- ofp_port_t in_port;
- struct ofpbuf key;
- int error;
-
- ovs_assert((rule != NULL) != (ofpacts != NULL));
-
- dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
- if (rule) {
- rule_dpif_credit_stats(rule, &stats);
- }
-
- xlate_in_init(&xin, ofproto, flow, rule, stats.tcp_flags, packet);
- xin.ofpacts = ofpacts;
- xin.ofpacts_len = ofpacts_len;
- xin.resubmit_stats = &stats;
- xlate_actions(&xin, &xout);
-
- ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- in_port = flow->in_port.ofp_port;
- if (in_port == OFPP_NONE) {
- in_port = OFPP_LOCAL;
- }
- odp_flow_key_from_flow(&key, flow, ofp_port_to_odp_port(ofproto, in_port));
-
- error = dpif_execute(ofproto->backer->dpif, key.data, key.size,
- xout.odp_actions.data, xout.odp_actions.size, packet,
- (xout.slow & SLOW_ACTION) != 0);
- xlate_out_uninit(&xout);
-
- return error;
-}
-
-/* Remove 'facet' from its ofproto and free up the associated memory:
- *
- * - If 'facet' was installed in the datapath, uninstalls it and updates its
- * rule's statistics, via subfacet_uninstall().
- *
- * - Removes 'facet' from its rule and from ofproto->facets.
- */
-static void
-facet_remove(struct facet *facet)
-{
- struct subfacet *subfacet, *next_subfacet;
-
- COVERAGE_INC(facet_remove);
- ovs_assert(!list_is_empty(&facet->subfacets));
-
- /* First uninstall all of the subfacets to get final statistics. */
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- subfacet_uninstall(subfacet);
- }
-
- /* Flush the final stats to the rule.
- *
- * This might require us to have at least one subfacet around so that we
- * can use its actions for accounting in facet_account(), which is why we
- * have uninstalled but not yet destroyed the subfacets. */
- facet_flush_stats(facet);
-
- /* Now we're really all done so destroy everything. */
- LIST_FOR_EACH_SAFE (subfacet, next_subfacet, list_node,
- &facet->subfacets) {
- subfacet_destroy__(subfacet);
- }
- ovs_rwlock_wrlock(&facet->ofproto->facets.rwlock);
- classifier_remove(&facet->ofproto->facets, &facet->cr);
- ovs_rwlock_unlock(&facet->ofproto->facets.rwlock);
- cls_rule_destroy(&facet->cr);
- facet_free(facet);
-}
-
-/* Feed information from 'facet' back into the learning table to keep it in
- * sync with what is actually flowing through the datapath. */
-static void
-facet_learn(struct facet *facet)
-{
- long long int now = time_msec();
- if (!facet->xout.has_fin_timeout && now < facet->learn_rl) {
- return;
- }
-
- facet->learn_rl = now + 500;
-
- if (!facet->xout.has_learn
- && !facet->xout.has_normal
- && (!facet->xout.has_fin_timeout
- || !(facet->tcp_flags & (TCP_FIN | TCP_RST)))) {
- return;
- }
-
- facet_push_stats(facet, true);
-}
-
-/* Returns true if the only action for 'facet' is to send to the controller.
- * (We don't report NetFlow expiration messages for such facets because they
- * are just part of the control logic for the network, not real traffic). */
-static bool
-facet_is_controller_flow(struct facet *facet)
-{
- if (facet) {
- struct ofproto_dpif *ofproto = facet->ofproto;
- const struct ofpact *ofpacts;
- struct rule_actions *actions;
- struct rule_dpif *rule;
- size_t ofpacts_len;
- bool is_controller;
-
- rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
- actions = rule_dpif_get_actions(rule);
- rule_dpif_unref(rule);
-
- ofpacts_len = actions->ofpacts_len;
- ofpacts = actions->ofpacts;
- is_controller = ofpacts_len > 0
- && ofpacts->type == OFPACT_CONTROLLER
- && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
- rule_actions_unref(actions);
-
- return is_controller;
- }
- return false;
-}
-
-/* Folds all of 'facet''s statistics into its rule. Also updates the
- * accounting ofhook and emits a NetFlow expiration if appropriate. All of
- * 'facet''s statistics in the datapath should have been zeroed and folded into
- * its packet and byte counts before this function is called. */
-static void
-facet_flush_stats(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = facet->ofproto;
- struct subfacet *subfacet;
-
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- ovs_assert(!subfacet->dp_byte_count);
- ovs_assert(!subfacet->dp_packet_count);
- }
-
- facet_push_stats(facet, false);
-
- if (ofproto->netflow && !facet_is_controller_flow(facet)) {
- netflow_expire(ofproto->netflow, &facet->flow);
- netflow_flow_clear(ofproto->netflow, &facet->flow);
- }
-
- /* Reset counters to prevent double counting if 'facet' ever gets
- * reinstalled. */
- facet_reset_counters(facet);
- facet->tcp_flags = 0;
-}
-
-/* Searches 'ofproto''s table of facets for one which would be responsible for
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet might need revalidation; use facet_lookup_valid()
- * instead if that is important. */
-static struct facet *
-facet_find(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct cls_rule *cr;
-
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cr = classifier_lookup(&ofproto->facets, flow, NULL);
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- return cr ? CONTAINER_OF(cr, struct facet, cr) : NULL;
-}
-
-/* Searches 'ofproto''s table of facets for one capable that covers
- * 'flow'. Returns it if found, otherwise a null pointer.
- *
- * The returned facet is guaranteed to be valid. */
-static struct facet *
-facet_lookup_valid(struct ofproto_dpif *ofproto, const struct flow *flow)
-{
- struct facet *facet;
-
- facet = facet_find(ofproto, flow);
- if (facet
- && ofproto->backer->need_revalidate
- && !facet_revalidate(facet)) {
- return NULL;
- }
-
- return facet;
-}
-
-static bool
-facet_check_consistency(struct facet *facet)
-{
- static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 15);
-
- struct xlate_out xout;
- struct xlate_in xin;
- bool ok;
-
- /* Check the datapath actions for consistency. */
- xlate_in_init(&xin, facet->ofproto, &facet->flow, NULL, 0, NULL);
- xlate_actions(&xin, &xout);
-
- ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
- && facet->xout.slow == xout.slow;
- if (!ok && !VLOG_DROP_WARN(&rl)) {
- struct ds s = DS_EMPTY_INITIALIZER;
-
- flow_format(&s, &facet->flow);
- ds_put_cstr(&s, ": inconsistency in facet");
+ /* Has 'rule' expired? */
+ if (hard_timeout) {
+ long long int modified;
- if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
- ds_put_cstr(&s, " (actions were: ");
- format_odp_actions(&s, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size);
- ds_put_cstr(&s, ") (correct actions: ");
- format_odp_actions(&s, xout.odp_actions.data,
- xout.odp_actions.size);
- ds_put_char(&s, ')');
- }
+ ovs_mutex_lock(&rule->up.mutex);
+ modified = rule->up.modified;
+ ovs_mutex_unlock(&rule->up.mutex);
- if (facet->xout.slow != xout.slow) {
- ds_put_format(&s, " slow path incorrect. should be %d", xout.slow);
+ if (now > modified + hard_timeout * 1000) {
+ reason = OFPRR_HARD_TIMEOUT;
}
-
- ds_destroy(&s);
}
- xlate_out_uninit(&xout);
- return ok;
-}
+ if (reason < 0 && idle_timeout) {
+ long long int used;
-/* Re-searches the classifier for 'facet':
- *
- * - If the rule found is different from 'facet''s current rule, moves
- * 'facet' to the new rule and recompiles its actions.
- *
- * - If the rule found is the same as 'facet''s current rule, leaves 'facet'
- * where it is and recompiles its actions anyway.
- *
- * - If any of 'facet''s subfacets correspond to a new flow according to
- * xlate_receive(), 'facet' is removed.
- *
- * Returns true if 'facet' is still valid. False if 'facet' was removed. */
-static bool
-facet_revalidate(struct facet *facet)
-{
- struct ofproto_dpif *ofproto = facet->ofproto;
- struct rule_dpif *new_rule;
- struct subfacet *subfacet;
- struct flow_wildcards wc;
- struct xlate_out xout;
- struct xlate_in xin;
-
- COVERAGE_INC(facet_revalidate);
-
- /* Check that child subfacets still correspond to this facet. Tunnel
- * configuration changes could cause a subfacet's OpenFlow in_port to
- * change. */
- LIST_FOR_EACH (subfacet, list_node, &facet->subfacets) {
- struct ofproto_dpif *recv_ofproto;
- struct flow recv_flow;
- int error;
+ ovs_mutex_lock(&rule->stats_mutex);
+ used = rule->stats.used;
+ ovs_mutex_unlock(&rule->stats_mutex);
- error = xlate_receive(ofproto->backer, NULL, subfacet->key,
- subfacet->key_len, &recv_flow, NULL,
- &recv_ofproto, NULL, NULL, NULL, NULL);
- if (error
- || recv_ofproto != ofproto
- || facet != facet_find(ofproto, &recv_flow)) {
- facet_remove(facet);
- return false;
+ if (now > used + idle_timeout * 1000) {
+ reason = OFPRR_IDLE_TIMEOUT;
}
}
- flow_wildcards_init_catchall(&wc);
- rule_dpif_lookup(ofproto, &facet->flow, &wc, &new_rule);
-
- /* Calculate new datapath actions.
- *
- * We do not modify any 'facet' state yet, because we might need to, e.g.,
- * emit a NetFlow expiration and, if so, we need to have the old state
- * around to properly compose it. */
- xlate_in_init(&xin, ofproto, &facet->flow, new_rule, 0, NULL);
- xlate_actions(&xin, &xout);
- flow_wildcards_or(&xout.wc, &xout.wc, &wc);
- /* Make sure non -packet fields are not masked. If not cleared,
- * the memcmp() below may fail, causing an otherwise valid facet
- * to be removed. */
- flow_wildcards_clear_non_packet_fields(&xout.wc);
-
- /* A facet's slow path reason should only change under dramatic
- * circumstances. Rather than try to update everything, it's simpler to
- * remove the facet and start over.
- *
- * More importantly, if a facet's wildcards change, it will be relatively
- * difficult to figure out if its subfacets still belong to it, and if not
- * which facet they may belong to. Again, to avoid the complexity, we
- * simply give up instead. */
- if (facet->xout.slow != xout.slow
- || memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) {
- facet_remove(facet);
- xlate_out_uninit(&xout);
- rule_dpif_unref(new_rule);
- return false;
- }
-
- if (!ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)) {
- LIST_FOR_EACH(subfacet, list_node, &facet->subfacets) {
- if (subfacet->path == SF_FAST_PATH) {
- struct dpif_flow_stats stats;
-
- subfacet_install(subfacet, &xout.odp_actions, &stats);
- subfacet_update_stats(subfacet, &stats);
- }
- }
-
- facet_flush_stats(facet);
-
- ofpbuf_clear(&facet->xout.odp_actions);
- ofpbuf_put(&facet->xout.odp_actions, xout.odp_actions.data,
- xout.odp_actions.size);
+ if (reason >= 0) {
+ COVERAGE_INC(ofproto_dpif_expired);
+ ofproto_rule_expire(&rule->up, reason);
}
-
- /* Update 'facet' now that we've taken care of all the old state. */
- facet->xout.slow = xout.slow;
- facet->xout.has_learn = xout.has_learn;
- facet->xout.has_normal = xout.has_normal;
- facet->xout.has_fin_timeout = xout.has_fin_timeout;
- facet->xout.nf_output_iface = xout.nf_output_iface;
- facet->xout.mirrors = xout.mirrors;
-
- ovs_mutex_lock(&new_rule->up.mutex);
- facet->used = MAX(facet->used, new_rule->up.created);
- ovs_mutex_unlock(&new_rule->up.mutex);
-
- xlate_out_uninit(&xout);
- rule_dpif_unref(new_rule);
- return true;
}
-static void
-facet_reset_counters(struct facet *facet)
-{
- facet->packet_count = 0;
- facet->byte_count = 0;
- facet->prev_packet_count = 0;
- facet->prev_byte_count = 0;
-}
-
-static void
-flow_push_stats(struct ofproto_dpif *ofproto, struct flow *flow,
- struct dpif_flow_stats *stats, bool may_learn)
-{
- struct xlate_in xin;
-
- xlate_in_init(&xin, ofproto, flow, NULL, stats->tcp_flags, NULL);
- xin.resubmit_stats = stats;
- xin.may_learn = may_learn;
- xlate_actions_for_side_effects(&xin);
-}
-
-static void
-facet_push_stats(struct facet *facet, bool may_learn)
+int
+ofproto_dpif_execute_actions__(struct ofproto_dpif *ofproto,
+ const struct flow *flow,
+ struct rule_dpif *rule,
+ const struct ofpact *ofpacts, size_t ofpacts_len,
+ int recurse, int resubmits,
+ struct dp_packet *packet)
{
struct dpif_flow_stats stats;
+ struct xlate_out xout;
+ struct xlate_in xin;
+ ofp_port_t in_port;
+ struct dpif_execute execute;
+ int error;
- ovs_assert(facet->packet_count >= facet->prev_packet_count);
- ovs_assert(facet->byte_count >= facet->prev_byte_count);
- ovs_assert(facet->used >= facet->prev_used);
+ ovs_assert((rule != NULL) != (ofpacts != NULL));
- stats.n_packets = facet->packet_count - facet->prev_packet_count;
- stats.n_bytes = facet->byte_count - facet->prev_byte_count;
- stats.used = facet->used;
- stats.tcp_flags = facet->tcp_flags;
+ dpif_flow_stats_extract(flow, packet, time_msec(), &stats);
- if (may_learn || stats.n_packets || facet->used > facet->prev_used) {
- facet->prev_packet_count = facet->packet_count;
- facet->prev_byte_count = facet->byte_count;
- facet->prev_used = facet->used;
- flow_push_stats(facet->ofproto, &facet->flow, &stats, may_learn);
+ if (rule) {
+ rule_dpif_credit_stats(rule, &stats);
}
-}
-
-static void
-push_all_stats(void)
-{
- static long long int rl = LLONG_MIN;
- struct ofproto_dpif *ofproto;
- if (time_msec() < rl) {
- return;
+ uint64_t odp_actions_stub[1024 / 8];
+ struct ofpbuf odp_actions = OFPBUF_STUB_INITIALIZER(odp_actions_stub);
+ xlate_in_init(&xin, ofproto, flow, flow->in_port.ofp_port, rule,
+ stats.tcp_flags, packet, NULL, &odp_actions);
+ xin.ofpacts = ofpacts;
+ xin.ofpacts_len = ofpacts_len;
+ xin.resubmit_stats = &stats;
+ xin.recurse = recurse;
+ xin.resubmits = resubmits;
+ if (xlate_actions(&xin, &xout) != XLATE_OK) {
+ error = EINVAL;
+ goto out;
}
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- struct cls_cursor cursor;
- struct facet *facet;
+ execute.actions = odp_actions.data;
+ execute.actions_len = odp_actions.size;
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
- facet_push_stats(facet, false);
- }
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
+ pkt_metadata_from_flow(&packet->md, flow);
+ execute.packet = packet;
+ execute.needs_help = (xout.slow & SLOW_ACTION) != 0;
+ execute.probe = false;
+ execute.mtu = 0;
+
+ /* Fix up in_port. */
+ in_port = flow->in_port.ofp_port;
+ if (in_port == OFPP_NONE) {
+ in_port = OFPP_LOCAL;
}
+ execute.packet->md.in_port.odp_port = ofp_port_to_odp_port(ofproto, in_port);
- rl = time_msec() + 100;
+ error = dpif_execute(ofproto->backer->dpif, &execute);
+out:
+ xlate_out_uninit(&xout);
+ ofpbuf_uninit(&odp_actions);
+
+ return error;
+}
+
+/* Executes, within 'ofproto', the actions in 'rule' or 'ofpacts' on 'packet'.
+ * 'flow' must reflect the data in 'packet'. */
+int
+ofproto_dpif_execute_actions(struct ofproto_dpif *ofproto,
+ const struct flow *flow,
+ struct rule_dpif *rule,
+ const struct ofpact *ofpacts, size_t ofpacts_len,
+ struct dp_packet *packet)
+{
+ return ofproto_dpif_execute_actions__(ofproto, flow, rule, ofpacts,
+ ofpacts_len, 0, 0, packet);
}
void
const struct dpif_flow_stats *stats)
{
ovs_mutex_lock(&rule->stats_mutex);
- rule->packet_count += stats->n_packets;
- rule->byte_count += stats->n_bytes;
- rule->up.used = MAX(rule->up.used, stats->used);
+ if (OVS_UNLIKELY(rule->new_rule)) {
+ rule_dpif_credit_stats(rule->new_rule, stats);
+ } else {
+ rule->stats.n_packets += stats->n_packets;
+ rule->stats.n_bytes += stats->n_bytes;
+ rule->stats.used = MAX(rule->stats.used, stats->used);
+ }
ovs_mutex_unlock(&rule->stats_mutex);
}
-bool
-rule_dpif_is_fail_open(const struct rule_dpif *rule)
-{
- return is_fail_open_rule(&rule->up);
-}
-
-bool
-rule_dpif_is_table_miss(const struct rule_dpif *rule)
-{
- return rule_is_table_miss(&rule->up);
-}
-
ovs_be64
rule_dpif_get_flow_cookie(const struct rule_dpif *rule)
OVS_REQUIRES(rule->up.mutex)
ofproto_rule_reduce_timeouts(&rule->up, idle_timeout, hard_timeout);
}
-/* Returns 'rule''s actions. The caller owns a reference on the returned
- * actions and must eventually release it (with rule_actions_unref()) to avoid
- * a memory leak. */
-struct rule_actions *
+/* Returns 'rule''s actions. The returned actions are RCU-protected, and can
+ * be read until the calling thread quiesces. */
+const struct rule_actions *
rule_dpif_get_actions(const struct rule_dpif *rule)
{
return rule_get_actions(&rule->up);
}
-\f
-/* Subfacets. */
-
-static struct subfacet *
-subfacet_find(struct dpif_backer *backer, const struct nlattr *key,
- size_t key_len, uint32_t key_hash)
-{
- struct subfacet *subfacet;
-
- HMAP_FOR_EACH_WITH_HASH (subfacet, hmap_node, key_hash,
- &backer->subfacets) {
- if (subfacet->key_len == key_len
- && !memcmp(key, subfacet->key, key_len)) {
- return subfacet;
- }
- }
-
- return NULL;
-}
-
-/* Creates and returns a new subfacet within 'facet' for the flow in 'miss'.
- * 'key_hash' must be a hash over miss->key. The caller must have already
- * ensured that no subfacet subfacet already exists. */
-static struct subfacet *
-subfacet_create(struct facet *facet, struct flow_miss *miss, uint32_t key_hash)
-{
- struct dpif_backer *backer = miss->ofproto->backer;
- const struct nlattr *key = miss->key;
- size_t key_len = miss->key_len;
- struct subfacet *subfacet;
-
- subfacet = (list_is_empty(&facet->subfacets)
- ? &facet->one_subfacet
- : xmalloc(sizeof *subfacet));
-
- COVERAGE_INC(subfacet_create);
- hmap_insert(&backer->subfacets, &subfacet->hmap_node, key_hash);
- list_push_back(&facet->subfacets, &subfacet->list_node);
- subfacet->facet = facet;
- subfacet->key = xmemdup(key, key_len);
- subfacet->key_len = key_len;
- subfacet->used = miss->stats.used;
- subfacet->created = subfacet->used;
- subfacet->dp_packet_count = 0;
- subfacet->dp_byte_count = 0;
- subfacet->path = SF_NOT_INSTALLED;
- subfacet->backer = backer;
-
- return subfacet;
-}
-
-/* Uninstalls 'subfacet' from the datapath, if it is installed, removes it from
- * its facet within 'ofproto', and frees it. */
-static void
-subfacet_destroy__(struct subfacet *subfacet)
-{
- struct facet *facet = subfacet->facet;
-
- COVERAGE_INC(subfacet_destroy);
- subfacet_uninstall(subfacet);
- hmap_remove(&subfacet->backer->subfacets, &subfacet->hmap_node);
- list_remove(&subfacet->list_node);
- free(subfacet->key);
- if (subfacet != &facet->one_subfacet) {
- free(subfacet);
- }
-}
-/* Destroys 'subfacet', as with subfacet_destroy__(), and then if this was the
- * last remaining subfacet in its facet destroys the facet too. */
+/* Sets 'rule''s recirculation id. */
static void
-subfacet_destroy(struct subfacet *subfacet)
+rule_dpif_set_recirc_id(struct rule_dpif *rule, uint32_t id)
+ OVS_REQUIRES(rule->up.mutex)
{
- struct facet *facet = subfacet->facet;
-
- if (list_is_singleton(&facet->subfacets)) {
- /* facet_remove() needs at least one subfacet (it will remove it). */
- facet_remove(facet);
+ ovs_assert(!rule->recirc_id || rule->recirc_id == id);
+ if (rule->recirc_id == id) {
+ /* Release the new reference to the same id. */
+ recirc_free_id(id);
} else {
- subfacet_destroy__(subfacet);
+ rule->recirc_id = id;
}
}
-static void
-subfacet_destroy_batch(struct dpif_backer *backer,
- struct subfacet **subfacets, int n)
+/* Sets 'rule''s recirculation id. */
+void
+rule_set_recirc_id(struct rule *rule_, uint32_t id)
{
- struct dpif_op ops[SUBFACET_DESTROY_MAX_BATCH];
- struct dpif_op *opsp[SUBFACET_DESTROY_MAX_BATCH];
- struct dpif_flow_stats stats[SUBFACET_DESTROY_MAX_BATCH];
- int i;
-
- for (i = 0; i < n; i++) {
- ops[i].type = DPIF_OP_FLOW_DEL;
- ops[i].u.flow_del.key = subfacets[i]->key;
- ops[i].u.flow_del.key_len = subfacets[i]->key_len;
- ops[i].u.flow_del.stats = &stats[i];
- opsp[i] = &ops[i];
- }
-
- dpif_operate(backer->dpif, opsp, n);
- for (i = 0; i < n; i++) {
- subfacet_reset_dp_stats(subfacets[i], &stats[i]);
- subfacets[i]->path = SF_NOT_INSTALLED;
- subfacet_destroy(subfacets[i]);
- }
-}
-
-/* Updates 'subfacet''s datapath flow, setting its actions to 'actions_len'
- * bytes of actions in 'actions'. If 'stats' is non-null, statistics counters
- * in the datapath will be zeroed and 'stats' will be updated with traffic new
- * since 'subfacet' was last updated.
- *
- * Returns 0 if successful, otherwise a positive errno value. */
-static int
-subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions,
- struct dpif_flow_stats *stats)
-{
- struct facet *facet = subfacet->facet;
- enum subfacet_path path = facet->xout.slow ? SF_SLOW_PATH : SF_FAST_PATH;
- const struct nlattr *actions = odp_actions->data;
- size_t actions_len = odp_actions->size;
- struct odputil_keybuf maskbuf;
- struct ofpbuf mask;
-
- uint64_t slow_path_stub[128 / 8];
- enum dpif_flow_put_flags flags;
- int ret;
-
- flags = subfacet->path == SF_NOT_INSTALLED ? DPIF_FP_CREATE
- : DPIF_FP_MODIFY;
- if (stats) {
- flags |= DPIF_FP_ZERO_STATS;
- }
-
- if (path == SF_SLOW_PATH) {
- compose_slow_path(facet->ofproto, &facet->flow, facet->xout.slow,
- slow_path_stub, sizeof slow_path_stub,
- &actions, &actions_len);
- }
-
- ofpbuf_use_stack(&mask, &maskbuf, sizeof maskbuf);
- if (enable_megaflows) {
- odp_flow_key_from_mask(&mask, &facet->xout.wc.masks,
- &facet->flow, UINT32_MAX);
- }
-
- ret = dpif_flow_put(subfacet->backer->dpif, flags, subfacet->key,
- subfacet->key_len, mask.data, mask.size,
- actions, actions_len, stats);
-
- if (stats) {
- subfacet_reset_dp_stats(subfacet, stats);
- }
+ struct rule_dpif *rule = rule_dpif_cast(rule_);
- if (ret) {
- COVERAGE_INC(subfacet_install_fail);
- } else {
- subfacet->path = path;
- }
- return ret;
+ ovs_mutex_lock(&rule->up.mutex);
+ rule_dpif_set_recirc_id(rule, id);
+ ovs_mutex_unlock(&rule->up.mutex);
}
-/* If 'subfacet' is installed in the datapath, uninstalls it. */
-static void
-subfacet_uninstall(struct subfacet *subfacet)
+cls_version_t
+ofproto_dpif_get_tables_version(struct ofproto_dpif *ofproto OVS_UNUSED)
{
- if (subfacet->path != SF_NOT_INSTALLED) {
- struct ofproto_dpif *ofproto = subfacet->facet->ofproto;
- struct dpif_flow_stats stats;
- int error;
-
- error = dpif_flow_del(ofproto->backer->dpif, subfacet->key,
- subfacet->key_len, &stats);
- subfacet_reset_dp_stats(subfacet, &stats);
- if (!error) {
- subfacet_update_stats(subfacet, &stats);
- }
- subfacet->path = SF_NOT_INSTALLED;
- } else {
- ovs_assert(subfacet->dp_packet_count == 0);
- ovs_assert(subfacet->dp_byte_count == 0);
- }
-}
+ cls_version_t version;
-/* Resets 'subfacet''s datapath statistics counters. This should be called
- * when 'subfacet''s statistics are cleared in the datapath. If 'stats' is
- * non-null, it should contain the statistics returned by dpif when 'subfacet'
- * was reset in the datapath. 'stats' will be modified to include only
- * statistics new since 'subfacet' was last updated. */
-static void
-subfacet_reset_dp_stats(struct subfacet *subfacet,
- struct dpif_flow_stats *stats)
-{
- if (stats
- && subfacet->dp_packet_count <= stats->n_packets
- && subfacet->dp_byte_count <= stats->n_bytes) {
- stats->n_packets -= subfacet->dp_packet_count;
- stats->n_bytes -= subfacet->dp_byte_count;
- }
+ atomic_read_relaxed(&ofproto->tables_version, &version);
- subfacet->dp_packet_count = 0;
- subfacet->dp_byte_count = 0;
+ return version;
}
-/* Folds the statistics from 'stats' into the counters in 'subfacet'.
+/* The returned rule (if any) is valid at least until the next RCU quiescent
+ * period. If the rule needs to stay around longer, the caller should take
+ * a reference.
*
- * Because of the meaning of a subfacet's counters, it only makes sense to do
- * this if 'stats' are not tracked in the datapath, that is, if 'stats'
- * represents a packet that was sent by hand or if it represents statistics
- * that have been cleared out of the datapath. */
-static void
-subfacet_update_stats(struct subfacet *subfacet,
- const struct dpif_flow_stats *stats)
-{
- if (stats->n_packets || stats->used > subfacet->used) {
- struct facet *facet = subfacet->facet;
-
- subfacet->used = MAX(subfacet->used, stats->used);
- facet->used = MAX(facet->used, stats->used);
- facet->packet_count += stats->n_packets;
- facet->byte_count += stats->n_bytes;
- facet->tcp_flags |= stats->tcp_flags;
- }
-}
-\f
-/* Rules. */
-
-/* Lookup 'flow' in 'ofproto''s classifier. If 'wc' is non-null, sets
- * the fields that were relevant as part of the lookup. */
-void
-rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
- struct flow_wildcards *wc, struct rule_dpif **rule)
-{
- struct ofport_dpif *port;
-
- if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) {
- return;
- }
- port = get_ofp_port(ofproto, flow->in_port.ofp_port);
- if (!port) {
- VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
- flow->in_port.ofp_port);
+ * 'flow' is non-const to allow for temporary modifications during the lookup.
+ * Any changes are restored before returning. */
+static struct rule_dpif *
+rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto, cls_version_t version,
+ uint8_t table_id, struct flow *flow,
+ struct flow_wildcards *wc)
+{
+ struct classifier *cls = &ofproto->up.tables[table_id].cls;
+ return rule_dpif_cast(rule_from_cls_rule(classifier_lookup(cls, version,
+ flow, wc)));
+}
+
+/* Look up 'flow' in 'ofproto''s classifier version 'version', starting from
+ * table '*table_id'. Returns the rule that was found, which may be one of the
+ * special rules according to packet miss hadling. If 'may_packet_in' is
+ * false, returning of the miss_rule (which issues packet ins for the
+ * controller) is avoided. Updates 'wc', if nonnull, to reflect the fields
+ * that were used during the lookup.
+ *
+ * If 'honor_table_miss' is true, the first lookup occurs in '*table_id', but
+ * if none is found then the table miss configuration for that table is
+ * honored, which can result in additional lookups in other OpenFlow tables.
+ * In this case the function updates '*table_id' to reflect the final OpenFlow
+ * table that was searched.
+ *
+ * If 'honor_table_miss' is false, then only one table lookup occurs, in
+ * '*table_id'.
+ *
+ * The rule is returned in '*rule', which is valid at least until the next
+ * RCU quiescent period. If the '*rule' needs to stay around longer, the
+ * caller must take a reference.
+ *
+ * 'in_port' allows the lookup to take place as if the in port had the value
+ * 'in_port'. This is needed for resubmit action support.
+ *
+ * 'flow' is non-const to allow for temporary modifications during the lookup.
+ * Any changes are restored before returning. */
+struct rule_dpif *
+rule_dpif_lookup_from_table(struct ofproto_dpif *ofproto,
+ cls_version_t version, struct flow *flow,
+ struct flow_wildcards *wc,
+ const struct dpif_flow_stats *stats,
+ uint8_t *table_id, ofp_port_t in_port,
+ bool may_packet_in, bool honor_table_miss)
+{
+ ovs_be16 old_tp_src = flow->tp_src, old_tp_dst = flow->tp_dst;
+ ofp_port_t old_in_port = flow->in_port.ofp_port;
+ enum ofputil_table_miss miss_config;
+ struct rule_dpif *rule;
+ uint8_t next_id;
+
+ /* We always unwildcard nw_frag (for IP), so they
+ * need not be unwildcarded here. */
+ if (flow->nw_frag & FLOW_NW_FRAG_ANY
+ && ofproto->up.frag_handling != OFPC_FRAG_NX_MATCH) {
+ if (ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
+ /* We must pretend that transport ports are unavailable. */
+ flow->tp_src = htons(0);
+ flow->tp_dst = htons(0);
+ } else {
+ /* Must be OFPC_FRAG_DROP (we don't have OFPC_FRAG_REASM).
+ * Use the drop_frags_rule (which cannot disappear). */
+ rule = ofproto->drop_frags_rule;
+ if (stats) {
+ struct oftable *tbl = &ofproto->up.tables[*table_id];
+ unsigned long orig;
+
+ atomic_add_relaxed(&tbl->n_matched, stats->n_packets, &orig);
+ }
+ return rule;
+ }
}
- choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
- ofproto->no_packet_in_rule, rule);
-}
+ /* Look up a flow with 'in_port' as the input port. Then restore the
+ * original input port (otherwise OFPP_NORMAL and OFPP_IN_PORT will
+ * have surprising behavior). */
+ flow->in_port.ofp_port = in_port;
-bool
-rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
- const struct flow *flow, struct flow_wildcards *wc,
- uint8_t table_id, struct rule_dpif **rule)
-{
- const struct cls_rule *cls_rule;
- struct classifier *cls;
- bool frag;
+ /* Our current implementation depends on n_tables == N_TABLES, and
+ * TBL_INTERNAL being the last table. */
+ BUILD_ASSERT_DECL(N_TABLES == TBL_INTERNAL + 1);
- *rule = NULL;
- if (table_id >= N_TABLES) {
- return false;
- }
+ miss_config = OFPUTIL_TABLE_MISS_CONTINUE;
- if (wc) {
- memset(&wc->masks.dl_type, 0xff, sizeof wc->masks.dl_type);
- wc->masks.nw_frag |= FLOW_NW_FRAG_MASK;
- }
+ for (next_id = *table_id;
+ next_id < ofproto->up.n_tables;
+ next_id++, next_id += (next_id == TBL_INTERNAL))
+ {
+ *table_id = next_id;
+ rule = rule_dpif_lookup_in_table(ofproto, version, next_id, flow, wc);
+ if (stats) {
+ struct oftable *tbl = &ofproto->up.tables[next_id];
+ unsigned long orig;
- cls = &ofproto->up.tables[table_id].cls;
- ovs_rwlock_rdlock(&cls->rwlock);
- frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0;
- if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
- /* We must pretend that transport ports are unavailable. */
- struct flow ofpc_normal_flow = *flow;
- ofpc_normal_flow.tp_src = htons(0);
- ofpc_normal_flow.tp_dst = htons(0);
- cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc);
- } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) {
- cls_rule = &ofproto->drop_frags_rule->up.cr;
- /* Frag mask in wc already set above. */
- } else {
- cls_rule = classifier_lookup(cls, flow, wc);
+ atomic_add_relaxed(rule ? &tbl->n_matched : &tbl->n_missed,
+ stats->n_packets, &orig);
+ }
+ if (rule) {
+ goto out; /* Match. */
+ }
+ if (honor_table_miss) {
+ miss_config = ofproto_table_get_miss_config(&ofproto->up,
+ *table_id);
+ if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE) {
+ continue;
+ }
+ }
+ break;
}
-
- *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
- rule_dpif_ref(*rule);
- ovs_rwlock_unlock(&cls->rwlock);
-
- return *rule != NULL;
-}
-
-/* Given a port configuration (specified as zero if there's no port), chooses
- * which of 'miss_rule' and 'no_packet_in_rule' should be used in case of a
- * flow table miss. */
-void
-choose_miss_rule(enum ofputil_port_config config, struct rule_dpif *miss_rule,
- struct rule_dpif *no_packet_in_rule, struct rule_dpif **rule)
-{
- *rule = config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
- rule_dpif_ref(*rule);
-}
-
-void
-rule_dpif_ref(struct rule_dpif *rule)
-{
- if (rule) {
- ofproto_rule_ref(&rule->up);
+ /* Miss. */
+ rule = ofproto->no_packet_in_rule;
+ if (may_packet_in) {
+ if (miss_config == OFPUTIL_TABLE_MISS_CONTINUE
+ || miss_config == OFPUTIL_TABLE_MISS_CONTROLLER) {
+ struct ofport_dpif *port;
+
+ port = ofp_port_to_ofport(ofproto, old_in_port);
+ if (!port) {
+ VLOG_WARN_RL(&rl, "packet-in on unknown OpenFlow port %"PRIu16,
+ old_in_port);
+ } else if (!(port->up.pp.config & OFPUTIL_PC_NO_PACKET_IN)) {
+ rule = ofproto->miss_rule;
+ }
+ } else if (miss_config == OFPUTIL_TABLE_MISS_DEFAULT &&
+ connmgr_wants_packet_in_on_miss(ofproto->up.connmgr)) {
+ rule = ofproto->miss_rule;
+ }
}
-}
+out:
+ /* Restore port numbers, as they may have been modified above. */
+ flow->tp_src = old_tp_src;
+ flow->tp_dst = old_tp_dst;
+ /* Restore the old in port. */
+ flow->in_port.ofp_port = old_in_port;
-void
-rule_dpif_unref(struct rule_dpif *rule)
-{
- if (rule) {
- ofproto_rule_unref(&rule->up);
- }
+ return rule;
}
static void
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
ofproto->backer->need_revalidate = REV_FLOW_TABLE;
- ofoperation_complete(rule->up.pending, 0);
}
static struct rule_dpif *rule_dpif_cast(const struct rule *rule)
static struct rule *
rule_alloc(void)
{
- struct rule_dpif *rule = xmalloc(sizeof *rule);
+ struct rule_dpif *rule = xzalloc(sizeof *rule);
return &rule->up;
}
free(rule);
}
+static enum ofperr
+rule_check(struct rule *rule)
+{
+ uint16_t ct_state, ct_zone;
+ const ovs_u128 *labelp;
+ ovs_u128 ct_label = { { 0, 0 } };
+ uint32_t ct_mark;
+
+ ct_state = MINIFLOW_GET_U16(rule->cr.match.flow, ct_state);
+ ct_zone = MINIFLOW_GET_U16(rule->cr.match.flow, ct_zone);
+ ct_mark = MINIFLOW_GET_U32(rule->cr.match.flow, ct_mark);
+ labelp = MINIFLOW_GET_U128_PTR(rule->cr.match.flow, ct_label);
+ if (labelp) {
+ ct_label = *labelp;
+ }
+
+ if (ct_state || ct_zone || ct_mark
+ || !ovs_u128_is_zero(&ct_label)) {
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->ofproto);
+ const struct odp_support *support = &ofproto_dpif_get_support(ofproto)->odp;
+
+ if ((ct_state && !support->ct_state)
+ || (ct_zone && !support->ct_zone)
+ || (ct_mark && !support->ct_mark)
+ || (!ovs_u128_is_zero(&ct_label) && !support->ct_label)) {
+ return OFPERR_OFPBMC_BAD_FIELD;
+ }
+ if (ct_state & CS_UNSUPPORTED_MASK) {
+ return OFPERR_OFPBMC_BAD_MASK;
+ }
+ }
+ return 0;
+}
+
static enum ofperr
rule_construct(struct rule *rule_)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
- ovs_mutex_init(&rule->stats_mutex);
- ovs_mutex_lock(&rule->stats_mutex);
- rule->packet_count = 0;
- rule->byte_count = 0;
- ovs_mutex_unlock(&rule->stats_mutex);
+ int error;
+
+ error = rule_check(rule_);
+ if (error) {
+ return error;
+ }
+
+ ovs_mutex_init_adaptive(&rule->stats_mutex);
+ rule->stats.n_packets = 0;
+ rule->stats.n_bytes = 0;
+ rule->stats.used = rule->up.modified;
+ rule->recirc_id = 0;
+ rule->new_rule = NULL;
+
return 0;
}
static void
-rule_insert(struct rule *rule_)
+rule_insert(struct rule *rule_, struct rule *old_rule_, bool forward_stats)
OVS_REQUIRES(ofproto_mutex)
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
+
+ if (old_rule_ && forward_stats) {
+ struct rule_dpif *old_rule = rule_dpif_cast(old_rule_);
+
+ ovs_assert(!old_rule->new_rule);
+
+ /* Take a reference to the new rule, and refer all stats updates from
+ * the old rule to the new rule. */
+ rule_dpif_ref(rule);
+
+ ovs_mutex_lock(&old_rule->stats_mutex);
+ ovs_mutex_lock(&rule->stats_mutex);
+ old_rule->new_rule = rule; /* Forward future stats. */
+ rule->stats = old_rule->stats; /* Transfer stats to the new rule. */
+ ovs_mutex_unlock(&rule->stats_mutex);
+ ovs_mutex_unlock(&old_rule->stats_mutex);
+ }
+
complete_operation(rule);
}
static void
rule_destruct(struct rule *rule_)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
+
ovs_mutex_destroy(&rule->stats_mutex);
+ /* Release reference to the new rule, if any. */
+ if (rule->new_rule) {
+ rule_dpif_unref(rule->new_rule);
+ }
+ if (rule->recirc_id) {
+ recirc_free_id(rule->recirc_id);
+ }
}
static void
-rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes)
+rule_get_stats(struct rule *rule_, uint64_t *packets, uint64_t *bytes,
+ long long int *used)
{
struct rule_dpif *rule = rule_dpif_cast(rule_);
- push_all_stats();
-
- /* Start from historical data for 'rule' itself that are no longer tracked
- * in facets. This counts, for example, facets that have expired. */
ovs_mutex_lock(&rule->stats_mutex);
- *packets = rule->packet_count;
- *bytes = rule->byte_count;
+ if (OVS_UNLIKELY(rule->new_rule)) {
+ rule_get_stats(&rule->new_rule->up, packets, bytes, used);
+ } else {
+ *packets = rule->stats.n_packets;
+ *bytes = rule->stats.n_bytes;
+ *used = rule->stats.used;
+ }
ovs_mutex_unlock(&rule->stats_mutex);
}
static void
rule_dpif_execute(struct rule_dpif *rule, const struct flow *flow,
- struct ofpbuf *packet)
+ struct dp_packet *packet)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(rule->up.ofproto);
static enum ofperr
rule_execute(struct rule *rule, const struct flow *flow,
- struct ofpbuf *packet)
+ struct dp_packet *packet)
{
rule_dpif_execute(rule_dpif_cast(rule), flow, packet);
- ofpbuf_delete(packet);
+ dp_packet_delete(packet);
return 0;
}
-static void
-rule_modify_actions(struct rule *rule_, bool reset_counters)
- OVS_REQUIRES(ofproto_mutex)
-{
- struct rule_dpif *rule = rule_dpif_cast(rule_);
-
- if (reset_counters) {
- ovs_mutex_lock(&rule->stats_mutex);
- rule->packet_count = 0;
- rule->byte_count = 0;
- ovs_mutex_unlock(&rule->stats_mutex);
- }
-
- complete_operation(rule);
-}
-
static struct group_dpif *group_dpif_cast(const struct ofgroup *group)
{
return group ? CONTAINER_OF(group, struct group_dpif, up) : NULL;
group_construct_stats(struct group_dpif *group)
OVS_REQUIRES(group->stats_mutex)
{
+ struct ofputil_bucket *bucket;
+ const struct ovs_list *buckets;
+
group->packet_count = 0;
group->byte_count = 0;
- if (!group->bucket_stats) {
- group->bucket_stats = xcalloc(group->up.n_buckets,
- sizeof *group->bucket_stats);
- } else {
- memset(group->bucket_stats, 0, group->up.n_buckets *
- sizeof *group->bucket_stats);
+
+ group_dpif_get_buckets(group, &buckets);
+ LIST_FOR_EACH (bucket, list_node, buckets) {
+ bucket->stats.packet_count = 0;
+ bucket->stats.byte_count = 0;
+ }
+}
+
+void
+group_dpif_credit_stats(struct group_dpif *group,
+ struct ofputil_bucket *bucket,
+ const struct dpif_flow_stats *stats)
+{
+ ovs_mutex_lock(&group->stats_mutex);
+ group->packet_count += stats->n_packets;
+ group->byte_count += stats->n_bytes;
+ if (bucket) {
+ bucket->stats.packet_count += stats->n_packets;
+ bucket->stats.byte_count += stats->n_bytes;
+ } else { /* Credit to all buckets */
+ const struct ovs_list *buckets;
+
+ group_dpif_get_buckets(group, &buckets);
+ LIST_FOR_EACH (bucket, list_node, buckets) {
+ bucket->stats.packet_count += stats->n_packets;
+ bucket->stats.byte_count += stats->n_bytes;
+ }
}
+ ovs_mutex_unlock(&group->stats_mutex);
}
static enum ofperr
group_construct(struct ofgroup *group_)
{
struct group_dpif *group = group_dpif_cast(group_);
- ovs_mutex_init(&group->stats_mutex);
+
+ ovs_mutex_init_adaptive(&group->stats_mutex);
ovs_mutex_lock(&group->stats_mutex);
group_construct_stats(group);
ovs_mutex_unlock(&group->stats_mutex);
return 0;
}
-static void
-group_destruct__(struct group_dpif *group)
- OVS_REQUIRES(group->stats_mutex)
-{
- free(group->bucket_stats);
- group->bucket_stats = NULL;
-}
-
static void
group_destruct(struct ofgroup *group_)
{
struct group_dpif *group = group_dpif_cast(group_);
- ovs_mutex_lock(&group->stats_mutex);
- group_destruct__(group);
- ovs_mutex_unlock(&group->stats_mutex);
ovs_mutex_destroy(&group->stats_mutex);
}
static enum ofperr
-group_modify(struct ofgroup *group_, struct ofgroup *victim_)
+group_modify(struct ofgroup *group_)
{
- struct group_dpif *group = group_dpif_cast(group_);
- struct group_dpif *victim = group_dpif_cast(victim_);
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto);
- ovs_mutex_lock(&group->stats_mutex);
- if (victim->up.n_buckets < group->up.n_buckets) {
- group_destruct__(group);
- }
- group_construct_stats(group);
- ovs_mutex_unlock(&group->stats_mutex);
+ ofproto->backer->need_revalidate = REV_FLOW_TABLE;
return 0;
}
group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs)
{
struct group_dpif *group = group_dpif_cast(group_);
+ struct ofputil_bucket *bucket;
+ const struct ovs_list *buckets;
+ struct bucket_counter *bucket_stats;
- /* Start from historical data for 'group' itself that are no longer tracked
- * in facets. This counts, for example, facets that have expired. */
ovs_mutex_lock(&group->stats_mutex);
ogs->packet_count = group->packet_count;
ogs->byte_count = group->byte_count;
- memcpy(ogs->bucket_stats, group->bucket_stats,
- group->up.n_buckets * sizeof *group->bucket_stats);
+
+ group_dpif_get_buckets(group, &buckets);
+ bucket_stats = ogs->bucket_stats;
+ LIST_FOR_EACH (bucket, list_node, buckets) {
+ bucket_stats->packet_count = bucket->stats.packet_count;
+ bucket_stats->byte_count = bucket->stats.byte_count;
+ bucket_stats++;
+ }
ovs_mutex_unlock(&group->stats_mutex);
return 0;
}
+/* If the group exists, this function increments the groups's reference count.
+ *
+ * Make sure to call group_dpif_unref() after no longer needing to maintain
+ * a reference to the group. */
bool
group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id,
struct group_dpif **group)
- OVS_TRY_RDLOCK(true, (*group)->up.rwlock)
{
struct ofgroup *ofgroup;
bool found;
- *group = NULL;
found = ofproto_group_lookup(&ofproto->up, group_id, &ofgroup);
*group = found ? group_dpif_cast(ofgroup) : NULL;
return found;
}
-void
-group_dpif_release(struct group_dpif *group)
- OVS_RELEASES(group->up.rwlock)
-{
- ofproto_group_release(&group->up);
-}
-
void
group_dpif_get_buckets(const struct group_dpif *group,
- const struct list **buckets)
+ const struct ovs_list **buckets)
{
*buckets = &group->up.buckets;
}
{
return group->up.type;
}
+
+const char *
+group_dpif_get_selection_method(const struct group_dpif *group)
+{
+ return group->up.props.selection_method;
+}
\f
/* Sends 'packet' out 'ofport'.
* May modify 'packet'.
* Returns 0 if successful, otherwise a positive errno value. */
int
-ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct ofpbuf *packet)
+ofproto_dpif_send_packet(const struct ofport_dpif *ofport, struct dp_packet *packet)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofport->up.ofproto);
int error;
ovs_mutex_lock(&ofproto->stats_mutex);
ofproto->stats.tx_packets++;
- ofproto->stats.tx_bytes += packet->size;
+ ofproto->stats.tx_bytes += dp_packet_size(packet);
ovs_mutex_unlock(&ofproto->stats_mutex);
return error;
}
-/* Composes an ODP action for a "slow path" action for 'flow' within 'ofproto'.
- * The action will state 'slow' as the reason that the action is in the slow
- * path. (This is purely informational: it allows a human viewing "ovs-dpctl
- * dump-flows" output to see why a flow is in the slow path.)
- *
- * The 'stub_size' bytes in 'stub' will be used to store the action.
- * 'stub_size' must be large enough for the action.
- *
- * The action and its size will be stored in '*actionsp' and '*actions_lenp',
- * respectively. */
-static void
-compose_slow_path(const struct ofproto_dpif *ofproto, const struct flow *flow,
- enum slow_path_reason slow,
- uint64_t *stub, size_t stub_size,
- const struct nlattr **actionsp, size_t *actions_lenp)
-{
- union user_action_cookie cookie;
- struct ofpbuf buf;
-
- cookie.type = USER_ACTION_COOKIE_SLOW_PATH;
- cookie.slow_path.unused = 0;
- cookie.slow_path.reason = slow;
-
- ofpbuf_use_stack(&buf, stub, stub_size);
- if (slow & (SLOW_CFM | SLOW_BFD | SLOW_LACP | SLOW_STP)) {
- uint32_t pid = dpif_port_get_pid(ofproto->backer->dpif,
- ODPP_NONE);
- odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
- } else {
- odp_port_t odp_port;
- uint32_t pid;
+uint64_t
+group_dpif_get_selection_method_param(const struct group_dpif *group)
+{
+ return group->up.props.selection_method_param;
+}
+
+const struct field_array *
+group_dpif_get_fields(const struct group_dpif *group)
+{
+ return &group->up.props.fields;
+}
+\f
+/* Return the version string of the datapath that backs up
+ * this 'ofproto'.
+ */
+static const char *
+get_datapath_version(const struct ofproto *ofproto_)
+{
+ struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
- odp_port = ofp_port_to_odp_port(ofproto, flow->in_port.ofp_port);
- pid = dpif_port_get_pid(ofproto->backer->dpif, odp_port);
- odp_put_userspace_action(pid, &cookie, sizeof cookie.slow_path, &buf);
- }
- *actionsp = buf.data;
- *actions_lenp = buf.size;
+ return ofproto->backer->dp_version_string;
}
-\f
+
static bool
set_frag_handling(struct ofproto *ofproto_,
enum ofp_config_flags frag_handling)
}
static enum ofperr
-packet_out(struct ofproto *ofproto_, struct ofpbuf *packet,
+packet_out(struct ofproto *ofproto_, struct dp_packet *packet,
const struct flow *flow,
const struct ofpact *ofpacts, size_t ofpacts_len)
{
unixctl_command_reply(conn, "table successfully flushed");
}
+static void
+ofproto_unixctl_mcast_snooping_flush(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ struct ofproto_dpif *ofproto;
+
+ if (argc > 1) {
+ ofproto = ofproto_dpif_lookup(argv[1]);
+ if (!ofproto) {
+ unixctl_command_reply_error(conn, "no such bridge");
+ return;
+ }
+
+ if (!mcast_snooping_enabled(ofproto->ms)) {
+ unixctl_command_reply_error(conn, "multicast snooping is disabled");
+ return;
+ }
+ mcast_snooping_mdb_flush(ofproto->ms);
+ } else {
+ HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
+ if (!mcast_snooping_enabled(ofproto->ms)) {
+ continue;
+ }
+ mcast_snooping_mdb_flush(ofproto->ms);
+ }
+ }
+
+ unixctl_command_reply(conn, "table successfully flushed");
+}
+
static struct ofport_dpif *
ofbundle_get_a_port(const struct ofbundle *bundle)
{
ds_put_cstr(&ds, " port VLAN MAC Age\n");
ovs_rwlock_rdlock(&ofproto->ml->rwlock);
LIST_FOR_EACH (e, lru_node, &ofproto->ml->lrus) {
- struct ofbundle *bundle = e->port.p;
+ struct ofbundle *bundle = mac_entry_get_port(ofproto->ml, e);
char name[OFP_MAX_PORT_NAME_LEN];
ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
ds_destroy(&ds);
}
+static void
+ofproto_unixctl_mcast_snooping_show(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ const struct ofproto_dpif *ofproto;
+ const struct ofbundle *bundle;
+ const struct mcast_group *grp;
+ struct mcast_group_bundle *b;
+ struct mcast_mrouter_bundle *mrouter;
+
+ ofproto = ofproto_dpif_lookup(argv[1]);
+ if (!ofproto) {
+ unixctl_command_reply_error(conn, "no such bridge");
+ return;
+ }
+
+ if (!mcast_snooping_enabled(ofproto->ms)) {
+ unixctl_command_reply_error(conn, "multicast snooping is disabled");
+ return;
+ }
+
+ ds_put_cstr(&ds, " port VLAN GROUP Age\n");
+ ovs_rwlock_rdlock(&ofproto->ms->rwlock);
+ LIST_FOR_EACH (grp, group_node, &ofproto->ms->group_lru) {
+ LIST_FOR_EACH(b, bundle_node, &grp->bundle_lru) {
+ char name[OFP_MAX_PORT_NAME_LEN];
+
+ bundle = b->port;
+ ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
+ name, sizeof name);
+ ds_put_format(&ds, "%5s %4d ", name, grp->vlan);
+ ipv6_format_mapped(&grp->addr, &ds);
+ ds_put_format(&ds, " %3d\n",
+ mcast_bundle_age(ofproto->ms, b));
+ }
+ }
+
+ /* ports connected to multicast routers */
+ LIST_FOR_EACH(mrouter, mrouter_node, &ofproto->ms->mrouter_lru) {
+ char name[OFP_MAX_PORT_NAME_LEN];
+
+ bundle = mrouter->port;
+ ofputil_port_to_string(ofbundle_get_a_port(bundle)->up.ofp_port,
+ name, sizeof name);
+ ds_put_format(&ds, "%5s %4d querier %3d\n",
+ name, mrouter->vlan,
+ mcast_mrouter_age(ofproto->ms, mrouter));
+ }
+ ovs_rwlock_unlock(&ofproto->ms->rwlock);
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+
struct trace_ctx {
struct xlate_out xout;
struct xlate_in xin;
+ const struct flow *key;
struct flow flow;
struct ds *result;
+ struct flow_wildcards wc;
+ struct ofpbuf odp_actions;
};
static void
trace_format_rule(struct ds *result, int level, const struct rule_dpif *rule)
{
- struct rule_actions *actions;
+ const struct rule_actions *actions;
ovs_be64 cookie;
ds_put_char_multiple(result, '\t', level);
ds_put_cstr(result, "OpenFlow actions=");
ofpacts_format(actions->ofpacts, actions->ofpacts_len, result);
ds_put_char(result, '\n');
-
- rule_actions_unref(actions);
}
static void
{
ds_put_char_multiple(result, '\t', level);
ds_put_format(result, "%s: ", title);
- if (flow_equal(&trace->xin.flow, &trace->flow)) {
+ /* Do not report unchanged flows for resubmits. */
+ if ((level > 0 && flow_equal(&trace->xin.flow, &trace->flow))
+ || (level == 0 && flow_equal(&trace->xin.flow, trace->key))) {
ds_put_cstr(result, "unchanged");
} else {
flow_format(result, &trace->xin.flow);
trace_format_odp(struct ds *result, int level, const char *title,
struct trace_ctx *trace)
{
- struct ofpbuf *odp_actions = &trace->xout.odp_actions;
+ struct ofpbuf *odp_actions = &trace->odp_actions;
ds_put_char_multiple(result, '\t', level);
ds_put_format(result, "%s: ", title);
ds_put_char(result, '\n');
}
+static void
+trace_format_megaflow(struct ds *result, int level, const char *title,
+ struct trace_ctx *trace)
+{
+ struct match match;
+
+ ds_put_char_multiple(result, '\t', level);
+ ds_put_format(result, "%s: ", title);
+ match_init(&match, trace->key, &trace->wc);
+ match_format(&match, result, OFP_DEFAULT_PRIORITY);
+ ds_put_char(result, '\n');
+}
+
+static void trace_report(struct xlate_in *, int recurse,
+ const char *format, ...)
+ OVS_PRINTF_FORMAT(3, 4);
+static void trace_report_valist(struct xlate_in *, int recurse,
+ const char *format, va_list args)
+ OVS_PRINTF_FORMAT(3, 0);
+
static void
trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
{
struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
struct ds *result = trace->result;
+ if (!recurse) {
+ if (rule == xin->ofproto->miss_rule) {
+ trace_report(xin, recurse,
+ "No match, flow generates \"packet in\"s.");
+ } else if (rule == xin->ofproto->no_packet_in_rule) {
+ trace_report(xin, recurse, "No match, packets dropped because "
+ "OFPPC_NO_PACKET_IN is set on in_port.");
+ } else if (rule == xin->ofproto->drop_frags_rule) {
+ trace_report(xin, recurse, "Packets dropped because they are IP "
+ "fragments and the fragment handling mode is "
+ "\"drop\".");
+ }
+ }
+
ds_put_char(result, '\n');
- trace_format_flow(result, recurse + 1, "Resubmitted flow", trace);
- trace_format_regs(result, recurse + 1, "Resubmitted regs", trace);
- trace_format_odp(result, recurse + 1, "Resubmitted odp", trace);
- trace_format_rule(result, recurse + 1, rule);
+ if (recurse) {
+ trace_format_flow(result, recurse, "Resubmitted flow", trace);
+ trace_format_regs(result, recurse, "Resubmitted regs", trace);
+ trace_format_odp(result, recurse, "Resubmitted odp", trace);
+ trace_format_megaflow(result, recurse, "Resubmitted megaflow", trace);
+ }
+ trace_format_rule(result, recurse, rule);
}
static void
-trace_report(struct xlate_in *xin, const char *s, int recurse)
+trace_report_valist(struct xlate_in *xin, int recurse,
+ const char *format, va_list args)
{
struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
struct ds *result = trace->result;
ds_put_char_multiple(result, '\t', recurse);
- ds_put_cstr(result, s);
+ ds_put_format_valist(result, format, args);
ds_put_char(result, '\n');
}
+static void
+trace_report(struct xlate_in *xin, int recurse, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ trace_report_valist(xin, recurse, format, args);
+ va_end(args);
+}
+
/* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following
* forms are supported:
*
*
* On success, initializes '*ofprotop' and 'flow' and returns NULL. On failure
* returns a nonnull malloced error message. */
-static char * WARN_UNUSED_RESULT
+static char * OVS_WARN_UNUSED_RESULT
parse_flow_and_packet(int argc, const char *argv[],
struct ofproto_dpif **ofprotop, struct flow *flow,
- struct ofpbuf **packetp)
+ struct dp_packet **packetp)
{
const struct dpif_backer *backer = NULL;
const char *error = NULL;
char *m_err = NULL;
struct simap port_names = SIMAP_INITIALIZER(&port_names);
- struct ofpbuf *packet;
+ struct dp_packet *packet;
struct ofpbuf odp_key;
struct ofpbuf odp_mask;
/* Handle "-generate" or a hex string as the last argument. */
if (!strcmp(argv[argc - 1], "-generate")) {
- packet = ofpbuf_new(0);
+ packet = dp_packet_new(0);
argc--;
} else {
error = eth_from_hex(argv[argc - 1], &packet);
goto exit;
}
- if (xlate_receive(backer, NULL, odp_key.data, odp_key.size, flow,
- NULL, ofprotop, NULL, NULL, NULL, NULL)) {
+ if (odp_flow_key_to_flow(odp_key.data, odp_key.size, flow) == ODP_FIT_ERROR) {
+ error = "Failed to parse datapath flow key";
+ goto exit;
+ }
+
+ *ofprotop = xlate_lookup_ofproto(backer, flow,
+ &flow->in_port.ofp_port);
+ if (*ofprotop == NULL) {
error = "Invalid datapath flow";
goto exit;
}
+
+ vsp_adjust_flow(*ofprotop, flow, NULL);
+
} else {
char *err = parse_ofp_exact_flow(flow, NULL, argv[argc - 1], NULL);
if (err) {
- m_err = xasprintf("Bad flow syntax: %s", err);
+ m_err = xasprintf("Bad openflow flow syntax: %s", err);
free(err);
goto exit;
} else {
/* Generate a packet, if requested. */
if (packet) {
- if (!packet->size) {
+ if (!dp_packet_size(packet)) {
flow_compose(packet, flow);
} else {
- union flow_in_port in_port = flow->in_port;
-
/* Use the metadata from the flow and the packet argument
* to reconstruct the flow. */
- flow_extract(packet, flow->skb_priority, flow->pkt_mark, NULL,
- &in_port, flow);
+ pkt_metadata_from_flow(&packet->md, flow);
+ flow_extract(packet, flow);
}
}
m_err = xstrdup(error);
}
if (m_err) {
- ofpbuf_delete(packet);
+ dp_packet_delete(packet);
packet = NULL;
}
*packetp = packet;
void *aux OVS_UNUSED)
{
struct ofproto_dpif *ofproto;
- struct ofpbuf *packet;
+ struct dp_packet *packet;
char *error;
struct flow flow;
ofproto_trace(ofproto, &flow, packet, NULL, 0, &result);
unixctl_command_reply(conn, ds_cstr(&result));
ds_destroy(&result);
- ofpbuf_delete(packet);
+ dp_packet_delete(packet);
} else {
unixctl_command_reply_error(conn, error);
free(error);
struct ofproto_dpif *ofproto;
bool enforce_consistency;
struct ofpbuf ofpacts;
- struct ofpbuf *packet;
+ struct dp_packet *packet;
struct ds result;
struct flow flow;
uint16_t in_port;
ofpbuf_init(&ofpacts, 0);
/* Parse actions. */
- error = parse_ofpacts(argv[--argc], &ofpacts, &usable_protocols);
+ error = ofpacts_parse_actions(argv[--argc], &ofpacts, &usable_protocols);
if (error) {
unixctl_command_reply_error(conn, error);
free(error);
/* Do the same checks as handle_packet_out() in ofproto.c.
*
- * We pass a 'table_id' of 0 to ofproto_check_ofpacts(), which isn't
+ * We pass a 'table_id' of 0 to ofpacts_check(), which isn't
* strictly correct because these actions aren't in any table, but it's OK
* because it 'table_id' is used only to check goto_table instructions, but
* packet-outs take a list of actions and therefore it can't include
if (enforce_consistency) {
retval = ofpacts_check_consistency(ofpacts.data, ofpacts.size, &flow,
u16_to_ofp(ofproto->up.max_ports),
- 0, 0, usable_protocols);
+ 0, ofproto->up.n_tables,
+ usable_protocols);
} else {
retval = ofpacts_check(ofpacts.data, ofpacts.size, &flow,
- u16_to_ofp(ofproto->up.max_ports), 0, 0,
- &usable_protocols);
+ u16_to_ofp(ofproto->up.max_ports), 0,
+ ofproto->up.n_tables, &usable_protocols);
+ }
+ if (!retval) {
+ retval = ofproto_check_ofpacts(&ofproto->up, ofpacts.data,
+ ofpacts.size);
}
if (retval) {
goto exit;
}
- ofproto_trace(ofproto, &flow, packet, ofpacts.data, ofpacts.size, &result);
+ ofproto_trace(ofproto, &flow, packet,
+ ofpacts.data, ofpacts.size, &result);
unixctl_command_reply(conn, ds_cstr(&result));
exit:
ds_destroy(&result);
- ofpbuf_delete(packet);
+ dp_packet_delete(packet);
ofpbuf_uninit(&ofpacts);
}
* If 'ofpacts' is nonnull then its 'ofpacts_len' bytes specify the actions to
* trace, otherwise the actions are determined by a flow table lookup. */
static void
-ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
- const struct ofpbuf *packet,
+ofproto_trace(struct ofproto_dpif *ofproto, struct flow *flow,
+ const struct dp_packet *packet,
const struct ofpact ofpacts[], size_t ofpacts_len,
struct ds *ds)
{
- struct rule_dpif *rule;
- struct flow_wildcards wc;
+ struct trace_ctx trace;
+ enum xlate_error error;
ds_put_format(ds, "Bridge: %s\n", ofproto->up.name);
ds_put_cstr(ds, "Flow: ");
flow_format(ds, flow);
ds_put_char(ds, '\n');
- flow_wildcards_init_catchall(&wc);
- if (ofpacts) {
- rule = NULL;
- } else {
- rule_dpif_lookup(ofproto, flow, &wc, &rule);
-
- trace_format_rule(ds, 0, rule);
- if (rule == ofproto->miss_rule) {
- ds_put_cstr(ds, "\nNo match, flow generates \"packet in\"s.\n");
- } else if (rule == ofproto->no_packet_in_rule) {
- ds_put_cstr(ds, "\nNo match, packets dropped because "
- "OFPPC_NO_PACKET_IN is set on in_port.\n");
- } else if (rule == ofproto->drop_frags_rule) {
- ds_put_cstr(ds, "\nPackets dropped because they are IP fragments "
- "and the fragment handling mode is \"drop\".\n");
- }
- }
-
- if (rule || ofpacts) {
- uint64_t odp_actions_stub[1024 / 8];
- struct ofpbuf odp_actions;
- struct trace_ctx trace;
- struct match match;
- uint16_t tcp_flags;
-
- tcp_flags = packet ? packet_get_tcp_flags(packet, flow) : 0;
- trace.result = ds;
- trace.flow = *flow;
- ofpbuf_use_stub(&odp_actions,
- odp_actions_stub, sizeof odp_actions_stub);
- xlate_in_init(&trace.xin, ofproto, flow, rule, tcp_flags, packet);
- if (ofpacts) {
- trace.xin.ofpacts = ofpacts;
- trace.xin.ofpacts_len = ofpacts_len;
- }
- trace.xin.resubmit_hook = trace_resubmit;
- trace.xin.report_hook = trace_report;
-
- xlate_actions(&trace.xin, &trace.xout);
- flow_wildcards_or(&trace.xout.wc, &trace.xout.wc, &wc);
-
- ds_put_char(ds, '\n');
- trace_format_flow(ds, 0, "Final flow", &trace);
-
- match_init(&match, flow, &trace.xout.wc);
- ds_put_cstr(ds, "Relevant fields: ");
- match_format(&match, ds, OFP_DEFAULT_PRIORITY);
- ds_put_char(ds, '\n');
-
- ds_put_cstr(ds, "Datapath actions: ");
- format_odp_actions(ds, trace.xout.odp_actions.data,
- trace.xout.odp_actions.size);
-
- if (trace.xout.slow) {
- enum slow_path_reason slow;
+ ofpbuf_init(&trace.odp_actions, 0);
- ds_put_cstr(ds, "\nThis flow is handled by the userspace "
- "slow path because it:");
+ trace.result = ds;
+ trace.key = flow; /* Original flow key, used for megaflow. */
+ trace.flow = *flow; /* May be modified by actions. */
+ xlate_in_init(&trace.xin, ofproto, flow, flow->in_port.ofp_port, NULL,
+ ntohs(flow->tcp_flags), packet, &trace.wc,
+ &trace.odp_actions);
+ trace.xin.ofpacts = ofpacts;
+ trace.xin.ofpacts_len = ofpacts_len;
+ trace.xin.resubmit_hook = trace_resubmit;
+ trace.xin.report_hook = trace_report_valist;
- slow = trace.xout.slow;
- while (slow) {
- enum slow_path_reason bit = rightmost_1bit(slow);
-
- ds_put_format(ds, "\n\t- %s.",
- slow_path_reason_to_explanation(bit));
-
- slow &= ~bit;
- }
- }
-
- xlate_out_uninit(&trace.xout);
- }
+ error = xlate_actions(&trace.xin, &trace.xout);
+ ds_put_char(ds, '\n');
+ trace_format_flow(ds, 0, "Final flow", &trace);
+ trace_format_megaflow(ds, 0, "Megaflow", &trace);
- rule_dpif_unref(rule);
-}
+ ds_put_cstr(ds, "Datapath actions: ");
+ format_odp_actions(ds, trace.odp_actions.data, trace.odp_actions.size);
-/* Runs a self-check of flow translations in 'ofproto'. Appends a message to
- * 'reply' describing the results. */
-static void
-ofproto_dpif_self_check__(struct ofproto_dpif *ofproto, struct ds *reply)
-{
- struct cls_cursor cursor;
- struct facet *facet;
- int errors;
+ if (error != XLATE_OK) {
+ ds_put_format(ds, "\nTranslation failed (%s), packet is dropped.\n",
+ xlate_strerror(error));
+ } else if (trace.xout.slow) {
+ enum slow_path_reason slow;
- errors = 0;
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
- if (!facet_check_consistency(facet)) {
- errors++;
- }
- }
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- if (errors) {
- ofproto->backer->need_revalidate = REV_INCONSISTENCY;
- }
+ ds_put_cstr(ds, "\nThis flow is handled by the userspace "
+ "slow path because it:");
- if (errors) {
- ds_put_format(reply, "%s: self-check failed (%d errors)\n",
- ofproto->up.name, errors);
- } else {
- ds_put_format(reply, "%s: self-check passed\n", ofproto->up.name);
- }
-}
+ slow = trace.xout.slow;
+ while (slow) {
+ enum slow_path_reason bit = rightmost_1bit(slow);
-static void
-ofproto_dpif_self_check(struct unixctl_conn *conn,
- int argc, const char *argv[], void *aux OVS_UNUSED)
-{
- struct ds reply = DS_EMPTY_INITIALIZER;
- struct ofproto_dpif *ofproto;
+ ds_put_format(ds, "\n\t- %s.",
+ slow_path_reason_to_explanation(bit));
- if (argc > 1) {
- ofproto = ofproto_dpif_lookup(argv[1]);
- if (!ofproto) {
- unixctl_command_reply_error(conn, "Unknown ofproto (use "
- "ofproto/list for help)");
- return;
- }
- ofproto_dpif_self_check__(ofproto, &reply);
- } else {
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- ofproto_dpif_self_check__(ofproto, &reply);
+ slow &= ~bit;
}
}
- unixctl_command_reply(conn, ds_cstr(&reply));
- ds_destroy(&reply);
+ xlate_out_uninit(&trace.xout);
+ ofpbuf_uninit(&trace.odp_actions);
}
/* Store the current ofprotos in 'ofproto_shash'. Returns a sorted list
dpif_show_backer(const struct dpif_backer *backer, struct ds *ds)
{
const struct shash_node **ofprotos;
- struct ofproto_dpif *ofproto;
+ struct dpif_dp_stats dp_stats;
struct shash ofproto_shash;
- uint64_t n_hit, n_missed;
size_t i;
- n_hit = n_missed = 0;
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- if (ofproto->backer == backer) {
- n_missed += ofproto->n_missed;
- n_hit += ofproto->n_hit;
- }
- }
+ dpif_get_dp_stats(backer->dpif, &dp_stats);
ds_put_format(ds, "%s: hit:%"PRIu64" missed:%"PRIu64"\n",
- dpif_name(backer->dpif), n_hit, n_missed);
-
- ds_put_format(ds, "\tflows: cur: %"PRIuSIZE", avg: %u, max: %u\n",
- hmap_count(&backer->subfacets), backer->avg_n_subfacet,
- backer->max_n_subfacet);
+ dpif_name(backer->dpif), dp_stats.n_hit, dp_stats.n_missed);
shash_init(&ofproto_shash);
ofprotos = get_ofprotos(&ofproto_shash);
continue;
}
- ds_put_format(ds, "\t%s: hit:%"PRIu64" missed:%"PRIu64"\n",
- ofproto->up.name, ofproto->n_hit, ofproto->n_missed);
+ ds_put_format(ds, "\t%s:\n", ofproto->up.name);
ports = shash_sort(&ofproto->up.port_by_name);
for (j = 0; j < shash_count(&ofproto->up.port_by_name); j++) {
ds_destroy(&ds);
}
-/* Dump the megaflow (facet) cache. This is useful to check the
- * correctness of flow wildcarding, since the same mechanism is used for
- * both xlate caching and kernel wildcarding.
- *
- * It's important to note that in the output the flow description uses
- * OpenFlow (OFP) ports, but the actions use datapath (ODP) ports.
- *
- * This command is only needed for advanced debugging, so it's not
- * documented in the man page. */
static void
-ofproto_unixctl_dpif_dump_megaflows(struct unixctl_conn *conn,
- int argc OVS_UNUSED, const char *argv[],
- void *aux OVS_UNUSED)
+ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
{
- struct ds ds = DS_EMPTY_INITIALIZER;
const struct ofproto_dpif *ofproto;
- long long int now = time_msec();
- struct cls_cursor cursor;
- struct facet *facet;
-
- ofproto = ofproto_dpif_lookup(argv[1]);
- if (!ofproto) {
- unixctl_command_reply_error(conn, "no such bridge");
- return;
- }
-
- ovs_rwlock_rdlock(&ofproto->facets.rwlock);
- cls_cursor_init(&cursor, &ofproto->facets, NULL);
- CLS_CURSOR_FOR_EACH (facet, cr, &cursor) {
- cls_rule_format(&facet->cr, &ds);
- ds_put_cstr(&ds, ", ");
- ds_put_format(&ds, "n_subfacets:%"PRIuSIZE", ", list_size(&facet->subfacets));
- ds_put_format(&ds, "used:%.3fs, ", (now - facet->used) / 1000.0);
- ds_put_cstr(&ds, "Datapath actions: ");
- if (facet->xout.slow) {
- uint64_t slow_path_stub[128 / 8];
- const struct nlattr *actions;
- size_t actions_len;
-
- compose_slow_path(ofproto, &facet->flow, facet->xout.slow,
- slow_path_stub, sizeof slow_path_stub,
- &actions, &actions_len);
- format_odp_actions(&ds, actions, actions_len);
- } else {
- format_odp_actions(&ds, facet->xout.odp_actions.data,
- facet->xout.odp_actions.size);
- }
- ds_put_cstr(&ds, "\n");
- }
- ovs_rwlock_unlock(&ofproto->facets.rwlock);
- ds_chomp(&ds, '\n');
- unixctl_command_reply(conn, ds_cstr(&ds));
- ds_destroy(&ds);
-}
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ bool verbosity = false;
-/* Disable using the megaflows.
- *
- * This command is only needed for advanced debugging, so it's not
- * documented in the man page. */
-static void
-ofproto_unixctl_dpif_disable_megaflows(struct unixctl_conn *conn,
- int argc OVS_UNUSED,
- const char *argv[] OVS_UNUSED,
- void *aux OVS_UNUSED)
-{
- struct ofproto_dpif *ofproto;
+ struct dpif_port dpif_port;
+ struct dpif_port_dump port_dump;
+ struct hmap portno_names;
- enable_megaflows = false;
+ struct dpif_flow_dump *flow_dump;
+ struct dpif_flow_dump_thread *flow_dump_thread;
+ struct dpif_flow f;
+ int error;
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- flush(&ofproto->up);
+ ofproto = ofproto_dpif_lookup(argv[argc - 1]);
+ if (!ofproto) {
+ unixctl_command_reply_error(conn, "no such bridge");
+ return;
}
- unixctl_command_reply(conn, "megaflows disabled");
-}
-
-/* Re-enable using megaflows.
- *
- * This command is only needed for advanced debugging, so it's not
- * documented in the man page. */
-static void
-ofproto_unixctl_dpif_enable_megaflows(struct unixctl_conn *conn,
- int argc OVS_UNUSED,
- const char *argv[] OVS_UNUSED,
- void *aux OVS_UNUSED)
-{
- struct ofproto_dpif *ofproto;
-
- enable_megaflows = true;
-
- HMAP_FOR_EACH (ofproto, all_ofproto_dpifs_node, &all_ofproto_dpifs) {
- flush(&ofproto->up);
+ if (argc > 2 && !strcmp(argv[1], "-m")) {
+ verbosity = true;
}
- unixctl_command_reply(conn, "megaflows enabled");
-}
-
-static bool
-ofproto_dpif_contains_flow(const struct ofproto_dpif *ofproto,
- const struct nlattr *key, size_t key_len)
-{
- enum odp_key_fitness fitness;
- struct ofproto_dpif *ofp;
- struct flow flow;
-
- xlate_receive(ofproto->backer, NULL, key, key_len, &flow, &fitness, &ofp,
- NULL, NULL, NULL, NULL);
- return ofp == ofproto;
-}
-
-static void
-ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn,
- int argc OVS_UNUSED, const char *argv[],
- void *aux OVS_UNUSED)
-{
- struct ds ds = DS_EMPTY_INITIALIZER;
- const struct dpif_flow_stats *stats;
- const struct ofproto_dpif *ofproto;
- struct dpif_flow_dump flow_dump;
- const struct nlattr *actions;
- const struct nlattr *mask;
- const struct nlattr *key;
- size_t actions_len;
- size_t mask_len;
- size_t key_len;
-
- ofproto = ofproto_dpif_lookup(argv[1]);
- if (!ofproto) {
- unixctl_command_reply_error(conn, "no such bridge");
- return;
+ hmap_init(&portno_names);
+ DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, ofproto->backer->dpif) {
+ odp_portno_names_set(&portno_names, dpif_port.port_no, dpif_port.name);
}
ds_init(&ds);
- dpif_flow_dump_start(&flow_dump, ofproto->backer->dpif);
- while (dpif_flow_dump_next(&flow_dump, &key, &key_len, &mask, &mask_len,
- &actions, &actions_len, &stats)) {
- if (!ofproto_dpif_contains_flow(ofproto, key, key_len)) {
+ flow_dump = dpif_flow_dump_create(ofproto->backer->dpif, false);
+ flow_dump_thread = dpif_flow_dump_thread_create(flow_dump);
+ while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) {
+ struct flow flow;
+
+ if (odp_flow_key_to_flow(f.key, f.key_len, &flow) == ODP_FIT_ERROR
+ || xlate_lookup_ofproto(ofproto->backer, &flow, NULL) != ofproto) {
continue;
}
- odp_flow_format(key, key_len, mask, mask_len, NULL, &ds, false);
+ if (verbosity) {
+ odp_format_ufid(&f.ufid, &ds);
+ ds_put_cstr(&ds, " ");
+ }
+ odp_flow_format(f.key, f.key_len, f.mask, f.mask_len,
+ &portno_names, &ds, verbosity);
ds_put_cstr(&ds, ", ");
- dpif_flow_stats_format(stats, &ds);
+ dpif_flow_stats_format(&f.stats, &ds);
ds_put_cstr(&ds, ", actions:");
- format_odp_actions(&ds, actions, actions_len);
+ format_odp_actions(&ds, f.actions, f.actions_len);
ds_put_char(&ds, '\n');
}
+ dpif_flow_dump_thread_destroy(flow_dump_thread);
+ error = dpif_flow_dump_destroy(flow_dump);
- if (dpif_flow_dump_done(&flow_dump)) {
+ if (error) {
ds_clear(&ds);
ds_put_format(&ds, "dpif/dump_flows failed: %s", ovs_strerror(errno));
unixctl_command_reply_error(conn, ds_cstr(&ds));
} else {
unixctl_command_reply(conn, ds_cstr(&ds));
}
+ odp_portno_names_destroy(&portno_names);
+ hmap_destroy(&portno_names);
ds_destroy(&ds);
}
static void
-ofproto_dpif_unixctl_init(void)
+ofproto_revalidate_all_backers(void)
+{
+ const struct shash_node **backers;
+ int i;
+
+ backers = shash_sort(&all_dpif_backers);
+ for (i = 0; i < shash_count(&all_dpif_backers); i++) {
+ struct dpif_backer *backer = backers[i]->data;
+ backer->need_revalidate = REV_RECONFIGURE;
+ }
+ free(backers);
+}
+
+static void
+disable_tnl_push_pop(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ if (!strcasecmp(argv[1], "off")) {
+ ofproto_use_tnl_push_pop = false;
+ unixctl_command_reply(conn, "Tunnel push-pop off");
+ ofproto_revalidate_all_backers();
+ } else if (!strcasecmp(argv[1], "on")) {
+ ofproto_use_tnl_push_pop = true;
+ unixctl_command_reply(conn, "Tunnel push-pop on");
+ ofproto_revalidate_all_backers();
+ }
+}
+
+static void
+ofproto_unixctl_init(void)
{
static bool registered;
if (registered) {
ofproto_unixctl_fdb_flush, NULL);
unixctl_command_register("fdb/show", "bridge", 1, 1,
ofproto_unixctl_fdb_show, NULL);
- unixctl_command_register("ofproto/self-check", "[bridge]", 0, 1,
- ofproto_dpif_self_check, NULL);
+ unixctl_command_register("mdb/flush", "[bridge]", 0, 1,
+ ofproto_unixctl_mcast_snooping_flush, NULL);
+ unixctl_command_register("mdb/show", "bridge", 1, 1,
+ ofproto_unixctl_mcast_snooping_show, NULL);
unixctl_command_register("dpif/dump-dps", "", 0, 0,
ofproto_unixctl_dpif_dump_dps, NULL);
unixctl_command_register("dpif/show", "", 0, 0, ofproto_unixctl_dpif_show,
NULL);
- unixctl_command_register("dpif/dump-flows", "bridge", 1, 1,
+ unixctl_command_register("dpif/dump-flows", "[-m] bridge", 1, 2,
ofproto_unixctl_dpif_dump_flows, NULL);
- unixctl_command_register("dpif/dump-megaflows", "bridge", 1, 1,
- ofproto_unixctl_dpif_dump_megaflows, NULL);
- unixctl_command_register("dpif/disable-megaflows", "", 0, 0,
- ofproto_unixctl_dpif_disable_megaflows, NULL);
- unixctl_command_register("dpif/enable-megaflows", "", 0, 0,
- ofproto_unixctl_dpif_enable_megaflows, NULL);
+
+ unixctl_command_register("ofproto/tnl-push-pop", "[on]|[off]", 1, 1,
+ disable_tnl_push_pop, NULL);
+}
+
+/* Returns true if 'table' is the table used for internal rules,
+ * false otherwise. */
+bool
+table_is_internal(uint8_t table_id)
+{
+ return table_id == TBL_INTERNAL;
}
\f
/* Linux VLAN device support (e.g. "eth0.10" for VLAN 10.)
if (realdev_ofp_port && ofport->bundle) {
/* vlandevs are enslaved to their realdevs, so they are not allowed to
* themselves be part of a bundle. */
- bundle_set(ofport->up.ofproto, ofport->bundle, NULL);
+ bundle_set(ofport_->ofproto, ofport->bundle, NULL);
}
ofport->realdev_ofp_port = realdev_ofp_port;
ofproto_has_vlan_splinters(const struct ofproto_dpif *ofproto)
OVS_EXCLUDED(ofproto->vsp_mutex)
{
- bool ret;
-
- ovs_mutex_lock(&ofproto->vsp_mutex);
- ret = !hmap_is_empty(&ofproto->realdev_vid_map);
- ovs_mutex_unlock(&ofproto->vsp_mutex);
- return ret;
+ /* hmap_is_empty is thread safe. */
+ return !hmap_is_empty(&ofproto->realdev_vid_map);
}
+
static ofp_port_t
vsp_realdev_to_vlandev__(const struct ofproto_dpif *ofproto,
ofp_port_t realdev_ofp_port, ovs_be16 vlan_tci)
{
ofp_port_t ret;
+ /* hmap_is_empty is thread safe, see if we can return immediately. */
+ if (hmap_is_empty(&ofproto->realdev_vid_map)) {
+ return realdev_ofp_port;
+ }
ovs_mutex_lock(&ofproto->vsp_mutex);
ret = vsp_realdev_to_vlandev__(ofproto, realdev_ofp_port, vlan_tci);
ovs_mutex_unlock(&ofproto->vsp_mutex);
/* Given 'flow', a flow representing a packet received on 'ofproto', checks
* whether 'flow->in_port' represents a Linux VLAN device. If so, changes
* 'flow->in_port' to the "real" device backing the VLAN device, sets
- * 'flow->vlan_tci' to the VLAN VID, and returns true. Otherwise (which is
- * always the case unless VLAN splinters are enabled), returns false without
- * making any changes. */
+ * 'flow->vlan_tci' to the VLAN VID, and returns true. Optionally pushes the
+ * appropriate VLAN on 'packet' if provided. Otherwise (which is always the
+ * case unless VLAN splinters are enabled), returns false without making any
+ * changes. */
bool
-vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow)
+vsp_adjust_flow(const struct ofproto_dpif *ofproto, struct flow *flow,
+ struct dp_packet *packet)
OVS_EXCLUDED(ofproto->vsp_mutex)
{
ofp_port_t realdev;
int vid;
+ /* hmap_is_empty is thread safe. */
+ if (hmap_is_empty(&ofproto->vlandev_map)) {
+ return false;
+ }
+
ovs_mutex_lock(&ofproto->vsp_mutex);
realdev = vsp_vlandev_to_realdev(ofproto, flow->in_port.ofp_port, &vid);
ovs_mutex_unlock(&ofproto->vsp_mutex);
* the VLAN device's VLAN ID. */
flow->in_port.ofp_port = realdev;
flow->vlan_tci = htons((vid & VLAN_VID_MASK) | VLAN_CFI);
+
+ if (packet) {
+ /* Make the packet resemble the flow, so that it gets sent to an
+ * OpenFlow controller properly, so that it looks correct for sFlow,
+ * and so that flow_extract() will get the correct vlan_tci if it is
+ * called on 'packet'. */
+ eth_push_vlan(packet, htons(ETH_TYPE_VLAN), flow->vlan_tci);
+ }
+
return true;
}
static odp_port_t
ofp_port_to_odp_port(const struct ofproto_dpif *ofproto, ofp_port_t ofp_port)
{
- const struct ofport_dpif *ofport = get_ofp_port(ofproto, ofp_port);
+ const struct ofport_dpif *ofport = ofp_port_to_ofport(ofproto, ofp_port);
return ofport ? ofport->odp_port : ODPP_NONE;
}
}
}
+int
+ofproto_dpif_add_internal_flow(struct ofproto_dpif *ofproto,
+ const struct match *match, int priority,
+ uint16_t idle_timeout,
+ const struct ofpbuf *ofpacts,
+ struct rule **rulep)
+{
+ struct ofproto_flow_mod ofm;
+ struct rule_dpif *rule;
+ int error;
+
+ ofm.fm.match = *match;
+ ofm.fm.priority = priority;
+ ofm.fm.new_cookie = htonll(0);
+ ofm.fm.cookie = htonll(0);
+ ofm.fm.cookie_mask = htonll(0);
+ ofm.fm.modify_cookie = false;
+ ofm.fm.table_id = TBL_INTERNAL;
+ ofm.fm.command = OFPFC_ADD;
+ ofm.fm.idle_timeout = idle_timeout;
+ ofm.fm.hard_timeout = 0;
+ ofm.fm.importance = 0;
+ ofm.fm.buffer_id = 0;
+ ofm.fm.out_port = 0;
+ ofm.fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+ ofm.fm.ofpacts = ofpacts->data;
+ ofm.fm.ofpacts_len = ofpacts->size;
+
+ error = ofproto_flow_mod(&ofproto->up, &ofm);
+ if (error) {
+ VLOG_ERR_RL(&rl, "failed to add internal flow (%s)",
+ ofperr_to_string(error));
+ *rulep = NULL;
+ return error;
+ }
+
+ rule = rule_dpif_lookup_in_table(ofproto,
+ ofproto_dpif_get_tables_version(ofproto),
+ TBL_INTERNAL, &ofm.fm.match.flow,
+ &ofm.fm.match.wc);
+ if (rule) {
+ *rulep = &rule->up;
+ } else {
+ OVS_NOT_REACHED();
+ }
+ return 0;
+}
+
+int
+ofproto_dpif_delete_internal_flow(struct ofproto_dpif *ofproto,
+ struct match *match, int priority)
+{
+ struct ofproto_flow_mod ofm;
+ int error;
+
+ ofm.fm.match = *match;
+ ofm.fm.priority = priority;
+ ofm.fm.new_cookie = htonll(0);
+ ofm.fm.cookie = htonll(0);
+ ofm.fm.cookie_mask = htonll(0);
+ ofm.fm.modify_cookie = false;
+ ofm.fm.table_id = TBL_INTERNAL;
+ ofm.fm.flags = OFPUTIL_FF_HIDDEN_FIELDS | OFPUTIL_FF_NO_READONLY;
+ ofm.fm.command = OFPFC_DELETE_STRICT;
+
+ error = ofproto_flow_mod(&ofproto->up, &ofm);
+ if (error) {
+ VLOG_ERR_RL(&rl, "failed to delete internal flow (%s)",
+ ofperr_to_string(error));
+ return error;
+ }
+
+ return 0;
+}
+
const struct ofproto_class ofproto_dpif_class = {
init,
enumerate_types,
dealloc,
run,
wait,
- get_memory_usage,
+ NULL, /* get_memory_usage. */
type_get_memory_usage,
flush,
- get_features,
- get_tables,
+ query_tables,
+ set_tables_version,
port_alloc,
port_construct,
port_destruct,
port_poll,
port_poll_wait,
port_is_lacp_current,
+ port_get_lacp_stats,
NULL, /* rule_choose_table */
rule_alloc,
rule_construct,
rule_dealloc,
rule_get_stats,
rule_execute,
- rule_modify_actions,
set_frag_handling,
packet_out,
set_netflow,
set_sflow,
set_ipfix,
set_cfm,
+ cfm_status_changed,
get_cfm_status,
+ set_lldp,
+ get_lldp_status,
+ set_aa,
+ aa_mapping_set,
+ aa_mapping_unset,
+ aa_vlan_get_queued,
+ aa_vlan_get_queue_size,
set_bfd,
+ bfd_status_changed,
get_bfd_status,
set_stp,
get_stp_status,
set_stp_port,
get_stp_port_status,
get_stp_port_stats,
+ set_rstp,
+ get_rstp_status,
+ set_rstp_port,
+ get_rstp_port_status,
set_queues,
bundle_set,
bundle_remove,
is_mirror_output_bundle,
forward_bpdu_changed,
set_mac_table_config,
+ set_mcast_snooping,
+ set_mcast_snooping_port,
set_realdev,
NULL, /* meter_get_features */
NULL, /* meter_set */
group_dealloc, /* group_dealloc */
group_modify, /* group_modify */
group_get_stats, /* group_get_stats */
+ get_datapath_version, /* get_datapath_version */
};