/* Bit-mask for hashing a flow down to a bucket. */
#define BOND_MASK 0xff
#define BOND_BUCKETS (BOND_MASK + 1)
-#define RECIRC_RULE_PRIORITY 20 /* Priority level for internal rules */
/* A hash bucket for mapping a flow to a slave.
* "struct bond" has an array of BOND_BUCKETS of these. */
struct bond_slave *slave; /* Assigned slave, NULL if unassigned. */
uint64_t tx_bytes /* Count of bytes recently transmitted. */
OVS_GUARDED_BY(rwlock);
- struct list list_node; /* In bond_slave's 'entries' list. */
+ struct ovs_list list_node; /* In bond_slave's 'entries' list. */
/* Recirculation.
*
/* A bond slave, that is, one of the links comprising a bond. */
struct bond_slave {
struct hmap_node hmap_node; /* In struct bond's slaves hmap. */
- struct list list_node; /* In struct bond's enabled_slaves list. */
+ struct ovs_list list_node; /* In struct bond's enabled_slaves list. */
struct bond *bond; /* The bond that contains this slave. */
void *aux; /* Client-provided handle for this slave. */
struct netdev *netdev; /* Network device, owned by the client. */
unsigned int change_seq; /* Tracks changes in 'netdev'. */
- ofp_port_t ofp_port; /* Open flow port number */
+ ofp_port_t ofp_port; /* OpenFlow port number. */
char *name; /* Name (a copy of netdev_get_name(netdev)). */
/* Link status. */
bool may_enable; /* Client considers this slave bondable. */
/* Rebalancing info. Used only by bond_rebalance(). */
- struct list bal_node; /* In bond_rebalance()'s 'bals' list. */
- struct list entries; /* 'struct bond_entry's assigned here. */
+ struct ovs_list bal_node; /* In bond_rebalance()'s 'bals' list. */
+ struct ovs_list entries; /* 'struct bond_entry's assigned here. */
uint64_t tx_bytes; /* Sum across 'tx_bytes' of entries. */
};
* (To prevent the bond_slave from disappearing they must also hold
* 'rwlock'.) */
struct ovs_mutex mutex OVS_ACQ_AFTER(rwlock);
- struct list enabled_slaves OVS_GUARDED; /* Contains struct bond_slaves. */
+ struct ovs_list enabled_slaves OVS_GUARDED; /* Contains struct bond_slaves. */
/* Bonding info. */
enum bond_mode balance; /* Balancing mode, one of BM_*. */
uint32_t recirc_id; /* Non zero if recirculation can be used.*/
struct hmap pr_rule_ops; /* Helps to maintain post recirculation rules.*/
+ /* Store active slave to OVSDB. */
+ bool active_slave_changed; /* Set to true whenever the bond changes
+ active slave. It will be reset to false
+ after it is stored into OVSDB */
+
+ /* Interface name may not be persistent across an OS reboot, use
+ * MAC address for identifing the active slave */
+ uint8_t active_slave_mac[ETH_ADDR_LEN];
+ /* The MAC address of the active interface. */
/* Legacy compatibility. */
- long long int next_fake_iface_update; /* LLONG_MAX if disabled. */
bool lacp_fallback_ab; /* Fallback to active-backup on LACP failure. */
struct ovs_refcount ref_cnt;
struct match match;
ofp_port_t out_ofport;
enum bond_op op;
- struct rule *pr_rule;
+ struct rule **pr_rule;
};
static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
struct flow_wildcards *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
-static void bond_update_fake_slave_stats(struct bond *)
- OVS_REQ_RDLOCK(rwlock);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
* stores the mode in '*balance' and returns true. Otherwise returns false
hmap_init(&bond->slaves);
list_init(&bond->enabled_slaves);
ovs_mutex_init(&bond->mutex);
- bond->next_fake_iface_update = LLONG_MAX;
ovs_refcount_init(&bond->ref_cnt);
bond->recirc_id = 0;
struct bond_slave *slave, *next_slave;
struct bond_pr_rule_op *pr_op, *next_op;
- if (!bond || ovs_refcount_unref(&bond->ref_cnt) != 1) {
+ if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
}
static void
add_pr_rule(struct bond *bond, const struct match *match,
- ofp_port_t out_ofport, struct rule *rule)
+ ofp_port_t out_ofport, struct rule **rule)
{
uint32_t hash = match_hash(match, 0);
struct bond_pr_rule_op *pr_op;
pr_op->op = DEL;
}
- if ((bond->hash == NULL) || (!bond->recirc_id)) {
- return;
- }
+ if (bond->hash && bond->recirc_id) {
+ for (i = 0; i < BOND_BUCKETS; i++) {
+ struct bond_slave *slave = bond->hash[i].slave;
- for (i = 0; i < BOND_BUCKETS; i++) {
- struct bond_slave *slave = bond->hash[i].slave;
+ if (slave) {
+ match_init_catchall(&match);
+ match_set_recirc_id(&match, bond->recirc_id);
+ match_set_dp_hash_masked(&match, i, BOND_MASK);
- if (slave) {
- match_init_catchall(&match);
- match_set_recirc_id(&match, bond->recirc_id);
- /* recirc_id -> metadata to speed up look ups. */
- match_set_metadata(&match, htonll(bond->recirc_id));
- match_set_dp_hash_masked(&match, i, BOND_MASK);
-
- add_pr_rule(bond, &match, slave->ofp_port,
- bond->hash[i].pr_rule);
+ add_pr_rule(bond, &match, slave->ofp_port,
+ &bond->hash[i].pr_rule);
+ }
}
}
HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
int error;
- struct rule *rule;
switch (pr_op->op) {
case ADD:
ofpbuf_clear(&ofpacts);
ofpact_put_OUTPUT(&ofpacts)->port = pr_op->out_ofport;
error = ofproto_dpif_add_internal_flow(bond->ofproto,
&pr_op->match,
- RECIRC_RULE_PRIORITY,
- &ofpacts, &rule);
+ RECIRC_RULE_PRIORITY, 0,
+ &ofpacts, pr_op->pr_rule);
if (error) {
char *err_s = match_to_string(&pr_op->match,
RECIRC_RULE_PRIORITY);
VLOG_ERR("failed to add post recirculation flow %s", err_s);
free(err_s);
- pr_op->pr_rule = NULL;
- } else {
- pr_op->pr_rule = rule;
}
break;
}
hmap_remove(&bond->pr_rule_ops, &pr_op->hmap_node);
- pr_op->pr_rule = NULL;
+ *pr_op->pr_rule = NULL;
free(pr_op);
break;
}
revalidate = true;
}
- if (s->fake_iface) {
- if (bond->next_fake_iface_update == LLONG_MAX) {
- bond->next_fake_iface_update = time_msec();
- }
- } else {
- bond->next_fake_iface_update = LLONG_MAX;
- }
-
if (bond->bond_revalidate) {
revalidate = true;
bond->bond_revalidate = false;
bond_entry_reset(bond);
}
+ memcpy(bond->active_slave_mac, s->active_slave_mac,
+ sizeof s->active_slave_mac);
+
+ bond->active_slave_changed = false;
+
ovs_rwlock_unlock(&rwlock);
return revalidate;
}
+static struct bond_slave *
+bond_find_slave_by_mac(const struct bond *bond, const uint8_t mac[ETH_ADDR_LEN])
+{
+ struct bond_slave *slave;
+
+ /* Find the last active slave */
+ HMAP_FOR_EACH(slave, hmap_node, &bond->slaves) {
+ uint8_t slave_mac[ETH_ADDR_LEN];
+
+ if (netdev_get_etheraddr(slave->netdev, slave_mac)) {
+ continue;
+ }
+
+ if (!memcmp(slave_mac, mac, sizeof(slave_mac))) {
+ return slave;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+bond_active_slave_changed(struct bond *bond)
+{
+ uint8_t mac[ETH_ADDR_LEN];
+
+ netdev_get_etheraddr(bond->active_slave->netdev, mac);
+ memcpy(bond->active_slave_mac, mac, sizeof bond->active_slave_mac);
+ bond->active_slave_changed = true;
+ seq_change(connectivity_seq_get());
+}
+
static void
bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
OVS_REQ_WRLOCK(rwlock)
bond_choose_active_slave(bond);
}
- /* Update fake bond interface stats. */
- if (time_msec() >= bond->next_fake_iface_update) {
- bond_update_fake_slave_stats(bond);
- bond->next_fake_iface_update = time_msec() + 1000;
- }
-
revalidate = bond->bond_revalidate;
bond->bond_revalidate = false;
ovs_rwlock_unlock(&rwlock);
seq_wait(connectivity_seq_get(), slave->change_seq);
}
- if (bond->next_fake_iface_update != LLONG_MAX) {
- poll_timer_wait_until(bond->next_fake_iface_update);
- }
-
if (bond->bond_revalidate) {
poll_immediate_wake();
}
}
/* Maintain bond stats using post recirculation rule byte counters.*/
-void
+static void
bond_recirculation_account(struct bond *bond)
+ OVS_REQ_WRLOCK(rwlock)
{
int i;
- ovs_rwlock_wrlock(&rwlock);
for (i=0; i<=BOND_MASK; i++) {
struct bond_entry *entry = &bond->hash[i];
struct rule *rule = entry->pr_rule;
bond_entry_account(entry, n_bytes);
}
}
- ovs_rwlock_unlock(&rwlock);
}
bool
bond_may_recirc(const struct bond *bond, uint32_t *recirc_id,
uint32_t *hash_bias)
{
- if (bond->balance == BM_TCP) {
+ if (bond->balance == BM_TCP && bond->recirc_id) {
if (recirc_id) {
*recirc_id = bond->recirc_id;
}
}
static struct bond_slave *
-bond_slave_from_bal_node(struct list *bal) OVS_REQ_RDLOCK(rwlock)
+bond_slave_from_bal_node(struct ovs_list *bal) OVS_REQ_RDLOCK(rwlock)
{
return CONTAINER_OF(bal, struct bond_slave, bal_node);
}
static void
-log_bals(struct bond *bond, const struct list *bals)
+log_bals(struct bond *bond, const struct ovs_list *bals)
OVS_REQ_RDLOCK(rwlock)
{
if (VLOG_IS_DBG_ENABLED()) {
/* Inserts 'slave' into 'bals' so that descending order of 'tx_bytes' is
* maintained. */
static void
-insert_bal(struct list *bals, struct bond_slave *slave)
+insert_bal(struct ovs_list *bals, struct bond_slave *slave)
{
struct bond_slave *pos;
/* Removes 'slave' from its current list and then inserts it into 'bals' so
* that descending order of 'tx_bytes' is maintained. */
static void
-reinsert_bal(struct list *bals, struct bond_slave *slave)
+reinsert_bal(struct ovs_list *bals, struct bond_slave *slave)
{
list_remove(&slave->bal_node);
insert_bal(bals, slave);
* The caller should have called bond_account() for each active flow, or in case
* of recirculation is used, have called bond_recirculation_account(bond),
* to ensure that flow data is consistently accounted at this point.
- *
- * Return whether rebalancing took place.*/
-bool
+ */
+void
bond_rebalance(struct bond *bond)
{
struct bond_slave *slave;
struct bond_entry *e;
- struct list bals;
+ struct ovs_list bals;
bool rebalanced = false;
+ bool use_recirc;
ovs_rwlock_wrlock(&rwlock);
if (!bond_is_balanced(bond) || time_msec() < bond->next_rebalance) {
}
bond->next_rebalance = time_msec() + bond->rebalance_interval;
+ use_recirc = ofproto_dpif_get_enable_recirc(bond->ofproto) &&
+ bond_may_recirc(bond, NULL, NULL);
+
+ if (use_recirc) {
+ bond_recirculation_account(bond);
+ }
+
/* Add each bond_entry to its slave's 'entries' list.
* Compute each slave's tx_bytes as the sum of its entries' tx_bytes. */
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
/* Re-sort 'bals'. */
reinsert_bal(&bals, from);
reinsert_bal(&bals, to);
- rebalanced = true;
+ rebalanced = true;
} else {
/* Can't usefully migrate anything away from 'from'.
* Don't reconsider it. */
* take 20 rebalancing runs to decay to 0 and get deleted entirely. */
for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) {
e->tx_bytes /= 2;
- if (!e->tx_bytes) {
- e->slave = NULL;
- }
+ }
+
+ if (use_recirc && rebalanced) {
+ bond_update_post_recirc_rules(bond,true);
}
done:
ovs_rwlock_unlock(&rwlock);
- return rebalanced;
}
\f
/* Bonding unixctl user interface functions. */
break;
}
+ ds_put_cstr(ds, "active slave mac: ");
+ ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac));
+ slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
+ ds_put_format(ds,"(%s)\n", slave ? slave->name : "none");
+
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
shash_add(&slave_shash, slave->name, slave);
}
bond->name, slave->name);
bond->send_learning_packets = true;
unixctl_command_reply(conn, "done");
+ bond_active_slave_changed(bond);
} else {
unixctl_command_reply(conn, "no change");
}
static struct bond_slave *
get_enabled_slave(struct bond *bond)
{
- struct list *node;
+ struct ovs_list *node;
ovs_mutex_lock(&bond->mutex);
if (list_is_empty(&bond->enabled_slaves)) {
{
struct bond_slave *slave, *best;
+ /* Find the last active slave. */
+ slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
+ if (slave && slave->enabled) {
+ return slave;
+ }
+
/* Find an enabled slave. */
HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
if (slave->enabled) {
}
bond->send_learning_packets = true;
+
+ if (bond->active_slave != old_active_slave) {
+ bond_active_slave_changed(bond);
+ }
} else if (old_active_slave) {
VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);
}
}
-/* Attempts to make the sum of the bond slaves' statistics appear on the fake
- * bond interface. */
-static void
-bond_update_fake_slave_stats(struct bond *bond)
+/*
+ * Return true if bond has unstored active slave change.
+ * If return true, 'mac' will store the bond's current active slave's
+ * MAC address. */
+bool
+bond_get_changed_active_slave(const char *name, uint8_t* mac, bool force)
{
- struct netdev_stats bond_stats;
- struct bond_slave *slave;
- struct netdev *bond_dev;
-
- memset(&bond_stats, 0, sizeof bond_stats);
+ struct bond *bond;
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- struct netdev_stats slave_stats;
-
- if (!netdev_get_stats(slave->netdev, &slave_stats)) {
- /* XXX: We swap the stats here because they are swapped back when
- * reported by the internal device. The reason for this is
- * internal devices normally represent packets going into the
- * system but when used as fake bond device they represent packets
- * leaving the system. We really should do this in the internal
- * device itself because changing it here reverses the counts from
- * the perspective of the switch. However, the internal device
- * doesn't know what type of device it represents so we have to do
- * it here for now. */
- bond_stats.tx_packets += slave_stats.rx_packets;
- bond_stats.tx_bytes += slave_stats.rx_bytes;
- bond_stats.rx_packets += slave_stats.tx_packets;
- bond_stats.rx_bytes += slave_stats.tx_bytes;
+ ovs_rwlock_wrlock(&rwlock);
+ bond = bond_find(name);
+ if (bond) {
+ if (bond->active_slave_changed || force) {
+ memcpy(mac, bond->active_slave_mac, ETH_ADDR_LEN);
+ bond->active_slave_changed = false;
+ ovs_rwlock_unlock(&rwlock);
+ return true;
}
}
+ ovs_rwlock_unlock(&rwlock);
- if (!netdev_open(bond->name, "system", &bond_dev)) {
- netdev_set_stats(bond_dev, &bond_stats);
- netdev_close(bond_dev);
- }
+ return false;
}