-/* Copyright (c) 2013 Nicira, Inc.
+/* Copyright (c) 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <arpa/inet.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <sys/socket.h>
#include "byte-order.h"
#include "connectivity.h"
#include "csum.h"
+#include "dp-packet.h"
#include "dpif.h"
#include "dynamic-string.h"
#include "flow.h"
#include "hmap.h"
#include "list.h"
#include "netdev.h"
-#include "netlink.h"
#include "odp-util.h"
#include "ofpbuf.h"
#include "ovs-thread.h"
#include "unaligned.h"
#include "unixctl.h"
#include "util.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(bfd);
uint32_t rmt_disc; /* bfd.RemoteDiscr. */
- uint8_t eth_dst[ETH_ADDR_LEN];/* Ethernet destination address. */
- bool eth_dst_set; /* 'eth_dst' set through database. */
+ struct eth_addr local_eth_src; /* Local eth src address. */
+ struct eth_addr local_eth_dst; /* Local eth dst address. */
+
+ struct eth_addr rmt_eth_dst; /* Remote eth dst address. */
+
+ ovs_be32 ip_src; /* IPv4 source address. */
+ ovs_be32 ip_dst; /* IPv4 destination address. */
uint16_t udp_src; /* UDP source port. */
int forwarding_override; /* Manual override of 'forwarding' status. */
atomic_bool check_tnl_key; /* Verify tunnel key of inbound packets? */
- atomic_int ref_cnt;
+ struct ovs_refcount ref_cnt;
/* When forward_if_rx is true, bfd_forwarding() will return
* true as long as there are incoming packets received.
bool forwarding_if_rx;
long long int forwarding_if_rx_detect_time;
+ /* When 'bfd->forwarding_if_rx' is set, at least one bfd control packet
+ * is required to be received every 100 * bfd->cfg_min_rx. If bfd
+ * control packet is not received within this interval, even if data
+ * packets are received, the bfd->forwarding will still be false. */
+ long long int demand_rx_bfd_time;
+
/* BFD decay related variables. */
bool in_decay; /* True when bfd is in decay. */
int decay_min_rx; /* min_rx is set to decay_min_rx when */
long long int decay_detect_time; /* Decay detection time. */
uint64_t flap_count; /* Counts bfd forwarding flaps. */
+
+ /* True when the variables returned by bfd_get_status() are changed
+ * since last check. */
+ bool status_changed;
};
static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
static struct hmap all_bfds__ = HMAP_INITIALIZER(&all_bfds__);
static struct hmap *const all_bfds OVS_GUARDED_BY(mutex) = &all_bfds__;
+static bool bfd_lookup_ip(const char *host_name, struct in_addr *)
+ OVS_REQUIRES(mutex);
static bool bfd_forwarding__(struct bfd *) OVS_REQUIRES(mutex);
static bool bfd_in_poll(const struct bfd *) OVS_REQUIRES(mutex);
static void bfd_poll(struct bfd *bfd) OVS_REQUIRES(mutex);
static uint64_t bfd_rx_packets(const struct bfd *) OVS_REQUIRES(mutex);
static void bfd_try_decay(struct bfd *) OVS_REQUIRES(mutex);
static void bfd_decay_update(struct bfd *) OVS_REQUIRES(mutex);
+static void bfd_status_changed(struct bfd *) OVS_REQUIRES(mutex);
static void bfd_forwarding_if_rx_update(struct bfd *) OVS_REQUIRES(mutex);
static void bfd_unixctl_show(struct unixctl_conn *, int argc,
}
}
+/* Returns and resets the 'bfd->status_changed'. */
+bool
+bfd_check_status_change(struct bfd *bfd) OVS_EXCLUDED(mutex)
+{
+ bool ret;
+
+ ovs_mutex_lock(&mutex);
+ ret = bfd->status_changed;
+ bfd->status_changed = false;
+ ovs_mutex_unlock(&mutex);
+
+ return ret;
+}
+
/* Returns a 'smap' of key value pairs representing the status of 'bfd'
* intended for the OVS database. */
void
smap_add(smap, "state", bfd_state_str(bfd->state));
smap_add(smap, "diagnostic", bfd_diag_str(bfd->diag));
smap_add_format(smap, "flap_count", "%"PRIu64, bfd->flap_count);
-
- if (bfd->state != STATE_DOWN) {
- smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state));
- smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag));
- }
+ smap_add(smap, "remote_state", bfd_state_str(bfd->rmt_state));
+ smap_add(smap, "remote_diagnostic", bfd_diag_str(bfd->rmt_diag));
ovs_mutex_unlock(&mutex);
}
+void
+bfd_init(void)
+{
+ unixctl_command_register("bfd/show", "[interface]", 0, 1,
+ bfd_unixctl_show, NULL);
+ unixctl_command_register("bfd/set-forwarding",
+ "[interface] normal|false|true", 1, 2,
+ bfd_unixctl_set_forwarding_override, NULL);
+}
+
/* Initializes, destroys, or reconfigures the BFD session 'bfd' (named 'name'),
* according to the database configuration contained in 'cfg'. Takes ownership
* of 'bfd', which may be NULL. Returns a BFD object which may be used as a
bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg,
struct netdev *netdev) OVS_EXCLUDED(mutex)
{
- static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
- static atomic_uint16_t udp_src = ATOMIC_VAR_INIT(0);
+ static atomic_count udp_src = ATOMIC_COUNT_INIT(0);
int decay_min_rx;
long long int min_tx, min_rx;
bool need_poll = false;
bool cfg_min_rx_changed = false;
bool cpath_down, forwarding_if_rx;
- const char *hwaddr;
- uint8_t ea[ETH_ADDR_LEN];
-
- if (ovsthread_once_start(&once)) {
- unixctl_command_register("bfd/show", "[interface]", 0, 1,
- bfd_unixctl_show, NULL);
- unixctl_command_register("bfd/set-forwarding",
- "[interface] normal|false|true", 1, 2,
- bfd_unixctl_set_forwarding_override, NULL);
- ovsthread_once_done(&once);
- }
+ const char *hwaddr, *ip_src, *ip_dst;
+ struct in_addr in_addr;
+ struct eth_addr ea;
if (!cfg || !smap_get_bool(cfg, "enable", false)) {
bfd_unref(bfd);
bfd->diag = DIAG_NONE;
bfd->min_tx = 1000;
bfd->mult = 3;
- atomic_init(&bfd->ref_cnt, 1);
+ ovs_refcount_init(&bfd->ref_cnt);
bfd->netdev = netdev_ref(netdev);
bfd->rx_packets = bfd_rx_packets(bfd);
bfd->in_decay = false;
* UDP source port number MUST be used for all BFD Control packets
* associated with a particular session. The source port number SHOULD
* be unique among all BFD sessions on the system. */
- atomic_add(&udp_src, 1, &bfd->udp_src);
- bfd->udp_src = (bfd->udp_src % 16384) + 49152;
+ bfd->udp_src = (atomic_count_inc(&udp_src) % 16384) + 49152;
bfd_set_state(bfd, STATE_DOWN, DIAG_NONE);
- memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN);
+ bfd_status_changed(bfd);
}
- atomic_store(&bfd->check_tnl_key,
- smap_get_bool(cfg, "check_tnl_key", false));
+ atomic_store_relaxed(&bfd->check_tnl_key,
+ smap_get_bool(cfg, "check_tnl_key", false));
min_tx = smap_get_int(cfg, "min_tx", 100);
- min_tx = MAX(min_tx, 100);
+ min_tx = MAX(min_tx, 1);
if (bfd->cfg_min_tx != min_tx) {
bfd->cfg_min_tx = min_tx;
if (bfd->state != STATE_UP
}
min_rx = smap_get_int(cfg, "min_rx", 1000);
- min_rx = MAX(min_rx, 100);
+ min_rx = MAX(min_rx, 1);
if (bfd->cfg_min_rx != min_rx) {
bfd->cfg_min_rx = min_rx;
if (bfd->state != STATE_UP
cpath_down = smap_get_bool(cfg, "cpath_down", false);
if (bfd->cpath_down != cpath_down) {
bfd->cpath_down = cpath_down;
- if (bfd->diag == DIAG_NONE || bfd->diag == DIAG_CPATH_DOWN) {
- bfd_set_state(bfd, bfd->state, DIAG_NONE);
- }
+ bfd_set_state(bfd, bfd->state, DIAG_NONE);
need_poll = true;
}
- hwaddr = smap_get(cfg, "bfd_dst_mac");
- if (hwaddr && eth_addr_from_string(hwaddr, ea) && !eth_addr_is_zero(ea)) {
- memcpy(bfd->eth_dst, ea, ETH_ADDR_LEN);
- bfd->eth_dst_set = true;
- } else if (bfd->eth_dst_set) {
- memcpy(bfd->eth_dst, eth_addr_bfd, ETH_ADDR_LEN);
- bfd->eth_dst_set = false;
+ hwaddr = smap_get(cfg, "bfd_local_src_mac");
+ if (hwaddr && eth_addr_from_string(hwaddr, &ea)) {
+ bfd->local_eth_src = ea;
+ } else {
+ bfd->local_eth_src = eth_addr_zero;
+ }
+
+ hwaddr = smap_get(cfg, "bfd_local_dst_mac");
+ if (hwaddr && eth_addr_from_string(hwaddr, &ea)) {
+ bfd->local_eth_dst = ea;
+ } else {
+ bfd->local_eth_dst = eth_addr_zero;
+ }
+
+ hwaddr = smap_get(cfg, "bfd_remote_dst_mac");
+ if (hwaddr && eth_addr_from_string(hwaddr, &ea)) {
+ bfd->rmt_eth_dst = ea;
+ } else {
+ bfd->rmt_eth_dst = eth_addr_zero;
+ }
+
+ ip_src = smap_get(cfg, "bfd_src_ip");
+ if (ip_src && bfd_lookup_ip(ip_src, &in_addr)) {
+ memcpy(&bfd->ip_src, &in_addr, sizeof in_addr);
+ } else {
+ bfd->ip_src = htonl(0xA9FE0101); /* 169.254.1.1. */
+ }
+
+ ip_dst = smap_get(cfg, "bfd_dst_ip");
+ if (ip_dst && bfd_lookup_ip(ip_dst, &in_addr)) {
+ memcpy(&bfd->ip_dst, &in_addr, sizeof in_addr);
+ } else {
+ bfd->ip_dst = htonl(0xA9FE0100); /* 169.254.1.0. */
}
forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false);
{
struct bfd *bfd = CONST_CAST(struct bfd *, bfd_);
if (bfd) {
- int orig;
- atomic_add(&bfd->ref_cnt, 1, &orig);
- ovs_assert(orig > 0);
+ ovs_refcount_ref(&bfd->ref_cnt);
}
return bfd;
}
void
bfd_unref(struct bfd *bfd) OVS_EXCLUDED(mutex)
{
- if (bfd) {
- int orig;
-
- atomic_sub(&bfd->ref_cnt, 1, &orig);
- ovs_assert(orig > 0);
- if (orig == 1) {
- ovs_mutex_lock(&mutex);
- hmap_remove(all_bfds, &bfd->node);
- netdev_close(bfd->netdev);
- free(bfd->name);
- free(bfd);
- ovs_mutex_unlock(&mutex);
- }
+ if (bfd && ovs_refcount_unref_relaxed(&bfd->ref_cnt) == 1) {
+ ovs_mutex_lock(&mutex);
+ bfd_status_changed(bfd);
+ hmap_remove(all_bfds, &bfd->node);
+ netdev_close(bfd->netdev);
+ free(bfd->name);
+ free(bfd);
+ ovs_mutex_unlock(&mutex);
}
}
-void
+long long int
bfd_wait(const struct bfd *bfd) OVS_EXCLUDED(mutex)
{
- poll_timer_wait_until(bfd_wake_time(bfd));
+ long long int wake_time = bfd_wake_time(bfd);
+ poll_timer_wait_until(wake_time);
+ return wake_time;
}
/* Returns the next wake up time. */
}
void
-bfd_put_packet(struct bfd *bfd, struct ofpbuf *p,
- uint8_t eth_src[ETH_ADDR_LEN]) OVS_EXCLUDED(mutex)
+bfd_put_packet(struct bfd *bfd, struct dp_packet *p,
+ const struct eth_addr eth_src) OVS_EXCLUDED(mutex)
{
long long int min_tx, min_rx;
struct udp_header *udp;
* set. */
ovs_assert(!(bfd->flags & FLAG_POLL) || !(bfd->flags & FLAG_FINAL));
- ofpbuf_reserve(p, 2); /* Properly align after the ethernet header. */
- eth = ofpbuf_put_uninit(p, sizeof *eth);
- memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
- memcpy(eth->eth_dst, bfd->eth_dst, ETH_ADDR_LEN);
+ dp_packet_reserve(p, 2); /* Properly align after the ethernet header. */
+ eth = dp_packet_put_uninit(p, sizeof *eth);
+ eth->eth_src = eth_addr_is_zero(bfd->local_eth_src)
+ ? eth_src : bfd->local_eth_src;
+ eth->eth_dst = eth_addr_is_zero(bfd->local_eth_dst)
+ ? eth_addr_bfd : bfd->local_eth_dst;
eth->eth_type = htons(ETH_TYPE_IP);
- ip = ofpbuf_put_zeros(p, sizeof *ip);
+ ip = dp_packet_put_zeros(p, sizeof *ip);
ip->ip_ihl_ver = IP_IHL_VER(5, 4);
ip->ip_tot_len = htons(sizeof *ip + sizeof *udp + sizeof *msg);
ip->ip_ttl = MAXTTL;
ip->ip_tos = IPTOS_LOWDELAY | IPTOS_THROUGHPUT;
ip->ip_proto = IPPROTO_UDP;
- /* Use link local addresses: */
- put_16aligned_be32(&ip->ip_src, htonl(0xA9FE0100)); /* 169.254.1.0. */
- put_16aligned_be32(&ip->ip_dst, htonl(0xA9FE0101)); /* 169.254.1.1. */
+ put_16aligned_be32(&ip->ip_src, bfd->ip_src);
+ put_16aligned_be32(&ip->ip_dst, bfd->ip_dst);
ip->ip_csum = csum(ip, sizeof *ip);
- udp = ofpbuf_put_zeros(p, sizeof *udp);
+ udp = dp_packet_put_zeros(p, sizeof *udp);
udp->udp_src = htons(bfd->udp_src);
udp->udp_dst = htons(BFD_DEST_PORT);
udp->udp_len = htons(sizeof *udp + sizeof *msg);
- msg = ofpbuf_put_uninit(p, sizeof *msg);
+ msg = dp_packet_put_uninit(p, sizeof *msg);
msg->vers_diag = (BFD_VERSION << 5) | bfd->diag;
msg->flags = (bfd->state & STATE_MASK) | bfd->flags;
struct flow_wildcards *wc)
{
struct bfd *bfd = CONST_CAST(struct bfd *, bfd_);
- bool check_tnl_key;
- memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
- if (bfd->eth_dst_set && memcmp(bfd->eth_dst, flow->dl_dst, ETH_ADDR_LEN)) {
- return false;
- }
+ if (!eth_addr_is_zero(bfd->rmt_eth_dst)) {
+ memset(&wc->masks.dl_dst, 0xff, sizeof wc->masks.dl_dst);
- memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
- memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
+ if (!eth_addr_equals(bfd->rmt_eth_dst, flow->dl_dst)) {
+ return false;
+ }
+ }
- atomic_read(&bfd->check_tnl_key, &check_tnl_key);
- if (check_tnl_key) {
- memset(&wc->masks.tunnel.tun_id, 0xff, sizeof wc->masks.tunnel.tun_id);
+ if (flow->dl_type == htons(ETH_TYPE_IP)) {
+ memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
+ if (flow->nw_proto == IPPROTO_UDP) {
+ memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
+ if (flow->tp_dst == htons(BFD_DEST_PORT)) {
+ bool check_tnl_key;
+
+ atomic_read_relaxed(&bfd->check_tnl_key, &check_tnl_key);
+ if (check_tnl_key) {
+ memset(&wc->masks.tunnel.tun_id, 0xff,
+ sizeof wc->masks.tunnel.tun_id);
+ return flow->tunnel.tun_id == htonll(0);
+ }
+ return true;
+ }
+ }
}
- return (flow->dl_type == htons(ETH_TYPE_IP)
- && flow->nw_proto == IPPROTO_UDP
- && flow->tp_dst == htons(BFD_DEST_PORT)
- && (!check_tnl_key || flow->tunnel.tun_id == htonll(0)));
+ return false;
}
void
bfd_process_packet(struct bfd *bfd, const struct flow *flow,
- const struct ofpbuf *p) OVS_EXCLUDED(mutex)
+ const struct dp_packet *p) OVS_EXCLUDED(mutex)
{
uint32_t rmt_min_rx, pkt_your_disc;
enum state rmt_state;
enum flags flags;
uint8_t version;
struct msg *msg;
+ const uint8_t *l7 = dp_packet_get_udp_payload(p);
+
+ if (!l7) {
+ return; /* No UDP payload. */
+ }
/* This function is designed to follow section RFC 5880 6.8.6 closely. */
goto out;
}
- msg = ofpbuf_at(p, (uint8_t *)p->l7 - (uint8_t *)p->data, BFD_PACKET_LEN);
+ msg = dp_packet_at(p, l7 - (uint8_t *)dp_packet_data(p), BFD_PACKET_LEN);
if (!msg) {
VLOG_INFO_RL(&rl, "%s: Received too-short BFD control message (only "
"%"PRIdPTR" bytes long, at least %d required).",
- bfd->name, (uint8_t *) ofpbuf_tail(p) - (uint8_t *) p->l7,
+ bfd->name, (uint8_t *) dp_packet_tail(p) - l7,
BFD_PACKET_LEN);
goto out;
}
* If the Length field is greater than the payload of the encapsulating
* protocol, the packet MUST be discarded.
*
- * Note that we make this check implicity. Above we use ofpbuf_at() to
+ * Note that we make this check implicitly. Above we use dp_packet_at() to
* ensure that there are at least BFD_PACKET_LEN bytes in the payload of
* the encapsulating protocol. Below we require msg->length to be exactly
* BFD_PACKET_LEN bytes. */
goto out;
}
+ if (bfd->rmt_state != rmt_state) {
+ bfd_status_changed(bfd);
+ }
+
bfd->rmt_disc = ntohl(msg->my_disc);
bfd->rmt_state = rmt_state;
bfd->rmt_flags = flags;
rmt_min_rx = MAX(ntohl(msg->min_rx) / 1000, 1);
if (bfd->rmt_min_rx != rmt_min_rx) {
bfd->rmt_min_rx = rmt_min_rx;
- bfd_set_next_tx(bfd);
+ if (bfd->next_tx) {
+ bfd_set_next_tx(bfd);
+ }
log_msg(VLL_INFO, msg, "New remote min_rx", bfd);
}
}
/* XXX: RFC 5880 Section 6.8.6 Demand mode related calculations here. */
+ if (bfd->forwarding_if_rx) {
+ bfd->demand_rx_bfd_time = time_msec() + 100 * bfd->cfg_min_rx;
+ }
+
out:
bfd_forwarding__(bfd);
ovs_mutex_unlock(&mutex);
static bool
bfd_forwarding__(struct bfd *bfd) OVS_REQUIRES(mutex)
{
- long long int time;
+ long long int now = time_msec();
+ bool forwarding_if_rx;
bool last_forwarding = bfd->last_forwarding;
if (bfd->forwarding_override != -1) {
return bfd->forwarding_override == 1;
}
- time = bfd->forwarding_if_rx_detect_time;
- bfd->last_forwarding = (bfd->state == STATE_UP
- || (bfd->forwarding_if_rx && time > time_msec()))
- && bfd->rmt_diag != DIAG_PATH_DOWN
- && bfd->rmt_diag != DIAG_CPATH_DOWN
- && bfd->rmt_diag != DIAG_RCPATH_DOWN;
+ forwarding_if_rx = bfd->forwarding_if_rx
+ && bfd->forwarding_if_rx_detect_time > now
+ && bfd->demand_rx_bfd_time > now;
+
+ bfd->last_forwarding = (bfd->state == STATE_UP || forwarding_if_rx)
+ && bfd->rmt_diag != DIAG_PATH_DOWN
+ && bfd->rmt_diag != DIAG_CPATH_DOWN
+ && bfd->rmt_diag != DIAG_RCPATH_DOWN;
if (bfd->last_forwarding != last_forwarding) {
bfd->flap_count++;
- seq_change(connectivity_seq_get());
+ bfd_status_changed(bfd);
}
return bfd->last_forwarding;
}
/* Helpers. */
+static bool
+bfd_lookup_ip(const char *host_name, struct in_addr *addr)
+{
+ if (!ip_parse(host_name, &addr->s_addr)) {
+ VLOG_ERR_RL(&rl, "\"%s\" is not a valid IP address", host_name);
+ return false;
+ }
+ return true;
+}
+
static bool
bfd_in_poll(const struct bfd *bfd) OVS_REQUIRES(mutex)
{
bfd_set_state(struct bfd *bfd, enum state state, enum diag diag)
OVS_REQUIRES(mutex)
{
- if (diag == DIAG_NONE && bfd->cpath_down) {
+ if (bfd->cpath_down) {
diag = DIAG_CPATH_DOWN;
}
bfd_decay_update(bfd);
}
- seq_change(connectivity_seq_get());
+ bfd_status_changed(bfd);
}
}
bfd->decay_detect_time = MAX(bfd->decay_min_rx, 2000) + time_msec();
}
+/* Records the status change and changes the global connectivity seq. */
+static void
+bfd_status_changed(struct bfd *bfd) OVS_REQUIRES(mutex)
+{
+ seq_change(connectivity_seq_get());
+ bfd->status_changed = true;
+}
+
static void
bfd_forwarding_if_rx_update(struct bfd *bfd) OVS_REQUIRES(mutex)
{
goto out;
}
bfd->forwarding_override = forwarding_override;
+ bfd_status_changed(bfd);
} else {
HMAP_FOR_EACH (bfd, node, all_bfds) {
bfd->forwarding_override = forwarding_override;
+ bfd_status_changed(bfd);
}
}