From: Ben Pfaff Date: Mon, 28 Nov 2011 17:29:18 +0000 (-0800) Subject: dpif-linux: Use poll() internally in dpif_linux_recv(). X-Git-Tag: v1.3.0~14 X-Git-Url: http://git.cascardo.eti.br/?a=commitdiff_plain;h=2a75b1753eabb1b4e123f0b5f1c0dbaba5d0eaba;p=cascardo%2Fovs.git dpif-linux: Use poll() internally in dpif_linux_recv(). Using poll() internally in dpif_linux_recv(), instead of relying on the results of the main loop poll() call, brings netperf CRR performance back within 1% of par versus the code base before the poll_fd_woke() optimizations were introduced. It also increases the ovs-benchmark results by about 5% versus that baseline, too. My theory is that this is because the main loop takes long enough that a significant number of packets can arrive during the main loop itself, so this reduces the time before OVS gets to those packets. --- diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index b1e5e3103..4f6b6ab52 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include #include @@ -63,6 +65,7 @@ BUILD_ASSERT_DECL(IS_POW2(LRU_MAX_PORTS)); enum { N_UPCALL_SOCKS = 16 }; BUILD_ASSERT_DECL(IS_POW2(N_UPCALL_SOCKS)); +BUILD_ASSERT_DECL(N_UPCALL_SOCKS <= 32); /* We use a 32-bit word as a mask. */ /* This ethtool flag was introduced in Linux 2.6.24, so it might be * missing if we have old headers. */ @@ -135,8 +138,8 @@ struct dpif_linux { /* Upcall messages. */ struct nl_sock *upcall_socks[N_UPCALL_SOCKS]; - int last_read_upcall; - unsigned int listen_mask; + uint32_t ready_mask; /* 1-bit for each sock with unread messages. */ + unsigned int listen_mask; /* Mask of DPIF_UC_* bits. */ /* Change notification. */ struct sset changed_ports; /* Ports that have changed. */ @@ -1009,6 +1012,8 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask) return error; } } + + dpif->ready_mask = 0; } dpif->listen_mask = listen_mask; @@ -1088,24 +1093,51 @@ static int dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall) { struct dpif_linux *dpif = dpif_linux_cast(dpif_); - int i; int read_tries = 0; if (!dpif->listen_mask) { return EAGAIN; } - for (i = 0; i < N_UPCALL_SOCKS; i++) { - struct nl_sock *upcall_sock; - int dp_ifindex; + if (!dpif->ready_mask) { + struct pollfd pfds[N_UPCALL_SOCKS]; + int retval; + int i; - dpif->last_read_upcall = (dpif->last_read_upcall + 1) & - (N_UPCALL_SOCKS - 1); - upcall_sock = dpif->upcall_socks[dpif->last_read_upcall]; + for (i = 0; i < N_UPCALL_SOCKS; i++) { + pfds[i].fd = nl_sock_fd(dpif->upcall_socks[i]); + pfds[i].events = POLLIN; + pfds[i].revents = 0; + } + do { + retval = poll(pfds, N_UPCALL_SOCKS, 0); + } while (retval < 0 && errno == EINTR); + if (retval < 0) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "poll failed (%s)", strerror(errno)); + } else if (!retval) { + return EAGAIN; + } + + for (i = 0; i < N_UPCALL_SOCKS; i++) { + if (pfds[i].revents) { + dpif->ready_mask |= 1u << i; + } + } + + assert(dpif->ready_mask); + } + + do { + int indx = ffs(dpif->ready_mask) - 1; + struct nl_sock *upcall_sock = dpif->upcall_socks[indx]; + + dpif->ready_mask &= ~(1u << indx); for (;;) { struct ofpbuf *buf; + int dp_ifindex; int error; if (++read_tries > 50) { @@ -1131,7 +1163,7 @@ dpif_linux_recv(struct dpif *dpif_, struct dpif_upcall *upcall) return error; } } - } + } while (dpif->ready_mask); return EAGAIN; } diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 3e64de39f..7f496261a 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -854,6 +854,19 @@ nl_sock_wait(const struct nl_sock *sock, short int events) poll_fd_wait(sock->fd, events); } +/* Returns the underlying fd for 'sock', for use in "poll()"-like operations + * that can't use nl_sock_wait(). + * + * It's a little tricky to use the returned fd correctly, because nl_sock does + * "copy on write" to allow a single nl_sock to be used for notifications, + * transactions, and dumps. If 'sock' is used only for notifications and + * transactions (and never for dump) then the usage is safe. */ +int +nl_sock_fd(const struct nl_sock *sock) +{ + return sock->fd; +} + /* Returns the PID associated with this socket. */ uint32_t nl_sock_pid(const struct nl_sock *sock) diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h index b6356b9e2..f2a5d3fa8 100644 --- a/lib/netlink-socket.h +++ b/lib/netlink-socket.h @@ -60,6 +60,7 @@ int nl_sock_transact(struct nl_sock *, const struct ofpbuf *request, int nl_sock_drain(struct nl_sock *); void nl_sock_wait(const struct nl_sock *, short int events); +int nl_sock_fd(const struct nl_sock *); uint32_t nl_sock_pid(const struct nl_sock *);