/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <sys/uio.h>
#include <unistd.h>
#include "coverage.h"
-#include "dynamic-string.h"
+#include "openvswitch/dynamic-string.h"
#include "hash.h"
-#include "hmap.h"
+#include "openvswitch/hmap.h"
#include "netlink.h"
#include "netlink-protocol.h"
#include "odp-netlink.h"
-#include "ofpbuf.h"
+#include "openvswitch/ofpbuf.h"
#include "ovs-thread.h"
#include "poll-loop.h"
#include "seq.h"
#define SOL_NETLINK 270
#endif
-#ifdef _WIN32
-static struct ovs_mutex portid_mutex = OVS_MUTEX_INITIALIZER;
-static uint32_t g_last_portid = 0;
-
-/* Port IDs must be unique! */
-static uint32_t
-portid_next(void)
- OVS_GUARDED_BY(portid_mutex)
-{
- g_last_portid++;
- return g_last_portid;
-}
-#endif /* _WIN32 */
-
/* A single (bad) Netlink message can in theory dump out many, many log
* messages, so the burst size is set quite high here to avoid missing useful
* information. Also, at high logging levels we log *all* Netlink messages. */
const void *message, size_t size, int protocol);
#ifdef _WIN32
static int get_sock_pid_from_kernel(struct nl_sock *sock);
+static int set_sock_property(struct nl_sock *sock);
#endif
\f
/* Netlink sockets. */
sock = xmalloc(sizeof *sock);
#ifdef _WIN32
+ sock->overlapped.hEvent = NULL;
sock->handle = CreateFile(OVS_DEVICE_NAME_USER,
GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
if (retval != 0) {
goto error;
}
+ retval = set_sock_property(sock);
+ if (retval != 0) {
+ goto error;
+ }
#else
if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE,
&rcvbuf, sizeof rcvbuf)) {
goto error;
}
sock->rcvbuf = retval;
+ retval = 0;
/* Connect to kernel (pid 0) as remote address. */
memset(&remote, 0, sizeof remote);
#ifdef _WIN32
/* Reads the pid for 'sock' generated in the kernel datapath. The function
- * follows a transaction semantic. Eventually this function should call into
- * nl_transact. */
+ * uses a separate IOCTL instead of a transaction semantic to avoid unnecessary
+ * message overhead. */
static int
get_sock_pid_from_kernel(struct nl_sock *sock)
{
- struct nl_transaction txn;
- struct ofpbuf request;
- uint64_t request_stub[128];
- struct ofpbuf reply;
- uint64_t reply_stub[128];
- struct ovs_header *ovs_header;
- struct nlmsghdr *nlmsg;
- uint32_t seq;
- int retval;
- DWORD bytes;
- int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
- sizeof (struct ovs_header);
+ uint32_t pid = 0;
+ int retval = 0;
+ DWORD bytes = 0;
- ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
- txn.request = &request;
- ofpbuf_use_stub(&reply, reply_stub, sizeof reply_stub);
- txn.reply = &reply;
+ if (!DeviceIoControl(sock->handle, OVS_IOCTL_GET_PID,
+ NULL, 0, &pid, sizeof(pid),
+ &bytes, NULL)) {
+ retval = EINVAL;
+ } else {
+ if (bytes < sizeof(pid)) {
+ retval = EINVAL;
+ } else {
+ sock->pid = pid;
+ }
+ }
- seq = nl_sock_allocate_seq(sock, 1);
- nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
- OVS_CTRL_CMD_WIN_GET_PID, OVS_WIN_CONTROL_VERSION);
- nlmsg = nl_msg_nlmsghdr(txn.request);
- nlmsg->nlmsg_seq = seq;
+ return retval;
+}
+/* Used for setting and managing socket properties in userspace and kernel.
+ * Currently two attributes are tracked - pid and protocol
+ * protocol - supplied by userspace based on the netlink family. Windows uses
+ * this property to set the value in kernel datapath.
+ * eg: (NETLINK_GENERIC/ NETLINK_NETFILTER)
+ * pid - generated by windows kernel and set in userspace. The property
+ * is not modified.
+ * Also verify if Protocol and PID in Kernel reflects the values in userspace
+ * */
+static int
+set_sock_property(struct nl_sock *sock)
+{
+ static const struct nl_policy ovs_socket_policy[] = {
+ [OVS_NL_ATTR_SOCK_PROTO] = { .type = NL_A_BE32, .optional = true },
+ [OVS_NL_ATTR_SOCK_PID] = { .type = NL_A_BE32, .optional = true }
+ };
+
+ struct ofpbuf request, *reply;
+ struct ovs_header *ovs_header;
+ struct nlattr *attrs[ARRAY_SIZE(ovs_socket_policy)];
+ int retval = 0;
+ int error;
+
+ ofpbuf_init(&request, 0);
+ nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
+ OVS_CTRL_CMD_SOCK_PROP, OVS_WIN_CONTROL_VERSION);
ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
ovs_header->dp_ifindex = 0;
- ovs_header = ofpbuf_put_uninit(&reply, ovs_msg_size);
- if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
- txn.request->data, txn.request->size,
- txn.reply->data, txn.reply->size,
- &bytes, NULL)) {
+ nl_msg_put_be32(&request, OVS_NL_ATTR_SOCK_PROTO, sock->protocol);
+ /* pid is already set as part of get_sock_pid_from_kernel()
+ * This is added to maintain consistency
+ */
+ nl_msg_put_be32(&request, OVS_NL_ATTR_SOCK_PID, sock->pid);
+
+ error = nl_sock_transact(sock, &request, &reply);
+ ofpbuf_uninit(&request);
+ if (error) {
retval = EINVAL;
- goto done;
- } else {
- if (bytes < ovs_msg_size) {
+ }
+
+ if (!nl_policy_parse(reply,
+ NLMSG_HDRLEN + GENL_HDRLEN + sizeof *ovs_header,
+ ovs_socket_policy, attrs,
+ ARRAY_SIZE(ovs_socket_policy))) {
+ ofpbuf_delete(reply);
+ retval = EINVAL;
+ }
+ /* Verify if the properties are setup properly */
+ if (attrs[OVS_NL_ATTR_SOCK_PROTO]) {
+ int protocol = nl_attr_get_be32(attrs[OVS_NL_ATTR_SOCK_PROTO]);
+ if (protocol != sock->protocol) {
+ VLOG_ERR("Invalid protocol returned:%d expected:%d",
+ protocol, sock->protocol);
retval = EINVAL;
- goto done;
}
+ }
- nlmsg = nl_msg_nlmsghdr(txn.reply);
- if (nlmsg->nlmsg_seq != seq) {
+ if (attrs[OVS_NL_ATTR_SOCK_PID]) {
+ int pid = nl_attr_get_be32(attrs[OVS_NL_ATTR_SOCK_PID]);
+ if (pid != sock->pid) {
+ VLOG_ERR("Invalid pid returned:%d expected:%d",
+ pid, sock->pid);
retval = EINVAL;
- goto done;
}
- sock->pid = nlmsg->nlmsg_pid;
}
- retval = 0;
-done:
- ofpbuf_uninit(&request);
- ofpbuf_uninit(&reply);
return retval;
}
#endif /* _WIN32 */
error = nl_sock_subscribe_packet__(sock, true);
if (error) {
- VLOG_WARN("could not unsubscribe packets (%s)",
- ovs_strerror(errno));
+ VLOG_WARN("could not subscribe packets (%s)",
+ ovs_strerror(error));
return error;
}
sock->read_ioctl = OVS_IOCTL_READ_PACKET;
int error = nl_sock_subscribe_packet__(sock, false);
if (error) {
- VLOG_WARN("could not subscribe to packets (%s)",
- ovs_strerror(errno));
+ VLOG_WARN("could not unsubscribe to packets (%s)",
+ ovs_strerror(error));
return error;
}
retval = -1;
/* XXX: Map to a more appropriate error based on GetLastError(). */
errno = EINVAL;
+ VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
+ ovs_lasterror_to_string());
} else {
retval = msg->size;
}
DWORD bytes;
if (!DeviceIoControl(sock->handle, sock->read_ioctl,
NULL, 0, tail, sizeof tail, &bytes, NULL)) {
+ VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
+ ovs_lasterror_to_string());
retval = -1;
+ /* XXX: Map to a more appropriate error. */
errno = EINVAL;
} else {
retval = bytes;
uint8_t reply_buf[65536];
for (i = 0; i < n; i++) {
DWORD reply_len;
+ bool ret;
struct nl_transaction *txn = transactions[i];
struct nlmsghdr *request_nlmsg, *reply_nlmsg;
- if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
- txn->request->data,
- txn->request->size,
- reply_buf, sizeof reply_buf,
- &reply_len, NULL)) {
+ ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
+ txn->request->data,
+ txn->request->size,
+ reply_buf, sizeof reply_buf,
+ &reply_len, NULL);
+
+ if (ret && reply_len == 0) {
+ /*
+ * The current transaction did not produce any data to read and that
+ * is not an error as such. Continue with the remainder of the
+ * transactions.
+ */
+ txn->error = 0;
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ } else if (!ret) {
/* XXX: Map to a more appropriate error. */
error = EINVAL;
+ VLOG_DBG_RL(&rl, "fatal driver failure: %s",
+ ovs_lasterror_to_string());
break;
}
- if (reply_len < sizeof *reply_nlmsg) {
- nl_sock_record_errors__(transactions, n, 0);
- VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
- " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
- break;
- }
-
- /* Validate the sequence number in the reply. */
- request_nlmsg = nl_msg_nlmsghdr(txn->request);
- reply_nlmsg = (struct nlmsghdr *)reply_buf;
+ if (reply_len != 0) {
+ if (reply_len < sizeof *reply_nlmsg) {
+ nl_sock_record_errors__(transactions, n, 0);
+ VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
+ " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
+ break;
+ }
- if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
- ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
- VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
- ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
- reply_nlmsg->nlmsg_seq);
- break;
- }
+ /* Validate the sequence number in the reply. */
+ request_nlmsg = nl_msg_nlmsghdr(txn->request);
+ reply_nlmsg = (struct nlmsghdr *)reply_buf;
- /* Handle errors embedded within the netlink message. */
- ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
- tmp_reply.size = sizeof reply_buf;
- if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
- if (txn->reply) {
- ofpbuf_clear(txn->reply);
- }
- if (txn->error) {
- VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
- error, ovs_strerror(txn->error));
+ if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
+ ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
+ VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
+ ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
+ reply_nlmsg->nlmsg_seq);
+ break;
}
- } else {
- txn->error = 0;
- if (txn->reply) {
- /* Copy the reply to the buffer specified by the caller. */
- if (reply_len > txn->reply->allocated) {
- ofpbuf_reinit(txn->reply, reply_len);
+
+ /* Handle errors embedded within the netlink message. */
+ ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
+ tmp_reply.size = sizeof reply_buf;
+ if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ if (txn->error) {
+ VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+ error, ovs_strerror(txn->error));
+ }
+ } else {
+ txn->error = 0;
+ if (txn->reply) {
+ /* Copy the reply to the buffer specified by the caller. */
+ if (reply_len > txn->reply->allocated) {
+ ofpbuf_reinit(txn->reply, reply_len);
+ }
+ memcpy(txn->reply->data, reply_buf, reply_len);
+ txn->reply->size = reply_len;
}
- memcpy(txn->reply->data, reply_buf, reply_len);
- txn->reply->size = reply_len;
}
+ ofpbuf_uninit(&tmp_reply);
}
- ofpbuf_uninit(&tmp_reply);
/* Count the number of successful transactions. */
(*done)++;
} else if (error) {
VLOG_ERR_RL(&rl, "transaction error (%s)", ovs_strerror(error));
nl_sock_record_errors__(transactions, n, error);
+ if (error != EAGAIN) {
+ /* A fatal error has occurred. Abort the rest of
+ * transactions. */
+ break;
+ }
}
}
}
struct ovs_header *ovs_header;
struct nlmsghdr *nlmsg;
uint32_t seq;
- int retval;
+ int retval = 0;
int error;
DWORD bytes;
OVERLAPPED *overlapped = CONST_CAST(OVERLAPPED *, &sock->overlapped);
+ uint16_t cmd = OVS_CTRL_CMD_WIN_PEND_PACKET_REQ;
+
+ ovs_assert(sock->read_ioctl == OVS_IOCTL_READ_PACKET ||
+ sock->read_ioctl == OVS_IOCTL_READ_EVENT);
+ if (sock->read_ioctl == OVS_IOCTL_READ_EVENT) {
+ cmd = OVS_CTRL_CMD_WIN_PEND_REQ;
+ }
int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
sizeof (struct ovs_header);
seq = nl_sock_allocate_seq(sock, 1);
nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
- OVS_CTRL_CMD_WIN_PEND_REQ, OVS_WIN_CONTROL_VERSION);
+ cmd, OVS_WIN_CONTROL_VERSION);
nlmsg = nl_msg_nlmsghdr(&request);
nlmsg->nlmsg_seq = seq;
nlmsg->nlmsg_pid = sock->pid;
ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
ovs_header->dp_ifindex = 0;
+ nlmsg->nlmsg_len = request.size;
if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
request.data, request.size,
if (error != ERROR_IO_INCOMPLETE && error != ERROR_IO_PENDING) {
VLOG_ERR("nl_sock_wait failed - %s\n", ovs_format_message(error));
retval = EINVAL;
- goto done;
}
} else {
- /* The I/O was completed synchronously */
- poll_immediate_wake();
+ retval = EAGAIN;
}
- retval = 0;
done:
ofpbuf_uninit(&request);
{
#ifdef _WIN32
if (sock->overlapped.Internal != STATUS_PENDING) {
- pend_io_request(CONST_CAST(struct nl_sock *, sock));
- /* XXX: poll_wevent_wait(sock->overlapped.hEvent); */
+ int ret = pend_io_request(CONST_CAST(struct nl_sock *, sock));
+ if (ret == 0) {
+ poll_wevent_wait(sock->overlapped.hEvent);
+ } else {
+ poll_immediate_wake();
+ }
+ } else {
+ poll_wevent_wait(sock->overlapped.hEvent);
}
- poll_immediate_wake(); /* XXX: temporary. */
#else
poll_fd_wait(sock->fd, events);
#endif
}
+#ifndef _WIN32
/* Returns the underlying fd for 'sock', for use in "poll()"-like operations
* that can't use nl_sock_wait().
*
int
nl_sock_fd(const struct nl_sock *sock)
{
-#ifdef _WIN32
- BUILD_ASSERT_DECL(sizeof sock->handle == sizeof(int));
- return (int)sock->handle;
-#else
return sock->fd;
-#endif
}
+#endif
/* Returns the PID associated with this socket. */
uint32_t
log_nlmsg(const char *function, int error,
const void *message, size_t size, int protocol)
{
- struct ofpbuf buffer;
- char *nlmsg;
-
if (!VLOG_IS_DBG_ENABLED()) {
return;
}
- ofpbuf_use_const(&buffer, message, size);
- nlmsg = nlmsg_to_string(&buffer, protocol);
+ struct ofpbuf buffer = ofpbuf_const_initializer(message, size);
+ char *nlmsg = nlmsg_to_string(&buffer, protocol);
VLOG_DBG_RL(&rl, "%s (%s): %s", function, ovs_strerror(error), nlmsg);
free(nlmsg);
}