#include "seq.h"
#include "socket-util.h"
#include "util.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(netlink_socket);
#ifdef _WIN32
/* Reads the pid for 'sock' generated in the kernel datapath. The function
- * follows a transaction semantic. Eventually this function should call into
- * nl_transact. */
+ * uses a separate IOCTL instead of a transaction semantic to avoid unnecessary
+ * message overhead. */
static int
get_sock_pid_from_kernel(struct nl_sock *sock)
{
- struct nl_transaction txn;
- struct ofpbuf request;
- uint64_t request_stub[128];
- struct ofpbuf reply;
- uint64_t reply_stub[128];
- struct ovs_header *ovs_header;
- struct nlmsghdr *nlmsg;
- uint32_t seq;
- int retval;
- DWORD bytes;
- int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
- sizeof (struct ovs_header);
-
- ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
- txn.request = &request;
- ofpbuf_use_stub(&reply, reply_stub, sizeof reply_stub);
- txn.reply = &reply;
-
- seq = nl_sock_allocate_seq(sock, 1);
- nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
- OVS_CTRL_CMD_WIN_GET_PID, OVS_WIN_CONTROL_VERSION);
- nlmsg = nl_msg_nlmsghdr(txn.request);
- nlmsg->nlmsg_seq = seq;
-
- ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
- ovs_header->dp_ifindex = 0;
- ovs_header = ofpbuf_put_uninit(&reply, ovs_msg_size);
+ uint32_t pid = 0;
+ int retval = 0;
+ DWORD bytes = 0;
- if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
- ofpbuf_data(txn.request), ofpbuf_size(txn.request),
- ofpbuf_data(txn.reply), ofpbuf_size(txn.reply),
+ if (!DeviceIoControl(sock->handle, OVS_IOCTL_GET_PID,
+ NULL, 0, &pid, sizeof(pid),
&bytes, NULL)) {
retval = EINVAL;
- goto done;
} else {
- if (bytes < ovs_msg_size) {
+ if (bytes < sizeof(pid)) {
retval = EINVAL;
- goto done;
- }
-
- nlmsg = nl_msg_nlmsghdr(txn.reply);
- if (nlmsg->nlmsg_seq != seq) {
- retval = EINVAL;
- goto done;
+ } else {
+ sock->pid = pid;
}
- sock->pid = nlmsg->nlmsg_pid;
}
- retval = 0;
-done:
- ofpbuf_uninit(&request);
- ofpbuf_uninit(&reply);
return retval;
}
#endif /* _WIN32 */
return 0;
}
+#ifdef _WIN32
+int
+nl_sock_subscribe_packets(struct nl_sock *sock)
+{
+ int error;
+
+ if (sock->read_ioctl != OVS_IOCTL_READ) {
+ return EINVAL;
+ }
+
+ error = nl_sock_subscribe_packet__(sock, true);
+ if (error) {
+ VLOG_WARN("could not unsubscribe packets (%s)",
+ ovs_strerror(errno));
+ return error;
+ }
+ sock->read_ioctl = OVS_IOCTL_READ_PACKET;
+
+ return 0;
+}
+
+int
+nl_sock_unsubscribe_packets(struct nl_sock *sock)
+{
+ ovs_assert(sock->read_ioctl == OVS_IOCTL_READ_PACKET);
+
+ int error = nl_sock_subscribe_packet__(sock, false);
+ if (error) {
+ VLOG_WARN("could not subscribe to packets (%s)",
+ ovs_strerror(errno));
+ return error;
+ }
+
+ sock->read_ioctl = OVS_IOCTL_READ;
+ return 0;
+}
+
+int
+nl_sock_subscribe_packet__(struct nl_sock *sock, bool subscribe)
+{
+ struct ofpbuf request;
+ uint64_t request_stub[128];
+ struct ovs_header *ovs_header;
+ struct nlmsghdr *nlmsg;
+ int error;
+
+ ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
+ nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
+ OVS_CTRL_CMD_PACKET_SUBSCRIBE_REQ,
+ OVS_WIN_CONTROL_VERSION);
+
+ ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
+ ovs_header->dp_ifindex = 0;
+ nl_msg_put_u8(&request, OVS_NL_ATTR_PACKET_SUBSCRIBE, subscribe ? 1 : 0);
+ nl_msg_put_u32(&request, OVS_NL_ATTR_PACKET_PID, sock->pid);
+
+ error = nl_sock_send(sock, &request, true);
+ ofpbuf_uninit(&request);
+ return error;
+}
+#endif
+
/* Tries to make 'sock' stop listening to 'multicast_group'. Returns 0 if
* successful, otherwise a positive errno value.
*
struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg);
int error;
- nlmsg->nlmsg_len = ofpbuf_size(msg);
+ nlmsg->nlmsg_len = msg->size;
nlmsg->nlmsg_seq = nlmsg_seq;
nlmsg->nlmsg_pid = sock->pid;
do {
DWORD bytes;
if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
- ofpbuf_data(msg), ofpbuf_size(msg), NULL, 0,
+ msg->data, msg->size, NULL, 0,
&bytes, NULL)) {
retval = -1;
/* XXX: Map to a more appropriate error based on GetLastError(). */
errno = EINVAL;
+ VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
+ ovs_lasterror_to_string());
} else {
- retval = ofpbuf_size(msg);
+ retval = msg->size;
}
#else
- retval = send(sock->fd, ofpbuf_data(msg), ofpbuf_size(msg),
+ retval = send(sock->fd, msg->data, msg->size,
wait ? 0 : MSG_DONTWAIT);
#endif
error = retval < 0 ? errno : 0;
} while (error == EINTR);
- log_nlmsg(__func__, error, ofpbuf_data(msg), ofpbuf_size(msg), sock->protocol);
+ log_nlmsg(__func__, error, msg->data, msg->size, sock->protocol);
if (!error) {
COVERAGE_INC(netlink_sent);
}
}
/* Tries to send 'msg', which must contain a Netlink message, to the kernel on
- * 'sock'. nlmsg_len in 'msg' will be finalized to match ofpbuf_size(msg), nlmsg_pid
+ * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid
* will be set to 'sock''s pid, and nlmsg_seq will be initialized to a fresh
* sequence number, before the message is sent.
*
}
/* Tries to send 'msg', which must contain a Netlink message, to the kernel on
- * 'sock'. nlmsg_len in 'msg' will be finalized to match ofpbuf_size(msg), nlmsg_pid
+ * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid
* will be set to 'sock''s pid, and nlmsg_seq will be initialized to
* 'nlmsg_seq', before the message is sent.
*
ovs_assert(buf->allocated >= sizeof *nlmsghdr);
ofpbuf_clear(buf);
- iov[0].iov_base = ofpbuf_base(buf);
+ iov[0].iov_base = buf->base;
iov[0].iov_len = buf->allocated;
iov[1].iov_base = tail;
iov[1].iov_len = sizeof tail;
* anything in the receive buffer in that case, so we can initialize the
* Netlink header with an impossible message length and then, upon success,
* check whether it changed. */
- nlmsghdr = ofpbuf_base(buf);
+ nlmsghdr = buf->base;
do {
nlmsghdr->nlmsg_len = UINT32_MAX;
#ifdef _WIN32
DWORD bytes;
if (!DeviceIoControl(sock->handle, sock->read_ioctl,
NULL, 0, tail, sizeof tail, &bytes, NULL)) {
+ VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
+ ovs_lasterror_to_string());
retval = -1;
+ /* XXX: Map to a more appropriate error. */
errno = EINVAL;
} else {
retval = bytes;
} else {
if (retval >= buf->allocated) {
ofpbuf_reinit(buf, retval);
- nlmsghdr = ofpbuf_base(buf);
+ nlmsghdr = buf->base;
nlmsghdr->nlmsg_len = UINT32_MAX;
}
- memcpy(ofpbuf_data(buf), tail, retval);
- ofpbuf_set_size(buf, retval);
+ memcpy(buf->data, tail, retval);
+ buf->size = retval;
}
}
#else
return EPROTO;
}
#ifndef _WIN32
- ofpbuf_set_size(buf, MIN(retval, buf->allocated));
+ buf->size = MIN(retval, buf->allocated);
if (retval > buf->allocated) {
COVERAGE_INC(netlink_recv_jumbo);
ofpbuf_put(buf, tail, retval - buf->allocated);
}
#endif
- log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol);
+ log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
COVERAGE_INC(netlink_received);
return 0;
struct nl_transaction *txn = transactions[i];
struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(txn->request);
- nlmsg->nlmsg_len = ofpbuf_size(txn->request);
+ nlmsg->nlmsg_len = txn->request->size;
nlmsg->nlmsg_seq = base_seq + i;
nlmsg->nlmsg_pid = sock->pid;
- iovs[i].iov_base = ofpbuf_data(txn->request);
- iovs[i].iov_len = ofpbuf_size(txn->request);
+ iovs[i].iov_base = txn->request->data;
+ iovs[i].iov_len = txn->request->size;
}
#ifndef _WIN32
for (i = 0; i < n; i++) {
struct nl_transaction *txn = transactions[i];
- log_nlmsg(__func__, error, ofpbuf_data(txn->request),
- ofpbuf_size(txn->request), sock->protocol);
+ log_nlmsg(__func__, error, txn->request->data,
+ txn->request->size, sock->protocol);
}
if (!error) {
COVERAGE_ADD(netlink_sent, n);
uint8_t reply_buf[65536];
for (i = 0; i < n; i++) {
DWORD reply_len;
+ bool ret;
struct nl_transaction *txn = transactions[i];
struct nlmsghdr *request_nlmsg, *reply_nlmsg;
- if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
- ofpbuf_data(txn->request),
- ofpbuf_size(txn->request),
- reply_buf, sizeof reply_buf,
- &reply_len, NULL)) {
+ ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT,
+ txn->request->data,
+ txn->request->size,
+ reply_buf, sizeof reply_buf,
+ &reply_len, NULL);
+
+ if (ret && reply_len == 0) {
+ /*
+ * The current transaction did not produce any data to read and that
+ * is not an error as such. Continue with the remainder of the
+ * transactions.
+ */
+ txn->error = 0;
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ } else if (!ret) {
/* XXX: Map to a more appropriate error. */
error = EINVAL;
+ VLOG_DBG_RL(&rl, "fatal driver failure: %s",
+ ovs_lasterror_to_string());
break;
}
- if (reply_len < sizeof *reply_nlmsg) {
- nl_sock_record_errors__(transactions, n, 0);
- VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
- " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
- break;
- }
+ if (reply_len != 0) {
+ if (reply_len < sizeof *reply_nlmsg) {
+ nl_sock_record_errors__(transactions, n, 0);
+ VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
+ " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq);
+ break;
+ }
- /* Validate the sequence number in the reply. */
- request_nlmsg = nl_msg_nlmsghdr(txn->request);
- reply_nlmsg = (struct nlmsghdr *)reply_buf;
+ /* Validate the sequence number in the reply. */
+ request_nlmsg = nl_msg_nlmsghdr(txn->request);
+ reply_nlmsg = (struct nlmsghdr *)reply_buf;
- if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
- ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
- VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
- ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
- reply_nlmsg->nlmsg_seq);
- break;
- }
-
- /* Handle errors embedded within the netlink message. */
- ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
- ofpbuf_set_size(&tmp_reply, sizeof reply_buf);
- if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
- if (txn->reply) {
- ofpbuf_clear(txn->reply);
- }
- if (txn->error) {
- VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
- error, ovs_strerror(txn->error));
+ if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
+ ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq);
+ VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32
+ ", reply %#"PRIx32, request_nlmsg->nlmsg_seq,
+ reply_nlmsg->nlmsg_seq);
+ break;
}
- } else {
- txn->error = 0;
- if (txn->reply) {
- /* Copy the reply to the buffer specified by the caller. */
- if (reply_len > txn->reply->allocated) {
- ofpbuf_reinit(txn->reply, reply_len);
+
+ /* Handle errors embedded within the netlink message. */
+ ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf);
+ tmp_reply.size = sizeof reply_buf;
+ if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) {
+ if (txn->reply) {
+ ofpbuf_clear(txn->reply);
+ }
+ if (txn->error) {
+ VLOG_DBG_RL(&rl, "received NAK error=%d (%s)",
+ error, ovs_strerror(txn->error));
+ }
+ } else {
+ txn->error = 0;
+ if (txn->reply) {
+ /* Copy the reply to the buffer specified by the caller. */
+ if (reply_len > txn->reply->allocated) {
+ ofpbuf_reinit(txn->reply, reply_len);
+ }
+ memcpy(txn->reply->data, reply_buf, reply_len);
+ txn->reply->size = reply_len;
}
- memcpy(ofpbuf_data(txn->reply), reply_buf, reply_len);
- ofpbuf_set_size(txn->reply, reply_len);
}
+ ofpbuf_uninit(&tmp_reply);
}
- ofpbuf_uninit(&tmp_reply);
/* Count the number of successful transactions. */
(*done)++;
#else
enum { MAX_BATCH_BYTES = 4096 - 512 };
#endif
- bytes = ofpbuf_size(transactions[0]->request);
+ bytes = transactions[0]->request->size;
for (count = 1; count < n && count < max_batch_count; count++) {
- if (bytes + ofpbuf_size(transactions[count]->request) > MAX_BATCH_BYTES) {
+ if (bytes + transactions[count]->request->size > MAX_BATCH_BYTES) {
break;
}
- bytes += ofpbuf_size(transactions[count]->request);
+ bytes += transactions[count]->request->size;
}
error = nl_sock_transact_multiple__(sock, transactions, count, &done);
} else if (error) {
VLOG_ERR_RL(&rl, "transaction error (%s)", ovs_strerror(error));
nl_sock_record_errors__(transactions, n, error);
+ if (error != EAGAIN) {
+ /* A fatal error has occurred. Abort the rest of
+ * transactions. */
+ break;
+ }
}
}
}
struct nlmsghdr *nlmsghdr;
int error;
- while (!ofpbuf_size(buffer)) {
+ while (!buffer->size) {
error = nl_sock_recv__(dump->sock, buffer, false);
if (error) {
/* The kernel never blocks providing the results of a dump, so
* initialized. 'buffer' should be at least NL_DUMP_BUFSIZE bytes long.
*
* If successful, returns true and points 'reply->data' and
- * 'ofpbuf_size(reply)' to the message that was retrieved. The caller must not
+ * 'reply->size' to the message that was retrieved. The caller must not
* modify 'reply' (because it points within 'buffer', which will be used by
* future calls to this function).
*
* On failure, returns false and sets 'reply->data' to NULL and
- * 'ofpbuf_size(reply)' to 0. Failure might indicate an actual error or merely
+ * 'reply->size' to 0. Failure might indicate an actual error or merely
* the end of replies. An error status for the entire dump operation is
* provided when it is completed by calling nl_dump_done().
*
* If the buffer is not empty, we don't check the dump's status.
* Otherwise, we could end up skipping some of the dump results if thread A
* hits EOF while thread B is in the midst of processing a batch. */
- if (!ofpbuf_size(buffer)) {
+ if (!buffer->size) {
ovs_mutex_lock(&dump->mutex);
if (!dump->status) {
/* Take the mutex here to avoid an in-kernel race. If two threads
}
if (retval) {
- ofpbuf_set_data(reply, NULL);
- ofpbuf_set_size(reply, 0);
+ reply->data = NULL;
+ reply->size = 0;
}
return !retval;
}
* the overlapped structure event associated with the pending I/O will be set
*/
static int
-pend_io_request(const struct nl_sock *sock)
+pend_io_request(struct nl_sock *sock)
{
struct ofpbuf request;
uint64_t request_stub[128];
OVS_CTRL_CMD_WIN_PEND_REQ, OVS_WIN_CONTROL_VERSION);
nlmsg = nl_msg_nlmsghdr(&request);
nlmsg->nlmsg_seq = seq;
+ nlmsg->nlmsg_pid = sock->pid;
ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
ovs_header->dp_ifindex = 0;
if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
- ofpbuf_data(&request), ofpbuf_size(&request),
+ request.data, request.size,
NULL, 0, &bytes, overlapped)) {
error = GetLastError();
/* Check if the I/O got pended */
#endif /* _WIN32 */
/* Causes poll_block() to wake up when any of the specified 'events' (which is
- * a OR'd combination of POLLIN, POLLOUT, etc.) occur on 'sock'. */
+ * a OR'd combination of POLLIN, POLLOUT, etc.) occur on 'sock'.
+ * On Windows, 'sock' is not treated as const, and may be modified. */
void
nl_sock_wait(const struct nl_sock *sock, short int events)
{
#ifdef _WIN32
if (sock->overlapped.Internal != STATUS_PENDING) {
- pend_io_request(sock);
+ pend_io_request(CONST_CAST(struct nl_sock *, sock));
/* XXX: poll_wevent_wait(sock->overlapped.hEvent); */
}
poll_immediate_wake(); /* XXX: temporary. */
nl_sock_fd(const struct nl_sock *sock)
{
#ifdef _WIN32
- return sock->handle;
+ BUILD_ASSERT_DECL(sizeof sock->handle == sizeof(int));
+ return (int)sock->handle;
#else
return sock->fd;
#endif
/* Set the total length of the netlink message. */
nlmsg = nl_msg_nlmsghdr(reply);
- nlmsg->nlmsg_len = ofpbuf_size(reply);
+ nlmsg->nlmsg_len = reply->size;
if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN,
family_policy, attrs, ARRAY_SIZE(family_policy))
* reply, if any, is discarded.
*
* Before the message is sent, nlmsg_len in 'request' will be finalized to
- * match ofpbuf_size(msg), nlmsg_pid will be set to the pid of the socket used
+ * match msg->size, nlmsg_pid will be set to the pid of the socket used
* for sending the request, and nlmsg_seq will be initialized.
*
* The caller is responsible for destroying 'request'.