X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=lib%2Fnetlink-socket.c;h=1a1b5e42a3082f5cae4f119bb8d87f044681d4b7;hb=HEAD;hp=4d9832f48d701ef566d14586827d01b9f2506659;hpb=ebac7fb759946f3e0660b882bfd4fefd39bc7b17;p=cascardo%2Fovs.git diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 4d9832f48..1a1b5e42a 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ #include "seq.h" #include "socket-util.h" #include "util.h" -#include "vlog.h" +#include "openvswitch/vlog.h" VLOG_DEFINE_THIS_MODULE(netlink_socket); @@ -49,20 +49,6 @@ COVERAGE_DEFINE(netlink_sent); #define SOL_NETLINK 270 #endif -#ifdef _WIN32 -static struct ovs_mutex portid_mutex = OVS_MUTEX_INITIALIZER; -static uint32_t g_last_portid = 0; - -/* Port IDs must be unique! */ -static uint32_t -portid_next(void) - OVS_GUARDED_BY(portid_mutex) -{ - g_last_portid++; - return g_last_portid; -} -#endif /* _WIN32 */ - /* A single (bad) Netlink message can in theory dump out many, many log * messages, so the burst size is set quite high here to avoid missing useful * information. Also, at high logging levels we log *all* Netlink messages. */ @@ -80,6 +66,8 @@ static int get_sock_pid_from_kernel(struct nl_sock *sock); struct nl_sock { #ifdef _WIN32 HANDLE handle; + OVERLAPPED overlapped; + DWORD read_ioctl; #else int fd; #endif @@ -139,18 +127,25 @@ nl_sock_create(int protocol, struct nl_sock **sockp) sock = xmalloc(sizeof *sock); #ifdef _WIN32 - sock->handle = CreateFileA("\\\\.\\OpenVSwitchDevice", - GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, NULL); - - int last_error = GetLastError(); + sock->handle = CreateFile(OVS_DEVICE_NAME_USER, + GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, + FILE_FLAG_OVERLAPPED, NULL); if (sock->handle == INVALID_HANDLE_VALUE) { - VLOG_ERR("fcntl: %s", ovs_strerror(last_error)); + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string()); goto error; } + + memset(&sock->overlapped, 0, sizeof sock->overlapped); + sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); + if (sock->overlapped.hEvent == NULL) { + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string()); + goto error; + } + /* Initialize the type/ioctl to Generic */ + sock->read_ioctl = OVS_IOCTL_READ; #else sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol); if (sock->fd < 0) { @@ -186,6 +181,7 @@ nl_sock_create(int protocol, struct nl_sock **sockp) goto error; } sock->rcvbuf = retval; + retval = 0; /* Connect to kernel (pid 0) as remote address. */ memset(&remote, 0, sizeof remote); @@ -221,6 +217,9 @@ error: } } #ifdef _WIN32 + if (sock->overlapped.hEvent) { + CloseHandle(sock->overlapped.hEvent); + } if (sock->handle != INVALID_HANDLE_VALUE) { CloseHandle(sock->handle); } @@ -248,6 +247,9 @@ nl_sock_destroy(struct nl_sock *sock) { if (sock) { #ifdef _WIN32 + if (sock->overlapped.hEvent) { + CloseHandle(sock->overlapped.hEvent); + } CloseHandle(sock->handle); #else close(sock->fd); @@ -258,67 +260,58 @@ nl_sock_destroy(struct nl_sock *sock) #ifdef _WIN32 /* Reads the pid for 'sock' generated in the kernel datapath. The function - * follows a transaction semantic. Eventually this function should call into - * nl_transact. */ + * uses a separate IOCTL instead of a transaction semantic to avoid unnecessary + * message overhead. */ static int get_sock_pid_from_kernel(struct nl_sock *sock) { - struct nl_transaction txn; + uint32_t pid = 0; + int retval = 0; + DWORD bytes = 0; + + if (!DeviceIoControl(sock->handle, OVS_IOCTL_GET_PID, + NULL, 0, &pid, sizeof(pid), + &bytes, NULL)) { + retval = EINVAL; + } else { + if (bytes < sizeof(pid)) { + retval = EINVAL; + } else { + sock->pid = pid; + } + } + + return retval; +} +#endif /* _WIN32 */ + +#ifdef _WIN32 +static int __inline +nl_sock_mcgroup(struct nl_sock *sock, unsigned int multicast_group, bool join) +{ struct ofpbuf request; uint64_t request_stub[128]; - struct ofpbuf reply; - uint64_t reply_stub[128]; struct ovs_header *ovs_header; struct nlmsghdr *nlmsg; - uint32_t seq; - int retval; - DWORD bytes; - int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) + - sizeof (struct ovs_header); + int error; ofpbuf_use_stub(&request, request_stub, sizeof request_stub); - txn.request = &request; - ofpbuf_use_stub(&reply, reply_stub, sizeof reply_stub); - txn.reply = &reply; - seq = nl_sock_allocate_seq(sock, 1); nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0, - OVS_CTRL_CMD_WIN_GET_PID, OVS_WIN_CONTROL_VERSION); - nlmsg = nl_msg_nlmsghdr(txn.request); - nlmsg->nlmsg_seq = seq; + OVS_CTRL_CMD_MC_SUBSCRIBE_REQ, + OVS_WIN_CONTROL_VERSION); ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header); ovs_header->dp_ifindex = 0; - ovs_header = ofpbuf_put_uninit(&reply, ovs_msg_size); - if (!DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT, - ofpbuf_data(txn.request), ofpbuf_size(txn.request), - ofpbuf_data(txn.reply), ofpbuf_size(txn.reply), - &bytes, NULL)) { - retval = EINVAL; - goto done; - } else { - if (bytes < ovs_msg_size) { - retval = EINVAL; - goto done; - } + nl_msg_put_u32(&request, OVS_NL_ATTR_MCAST_GRP, multicast_group); + nl_msg_put_u8(&request, OVS_NL_ATTR_MCAST_JOIN, join ? 1 : 0); - nlmsg = nl_msg_nlmsghdr(txn.reply); - if (nlmsg->nlmsg_seq != seq) { - retval = EINVAL; - goto done; - } - sock->pid = nlmsg->nlmsg_pid; - } - retval = 0; - -done: + error = nl_sock_send(sock, &request, true); ofpbuf_uninit(&request); - ofpbuf_uninit(&reply); - return retval; + return error; } -#endif /* _WIN32 */ - +#endif /* Tries to add 'sock' as a listener for 'multicast_group'. Returns 0 if * successful, otherwise a positive errno value. * @@ -334,31 +327,15 @@ int nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { #ifdef _WIN32 -#define OVS_VPORT_MCGROUP_FALLBACK_ID 33 - struct ofpbuf msg_buf; - struct message_multicast - { - struct nlmsghdr; - /* if true, join; if else, leave */ - unsigned char join; - unsigned int groupId; - }; - - struct message_multicast msg = { 0 }; - - msg.nlmsg_len = sizeof(struct message_multicast); - msg.nlmsg_type = OVS_VPORT_MCGROUP_FALLBACK_ID; - msg.nlmsg_flags = 0; - msg.nlmsg_seq = 0; - msg.nlmsg_pid = sock->pid; - - msg.join = 1; - msg.groupId = multicast_group; - msg_buf.base_ = &msg; - msg_buf.data_ = &msg; - msg_buf.size_ = msg.nlmsg_len; - - nl_sock_send__(sock, &msg_buf, msg.nlmsg_seq, 0); + /* Set the socket type as a "multicast" socket */ + sock->read_ioctl = OVS_IOCTL_READ_EVENT; + int error = nl_sock_mcgroup(sock, multicast_group, true); + if (error) { + sock->read_ioctl = OVS_IOCTL_READ; + VLOG_WARN("could not join multicast group %u (%s)", + multicast_group, ovs_strerror(error)); + return error; + } #else if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { @@ -370,6 +347,68 @@ nl_sock_join_mcgroup(struct nl_sock *sock, unsigned int multicast_group) return 0; } +#ifdef _WIN32 +int +nl_sock_subscribe_packets(struct nl_sock *sock) +{ + int error; + + if (sock->read_ioctl != OVS_IOCTL_READ) { + return EINVAL; + } + + error = nl_sock_subscribe_packet__(sock, true); + if (error) { + VLOG_WARN("could not subscribe packets (%s)", + ovs_strerror(error)); + return error; + } + sock->read_ioctl = OVS_IOCTL_READ_PACKET; + + return 0; +} + +int +nl_sock_unsubscribe_packets(struct nl_sock *sock) +{ + ovs_assert(sock->read_ioctl == OVS_IOCTL_READ_PACKET); + + int error = nl_sock_subscribe_packet__(sock, false); + if (error) { + VLOG_WARN("could not unsubscribe to packets (%s)", + ovs_strerror(error)); + return error; + } + + sock->read_ioctl = OVS_IOCTL_READ; + return 0; +} + +int +nl_sock_subscribe_packet__(struct nl_sock *sock, bool subscribe) +{ + struct ofpbuf request; + uint64_t request_stub[128]; + struct ovs_header *ovs_header; + struct nlmsghdr *nlmsg; + int error; + + ofpbuf_use_stub(&request, request_stub, sizeof request_stub); + nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0, + OVS_CTRL_CMD_PACKET_SUBSCRIBE_REQ, + OVS_WIN_CONTROL_VERSION); + + ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header); + ovs_header->dp_ifindex = 0; + nl_msg_put_u8(&request, OVS_NL_ATTR_PACKET_SUBSCRIBE, subscribe ? 1 : 0); + nl_msg_put_u32(&request, OVS_NL_ATTR_PACKET_PID, sock->pid); + + error = nl_sock_send(sock, &request, true); + ofpbuf_uninit(&request); + return error; +} +#endif + /* Tries to make 'sock' stop listening to 'multicast_group'. Returns 0 if * successful, otherwise a positive errno value. * @@ -384,24 +423,13 @@ int nl_sock_leave_mcgroup(struct nl_sock *sock, unsigned int multicast_group) { #ifdef _WIN32 - struct ofpbuf msg_buf; - struct message_multicast - { - struct nlmsghdr; - /* if true, join; if else, leave*/ - unsigned char join; - }; - - struct message_multicast msg = { 0 }; - nl_msg_put_nlmsghdr(&msg, sizeof(struct message_multicast), - multicast_group, 0); - msg.join = 0; - - msg_buf.base_ = &msg; - msg_buf.data_ = &msg; - msg_buf.size_ = msg.nlmsg_len; - - nl_sock_send__(sock, &msg_buf, msg.nlmsg_seq, 0); + int error = nl_sock_mcgroup(sock, multicast_group, false); + if (error) { + VLOG_WARN("could not leave multicast group %u (%s)", + multicast_group, ovs_strerror(error)); + return error; + } + sock->read_ioctl = OVS_IOCTL_READ; #else if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_DROP_MEMBERSHIP, &multicast_group, sizeof multicast_group) < 0) { @@ -420,27 +448,32 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(msg); int error; - nlmsg->nlmsg_len = ofpbuf_size(msg); + nlmsg->nlmsg_len = msg->size; nlmsg->nlmsg_seq = nlmsg_seq; nlmsg->nlmsg_pid = sock->pid; do { int retval; #ifdef _WIN32 - bool result; - DWORD last_error = 0; - result = WriteFile(sock->handle, ofpbuf_data(msg), ofpbuf_size(msg), - &retval, NULL); - last_error = GetLastError(); - if (last_error != ERROR_SUCCESS && !result) { + DWORD bytes; + + if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE, + msg->data, msg->size, NULL, 0, + &bytes, NULL)) { retval = -1; - errno = EAGAIN; + /* XXX: Map to a more appropriate error based on GetLastError(). */ + errno = EINVAL; + VLOG_DBG_RL(&rl, "fatal driver failure in write: %s", + ovs_lasterror_to_string()); + } else { + retval = msg->size; } #else - retval = send(sock->fd, ofpbuf_data(msg), ofpbuf_size(msg), wait ? 0 : MSG_DONTWAIT); + retval = send(sock->fd, msg->data, msg->size, + wait ? 0 : MSG_DONTWAIT); #endif error = retval < 0 ? errno : 0; } while (error == EINTR); - log_nlmsg(__func__, error, ofpbuf_data(msg), ofpbuf_size(msg), sock->protocol); + log_nlmsg(__func__, error, msg->data, msg->size, sock->protocol); if (!error) { COVERAGE_INC(netlink_sent); } @@ -448,7 +481,7 @@ nl_sock_send__(struct nl_sock *sock, const struct ofpbuf *msg, } /* Tries to send 'msg', which must contain a Netlink message, to the kernel on - * 'sock'. nlmsg_len in 'msg' will be finalized to match ofpbuf_size(msg), nlmsg_pid + * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid * will be set to 'sock''s pid, and nlmsg_seq will be initialized to a fresh * sequence number, before the message is sent. * @@ -462,7 +495,7 @@ nl_sock_send(struct nl_sock *sock, const struct ofpbuf *msg, bool wait) } /* Tries to send 'msg', which must contain a Netlink message, to the kernel on - * 'sock'. nlmsg_len in 'msg' will be finalized to match ofpbuf_size(msg), nlmsg_pid + * 'sock'. nlmsg_len in 'msg' will be finalized to match msg->size, nlmsg_pid * will be set to 'sock''s pid, and nlmsg_seq will be initialized to * 'nlmsg_seq', before the message is sent. * @@ -488,12 +521,7 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) * 'tail' to allow Netlink messages to be up to 64 kB long (a reasonable * figure since that's the maximum length of a Netlink attribute). */ struct nlmsghdr *nlmsghdr; -#ifdef _WIN32 -#define MAX_STACK_LENGTH 81920 - uint8_t tail[MAX_STACK_LENGTH]; -#else uint8_t tail[65536]; -#endif struct iovec iov[2]; struct msghdr msg; ssize_t retval; @@ -502,7 +530,7 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) ovs_assert(buf->allocated >= sizeof *nlmsghdr); ofpbuf_clear(buf); - iov[0].iov_base = ofpbuf_base(buf); + iov[0].iov_base = buf->base; iov[0].iov_len = buf->allocated; iov[1].iov_base = tail; iov[1].iov_len = sizeof tail; @@ -518,19 +546,32 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) * anything in the receive buffer in that case, so we can initialize the * Netlink header with an impossible message length and then, upon success, * check whether it changed. */ - nlmsghdr = ofpbuf_base(buf); + nlmsghdr = buf->base; do { nlmsghdr->nlmsg_len = UINT32_MAX; #ifdef _WIN32 - boolean result = false; - DWORD last_error = 0; - result = ReadFile(sock->handle, tail, MAX_STACK_LENGTH, &retval, NULL); - last_error = GetLastError(); - if (last_error != ERROR_SUCCESS && !result) { + DWORD bytes; + if (!DeviceIoControl(sock->handle, sock->read_ioctl, + NULL, 0, tail, sizeof tail, &bytes, NULL)) { + VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s", + ovs_lasterror_to_string()); retval = -1; - errno = EAGAIN; + /* XXX: Map to a more appropriate error. */ + errno = EINVAL; } else { - ofpbuf_put(buf, tail, retval); + retval = bytes; + if (retval == 0) { + retval = -1; + errno = EAGAIN; + } else { + if (retval >= buf->allocated) { + ofpbuf_reinit(buf, retval); + nlmsghdr = buf->base; + nlmsghdr->nlmsg_len = UINT32_MAX; + } + memcpy(buf->data, tail, retval); + buf->size = retval; + } } #else retval = recvmsg(sock->fd, &msg, wait ? 0 : MSG_DONTWAIT); @@ -563,14 +604,14 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) return EPROTO; } #ifndef _WIN32 - ofpbuf_set_size(buf, MIN(retval, buf->allocated)); + buf->size = MIN(retval, buf->allocated); if (retval > buf->allocated) { COVERAGE_INC(netlink_recv_jumbo); ofpbuf_put(buf, tail, retval - buf->allocated); } #endif - log_nlmsg(__func__, 0, ofpbuf_data(buf), ofpbuf_size(buf), sock->protocol); + log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); return 0; @@ -636,42 +677,27 @@ nl_sock_transact_multiple__(struct nl_sock *sock, struct nl_transaction *txn = transactions[i]; struct nlmsghdr *nlmsg = nl_msg_nlmsghdr(txn->request); - nlmsg->nlmsg_len = ofpbuf_size(txn->request); + nlmsg->nlmsg_len = txn->request->size; nlmsg->nlmsg_seq = base_seq + i; nlmsg->nlmsg_pid = sock->pid; - iovs[i].iov_base = ofpbuf_data(txn->request); - iovs[i].iov_len = ofpbuf_size(txn->request); + iovs[i].iov_base = txn->request->data; + iovs[i].iov_len = txn->request->size; } +#ifndef _WIN32 memset(&msg, 0, sizeof msg); msg.msg_iov = iovs; msg.msg_iovlen = n; do { -#ifdef _WIN32 - DWORD last_error = 0; - bool result = FALSE; - for (i = 0; i < n; i++) { - result = WriteFile((HANDLE)sock->handle, iovs[i].iov_base, iovs[i].iov_len, - &error, NULL); - last_error = GetLastError(); - if (last_error != ERROR_SUCCESS && !result) { - error = EAGAIN; - errno = EAGAIN; - } else { - error = 0; - } - } -#else error = sendmsg(sock->fd, &msg, 0) < 0 ? errno : 0; -#endif } while (error == EINTR); for (i = 0; i < n; i++) { struct nl_transaction *txn = transactions[i]; - log_nlmsg(__func__, error, ofpbuf_data(txn->request), ofpbuf_size(txn->request), - sock->protocol); + log_nlmsg(__func__, error, txn->request->data, + txn->request->size, sock->protocol); } if (!error) { COVERAGE_ADD(netlink_sent, n); @@ -750,6 +776,93 @@ nl_sock_transact_multiple__(struct nl_sock *sock, base_seq += i + 1; } ofpbuf_uninit(&tmp_reply); +#else + error = 0; + uint8_t reply_buf[65536]; + for (i = 0; i < n; i++) { + DWORD reply_len; + bool ret; + struct nl_transaction *txn = transactions[i]; + struct nlmsghdr *request_nlmsg, *reply_nlmsg; + + ret = DeviceIoControl(sock->handle, OVS_IOCTL_TRANSACT, + txn->request->data, + txn->request->size, + reply_buf, sizeof reply_buf, + &reply_len, NULL); + + if (ret && reply_len == 0) { + /* + * The current transaction did not produce any data to read and that + * is not an error as such. Continue with the remainder of the + * transactions. + */ + txn->error = 0; + if (txn->reply) { + ofpbuf_clear(txn->reply); + } + } else if (!ret) { + /* XXX: Map to a more appropriate error. */ + error = EINVAL; + VLOG_DBG_RL(&rl, "fatal driver failure: %s", + ovs_lasterror_to_string()); + break; + } + + if (reply_len != 0) { + if (reply_len < sizeof *reply_nlmsg) { + nl_sock_record_errors__(transactions, n, 0); + VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32 + " for seq: %#"PRIx32, reply_len, request_nlmsg->nlmsg_seq); + break; + } + + /* Validate the sequence number in the reply. */ + request_nlmsg = nl_msg_nlmsghdr(txn->request); + reply_nlmsg = (struct nlmsghdr *)reply_buf; + + if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) { + ovs_assert(request_nlmsg->nlmsg_seq == reply_nlmsg->nlmsg_seq); + VLOG_DBG_RL(&rl, "mismatched seq request %#"PRIx32 + ", reply %#"PRIx32, request_nlmsg->nlmsg_seq, + reply_nlmsg->nlmsg_seq); + break; + } + + /* Handle errors embedded within the netlink message. */ + ofpbuf_use_stub(&tmp_reply, reply_buf, sizeof reply_buf); + tmp_reply.size = sizeof reply_buf; + if (nl_msg_nlmsgerr(&tmp_reply, &txn->error)) { + if (txn->reply) { + ofpbuf_clear(txn->reply); + } + if (txn->error) { + VLOG_DBG_RL(&rl, "received NAK error=%d (%s)", + error, ovs_strerror(txn->error)); + } + } else { + txn->error = 0; + if (txn->reply) { + /* Copy the reply to the buffer specified by the caller. */ + if (reply_len > txn->reply->allocated) { + ofpbuf_reinit(txn->reply, reply_len); + } + memcpy(txn->reply->data, reply_buf, reply_len); + txn->reply->size = reply_len; + } + } + ofpbuf_uninit(&tmp_reply); + } + + /* Count the number of successful transactions. */ + (*done)++; + + } + + if (!error) { + COVERAGE_ADD(netlink_sent, n); + } +#endif return error; } @@ -788,12 +901,12 @@ nl_sock_transact_multiple(struct nl_sock *sock, #else enum { MAX_BATCH_BYTES = 4096 - 512 }; #endif - bytes = ofpbuf_size(transactions[0]->request); + bytes = transactions[0]->request->size; for (count = 1; count < n && count < max_batch_count; count++) { - if (bytes + ofpbuf_size(transactions[count]->request) > MAX_BATCH_BYTES) { + if (bytes + transactions[count]->request->size > MAX_BATCH_BYTES) { break; } - bytes += ofpbuf_size(transactions[count]->request); + bytes += transactions[count]->request->size; } error = nl_sock_transact_multiple__(sock, transactions, count, &done); @@ -805,6 +918,11 @@ nl_sock_transact_multiple(struct nl_sock *sock, } else if (error) { VLOG_ERR_RL(&rl, "transaction error (%s)", ovs_strerror(error)); nl_sock_record_errors__(transactions, n, error); + if (error != EAGAIN) { + /* A fatal error has occurred. Abort the rest of + * transactions. */ + break; + } } } } @@ -885,7 +1003,7 @@ nl_dump_refill(struct nl_dump *dump, struct ofpbuf *buffer) struct nlmsghdr *nlmsghdr; int error; - while (!ofpbuf_size(buffer)) { + while (!buffer->size) { error = nl_sock_recv__(dump->sock, buffer, false); if (error) { /* The kernel never blocks providing the results of a dump, so @@ -935,12 +1053,12 @@ nl_dump_next__(struct ofpbuf *reply, struct ofpbuf *buffer) * initialized. 'buffer' should be at least NL_DUMP_BUFSIZE bytes long. * * If successful, returns true and points 'reply->data' and - * 'ofpbuf_size(reply)' to the message that was retrieved. The caller must not + * 'reply->size' to the message that was retrieved. The caller must not * modify 'reply' (because it points within 'buffer', which will be used by * future calls to this function). * * On failure, returns false and sets 'reply->data' to NULL and - * 'ofpbuf_size(reply)' to 0. Failure might indicate an actual error or merely + * 'reply->size' to 0. Failure might indicate an actual error or merely * the end of replies. An error status for the entire dump operation is * provided when it is completed by calling nl_dump_done(). * @@ -960,7 +1078,7 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply, struct ofpbuf *buffer) * If the buffer is not empty, we don't check the dump's status. * Otherwise, we could end up skipping some of the dump results if thread A * hits EOF while thread B is in the midst of processing a batch. */ - if (!ofpbuf_size(buffer)) { + if (!buffer->size) { ovs_mutex_lock(&dump->mutex); if (!dump->status) { /* Take the mutex here to avoid an in-kernel race. If two threads @@ -990,8 +1108,8 @@ nl_dump_next(struct nl_dump *dump, struct ofpbuf *reply, struct ofpbuf *buffer) } if (retval) { - ofpbuf_set_data(reply, NULL); - ofpbuf_set_size(reply, 0); + reply->data = NULL; + reply->size = 0; } return !retval; } @@ -1034,18 +1152,88 @@ nl_dump_done(struct nl_dump *dump) return status == EOF ? 0 : status; } +#ifdef _WIN32 +/* Pend an I/O request in the driver. The driver completes the I/O whenever + * an event or a packet is ready to be read. Once the I/O is completed + * the overlapped structure event associated with the pending I/O will be set + */ +static int +pend_io_request(struct nl_sock *sock) +{ + struct ofpbuf request; + uint64_t request_stub[128]; + struct ovs_header *ovs_header; + struct nlmsghdr *nlmsg; + uint32_t seq; + int retval = 0; + int error; + DWORD bytes; + OVERLAPPED *overlapped = CONST_CAST(OVERLAPPED *, &sock->overlapped); + uint16_t cmd = OVS_CTRL_CMD_WIN_PEND_PACKET_REQ; + + ovs_assert(sock->read_ioctl == OVS_IOCTL_READ_PACKET || + sock->read_ioctl == OVS_IOCTL_READ_EVENT); + if (sock->read_ioctl == OVS_IOCTL_READ_EVENT) { + cmd = OVS_CTRL_CMD_WIN_PEND_REQ; + } + + int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) + + sizeof (struct ovs_header); + + ofpbuf_use_stub(&request, request_stub, sizeof request_stub); + + seq = nl_sock_allocate_seq(sock, 1); + nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0, + cmd, OVS_WIN_CONTROL_VERSION); + nlmsg = nl_msg_nlmsghdr(&request); + nlmsg->nlmsg_seq = seq; + nlmsg->nlmsg_pid = sock->pid; + + ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header); + ovs_header->dp_ifindex = 0; + + if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE, + request.data, request.size, + NULL, 0, &bytes, overlapped)) { + error = GetLastError(); + /* Check if the I/O got pended */ + if (error != ERROR_IO_INCOMPLETE && error != ERROR_IO_PENDING) { + VLOG_ERR("nl_sock_wait failed - %s\n", ovs_format_message(error)); + retval = EINVAL; + } + } else { + retval = EAGAIN; + } + +done: + ofpbuf_uninit(&request); + return retval; +} +#endif /* _WIN32 */ + /* Causes poll_block() to wake up when any of the specified 'events' (which is - * a OR'd combination of POLLIN, POLLOUT, etc.) occur on 'sock'. */ + * a OR'd combination of POLLIN, POLLOUT, etc.) occur on 'sock'. + * On Windows, 'sock' is not treated as const, and may be modified. */ void nl_sock_wait(const struct nl_sock *sock, short int events) { #ifdef _WIN32 - poll_fd_wait(sock->handle, events); + if (sock->overlapped.Internal != STATUS_PENDING) { + int ret = pend_io_request(CONST_CAST(struct nl_sock *, sock)); + if (ret == 0) { + poll_wevent_wait(sock->overlapped.hEvent); + } else { + poll_immediate_wake(); + } + } else { + poll_wevent_wait(sock->overlapped.hEvent); + } #else poll_fd_wait(sock->fd, events); #endif } +#ifndef _WIN32 /* Returns the underlying fd for 'sock', for use in "poll()"-like operations * that can't use nl_sock_wait(). * @@ -1056,12 +1244,9 @@ nl_sock_wait(const struct nl_sock *sock, short int events) int nl_sock_fd(const struct nl_sock *sock) { -#ifdef _WIN32 - return sock->handle; -#else return sock->fd; -#endif } +#endif /* Returns the PID associated with this socket. */ uint32_t @@ -1171,7 +1356,6 @@ static int do_lookup_genl_family(const char *name, struct nlattr **attrs, struct ofpbuf **replyp) { - struct nl_sock *sock; struct nlmsghdr *nlmsg; struct ofpbuf *reply; int error; @@ -1213,6 +1397,11 @@ do_lookup_genl_family(const char *name, struct nlattr **attrs, family_name = OVS_FLOW_FAMILY; family_version = OVS_FLOW_VERSION; family_attrmax = OVS_FLOW_ATTR_MAX; + } else if (!strcmp(name, OVS_WIN_NETDEV_FAMILY)) { + family_id = OVS_WIN_NL_NETDEV_FAMILY_ID; + family_name = OVS_WIN_NETDEV_FAMILY; + family_version = OVS_WIN_NETDEV_VERSION; + family_attrmax = OVS_WIN_NETDEV_ATTR_MAX; } else { ofpbuf_delete(reply); return EINVAL; @@ -1240,12 +1429,11 @@ do_lookup_genl_family(const char *name, struct nlattr **attrs, /* Set the total length of the netlink message. */ nlmsg = nl_msg_nlmsghdr(reply); - nlmsg->nlmsg_len = ofpbuf_size(reply); + nlmsg->nlmsg_len = reply->size; if (!nl_policy_parse(reply, NLMSG_HDRLEN + GENL_HDRLEN, family_policy, attrs, ARRAY_SIZE(family_policy)) || nl_attr_get_u16(attrs[CTRL_ATTR_FAMILY_ID]) == 0) { - nl_sock_destroy(sock); ofpbuf_delete(reply); return EPROTO; } @@ -1393,7 +1581,7 @@ nl_pool_release(struct nl_sock *sock) * reply, if any, is discarded. * * Before the message is sent, nlmsg_len in 'request' will be finalized to - * match ofpbuf_size(msg), nlmsg_pid will be set to the pid of the socket used + * match msg->size, nlmsg_pid will be set to the pid of the socket used * for sending the request, and nlmsg_seq will be initialized. * * The caller is responsible for destroying 'request'. @@ -1599,15 +1787,12 @@ static void log_nlmsg(const char *function, int error, const void *message, size_t size, int protocol) { - struct ofpbuf buffer; - char *nlmsg; - if (!VLOG_IS_DBG_ENABLED()) { return; } - ofpbuf_use_const(&buffer, message, size); - nlmsg = nlmsg_to_string(&buffer, protocol); + struct ofpbuf buffer = ofpbuf_const_initializer(message, size); + char *nlmsg = nlmsg_to_string(&buffer, protocol); VLOG_DBG_RL(&rl, "%s (%s): %s", function, ovs_strerror(error), nlmsg); free(nlmsg); }