2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "poll-loop.h"
25 #include "dynamic-string.h"
26 #include "fatal-signal.h"
28 #include "ovs-thread.h"
30 #include "socket-util.h"
36 VLOG_DEFINE_THIS_MODULE(poll_loop);
38 COVERAGE_DEFINE(poll_create_node);
39 COVERAGE_DEFINE(poll_zero_timeout);
42 struct hmap_node hmap_node;
43 struct pollfd pollfd; /* Events to pass to time_poll(). */
44 HANDLE wevent; /* Events for WaitForMultipleObjects(). */
45 const char *where; /* Where poll_node was created. */
49 /* All active poll waiters. */
50 struct hmap poll_nodes;
52 /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
53 * wake up immediately, or LLONG_MAX to wait forever. */
54 long long int timeout_when; /* In msecs as returned by time_msec(). */
55 const char *timeout_where; /* Where 'timeout_when' was set. */
58 static struct poll_loop *poll_loop(void);
60 /* Look up the node with same fd and wevent. */
61 static struct poll_node *
62 find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent)
64 struct poll_node *node;
66 HMAP_FOR_EACH_WITH_HASH (node, hmap_node,
67 hash_2words(fd, (uint32_t)wevent),
69 if (node->pollfd.fd == fd && node->wevent == wevent) {
76 /* On Unix based systems:
78 * Registers 'fd' as waiting for the specified 'events' (which should be
79 * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to
80 * poll_block() will wake up when 'fd' becomes ready for one or more of the
81 * requested events. The 'fd's are given to poll() function later.
85 * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and
86 * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified,
87 * it is assumed that it is unrelated to any sockets and poll_block()
88 * will wake up on any event on that 'wevent'. It is an error to pass
89 * both 'wevent' and 'fd'.
91 * The event registration is one-shot: only the following call to
92 * poll_block() is affected. The event will need to be re-registered after
93 * poll_block() is called if it is to persist.
95 * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
96 * automatically provide the caller's source file and line number for
99 poll_create_node(int fd, HANDLE wevent, short int events, const char *where)
101 struct poll_loop *loop = poll_loop();
102 struct poll_node *node;
104 COVERAGE_INC(poll_create_node);
106 /* Both 'fd' and 'wevent' cannot be set. */
107 ovs_assert(!fd != !wevent);
109 /* Check for duplicate. If found, "or" the events. */
110 node = find_poll_node(loop, fd, wevent);
112 node->pollfd.events |= events;
114 node = xzalloc(sizeof *node);
115 hmap_insert(&loop->poll_nodes, &node->hmap_node,
116 hash_2words(fd, (uint32_t)wevent));
117 node->pollfd.fd = fd;
118 node->pollfd.events = events;
121 wevent = CreateEvent(NULL, FALSE, FALSE, NULL);
124 node->wevent = wevent;
129 /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
130 * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will
131 * wake up when 'fd' becomes ready for one or more of the requested events.
133 * On Windows, 'fd' must be a socket.
135 * The event registration is one-shot: only the following call to poll_block()
136 * is affected. The event will need to be re-registered after poll_block() is
137 * called if it is to persist.
139 * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
140 * automatically provide the caller's source file and line number for
143 poll_fd_wait_at(int fd, short int events, const char *where)
145 poll_create_node(fd, 0, events, where);
149 /* Registers for the next call to poll_block() to wake up when 'wevent' is
152 * The event registration is one-shot: only the following call to poll_block()
153 * is affected. The event will need to be re-registered after poll_block() is
154 * called if it is to persist.
156 * ('where' is used in debug logging. Commonly one would use
157 * poll_wevent_wait() to automatically provide the caller's source file and
158 * line number for 'where'.) */
160 poll_wevent_wait_at(HANDLE wevent, const char *where)
162 poll_create_node(0, wevent, 0, where);
166 /* Causes the following call to poll_block() to block for no more than 'msec'
167 * milliseconds. If 'msec' is nonpositive, the following call to poll_block()
168 * will not block at all.
170 * The timer registration is one-shot: only the following call to poll_block()
171 * is affected. The timer will need to be re-registered after poll_block() is
172 * called if it is to persist.
174 * ('where' is used in debug logging. Commonly one would use poll_timer_wait()
175 * to automatically provide the caller's source file and line number for
178 poll_timer_wait_at(long long int msec, const char *where)
180 long long int now = time_msec();
184 /* Wake up immediately. */
186 } else if ((unsigned long long int) now + msec <= LLONG_MAX) {
190 /* now + msec would overflow. */
194 poll_timer_wait_until_at(when, where);
197 /* Causes the following call to poll_block() to wake up when the current time,
198 * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier
199 * than the current time, the following call to poll_block() will not block at
202 * The timer registration is one-shot: only the following call to poll_block()
203 * is affected. The timer will need to be re-registered after poll_block() is
204 * called if it is to persist.
206 * ('where' is used in debug logging. Commonly one would use
207 * poll_timer_wait_until() to automatically provide the caller's source file
208 * and line number for 'where'.) */
210 poll_timer_wait_until_at(long long int when, const char *where)
212 struct poll_loop *loop = poll_loop();
213 if (when < loop->timeout_when) {
214 loop->timeout_when = when;
215 loop->timeout_where = where;
219 /* Causes the following call to poll_block() to wake up immediately, without
222 * ('where' is used in debug logging. Commonly one would use
223 * poll_immediate_wake() to automatically provide the caller's source file and
224 * line number for 'where'.) */
226 poll_immediate_wake_at(const char *where)
228 poll_timer_wait_at(0, where);
231 /* Logs, if appropriate, that the poll loop was awakened by an event
232 * registered at 'where' (typically a source file and line number). The other
233 * arguments have two possible interpretations:
235 * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused
236 * the wakeup. 'timeout' is ignored.
238 * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after
239 * which the poll loop woke up.
242 log_wakeup(const char *where, const struct pollfd *pollfd, int timeout)
244 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10);
245 enum vlog_level level;
249 cpu_usage = get_cpu_usage();
250 if (VLOG_IS_DBG_ENABLED()) {
252 } else if (cpu_usage > 50 && !VLOG_DROP_INFO(&rl)) {
259 ds_put_cstr(&s, "wakeup due to ");
261 char *description = describe_fd(pollfd->fd);
262 if (pollfd->revents & POLLIN) {
263 ds_put_cstr(&s, "[POLLIN]");
265 if (pollfd->revents & POLLOUT) {
266 ds_put_cstr(&s, "[POLLOUT]");
268 if (pollfd->revents & POLLERR) {
269 ds_put_cstr(&s, "[POLLERR]");
271 if (pollfd->revents & POLLHUP) {
272 ds_put_cstr(&s, "[POLLHUP]");
274 if (pollfd->revents & POLLNVAL) {
275 ds_put_cstr(&s, "[POLLNVAL]");
277 ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description);
280 ds_put_format(&s, "%d-ms timeout", timeout);
283 ds_put_format(&s, " at %s", where);
285 if (cpu_usage >= 0) {
286 ds_put_format(&s, " (%d%% CPU usage)", cpu_usage);
288 VLOG(level, "%s", ds_cstr(&s));
293 free_poll_nodes(struct poll_loop *loop)
295 struct poll_node *node, *next;
297 HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) {
298 hmap_remove(&loop->poll_nodes, &node->hmap_node);
300 if (node->wevent && node->pollfd.fd) {
301 WSAEventSelect(node->pollfd.fd, NULL, 0);
302 CloseHandle(node->wevent);
309 /* Blocks until one or more of the events registered with poll_fd_wait()
310 * occurs, or until the minimum duration registered with poll_timer_wait()
311 * elapses, or not at all if poll_immediate_wake() has been called. */
315 struct poll_loop *loop = poll_loop();
316 struct poll_node *node;
317 struct pollfd *pollfds;
318 HANDLE *wevents = NULL;
323 /* Register fatal signal events before actually doing any real work for
327 if (loop->timeout_when == LLONG_MIN) {
328 COVERAGE_INC(poll_zero_timeout);
332 pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds);
335 wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents);
338 /* Populate with all the fds and events. */
340 HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
341 pollfds[i] = node->pollfd;
343 wevents[i] = node->wevent;
344 if (node->pollfd.fd && node->wevent) {
345 short int wsa_events = 0;
346 if (node->pollfd.events & POLLIN) {
347 wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE;
349 if (node->pollfd.events & POLLOUT) {
350 wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE;
352 WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events);
358 retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents,
359 loop->timeout_when, &elapsed);
361 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
362 VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval));
363 } else if (!retval) {
364 log_wakeup(loop->timeout_where, NULL, elapsed);
365 } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
367 HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) {
368 if (pollfds[i].revents) {
369 log_wakeup(node->where, &pollfds[i], 0);
375 free_poll_nodes(loop);
376 loop->timeout_when = LLONG_MAX;
377 loop->timeout_where = NULL;
381 /* Handle any pending signals before doing anything else. */
388 free_poll_loop(void *loop_)
390 struct poll_loop *loop = loop_;
392 free_poll_nodes(loop);
393 hmap_destroy(&loop->poll_nodes);
397 static struct poll_loop *
400 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
401 static pthread_key_t key;
402 struct poll_loop *loop;
404 if (ovsthread_once_start(&once)) {
405 xpthread_key_create(&key, free_poll_loop);
406 ovsthread_once_done(&once);
409 loop = pthread_getspecific(key);
411 loop = xzalloc(sizeof *loop);
412 hmap_init(&loop->poll_nodes);
413 xpthread_setspecific(key, loop);