From: Ben Pfaff Date: Wed, 2 Dec 2009 19:49:53 +0000 (-0800) Subject: Merge "master" branch into "db". X-Git-Tag: v1.0.0~259^2~475 X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=58fda1dab104041fc693032475ec4662c1a52849 Merge "master" branch into "db". --- 58fda1dab104041fc693032475ec4662c1a52849 diff --cc Makefile.am index 301fc7c14,de51e10f7..164ca3e1a --- a/Makefile.am +++ b/Makefile.am @@@ -29,10 -29,10 +30,11 @@@ CLEANFILES DISTCLEANFILES = EXTRA_DIST = INSTALL.bridge \ INSTALL.Linux \ + INSTALL.userspace \ INSTALL.OpenFlow \ INSTALL.SSL \ - INSTALL.XenServer + INSTALL.XenServer \ + README-gcov bin_PROGRAMS = sbin_PROGRAMS = bin_SCRIPTS = diff --cc lib/lockfile.c index e5a041eed,000000000..9bb7c6b18 mode 100644,000000..100644 --- a/lib/lockfile.c +++ b/lib/lockfile.c @@@ -1,280 -1,0 +1,280 @@@ + /* Copyright (c) 2008, 2009 Nicira Networks + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "lockfile.h" + +#include +#include +#include +#include +#include +#include + +#include "coverage.h" +#include "hash.h" +#include "hmap.h" +#include "timeval.h" +#include "util.h" + +#define THIS_MODULE VLM_lockfile +#include "vlog.h" + +struct lockfile { + struct hmap_node hmap_node; + char *name; + dev_t device; + ino_t inode; + int fd; +}; + +/* Lock table. + * + * We have to do this stupid dance because POSIX says that closing *any* file + * descriptor for a file on which a process holds a lock drops *all* locks on + * that file. That means that we can't afford to open a lockfile more than + * once. */ +static struct hmap lock_table = HMAP_INITIALIZER(&lock_table); + +static void lockfile_unhash(struct lockfile *); +static int lockfile_try_lock(const char *name, bool block, + struct lockfile **lockfilep); + +/* Returns the name of the lockfile that would be created for locking a file + * named 'file_name'. The caller is responsible for freeing the returned + * name, with free(), when it is no longer needed. */ +char * +lockfile_name(const char *file_name) +{ + const char *slash = strrchr(file_name, '/'); + return (slash - ? xasprintf("%.*s/.%s.~lock~", slash - file_name, file_name, - slash + 1) ++ ? xasprintf("%.*s/.%s.~lock~", ++ (int) (slash - file_name), file_name, slash + 1) + : xasprintf(".%s.~lock~", file_name)); +} + +/* Locks the configuration file against modification by other processes and + * re-reads it from disk. + * + * The 'timeout' specifies the maximum number of milliseconds to wait for the + * config file to become free. Use 0 to avoid waiting or INT_MAX to wait + * forever. + * + * Returns 0 on success, otherwise a positive errno value. On success, + * '*lockfilep' is set to point to a new "struct lockfile *" that may be + * unlocked with lockfile_unlock(). On failure, '*lockfilep' is set to + * NULL. */ +int +lockfile_lock(const char *file, int timeout, struct lockfile **lockfilep) +{ + /* Only exclusive ("write") locks are supported. This is not a problem + * because the Open vSwitch code that currently uses lock files does so in + * stylized ways such that any number of readers may access a file while it + * is being written. */ + long long int start, elapsed; + char *lock_name; + int error; + + COVERAGE_INC(lockfile_lock); + + lock_name = lockfile_name(file); + time_refresh(); + start = time_msec(); + + do { + error = lockfile_try_lock(lock_name, timeout > 0, lockfilep); + time_refresh(); + elapsed = time_msec() - start; + } while (error == EINTR && (timeout == INT_MAX || elapsed < timeout)); + + if (!error) { + if (elapsed) { + VLOG_WARN("%s: waited %lld ms for lock file", + lock_name, elapsed); + } + } else if (error == EINTR) { + COVERAGE_INC(lockfile_timeout); + VLOG_WARN("%s: giving up on lock file after %lld ms", + lock_name, elapsed); + error = ETIMEDOUT; + } else { + COVERAGE_INC(lockfile_error); + if (error == EACCES) { + error = EAGAIN; + } + VLOG_WARN("%s: failed to lock file " + "(after %lld ms, with %d-ms timeout): %s", + lock_name, elapsed, timeout, strerror(error)); + } + + free(lock_name); + return error; +} + +/* Unlocks 'lockfile', which must have been created by a call to + * lockfile_lock(), and frees 'lockfile'. */ +void +lockfile_unlock(struct lockfile *lockfile) +{ + if (lockfile) { + COVERAGE_INC(lockfile_unlock); + lockfile_unhash(lockfile); + free(lockfile->name); + free(lockfile); + } +} + +/* Marks all the currently locked lockfiles as no longer locked. It makes + * sense to call this function after fork(), because a child created by fork() + * does not hold its parents' locks. */ +void +lockfile_postfork(void) +{ + struct lockfile *lockfile; + + HMAP_FOR_EACH (lockfile, struct lockfile, hmap_node, &lock_table) { + if (lockfile->fd >= 0) { + VLOG_WARN("%s: child does not inherit lock", lockfile->name); + lockfile_unhash(lockfile); + } + } +} + +static uint32_t +lockfile_hash(dev_t device, ino_t inode) +{ + return hash_bytes(&device, sizeof device, + hash_bytes(&inode, sizeof inode, 0)); +} + +static struct lockfile * +lockfile_find(dev_t device, ino_t inode) +{ + struct lockfile *lockfile; + + HMAP_FOR_EACH_WITH_HASH (lockfile, struct lockfile, hmap_node, + lockfile_hash(device, inode), &lock_table) { + if (lockfile->device == device && lockfile->inode == inode) { + return lockfile; + } + } + return NULL; +} + +static void +lockfile_unhash(struct lockfile *lockfile) +{ + if (lockfile->fd >= 0) { + close(lockfile->fd); + lockfile->fd = -1; + hmap_remove(&lock_table, &lockfile->hmap_node); + } +} + +static struct lockfile * +lockfile_register(const char *name, dev_t device, ino_t inode, int fd) +{ + struct lockfile *lockfile; + + lockfile = lockfile_find(device, inode); + if (lockfile) { + VLOG_ERR("%s: lock file disappeared and reappeared!", name); + lockfile_unhash(lockfile); + } + + lockfile = xmalloc(sizeof *lockfile); + lockfile->name = xstrdup(name); + lockfile->device = device; + lockfile->inode = inode; + lockfile->fd = fd; + hmap_insert(&lock_table, &lockfile->hmap_node, + lockfile_hash(device, inode)); + return lockfile; +} + +static int +lockfile_try_lock(const char *name, bool block, struct lockfile **lockfilep) +{ + struct flock l; + struct stat s; + int error; + int fd; + + *lockfilep = NULL; + + /* Open the lock file, first creating it if necessary. */ + for (;;) { + /* Check whether we've already got a lock on that file. */ + if (!stat(name, &s)) { + if (lockfile_find(s.st_dev, s.st_ino)) { + return EDEADLK; + } + } else if (errno != ENOENT) { + VLOG_WARN("%s: failed to stat lock file: %s", + name, strerror(errno)); + return errno; + } + + /* Try to open an existing lock file. */ + fd = open(name, O_RDWR); + if (fd >= 0) { + break; + } else if (errno != ENOENT) { + VLOG_WARN("%s: failed to open lock file: %s", + name, strerror(errno)); + return errno; + } + + /* Try to create a new lock file. */ + VLOG_INFO("%s: lock file does not exist, creating", name); + fd = open(name, O_RDWR | O_CREAT | O_EXCL, 0600); + if (fd >= 0) { + break; + } else if (errno != EEXIST) { + VLOG_WARN("%s: failed to create lock file: %s", + name, strerror(errno)); + return errno; + } + + /* Someone else created the lock file. Try again. */ + } + + /* Get the inode and device number for the lock table. */ + if (fstat(fd, &s)) { + VLOG_ERR("%s: failed to fstat lock file: %s", name, strerror(errno)); + close(fd); + return errno; + } + + /* Try to lock the file. */ + memset(&l, 0, sizeof l); + l.l_type = F_WRLCK; + l.l_whence = SEEK_SET; + l.l_start = 0; + l.l_len = 0; + + time_disable_restart(); + error = fcntl(fd, block ? F_SETLKW : F_SETLK, &l) == -1 ? errno : 0; + time_enable_restart(); + + if (!error) { + *lockfilep = lockfile_register(name, s.st_dev, s.st_ino, fd); + } else { + close(fd); + } + return error; +} + diff --cc lib/netdev-linux.c index c33405fd3,61994c2fe..47d89efa1 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@@ -203,11 -286,11 +286,11 @@@ netdev_linux_open(const char *name, in int error; /* Allocate network device. */ - netdev = xcalloc(1, sizeof *netdev); + netdev = xzalloc(sizeof *netdev); - netdev_init(&netdev->netdev, suffix, &netdev_linux_class); + netdev_init(&netdev->netdev, name, &netdev_linux_class); netdev->netdev_fd = -1; netdev->tap_fd = -1; - netdev->cache = shash_find_data(&cache_map, suffix); + netdev->cache = shash_find_data(&cache_map, name); if (!netdev->cache) { if (shash_is_empty(&cache_map)) { int error = rtnetlink_notifier_register( diff --cc lib/stream-tcp.c index ecd96865f,000000000..947be9f19 mode 100644,000000..100644 --- a/lib/stream-tcp.c +++ b/lib/stream-tcp.c @@@ -1,137 -1,0 +1,137 @@@ +/* + * Copyright (c) 2008, 2009 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "stream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "packets.h" +#include "socket-util.h" +#include "util.h" +#include "stream-provider.h" +#include "stream-fd.h" + +#include "vlog.h" +#define THIS_MODULE VLM_stream_tcp + +/* Active TCP. */ + +static int +new_tcp_stream(const char *name, int fd, int connect_status, + const struct sockaddr_in *remote, struct stream **streamp) +{ + struct sockaddr_in local; + socklen_t local_len = sizeof local; + int on = 1; + int retval; + + /* Get the local IP and port information */ + retval = getsockname(fd, (struct sockaddr *)&local, &local_len); + if (retval) { + memset(&local, 0, sizeof local); + } + + retval = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof on); + if (retval) { + VLOG_ERR("%s: setsockopt(TCP_NODELAY): %s", name, strerror(errno)); + close(fd); + return errno; + } + + retval = new_fd_stream(name, fd, connect_status, NULL, streamp); + if (!retval) { + struct stream *stream = *streamp; + stream_set_remote_ip(stream, remote->sin_addr.s_addr); + stream_set_remote_port(stream, remote->sin_port); + stream_set_local_ip(stream, local.sin_addr.s_addr); + stream_set_local_port(stream, local.sin_port); + } + return retval; +} + +static int +tcp_open(const char *name, char *suffix, struct stream **streamp) +{ + struct sockaddr_in sin; + int fd, error; + - error = tcp_open_active(suffix, 0, &sin, &fd); ++ error = inet_open_active(SOCK_STREAM, suffix, 0, &sin, &fd); + if (fd >= 0) { + return new_tcp_stream(name, fd, error, &sin, streamp); + } else { + VLOG_ERR("%s: connect: %s", name, strerror(error)); + return error; + } +} + +struct stream_class tcp_stream_class = { + "tcp", /* name */ + tcp_open, /* open */ + NULL, /* close */ + NULL, /* connect */ + NULL, /* recv */ + NULL, /* send */ + NULL, /* wait */ +}; + +/* Passive TCP. */ + +static int ptcp_accept(int fd, const struct sockaddr *sa, size_t sa_len, + struct stream **streamp); + +static int +ptcp_open(const char *name UNUSED, char *suffix, struct pstream **pstreamp) +{ + int fd; + - fd = tcp_open_passive(suffix, 0); ++ fd = inet_open_passive(SOCK_STREAM, suffix, 0); + if (fd < 0) { + return -fd; + } else { + return new_fd_pstream("ptcp", fd, ptcp_accept, NULL, pstreamp); + } +} + +static int +ptcp_accept(int fd, const struct sockaddr *sa, size_t sa_len, + struct stream **streamp) +{ + const struct sockaddr_in *sin = (const struct sockaddr_in *) sa; + char name[128]; + + if (sa_len == sizeof(struct sockaddr_in) && sin->sin_family == AF_INET) { + sprintf(name, "tcp:"IP_FMT, IP_ARGS(&sin->sin_addr)); + sprintf(strchr(name, '\0'), ":%"PRIu16, ntohs(sin->sin_port)); + } else { + strcpy(name, "tcp"); + } + return new_tcp_stream(name, fd, 0, sin, streamp); +} + +struct pstream_class ptcp_pstream_class = { + "ptcp", + ptcp_open, + NULL, + NULL, + NULL +}; + diff --cc lib/timeval.c index 84abdfae4,8ad8d0607..5e4238758 --- a/lib/timeval.c +++ b/lib/timeval.c @@@ -43,8 -43,7 +43,8 @@@ static struct timeval now /* Time at which to die with SIGALRM (if not TIME_MIN). */ static time_t deadline = TIME_MIN; - static void setup_timer(void); - static void setup_signal(int flags); + static void set_up_timer(void); ++static void set_up_signal(int flags); static void sigalrm_handler(int); static void refresh_if_ticked(void); static time_t time_add(time_t, time_t); @@@ -67,15 -68,7 +67,15 @@@ time_init(void gettimeofday(&now, NULL); tick = false; - setup_signal(SA_RESTART); - setup_timer(); - /* Set up signal handler. */ ++ set_up_signal(SA_RESTART); ++ set_up_timer(); +} + +static void - setup_signal(int flags) ++set_up_signal(int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof sa); sa.sa_handler = sigalrm_handler; sigemptyset(&sa.sa_mask); @@@ -83,33 -76,9 +83,33 @@@ if (sigaction(SIGALRM, &sa, NULL)) { ovs_fatal(errno, "sigaction(SIGALRM) failed"); } +} - /* Set up periodic signal. */ - set_up_timer(); +/* Remove SA_RESTART from the flags for SIGALRM, so that any system call that + * is interrupted by the periodic timer interrupt will return EINTR instead of + * continuing after the signal handler returns. + * + * time_disable_restart() and time_enable_restart() may be usefully wrapped + * around function calls that might otherwise block forever unless interrupted + * by a signal, e.g.: + * + * time_disable_restart(); + * fcntl(fd, F_SETLKW, &lock); + * time_enable_restart(); + */ +void +time_disable_restart(void) +{ - setup_signal(0); ++ set_up_signal(0); +} + +/* Add SA_RESTART to the flags for SIGALRM, so that any system call that + * is interrupted by the periodic timer interrupt will continue after the + * signal handler returns instead of returning EINTR. */ +void +time_enable_restart(void) +{ - setup_signal(SA_RESTART); ++ set_up_signal(SA_RESTART); } static void diff --cc vswitchd/bridge.c index 516439f3d,dbcf31250..df4169fe5 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@@ -965,20 -1046,10 +1049,10 @@@ bridge_create(const char *name int error; assert(!bridge_lookup(name)); - br = xcalloc(1, sizeof *br); + br = xzalloc(sizeof *br); - error = dpif_create(name, &br->dpif); - if (error == EEXIST || error == EBUSY) { - error = dpif_open(name, &br->dpif); - if (error) { - VLOG_ERR("datapath %s already exists but cannot be opened: %s", - name, strerror(error)); - free(br); - return NULL; - } - dpif_flow_flush(br->dpif); - } else if (error) { - VLOG_ERR("failed to create datapath %s: %s", name, strerror(error)); + error = dpif_create_and_open(name, &br->dpif); + if (error) { free(br); return NULL; }