coverage: Make ovs-appctl command more useful and less alarming.
authorBen Pfaff <blp@nicira.com>
Fri, 20 Apr 2012 21:09:30 +0000 (14:09 -0700)
committerBen Pfaff <blp@nicira.com>
Thu, 26 Apr 2012 21:07:29 +0000 (14:07 -0700)
I've had a few complaints that ovs-vswitchd logs its coverage counters
at WARN level, but this is mainly wrong: ovs-vswitchd only logs coverage
counters at WARN level when the "coverage/log" command is used through
ovs-appctl.  This was even documented.

The reason to log at such a high level was to make it fairly certain that
these messages specifically requested by the admin would not be filtered
out before making it to the log.  But it's even better if the admin just
gets the coverage counters as a reply to the ovs-appctl command.  So that
is what this commit does.

This commit also improves the documentation of the ovs-appctl command.

Signed-off-by: Ben Pfaff <blp@nicira.com>
NEWS
lib/automake.mk
lib/coverage-unixctl.man [new file with mode: 0644]
lib/coverage.c
lib/coverage.h
lib/timeval.c
manpages.mk
ovsdb/ovsdb-server.1.in
vswitchd/ovs-vswitchd.8.in

diff --git a/NEWS b/NEWS
index 9c73352..e717a4a 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ post-v1.6.0
         - Added support for spawning ovs-test server from the client.
         - Now ovs-test is able to automatically create test bridges and ports.
     - "ovs-dpctl dump-flows" now prints observed TCP flags in TCP flows.
+    - The "coverage/log" command previously available through ovs-appctl
+      has been replaced by "coverage/show".  The new command replies with
+      coverage counter values, instead of logging them.
 
 
 v1.6.0 - xx xxx xxxx
index 5ce287f..2d2617e 100644 (file)
@@ -253,6 +253,7 @@ EXTRA_DIST += \
 MAN_FRAGMENTS += \
        lib/common.man \
        lib/common-syn.man \
+       lib/coverage-unixctl.man \
        lib/daemon.man \
        lib/daemon-syn.man \
        lib/leak-checker.man \
diff --git a/lib/coverage-unixctl.man b/lib/coverage-unixctl.man
new file mode 100644 (file)
index 0000000..9718894
--- /dev/null
@@ -0,0 +1,11 @@
+.SS "COVERAGE COMMANDS"
+These commands manage \fB\*(PN\fR's ``coverage counters,'' which count
+the number of times particular events occur during a daemon's runtime.
+In addition to these commands, \fB\*(PN\fR automatically logs coverage
+counter values, at \fBINFO\fR level, when it detects that the daemon's
+main loop takes unusually long to run.
+.PP
+Coverage counters are useful mainly for performance analysis and
+debugging.
+.IP "\fBcoverage/show\fR"
+Displays the values of all of the coverage counters.
index ff20f5e..ee27af0 100644 (file)
@@ -20,6 +20,7 @@
 #include <stdlib.h>
 #include "dynamic-string.h"
 #include "hash.h"
+#include "svec.h"
 #include "timeval.h"
 #include "unixctl.h"
 #include "util.h"
@@ -48,19 +49,28 @@ struct coverage_counter *coverage_counters[] = {
 
 static unsigned int epoch;
 
+static void coverage_read(struct svec *);
+
 static void
-coverage_unixctl_log(struct unixctl_conn *conn, int argc OVS_UNUSED,
+coverage_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
                      const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
 {
-    coverage_log(VLL_WARN, false);
-    unixctl_command_reply(conn, NULL);
+    struct svec lines;
+    char *reply;
+
+    svec_init(&lines);
+    coverage_read(&lines);
+    reply = svec_join(&lines, "\n", "\n");
+    unixctl_command_reply(conn, reply);
+    free(reply);
+    svec_destroy(&lines);
 }
 
 void
 coverage_init(void)
 {
-    unixctl_command_register("coverage/log", "", 0, 0,
-                             coverage_unixctl_log, NULL);
+    unixctl_command_register("coverage/show", "", 0, 0,
+                             coverage_unixctl_show, NULL);
 }
 
 /* Sorts coverage counters in descending order by count, within equal counts
@@ -144,60 +154,75 @@ coverage_hit(uint32_t hash)
     }
 }
 
+/* Logs the coverage counters, unless a similar set of events has already been
+ * logged.
+ *
+ * This function logs at log level VLL_INFO.  Use care before adjusting this
+ * level, because depending on its configuration, syslogd can write changes
+ * synchronously, which can cause the coverage messages to take several seconds
+ * to write. */
+void
+coverage_log(void)
+{
+    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3);
+
+    if (!VLOG_DROP_INFO(&rl)) {
+        uint32_t hash = coverage_hash();
+        if (coverage_hit(hash)) {
+            VLOG_INFO("Skipping details of duplicate event coverage for "
+                      "hash=%08"PRIx32" in epoch %u", hash, epoch);
+        } else {
+            struct svec lines;
+            const char *line;
+            size_t i;
+
+            svec_init(&lines);
+            coverage_read(&lines);
+            SVEC_FOR_EACH (i, line, &lines) {
+                VLOG_INFO("%s", line);
+            }
+            svec_destroy(&lines);
+        }
+    }
+}
+
 static void
-coverage_log_counter(enum vlog_level level, const struct coverage_counter *c)
+coverage_read_counter(struct svec *lines, const struct coverage_counter *c)
 {
-    VLOG(level, "%-24s %5u / %9llu", c->name, c->count, c->count + c->total);
+    svec_add_nocopy(lines, xasprintf("%-24s %5u / %9llu",
+                                     c->name, c->count, c->count + c->total));
 }
 
-/* Logs the coverage counters at the given vlog 'level'.  If
- * 'suppress_dups' is true, then duplicate events are not displayed.
- * Care should be taken in the value used for 'level'.  Depending on the
- * configuration, syslog can write changes synchronously, which can
- * cause the coverage messages to take several seconds to write. */
-void
-coverage_log(enum vlog_level level, bool suppress_dups)
+/* Adds coverage counter information to 'lines'. */
+static void
+coverage_read(struct svec *lines)
 {
-    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 3);
     size_t n_never_hit;
     uint32_t hash;
     size_t i;
 
-    if (suppress_dups
-        ? !vlog_is_enabled(THIS_MODULE, level)
-        : vlog_should_drop(THIS_MODULE, level, &rl)) {
-        return;
-    }
-
     hash = coverage_hash();
-    if (suppress_dups) {
-        if (coverage_hit(hash)) {
-            VLOG(level, "Skipping details of duplicate event coverage for "
-                 "hash=%08"PRIx32" in epoch %u", hash, epoch);
-            return;
-        }
-    }
 
     n_never_hit = 0;
-    VLOG(level, "Event coverage (epoch %u/entire run), hash=%08"PRIx32":",
-         epoch, hash);
+    svec_add_nocopy(lines, xasprintf("Event coverage (epoch %u/entire run), "
+                                     "hash=%08"PRIx32":", epoch, hash));
     for (i = 0; i < n_coverage_counters; i++) {
         struct coverage_counter *c = coverage_counters[i];
         if (c->count) {
-            coverage_log_counter(level, c);
+            coverage_read_counter(lines, c);
         }
     }
     for (i = 0; i < n_coverage_counters; i++) {
         struct coverage_counter *c = coverage_counters[i];
         if (!c->count) {
             if (c->total) {
-                coverage_log_counter(level, c);
+                coverage_read_counter(lines, c);
             } else {
                 n_never_hit++;
             }
         }
     }
-    VLOG(level, "%zu events never hit", n_never_hit);
+    svec_add_nocopy(lines, xasprintf("%zu events never hit", n_never_hit));
 }
 
 /* Advances to the next epoch of coverage, resetting all the counters to 0. */
index b7db6c4..0f389bf 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011 Nicira Networks.
+ * Copyright (c) 2009, 2010, 2011, 2012 Nicira Networks.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -56,7 +56,7 @@ struct coverage_counter {
 #define COVERAGE_ADD(COUNTER, AMOUNT) counter_##COUNTER.count += (AMOUNT);
 
 void coverage_init(void);
-void coverage_log(enum vlog_level, bool suppress_dups);
+void coverage_log(void);
 void coverage_clear(void);
 
 /* Implementation detail. */
index 9829100..b60ece9 100644 (file)
@@ -499,11 +499,7 @@ log_poll_interval(long long int last_wakeup)
                           rusage.ru_nivcsw - last_rusage->ru_nivcsw);
             }
         }
-        /* Care should be taken in the value chosen for logging.  Depending
-         * on the configuration, syslog can write changes synchronously,
-         * which can cause the coverage messages to take longer to log
-         * than the processing delay that triggered it. */
-        coverage_log(VLL_INFO, true);
+        coverage_log();
     }
 
     /* Update exponentially weighted moving average.  With these parameters, a
index 14bb41f..1773263 100644 (file)
@@ -34,6 +34,7 @@ ovsdb/ovsdb-server.1: \
        ovsdb/ovsdb-server.1.in \
        lib/common-syn.man \
        lib/common.man \
+       lib/coverage-unixctl.man \
        lib/daemon-syn.man \
        lib/daemon.man \
        lib/ssl-bootstrap-syn.man \
@@ -51,6 +52,7 @@ ovsdb/ovsdb-server.1: \
 ovsdb/ovsdb-server.1.in:
 lib/common-syn.man:
 lib/common.man:
+lib/coverage-unixctl.man:
 lib/daemon-syn.man:
 lib/daemon.man:
 lib/ssl-bootstrap-syn.man:
index 9e2d79a..dfe9208 100644 (file)
@@ -123,6 +123,7 @@ This command might be useful for debugging issues with database
 clients.
 .
 .so lib/vlog-unixctl.man
+.so lib/coverage-unixctl.man
 .so lib/stress-unixctl.man
 .SH "SEE ALSO"
 .
index 1abae6f..6c9f3e4 100644 (file)
@@ -108,8 +108,6 @@ how to configure Open vSwitch.
 .SS "GENERAL COMMANDS"
 .IP "\fBexit\fR"
 Causes \fBovs\-vswitchd\fR to gracefully terminate.
-.IP "\fBcoverage/log\fR"
-Logs coverage counters at level warn.
 .IP "\fBqos/show\fR \fIinterface\fR"
 Queries the kernel for Quality of Service configuration and statistics
 associated with the given \fIinterface\fR.