CHROMIUM: printk: prevent unbounded time inside console_unlock
authorMandeep Singh Baines <msb@chromium.org>
Wed, 15 Aug 2012 06:16:50 +0000 (23:16 -0700)
committerGerrit <chrome-bot@google.com>
Wed, 15 Aug 2012 20:03:39 +0000 (13:03 -0700)
We are seeing softlockups inside console_unlock():

BUG: soft lockup - CPU#0 stuck for 12s! [rsyslogd:285]
PC is at _raw_spin_lock_irqsave+0x10/0x28
LR is at down_trylock+0x1c/0x3c
(_raw_spin_lock_irqsave+0x10/0x28) from [<8004d830>] (down_trylock+0x1c/0x3c)
(down_trylock+0x1c/0x3c) from [<80028e40>] (console_trylock+0x1c/0x60)
(console_trylock+0x1c/0x60) from [<800292f8>] (console_unlock+0x1c8/0x1f4)
(console_unlock+0x1c8/0x1f4) from [<80268e74>] (do_con_write.part.14+0x1cc8/0x1cf4)
(do_con_write.part.14+0x1cc8/0x1cf4) from [<80268f34>] (con_write+0x40/0x54)
(con_write+0x40/0x54) from [<802547c0>] (do_output_char+0xa0/0x1c8)
(do_output_char+0xa0/0x1c8) from [<80254928>] (process_output+0x40/0x5c)
(process_output+0x40/0x5c) from [<80255170>] (n_tty_write+0x28c/0x3b4)
(n_tty_write+0x28c/0x3b4) from [<80251ea8>] (tty_write+0x198/0x238)
(tty_write+0x198/0x238) from [<800ed208>] (vfs_write+0xc4/0x140)
(vfs_write+0xc4/0x140) from [<800ed484>] (sys_write+0x4c/0x78)
(sys_write+0x4c/0x78) from [<8000e7c0>] (ret_fast_syscall+0x0/0x30)

The softlockup can be caused by a printk storm. Since console_unlock
can retry an unbounded number of times, we can get stuck in inside
for an unbounded amount of time. The fix is to schedule work instead
of retrying inside console_unlock() when there is more work to do.

Upstream:

printk.c upstream is dramatically different from 3.4 so I'll need to
write a new patch to send upstream.

BUG=chrome-os-partner:12565
TEST=See below.

$ dmesg # verify OK
$ reboot

*reboot*

$ cat /dev/pstore/console-ramoops # verify OK
$ dmesg # verify OK
$ echo bug > /proc/breakme

*crash*

$ cat /dev/pstore/console-ramoops # verify OK
$ cat /var/spool/*.kcrash # verify OK

Change-Id: If2955af0bd96b8a27b87f6a4306094c019a86dc7
Signed-off-by: Mandeep Singh Baines <msb@chromium.org>
Reviewed-on: https://gerrit.chromium.org/gerrit/30384
Reviewed-by: Olof Johansson <olofj@chromium.org>
kernel/printk.c

index b663c2c..55e39d3 100644 (file)
@@ -41,6 +41,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/rculist.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 
@@ -1253,6 +1254,14 @@ void wake_up_klogd(void)
                this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
 }
 
+static void console_unlock_work_fn(struct work_struct *work)
+{
+       if (console_trylock())
+               console_unlock();
+}
+
+static DECLARE_WORK(console_unlock_work, console_unlock_work_fn);
+
 /**
  * console_unlock - unlock the console system
  *
@@ -1271,7 +1280,7 @@ void console_unlock(void)
 {
        unsigned long flags;
        unsigned _con_start, _log_end;
-       unsigned wake_klogd = 0, retry = 0;
+       unsigned wake_klogd = 0;
 
        if (console_suspended) {
                up(&console_sem);
@@ -1280,12 +1289,9 @@ void console_unlock(void)
 
        console_may_schedule = 0;
 
-again:
-       for ( ; ; ) {
-               raw_spin_lock_irqsave(&logbuf_lock, flags);
-               wake_klogd |= log_start - log_end;
-               if (con_start == log_end)
-                       break;                  /* Nothing to print */
+       raw_spin_lock_irqsave(&logbuf_lock, flags);
+       wake_klogd |= log_start - log_end;
+       if (con_start != log_end) {
                _con_start = con_start;
                _log_end = log_end;
                con_start = log_end;            /* Flush */
@@ -1293,32 +1299,28 @@ again:
                stop_critical_timings();        /* don't trace print latency */
                call_console_drivers(_con_start, _log_end);
                start_critical_timings();
-               local_irq_restore(flags);
+       } else {
+               raw_spin_unlock(&logbuf_lock);
        }
+
        console_locked = 0;
 
        /* Release the exclusive_console once it is used */
        if (unlikely(exclusive_console))
                exclusive_console = NULL;
 
-       raw_spin_unlock(&logbuf_lock);
-
        up(&console_sem);
 
        /*
-        * Someone could have filled up the buffer again, so re-check if there's
-        * something to flush. In case we cannot trylock the console_sem again,
-        * there's a new owner and the console_unlock() from them will do the
-        * flush, no worries.
+        * Someone could have filled up the buffer again, so re-check and
+        * schedule work if there's something to flush.
         */
        raw_spin_lock(&logbuf_lock);
        if (con_start != log_end)
-               retry = 1;
+               if (!work_pending(&console_unlock_work))
+                       schedule_work(&console_unlock_work);
        raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
-       if (retry && console_trylock())
-               goto again;
-
        if (wake_klogd)
                wake_up_klogd();
 }