datapath: STT: Fix nf-hook softlockup.
authorPravin B Shelar <pshelar@nicira.com>
Thu, 14 Jan 2016 00:42:10 +0000 (16:42 -0800)
committerPravin B Shelar <pshelar@nicira.com>
Thu, 14 Jan 2016 19:20:25 +0000 (11:20 -0800)
nf-hook is not unregistered on STT device delete, But when
second time it was created it nf-hook is again registered.
which causes following softlockup.
Following patch fixes it by registering nf-hook only on very
first stt device.

---8<---

BUG: soft lockup - CPU#1 stuck for 22s! [ovs-vswitchd:11293]
RIP: 0010:[<ffffffffa0e48308>]  [<ffffffffa0e48308>] nf_ip_hook+0xf8/0x180 [openvswitch]
Stack:
 <IRQ>
 [<ffffffff8163bf60>] ? ip_rcv_finish+0x350/0x350
 [<ffffffff8163572a>] nf_iterate+0x9a/0xb0
 [<ffffffff8163bf60>] ? ip_rcv_finish+0x350/0x350
 [<ffffffff816357bc>] nf_hook_slow+0x7c/0x120
 [<ffffffff8163bf60>] ? ip_rcv_finish+0x350/0x350
 [<ffffffff8163c343>] ip_local_deliver+0x73/0x80
 [<ffffffff8163bc8d>] ip_rcv_finish+0x7d/0x350
 [<ffffffff8163c5e8>] ip_rcv+0x298/0x3d0
 [<ffffffff81605f26>] __netif_receive_skb_core+0x696/0x880
 [<ffffffff81606128>] __netif_receive_skb+0x18/0x60
 [<ffffffff81606cce>] process_backlog+0xae/0x180
 [<ffffffff81606512>] net_rx_action+0x152/0x270
 [<ffffffff8106accc>] __do_softirq+0xec/0x300
 [<ffffffff81710a1c>] do_softirq_own_stack+0x1c/0x30

Fixes: fee43fa2 ("datapath: Fix deadlock on STT device destroy.")
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Tested-by: Joe Stringer <joe@ovn.org>
datapath/linux/compat/stt.c

index 98d6d5b..527dfee 100644 (file)
@@ -153,6 +153,9 @@ struct stt_net {
        struct list_head stt_list;
        struct list_head stt_up_list;   /* Devices which are in IFF_UP state. */
        int n_tunnels;
+#ifdef HAVE_NF_REGISTER_NET_HOOK
+       bool nf_hook_reg_done;
+#endif
 };
 
 static int stt_net_id;
@@ -1553,12 +1556,23 @@ static int stt_start(struct net *net)
         * rtnl-lock, which results in dead lock in stt-dev-create. Therefore
         * use this new API.
         */
+
+       if (sn->nf_hook_reg_done)
+               goto out;
+
        err = nf_register_net_hook(net, &nf_hook_ops);
+       if (!err)
+               sn->nf_hook_reg_done = true;
 #else
+       /* Register STT only on very first STT device addition. */
+       if (!list_empty(&nf_hook_ops.list))
+               goto out;
+
        err = nf_register_hook(&nf_hook_ops);
 #endif
        if (err)
                goto dec_n_tunnel;
+out:
        sn->n_tunnels++;
        return 0;
 
@@ -1854,6 +1868,9 @@ static int stt_init_net(struct net *net)
 
        INIT_LIST_HEAD(&sn->stt_list);
        INIT_LIST_HEAD(&sn->stt_up_list);
+#ifdef HAVE_NF_REGISTER_NET_HOOK
+       sn->nf_hook_reg_done = false;
+#endif
        return 0;
 }
 
@@ -1868,7 +1885,7 @@ static void stt_exit_net(struct net *net)
        /* Ideally this should be done from stt_stop(), But on some kernels
         * nf-unreg operation needs RTNL-lock, which can cause deallock.
         * So it is done from here. */
-       if (!list_empty(&nf_hook_ops.list))
+       if (sn->nf_hook_reg_done)
                nf_unregister_net_hook(net, &nf_hook_ops);
 #endif