IB/ipoib: Save only IPOIB_MAX_PATH_REC_QUEUE skb's

[cascardo/linux.git] / drivers / infiniband / ulp / ipoib / ipoib_main.c
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c

index 58b5aa3..7cad4dd 100644 (file)
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
  
         set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
  
-       if (ipoib_ib_dev_open(dev, 1)) {
+       if (ipoib_ib_dev_open(dev)) {
                 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                         return 0;
                 goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
         return 0;
  
  err_stop:
-       ipoib_ib_dev_stop(dev, 1);
+       ipoib_ib_dev_stop(dev);
  
  err_disable:
         clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
  
         netif_stop_queue(dev);
  
-       ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev, 0);
+       ipoib_ib_dev_down(dev);
+       ipoib_ib_dev_stop(dev);
  
         if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                 struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
  
                 if (!path->query && path_rec_start(dev, path))
                         goto err_path;
-
-               __skb_queue_tail(&neigh->queue, skb);
+               if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+                       __skb_queue_tail(&neigh->queue, skb);
+               else
+                       goto err_drop;
         }
  
         spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                         new_path = 1;
                 }
                 if (path) {
-                       __skb_queue_tail(&path->queue, skb);
+                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+                               __skb_queue_tail(&path->queue, skb);
+                       } else {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                       }
  
                         if (!path->query && path_rec_start(dev, path)) {
                                 spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,14 @@ static void ipoib_set_mcast_list(struct net_device *dev)
                 return;
         }
  
-       queue_work(ipoib_workqueue, &priv->restart_task);
+       queue_work(priv->wq, &priv->restart_task);
+}
+
+static int ipoib_get_iflink(const struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       return priv->parent->ifindex;
  }
  
  static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +968,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
         __ipoib_reap_neigh(priv);
  
         if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+               queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                                    arp_tbl.gc_interval);
  }
  
@@ -1133,7 +1147,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
  
         /* start garbage collection */
         clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+       queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                            arp_tbl.gc_interval);
  
         return 0;
@@ -1262,15 +1276,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
  {
         struct ipoib_dev_priv *priv = netdev_priv(dev);
  
-       if (ipoib_neigh_hash_init(priv) < 0)
-               goto out;
         /* Allocate RX/TX "rings" to hold queued skbs */
         priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                 GFP_KERNEL);
         if (!priv->rx_ring) {
                 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                        ca->name, ipoib_recvq_size);
-               goto out_neigh_hash_cleanup;
+               goto out;
         }
  
         priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1297,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
         if (ipoib_ib_dev_init(dev, ca, port))
                 goto out_tx_ring_cleanup;
  
+       /*
+        * Must be after ipoib_ib_dev_init so we can allocate a per
+        * device wq there and use it here
+        */
+       if (ipoib_neigh_hash_init(priv) < 0)
+               goto out_dev_uninit;
+
         return 0;
  
+out_dev_uninit:
+       ipoib_ib_dev_cleanup(dev);
+
  out_tx_ring_cleanup:
         vfree(priv->tx_ring);
  
  out_rx_ring_cleanup:
         kfree(priv->rx_ring);
  
-out_neigh_hash_cleanup:
-       ipoib_neigh_hash_uninit(dev);
  out:
         return -ENOMEM;
  }
@@ -1317,6 +1337,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
         }
         unregister_netdevice_many(&head);
  
+       /*
+        * Must be before ipoib_ib_dev_cleanup or we delete an in use
+        * work queue
+        */
+       ipoib_neigh_hash_uninit(dev);
+
         ipoib_ib_dev_cleanup(dev);
  
         kfree(priv->rx_ring);
@@ -1324,8 +1350,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
  
         priv->rx_ring = NULL;
         priv->tx_ring = NULL;
-
-       ipoib_neigh_hash_uninit(dev);
  }
  
  static const struct header_ops ipoib_header_ops = {
@@ -1341,6 +1365,7 @@ static const struct net_device_ops ipoib_netdev_ops = {
         .ndo_start_xmit          = ipoib_start_xmit,
         .ndo_tx_timeout          = ipoib_timeout,
         .ndo_set_rx_mode         = ipoib_set_mcast_list,
+       .ndo_get_iflink          = ipoib_get_iflink,
  };
  
  void ipoib_setup(struct net_device *dev)
@@ -1633,10 +1658,11 @@ sysfs_failed:
  
  register_failed:
         ib_unregister_event_handler(&priv->event_handler);
+       flush_workqueue(ipoib_workqueue);
         /* Stop GC if started before flush */
         set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
         cancel_delayed_work(&priv->neigh_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
  
  event_failed:
         ipoib_dev_cleanup(priv->dev);
@@ -1699,6 +1725,7 @@ static void ipoib_remove_one(struct ib_device *device)
  
         list_for_each_entry_safe(priv, tmp, dev_list, list) {
                 ib_unregister_event_handler(&priv->event_handler);
+               flush_workqueue(ipoib_workqueue);
  
                 rtnl_lock();
                 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1707,7 +1734,7 @@ static void ipoib_remove_one(struct ib_device *device)
                 /* Stop GC */
                 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
                 cancel_delayed_work(&priv->neigh_reap_task);
-               flush_workqueue(ipoib_workqueue);
+               flush_workqueue(priv->wq);
  
                 unregister_netdev(priv->dev);
                 free_netdev(priv->dev);
@@ -1742,14 +1769,16 @@ static int __init ipoib_init_module(void)
                 return ret;
  
         /*
-        * We create our own workqueue mainly because we want to be
-        * able to flush it when devices are being removed.  We can't
-        * use schedule_work()/flush_scheduled_work() because both
-        * unregister_netdev() and linkwatch_event take the rtnl lock,
-        * so flush_scheduled_work() can deadlock during device
-        * removal.
+        * We create a global workqueue here that is used for all flush
+        * operations.  However, if you attempt to flush a workqueue
+        * from a task on that same workqueue, it deadlocks the system.
+        * We want to be able to flush the tasks associated with a
+        * specific net device, so we also create a workqueue for each
+        * netdevice.  We queue up the tasks for that device only on
+        * its private workqueue, and we only queue up flush events
+        * on our global flush workqueue.  This avoids the deadlocks.
          */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib");
+       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
         if (!ipoib_workqueue) {
                 ret = -ENOMEM;
                 goto err_fs;