net: less interrupt masking in NAPI
[cascardo/linux.git] / net / core / dev.c
index ebf778d..40be481 100644 (file)
@@ -4316,20 +4316,28 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
                local_irq_enable();
 }
 
+static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+       return sd->rps_ipi_list != NULL;
+#else
+       return false;
+#endif
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
        int work = 0;
        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 
-#ifdef CONFIG_RPS
        /* Check if we have pending ipi, its better to send them now,
         * not waiting net_rx_action() end.
         */
-       if (sd->rps_ipi_list) {
+       if (sd_has_rps_ipi_waiting(sd)) {
                local_irq_disable();
                net_rps_action_and_irq_enable(sd);
        }
-#endif
+
        napi->weight = weight_p;
        local_irq_disable();
        while (1) {
@@ -4356,7 +4364,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
                         * We can use a plain write instead of clear_bit(),
                         * and we dont need an smp_mb() memory barrier.
                         */
-                       list_del(&napi->poll_list);
                        napi->state = 0;
                        rps_unlock(sd);
 
@@ -4406,7 +4413,7 @@ void __napi_complete(struct napi_struct *n)
        BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
        BUG_ON(n->gro_list);
 
-       list_del(&n->poll_list);
+       list_del_init(&n->poll_list);
        smp_mb__before_atomic();
        clear_bit(NAPI_STATE_SCHED, &n->state);
 }
@@ -4424,9 +4431,15 @@ void napi_complete(struct napi_struct *n)
                return;
 
        napi_gro_flush(n, false);
-       local_irq_save(flags);
-       __napi_complete(n);
-       local_irq_restore(flags);
+
+       if (likely(list_empty(&n->poll_list))) {
+               WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+       } else {
+               /* If n->poll_list is not empty, we need to mask irqs */
+               local_irq_save(flags);
+               __napi_complete(n);
+               local_irq_restore(flags);
+       }
 }
 EXPORT_SYMBOL(napi_complete);
 
@@ -4520,29 +4533,28 @@ static void net_rx_action(struct softirq_action *h)
        struct softnet_data *sd = this_cpu_ptr(&softnet_data);
        unsigned long time_limit = jiffies + 2;
        int budget = netdev_budget;
+       LIST_HEAD(list);
+       LIST_HEAD(repoll);
        void *have;
 
        local_irq_disable();
+       list_splice_init(&sd->poll_list, &list);
+       local_irq_enable();
 
-       while (!list_empty(&sd->poll_list)) {
+       while (!list_empty(&list)) {
                struct napi_struct *n;
                int work, weight;
 
-               /* If softirq window is exhuasted then punt.
+               /* If softirq window is exhausted then punt.
                 * Allow this to run for 2 jiffies since which will allow
                 * an average latency of 1.5/HZ.
                 */
                if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
                        goto softnet_break;
 
-               local_irq_enable();
 
-               /* Even though interrupts have been re-enabled, this
-                * access is safe because interrupts can only add new
-                * entries to the tail of this list, and only ->poll()
-                * calls can remove this head entry from the list.
-                */
-               n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
+               n = list_first_entry(&list, struct napi_struct, poll_list);
+               list_del_init(&n->poll_list);
 
                have = netpoll_poll_lock(n);
 
@@ -4564,8 +4576,6 @@ static void net_rx_action(struct softirq_action *h)
 
                budget -= work;
 
-               local_irq_disable();
-
                /* Drivers must not modify the NAPI state if they
                 * consume the entire weight.  In such cases this code
                 * still "owns" the NAPI instance and therefore can
@@ -4573,32 +4583,40 @@ static void net_rx_action(struct softirq_action *h)
                 */
                if (unlikely(work == weight)) {
                        if (unlikely(napi_disable_pending(n))) {
-                               local_irq_enable();
                                napi_complete(n);
-                               local_irq_disable();
                        } else {
                                if (n->gro_list) {
                                        /* flush too old packets
                                         * If HZ < 1000, flush all packets.
                                         */
-                                       local_irq_enable();
                                        napi_gro_flush(n, HZ >= 1000);
-                                       local_irq_disable();
                                }
-                               list_move_tail(&n->poll_list, &sd->poll_list);
+                               list_add_tail(&n->poll_list, &repoll);
                        }
                }
 
                netpoll_poll_unlock(have);
        }
+
+       if (!sd_has_rps_ipi_waiting(sd) &&
+           list_empty(&list) &&
+           list_empty(&repoll))
+               return;
 out:
+       local_irq_disable();
+
+       list_splice_tail_init(&sd->poll_list, &list);
+       list_splice_tail(&repoll, &list);
+       list_splice(&list, &sd->poll_list);
+       if (!list_empty(&sd->poll_list))
+               __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+
        net_rps_action_and_irq_enable(sd);
 
        return;
 
 softnet_break:
        sd->time_squeeze++;
-       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
        goto out;
 }