Incoming packet should be either in backlog queue or
in RCU read-side section. Otherwise, the final sequence of
flush_backlog() and synchronize_net() may miss packets
that can run without device reference:
CPU 1 CPU 2
skb->dev: no reference
process_backlog:__skb_dequeue
process_backlog:local_irq_enable
on_each_cpu for
flush_backlog => IPI(hardirq): flush_backlog
- packet not found in backlog
CPU delayed ...
synchronize_net
- no ongoing RCU
read-side sections
netdev_run_todo,
rcu_barrier: no
ongoing callbacks
__netif_receive_skb_core:rcu_read_lock
- too late
free dev
process packet for freed dev
Fixes:
6e583ce5242f ("net: eliminate refcounting in backlog queue")
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
another_round:
skb->skb_iif = skb->dev->ifindex;
another_round:
skb->skb_iif = skb->dev->ifindex;
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
}
#ifdef CONFIG_NET_CLS_ACT
}
#ifdef CONFIG_NET_CLS_ACT
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
if (static_key_false(&ingress_needed)) {
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
}
#endif
#ifdef CONFIG_NET_CLS_ACT
}
#endif
#ifdef CONFIG_NET_CLS_ACT
if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
}
rx_handler = rcu_dereference(skb->dev->rx_handler);
}
rx_handler = rcu_dereference(skb->dev->rx_handler);
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
ret = NET_RX_SUCCESS;
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
ret = NET_RX_SUCCESS;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
-unlock:
- rcu_read_unlock();
static int netif_receive_skb_internal(struct sk_buff *skb)
{
static int netif_receive_skb_internal(struct sk_buff *skb)
{
net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
- int cpu, ret;
-
- rcu_read_lock();
-
- cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ int cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
- return __netif_receive_skb(skb);
+ ret = __netif_receive_skb(skb);
+ rcu_read_unlock();
+ return ret;
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
local_irq_enable();
__netif_receive_skb(skb);
local_irq_enable();
__netif_receive_skb(skb);
local_irq_disable();
input_queue_head_incr(sd);
if (++work >= quota) {
local_irq_disable();
input_queue_head_incr(sd);
if (++work >= quota) {