From a643b5d41c95164c14b111d19c05d6913bedb80b Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 25 Apr 2016 14:44:49 -0700 Subject: [PATCH] tcp: Handle eor bit when coalescing skb This patch: 1. Prevent next_skb from coalescing to the prev_skb if TCP_SKB_CB(prev_skb)->eor is set 2. Update the TCP_SKB_CB(prev_skb)->eor if coalescing is allowed Packetdrill script for testing: ~~~~~~ +0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10` +0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1` +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 0.100 < S 0:0(0) win 32792 0.100 > S. 0:0(0) ack 1 0.200 < . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 0.200 sendto(4, ..., 730, MSG_EOR, ..., ...) = 730 0.200 sendto(4, ..., 730, MSG_EOR, ..., ...) = 730 0.200 write(4, ..., 11680) = 11680 0.200 > P. 1:731(730) ack 1 0.200 > P. 731:1461(730) ack 1 0.200 > . 1461:8761(7300) ack 1 0.200 > P. 8761:13141(4380) ack 1 0.300 < . 1:1(0) ack 1 win 257 0.300 > P. 1:731(730) ack 1 0.300 > P. 731:1461(730) ack 1 0.400 < . 1:1(0) ack 13141 win 257 0.400 close(4) = 0 0.400 > F. 13141:13141(0) ack 1 0.500 < F. 1:1(0) ack 13142 win 257 0.500 > . 13142:13142(0) ack 2 Signed-off-by: Martin KaFai Lau Cc: Eric Dumazet Cc: Neal Cardwell Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Cc: Yuchung Cheng Acked-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 4 ++++ net/ipv4/tcp_output.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 70c370b93762..1fb19c91e091 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1303,6 +1303,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, } TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + TCP_SKB_CB(prev)->eor = TCP_SKB_CB(skb)->eor; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) TCP_SKB_CB(prev)->end_seq++; @@ -1368,6 +1369,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; + if (!tcp_skb_can_collapse_to(prev)) + goto fallback; + in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b3a31b4df57c..77c6cbb897e6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2499,6 +2499,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; + TCP_SKB_CB(skb)->eor = TCP_SKB_CB(next_skb)->eor; /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); @@ -2550,6 +2551,9 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, if (!tcp_can_collapse(sk, skb)) break; + if (!tcp_skb_can_collapse_to(to)) + break; + space -= skb->len; if (first) { -- 2.20.1