tcp: improve RTT from SACK for CC
authorKenneth Klette Jonassen <kennetkl@ifi.uio.no>
Thu, 30 Apr 2015 23:10:58 +0000 (01:10 +0200)
committerDavid S. Miller <davem@davemloft.net>
Mon, 4 May 2015 03:18:01 +0000 (23:18 -0400)
tcp_sacktag_one() always picks the earliest sequence SACKed for RTT.
This might not make sense for congestion control in cases where:

  1. ACKs are lost, i.e. a SACK following a lost SACK covers both
     new and old segments at the receiver.
  2. The receiver disregards the RFC 5681 recommendation to immediately
     ACK out-of-order segments.

Give congestion control a RTT for the latest segment SACKed, which is the
most accurate RTT estimate, but preserve the conservative RTT for RTO.

Removes the call to skb_mstamp_get() in tcp_sacktag_one().

Cc: Yuchung Cheng <ycheng@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kenneth Klette Jonassen <kennetkl@ifi.uio.no>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/ipv4/tcp_input.c

index 9902cf1..f563d2a 100644 (file)
@@ -1130,7 +1130,12 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 struct tcp_sacktag_state {
        int     reord;
        int     fack_count;
-       long    rtt_us; /* RTT measured by SACKing never-retransmitted data */
+       /* Timestamps for earliest and latest never-retransmitted segment
+        * that was SACKed. RTO needs the earliest RTT to stay conservative,
+        * but congestion control should still get an accurate delay signal.
+        */
+       struct skb_mstamp first_sackt;
+       struct skb_mstamp last_sackt;
        int     flag;
 };
 
@@ -1233,14 +1238,9 @@ static u8 tcp_sacktag_one(struct sock *sk,
                                                           state->reord);
                                if (!after(end_seq, tp->high_seq))
                                        state->flag |= FLAG_ORIG_SACK_ACKED;
-                               /* Pick the earliest sequence sacked for RTT */
-                               if (state->rtt_us < 0) {
-                                       struct skb_mstamp now;
-
-                                       skb_mstamp_get(&now);
-                                       state->rtt_us = skb_mstamp_us_delta(&now,
-                                                               xmit_time);
-                               }
+                               if (state->first_sackt.v64 == 0)
+                                       state->first_sackt = *xmit_time;
+                               state->last_sackt = *xmit_time;
                        }
 
                        if (sacked & TCPCB_LOST) {
@@ -3049,7 +3049,8 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
  * arrived at the other end.
  */
 static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-                              u32 prior_snd_una, long sack_rtt_us)
+                              u32 prior_snd_una,
+                              struct tcp_sacktag_state *sack)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct skb_mstamp first_ackt, last_ackt, now;
@@ -3057,8 +3058,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        u32 prior_sacked = tp->sacked_out;
        u32 reord = tp->packets_out;
        bool fully_acked = true;
-       long ca_seq_rtt_us = -1L;
+       long sack_rtt_us = -1L;
        long seq_rtt_us = -1L;
+       long ca_rtt_us = -1L;
        struct sk_buff *skb;
        u32 pkts_acked = 0;
        bool rtt_update;
@@ -3147,7 +3149,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
        skb_mstamp_get(&now);
        if (likely(first_ackt.v64)) {
                seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
-               ca_seq_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+               ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+       }
+       if (sack->first_sackt.v64) {
+               sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
+               ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
        }
 
        rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us);
@@ -3178,10 +3184,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
 
                tp->fackets_out -= min(pkts_acked, tp->fackets_out);
 
-               if (ca_ops->pkts_acked) {
-                       long rtt_us = min_t(ulong, ca_seq_rtt_us, sack_rtt_us);
-                       ca_ops->pkts_acked(sk, pkts_acked, rtt_us);
-               }
+               if (ca_ops->pkts_acked)
+                       ca_ops->pkts_acked(sk, pkts_acked, ca_rtt_us);
 
        } else if (skb && rtt_update && sack_rtt_us >= 0 &&
                   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
@@ -3466,7 +3470,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        const int prior_unsacked = tp->packets_out - tp->sacked_out;
        int acked = 0; /* Number of packets newly acked */
 
-       sack_state.rtt_us = -1L;
+       sack_state.first_sackt.v64 = 0;
 
        /* We very likely will need to access write queue head. */
        prefetchw(sk->sk_write_queue.next);
@@ -3555,7 +3559,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        /* See if we can take anything off of the retransmit queue. */
        acked = tp->packets_out;
        flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
-                                   sack_state.rtt_us);
+                                   &sack_state);
        acked -= tp->packets_out;
 
        /* Advance cwnd if state allows */