tcp-tso: do not split TSO packets at retransmit time

[cascardo/linux.git] / net / ipv4 / tcp_input.c
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c

index 3b2c8e9..dcad8f9 100644 (file)
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -80,9 +80,7 @@ int sysctl_tcp_timestamps __read_mostly = 1;
  int sysctl_tcp_window_scaling __read_mostly = 1;
  int sysctl_tcp_sack __read_mostly = 1;
  int sysctl_tcp_fack __read_mostly = 1;
-int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
  int sysctl_tcp_max_reordering __read_mostly = 300;
-EXPORT_SYMBOL(sysctl_tcp_reordering);
  int sysctl_tcp_dsack __read_mostly = 1;
  int sysctl_tcp_app_win __read_mostly = 31;
  int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@ -126,6 +124,10 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
  #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
  #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
  
+#define REXMIT_NONE    0 /* no loss recovery to do */
+#define REXMIT_LOST    1 /* retransmit packets marked lost */
+#define REXMIT_NEW     2 /* FRTO-style transmit of unsent/new packets */
+
  /* Adapt the MSS value used to make delayed ack decision to the
   * real world.
   */
@@ -1210,6 +1212,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
                 sacked |= TCPCB_SACKED_ACKED;
                 state->flag |= FLAG_DATA_SACKED;
                 tp->sacked_out += pcount;
+               tp->delivered += pcount;  /* Out-of-order packets delivered */
  
                 fack_count += pcount;
  
@@ -1306,6 +1309,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
         if (skb == tcp_highest_sack(sk))
                 tcp_advance_highest_sack(sk, skb);
  
+       tcp_skb_collapse_tstamp(prev, skb);
         tcp_unlink_write_queue(skb, sk);
         sk_wmem_free_skb(sk, skb);
  
@@ -1821,8 +1825,12 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
  static void tcp_add_reno_sack(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
+       u32 prior_sacked = tp->sacked_out;
+
         tp->sacked_out++;
         tcp_check_reno_reordering(sk, 0);
+       if (tp->sacked_out > prior_sacked)
+               tp->delivered++; /* Some out-of-order packet is delivered */
         tcp_verify_left_out(tp);
  }
  
@@ -1834,6 +1842,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, int acked)
  
         if (acked > 0) {
                 /* One ACK acked hole. The rest eat duplicate ACKs. */
+               tp->delivered += max_t(int, acked - tp->sacked_out, 1);
                 if (acked - 1 >= tp->sacked_out)
                         tp->sacked_out = 0;
                 else
@@ -1873,6 +1882,7 @@ void tcp_enter_loss(struct sock *sk)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
         struct sk_buff *skb;
         bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
         bool is_reneg;                  /* is receiver reneging on SACKs? */
@@ -1923,9 +1933,9 @@ void tcp_enter_loss(struct sock *sk)
          * suggests that the degree of reordering is over-estimated.
          */
         if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
-           tp->sacked_out >= sysctl_tcp_reordering)
+           tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
                 tp->reordering = min_t(unsigned int, tp->reordering,
-                                      sysctl_tcp_reordering);
+                                      net->ipv4.sysctl_tcp_reordering);
         tcp_set_ca_state(sk, TCP_CA_Loss);
         tp->high_seq = tp->snd_nxt;
         tcp_ecn_queue_cwr(tp);
@@ -2109,6 +2119,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         __u32 packets_out;
+       int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
  
         /* Trick#1: The loss is proven. */
         if (tp->lost_out)
@@ -2123,7 +2134,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
          */
         packets_out = tp->packets_out;
         if (packets_out <= tp->reordering &&
-           tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+           tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
             !tcp_may_send_now(sk)) {
                 /* We have nothing to send. This connection is limited
                  * either by receiver window or by application.
@@ -2242,16 +2253,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
         }
  }
  
-/* CWND moderation, preventing bursts due to too big ACKs
- * in dubious situations.
- */
-static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
-{
-       tp->snd_cwnd = min(tp->snd_cwnd,
-                          tcp_packets_in_flight(tp) + tcp_max_burst(tp));
-       tp->snd_cwnd_stamp = tcp_time_stamp;
-}
-
  static bool tcp_tsopt_ecr_before(const struct tcp_sock *tp, u32 when)
  {
         return tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
@@ -2400,7 +2401,6 @@ static bool tcp_try_undo_recovery(struct sock *sk)
                 /* Hold old state until something *above* high_seq
                  * is ACKed. For Reno it is MUST to prevent false
                  * fast retransmits (RFC2582). SACK TCP is safe. */
-               tcp_moderate_cwnd(tp);
                 if (!tcp_any_retrans_done(sk))
                         tp->retrans_stamp = 0;
                 return true;
@@ -2467,14 +2467,12 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
         tcp_ecn_queue_cwr(tp);
  }
  
-static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
-                              int fast_rexmit, int flag)
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+                              int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         int sndcnt = 0;
         int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
-       int newly_acked_sacked = prior_unsacked -
-                                (tp->packets_out - tp->sacked_out);
  
         if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
                 return;
@@ -2492,7 +2490,8 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
         } else {
                 sndcnt = min(delta, newly_acked_sacked);
         }
-       sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+       /* Force a fast retransmit upon entering fast recovery */
+       sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
         tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
  }
  
@@ -2537,7 +2536,7 @@ static void tcp_try_keep_open(struct sock *sk)
         }
  }
  
-static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
+static void tcp_try_to_open(struct sock *sk, int flag)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -2551,8 +2550,6 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
  
         if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                 tcp_try_keep_open(sk);
-       } else {
-               tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
         }
  }
  
@@ -2662,7 +2659,8 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
  /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
   * recovered or spurious. Otherwise retransmits more on partial ACKs.
   */
-static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+                            int *rexmit)
  {
         struct tcp_sock *tp = tcp_sk(sk);
         bool recovered = !before(tp->snd_una, tp->high_seq);
@@ -2684,10 +2682,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
                                 tp->frto = 0; /* Step 3.a. loss was real */
                 } else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
                         tp->high_seq = tp->snd_nxt;
-                       __tcp_push_pending_frames(sk, tcp_current_mss(sk),
-                                                 TCP_NAGLE_OFF);
-                       if (after(tp->snd_nxt, tp->high_seq))
-                               return; /* Step 2.b */
+                       /* Step 2.b. Try send new data (but deferred until cwnd
+                        * is updated in tcp_ack()). Otherwise fall back to
+                        * the conventional recovery.
+                        */
+                       if (tcp_send_head(sk) &&
+                           after(tcp_wnd_end(tp), tp->snd_nxt)) {
+                               *rexmit = REXMIT_NEW;
+                               return;
+                       }
                         tp->frto = 0;
                 }
         }
@@ -2706,12 +2709,11 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
                 else if (flag & FLAG_SND_UNA_ADVANCED)
                         tcp_reset_reno_sack(tp);
         }
-       tcp_xmit_retransmit_queue(sk);
+       *rexmit = REXMIT_LOST;
  }
  
  /* Undo during fast recovery after partial ACK. */
-static bool tcp_try_undo_partial(struct sock *sk, const int acked,
-                                const int prior_unsacked, int flag)
+static bool tcp_try_undo_partial(struct sock *sk, const int acked)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -2726,10 +2728,8 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
                  * can undo. Otherwise we clock out new packets but do not
                  * mark more packets lost or retransmit more.
                  */
-               if (tp->retrans_out) {
-                       tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
+               if (tp->retrans_out)
                         return true;
-               }
  
                 if (!tcp_any_retrans_done(sk))
                         tp->retrans_stamp = 0;
@@ -2748,21 +2748,21 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
   * taking into account both packets sitting in receiver's buffer and
   * packets lost by network.
   *
- * Besides that it does CWND reduction, when packet loss is detected
- * and changes state of machine.
+ * Besides that it updates the congestion state when packet loss or ECN
+ * is detected. But it does not reduce the cwnd, it is done by the
+ * congestion control later.
   *
   * It does _not_ decide what to send, it is made in function
   * tcp_xmit_retransmit_queue().
   */
  static void tcp_fastretrans_alert(struct sock *sk, const int acked,
-                                 const int prior_unsacked,
-                                 bool is_dupack, int flag)
+                                 bool is_dupack, int *ack_flag, int *rexmit)
  {
         struct inet_connection_sock *icsk = inet_csk(sk);
         struct tcp_sock *tp = tcp_sk(sk);
+       int fast_rexmit = 0, flag = *ack_flag;
         bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
                                     (tcp_fackets_out(tp) > tp->reordering));
-       int fast_rexmit = 0;
  
         if (WARN_ON(!tp->packets_out && tp->sacked_out))
                 tp->sacked_out = 0;
@@ -2809,8 +2809,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
  
         /* Use RACK to detect loss */
         if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
-           tcp_rack_mark_lost(sk))
+           tcp_rack_mark_lost(sk)) {
                 flag |= FLAG_LOST_RETRANS;
+               *ack_flag |= FLAG_LOST_RETRANS;
+       }
  
         /* E. Process state. */
         switch (icsk->icsk_ca_state) {
@@ -2819,7 +2821,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                         if (tcp_is_reno(tp) && is_dupack)
                                 tcp_add_reno_sack(sk);
                 } else {
-                       if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
+                       if (tcp_try_undo_partial(sk, acked))
                                 return;
                         /* Partial ACK arrived. Force fast retransmit. */
                         do_lost = tcp_is_reno(tp) ||
@@ -2831,7 +2833,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                 }
                 break;
         case TCP_CA_Loss:
-               tcp_process_loss(sk, flag, is_dupack);
+               tcp_process_loss(sk, flag, is_dupack, rexmit);
                 if (icsk->icsk_ca_state != TCP_CA_Open &&
                     !(flag & FLAG_LOST_RETRANS))
                         return;
@@ -2848,7 +2850,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                         tcp_try_undo_dsack(sk);
  
                 if (!tcp_time_to_recover(sk, flag)) {
-                       tcp_try_to_open(sk, flag, prior_unsacked);
+                       tcp_try_to_open(sk, flag);
                         return;
                 }
  
@@ -2870,8 +2872,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
  
         if (do_lost)
                 tcp_update_scoreboard(sk, fast_rexmit);
-       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
-       tcp_xmit_retransmit_queue(sk);
+       *rexmit = REXMIT_LOST;
  }
  
  /* Kathleen Nichols' algorithm for tracking the minimum value of
@@ -3082,12 +3083,13 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
         const struct skb_shared_info *shinfo;
  
         /* Avoid cache line misses to get skb_shinfo() and shinfo->tx_flags */
-       if (likely(!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)))
+       if (likely(!TCP_SKB_CB(skb)->txstamp_ack))
                 return;
  
         shinfo = skb_shinfo(skb);
         if ((shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
-           between(shinfo->tskey, prior_snd_una, tcp_sk(sk)->snd_una - 1))
+           !before(shinfo->tskey, prior_snd_una) &&
+           before(shinfo->tskey, tcp_sk(sk)->snd_una))
                 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
  }
  
@@ -3096,7 +3098,7 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
   * arrived at the other end.
   */
  static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
-                              u32 prior_snd_una,
+                              u32 prior_snd_una, int *acked,
                                struct tcp_sacktag_state *sack)
  {
         const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -3154,10 +3156,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                                 flag |= FLAG_ORIG_SACK_ACKED;
                 }
  
-               if (sacked & TCPCB_SACKED_ACKED)
+               if (sacked & TCPCB_SACKED_ACKED) {
                         tp->sacked_out -= acked_pcount;
-               else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
-                       tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+               } else if (tcp_is_sack(tp)) {
+                       tp->delivered += acked_pcount;
+                       if (!tcp_skb_spurious_retrans(tp, skb))
+                               tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+               }
                 if (sacked & TCPCB_LOST)
                         tp->lost_out -= acked_pcount;
  
@@ -3266,6 +3271,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                 }
         }
  #endif
+       *acked = pkts_acked;
         return flag;
  }
  
@@ -3299,21 +3305,36 @@ static inline bool tcp_ack_is_dubious(const struct sock *sk, const int flag)
  /* Decide wheather to run the increase function of congestion control. */
  static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
  {
-       if (tcp_in_cwnd_reduction(sk))
-               return false;
-
         /* If reordering is high then always grow cwnd whenever data is
          * delivered regardless of its ordering. Otherwise stay conservative
          * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
          * new SACK or ECE mark may first advance cwnd here and later reduce
          * cwnd in tcp_fastretrans_alert() based on more states.
          */
-       if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+       if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
                 return flag & FLAG_FORWARD_PROGRESS;
  
         return flag & FLAG_DATA_ACKED;
  }
  
+/* The "ultimate" congestion control function that aims to replace the rigid
+ * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
+ * It's called toward the end of processing an ACK with precise rate
+ * information. All transmission or retransmission are delayed afterwards.
+ */
+static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
+                            int flag)
+{
+       if (tcp_in_cwnd_reduction(sk)) {
+               /* Reduce cwnd if state mandates */
+               tcp_cwnd_reduction(sk, acked_sacked, flag);
+       } else if (tcp_may_raise_cwnd(sk, flag)) {
+               /* Advance cwnd if state allows */
+               tcp_cong_avoid(sk, ack, acked_sacked);
+       }
+       tcp_update_pacing_rate(sk);
+}
+
  /* Check that window update is acceptable.
   * The function assumes that snd_una<=ack<=snd_next.
   */
@@ -3509,6 +3530,27 @@ static inline void tcp_in_ack_event(struct sock *sk, u32 flags)
                 icsk->icsk_ca_ops->in_ack_event(sk, flags);
  }
  
+/* Congestion control has updated the cwnd already. So if we're in
+ * loss recovery then now we do any new sends (for FRTO) or
+ * retransmits (for CA_Loss or CA_recovery) that make sense.
+ */
+static void tcp_xmit_recovery(struct sock *sk, int rexmit)
+{
+       struct tcp_sock *tp = tcp_sk(sk);
+
+       if (rexmit == REXMIT_NONE)
+               return;
+
+       if (unlikely(rexmit == 2)) {
+               __tcp_push_pending_frames(sk, tcp_current_mss(sk),
+                                         TCP_NAGLE_OFF);
+               if (after(tp->snd_nxt, tp->high_seq))
+                       return;
+               tp->frto = 0;
+       }
+       tcp_xmit_retransmit_queue(sk);
+}
+
  /* This routine deals with incoming acks, but not outgoing ones. */
  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  {
@@ -3521,8 +3563,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
         bool is_dupack = false;
         u32 prior_fackets;
         int prior_packets = tp->packets_out;
-       const int prior_unsacked = tp->packets_out - tp->sacked_out;
+       u32 prior_delivered = tp->delivered;
         int acked = 0; /* Number of packets newly acked */
+       int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
  
         sack_state.first_sackt.v64 = 0;
  
@@ -3611,23 +3654,16 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                 goto no_queue;
  
         /* See if we can take anything off of the retransmit queue. */
-       acked = tp->packets_out;
-       flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+       flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
                                     &sack_state);
-       acked -= tp->packets_out;
  
         if (tcp_ack_is_dubious(sk, flag)) {
                 is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
-               tcp_fastretrans_alert(sk, acked, prior_unsacked,
-                                     is_dupack, flag);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
         }
         if (tp->tlp_high_seq)
                 tcp_process_tlp_ack(sk, ack, flag);
  
-       /* Advance cwnd if state allows */
-       if (tcp_may_raise_cwnd(sk, flag))
-               tcp_cong_avoid(sk, ack, acked);
-
         if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
                 struct dst_entry *dst = __sk_dst_get(sk);
                 if (dst)
@@ -3636,14 +3672,14 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  
         if (icsk->icsk_pending == ICSK_TIME_RETRANS)
                 tcp_schedule_loss_probe(sk);
-       tcp_update_pacing_rate(sk);
+       tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+       tcp_xmit_recovery(sk, rexmit);
         return 1;
  
  no_queue:
         /* If data was DSACKed, see if we can undo a cwnd reduction. */
         if (flag & FLAG_DSACKING_ACK)
-               tcp_fastretrans_alert(sk, acked, prior_unsacked,
-                                     is_dupack, flag);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
         /* If this ack opens up a zero window, clear backoff.  It was
          * being used to time the probes, and is probably far higher than
          * it needs to be for normal retransmission.
@@ -3666,8 +3702,8 @@ old_ack:
         if (TCP_SKB_CB(skb)->sacked) {
                 flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
                                                 &sack_state);
-               tcp_fastretrans_alert(sk, acked, prior_unsacked,
-                                     is_dupack, flag);
+               tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+               tcp_xmit_recovery(sk, rexmit);
         }
  
         SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@ -3998,7 +4034,7 @@ void tcp_reset(struct sock *sk)
   *
   *     If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
   */
-static void tcp_fin(struct sock *sk)
+void tcp_fin(struct sock *sk)
  {
         struct tcp_sock *tp = tcp_sk(sk);
  
@@ -4273,6 +4309,12 @@ static bool tcp_try_coalesce(struct sock *sk,
         return true;
  }
  
+static void tcp_drop(struct sock *sk, struct sk_buff *skb)
+{
+       sk_drops_add(sk, skb);
+       __kfree_skb(skb);
+}
+
  /* This one checks to see if we can put data from the
   * out_of_order queue into the receive_queue.
   */
@@ -4297,7 +4339,7 @@ static void tcp_ofo_queue(struct sock *sk)
                 __skb_unlink(skb, &tp->out_of_order_queue);
                 if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
                         SOCK_DEBUG(sk, "ofo packet was already received\n");
-                       __kfree_skb(skb);
+                       tcp_drop(sk, skb);
                         continue;
                 }
                 SOCK_DEBUG(sk, "ofo requeuing : rcv_next %X seq %X - %X\n",
@@ -4349,7 +4391,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
  
         if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFODROP);
-               __kfree_skb(skb);
+               tcp_drop(sk, skb);
                 return;
         }
  
@@ -4413,7 +4455,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
                 if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
                         /* All the bits are present. Drop. */
                         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-                       __kfree_skb(skb);
+                       tcp_drop(sk, skb);
                         skb = NULL;
                         tcp_dsack_set(sk, seq, end_seq);
                         goto add_sack;
@@ -4452,7 +4494,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
                 tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
                                  TCP_SKB_CB(skb1)->end_seq);
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
-               __kfree_skb(skb1);
+               tcp_drop(sk, skb1);
         }
  
  add_sack:
@@ -4535,12 +4577,13 @@ err:
  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
  {
         struct tcp_sock *tp = tcp_sk(sk);
-       int eaten = -1;
         bool fragstolen = false;
+       int eaten = -1;
  
-       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
-               goto drop;
-
+       if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
+               __kfree_skb(skb);
+               return;
+       }
         skb_dst_drop(skb);
         __skb_pull(skb, tcp_hdr(skb)->doff * 4);
  
@@ -4622,7 +4665,7 @@ out_of_window:
                 tcp_enter_quickack_mode(sk);
                 inet_csk_schedule_ack(sk);
  drop:
-               __kfree_skb(skb);
+               tcp_drop(sk, skb);
                 return;
         }
  
@@ -5199,7 +5242,7 @@ syn_challenge:
         return true;
  
  discard:
-       __kfree_skb(skb);
+       tcp_drop(sk, skb);
         return false;
  }
  
@@ -5417,7 +5460,7 @@ csum_error:
         TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
  
  discard:
-       __kfree_skb(skb);
+       tcp_drop(sk, skb);
  }
  EXPORT_SYMBOL(tcp_rcv_established);
  
@@ -5502,7 +5545,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
         if (data) { /* Retransmit unacked data in SYN */
                 tcp_for_write_queue_from(data, sk) {
                         if (data == tcp_send_head(sk) ||
-                           __tcp_retransmit_skb(sk, data))
+                           __tcp_retransmit_skb(sk, data, 1))
                                 break;
                 }
                 tcp_rearm_rto(sk);
@@ -5512,6 +5555,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
         tp->syn_data_acked = tp->syn_data;
         if (tp->syn_data_acked)
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+
+       tcp_fastopen_add_skb(sk, synack);
+
         return false;
  }
  
@@ -5645,7 +5691,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                                                   TCP_DELACK_MAX, TCP_RTO_MAX);
  
  discard:
-                       __kfree_skb(skb);
+                       tcp_drop(sk, skb);
                         return 0;
                 } else {
                         tcp_send_ack(sk);
@@ -5752,8 +5798,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
         int queued = 0;
         bool acceptable;
  
-       tp->rx_opt.saw_tstamp = 0;
-
         switch (sk->sk_state) {
         case TCP_CLOSE:
                 goto discard;
@@ -5794,6 +5838,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 goto discard;
  
         case TCP_SYN_SENT:
+               tp->rx_opt.saw_tstamp = 0;
                 queued = tcp_rcv_synsent_state_process(sk, skb, th);
                 if (queued >= 0)
                         return queued;
@@ -5805,6 +5850,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
                 return 0;
         }
  
+       tp->rx_opt.saw_tstamp = 0;
         req = tp->fastopen_rsk;
         if (req) {
                 WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV &&
@@ -6006,7 +6052,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
  
         if (!queued) {
  discard:
-               __kfree_skb(skb);
+               tcp_drop(sk, skb);
         }
         return 0;
  }
@@ -6118,9 +6164,10 @@ static bool tcp_syn_flood_action(const struct sock *sk,
         struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
         const char *msg = "Dropping request";
         bool want_cookie = false;
+       struct net *net = sock_net(sk);
  
  #ifdef CONFIG_SYN_COOKIES
-       if (sysctl_tcp_syncookies) {
+       if (net->ipv4.sysctl_tcp_syncookies) {
                 msg = "Sending cookies";
                 want_cookie = true;
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@ -6129,7 +6176,7 @@ static bool tcp_syn_flood_action(const struct sock *sk,
                 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
  
         if (!queue->synflood_warned &&
-           sysctl_tcp_syncookies != 2 &&
+           net->ipv4.sysctl_tcp_syncookies != 2 &&
             xchg(&queue->synflood_warned, 1) == 0)
                 pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
                         proto, ntohs(tcp_hdr(skb)->dest), msg);
@@ -6162,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
         __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
         struct tcp_options_received tmp_opt;
         struct tcp_sock *tp = tcp_sk(sk);
+       struct net *net = sock_net(sk);
         struct sock *fastopen_sk = NULL;
         struct dst_entry *dst = NULL;
         struct request_sock *req;
@@ -6172,7 +6220,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
          * limitations, they conserve resources and peer is
          * evidently real one.
          */
-       if ((sysctl_tcp_syncookies == 2 ||
+       if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
              inet_csk_reqsk_queue_is_full(sk)) && !isn) {
                 want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
                 if (!want_cookie)
@@ -6238,7 +6286,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                         }
                 }
                 /* Kill the following clause, if you dislike this way. */
-               else if (!sysctl_tcp_syncookies &&
+               else if (!net->ipv4.sysctl_tcp_syncookies &&
                          (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
                           (sysctl_max_syn_backlog >> 2)) &&
                          !tcp_peer_is_proven(req, dst, false,
@@ -6281,7 +6329,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
         }
         if (fastopen_sk) {
                 af_ops->send_synack(fastopen_sk, dst, &fl, req,
-                                   &foc, false);
+                                   &foc, TCP_SYNACK_FASTOPEN);
                 /* Add the child socket directly into the accept queue */
                 inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
                 sk->sk_data_ready(sk);
@@ -6291,10 +6339,13 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
                 tcp_rsk(req)->tfo_listener = false;
                 if (!want_cookie)
                         inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-               af_ops->send_synack(sk, dst, &fl, req,
-                                   &foc, !want_cookie);
-               if (want_cookie)
-                       goto drop_and_free;
+               af_ops->send_synack(sk, dst, &fl, req, &foc,
+                                   !want_cookie ? TCP_SYNACK_NORMAL :
+                                                  TCP_SYNACK_COOKIE);
+               if (want_cookie) {
+                       reqsk_free(req);
+                       return 0;
+               }
         }
         reqsk_put(req);
         return 0;
@@ -6304,7 +6355,7 @@ drop_and_release:
  drop_and_free:
         reqsk_free(req);
  drop:
-       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+       tcp_listendrop(sk);
         return 0;
  }
  EXPORT_SYMBOL(tcp_conn_request);