ipv4: disable BH when changing ip local port range
[cascardo/linux.git] / net / ipv4 / tcp_ipv4.c
index 56f8c63..1c2648b 100644 (file)
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
 
        if (seq != tcp_rsk(req)->snt_isn) {
                NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
-               reqsk_put(req);
        } else {
                /*
                 * Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
                 * created socket, and POSIX does not want network
                 * errors returned from accept().
                 */
-               NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
                inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+               NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
        }
+       reqsk_put(req);
 }
 EXPORT_SYMBOL(tcp_req_err);
 
@@ -803,7 +803,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
         */
        tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
                        tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
-                       tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+                       tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
                        tcp_time_stamp,
                        req->ts_recent,
                        0,
@@ -821,8 +821,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
 static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
                              struct flowi *fl,
                              struct request_sock *req,
-                             u16 queue_mapping,
-                             struct tcp_fastopen_cookie *foc)
+                             struct tcp_fastopen_cookie *foc,
+                                 bool attach_req)
 {
        const struct inet_request_sock *ireq = inet_rsk(req);
        struct flowi4 fl4;
@@ -833,12 +833,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
        if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
                return -1;
 
-       skb = tcp_make_synack(sk, dst, req, foc);
+       skb = tcp_make_synack(sk, dst, req, foc, attach_req);
 
        if (skb) {
                __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
-               skb_set_queue_mapping(skb, queue_mapping);
                err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
                                            ireq->ir_rmt_addr,
                                            ireq->opt);
@@ -1224,7 +1223,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
        .route_req      =       tcp_v4_route_req,
        .init_seq       =       tcp_v4_init_sequence,
        .send_synack    =       tcp_v4_send_synack,
-       .queue_hash_add =       inet_csk_reqsk_queue_hash_add,
 };
 
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -1249,7 +1247,9 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
  */
 struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
                                  struct request_sock *req,
-                                 struct dst_entry *dst)
+                                 struct dst_entry *dst,
+                                 struct request_sock *req_unhash,
+                                 bool *own_req)
 {
        struct inet_request_sock *ireq;
        struct inet_sock *newinet;
@@ -1325,7 +1325,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 
        if (__inet_inherit_port(sk, newsk) < 0)
                goto put_and_exit;
-       __inet_hash_nolisten(newsk, NULL);
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
 
        return newsk;
 
@@ -1343,34 +1343,11 @@ put_and_exit:
 }
 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
 
-static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
 {
+#ifdef CONFIG_SYN_COOKIES
        const struct tcphdr *th = tcp_hdr(skb);
-       const struct iphdr *iph = ip_hdr(skb);
-       struct request_sock *req;
-       struct sock *nsk;
-
-       req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
-       if (req) {
-               nsk = tcp_check_req(sk, skb, req, false);
-               if (!nsk || nsk == sk)
-                       reqsk_put(req);
-               return nsk;
-       }
 
-       nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
-                       th->source, iph->daddr, th->dest, inet_iif(skb));
-
-       if (nsk) {
-               if (nsk->sk_state != TCP_TIME_WAIT) {
-                       bh_lock_sock(nsk);
-                       return nsk;
-               }
-               inet_twsk_put(inet_twsk(nsk));
-               return NULL;
-       }
-
-#ifdef CONFIG_SYN_COOKIES
        if (!th->syn)
                sk = cookie_v4_check(sk, skb);
 #endif
@@ -1378,7 +1355,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
 }
 
 /* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
  *
  * We have a potential double-lock case here, so even when
  * doing backlog processing we use the BH locking scheme.
@@ -1409,10 +1386,10 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
                goto csum_err;
 
        if (sk->sk_state == TCP_LISTEN) {
-               struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+               struct sock *nsk = tcp_v4_cookie_check(sk, skb);
+
                if (!nsk)
                        goto discard;
-
                if (nsk != sk) {
                        sock_rps_save_rxhash(nsk, skb);
                        sk_mark_napi_id(nsk, skb);
@@ -1595,6 +1572,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
        TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
        TCP_SKB_CB(skb)->sacked  = 0;
 
+lookup:
        sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
        if (!sk)
                goto no_tcp_socket;
@@ -1603,6 +1581,33 @@ process:
        if (sk->sk_state == TCP_TIME_WAIT)
                goto do_time_wait;
 
+       if (sk->sk_state == TCP_NEW_SYN_RECV) {
+               struct request_sock *req = inet_reqsk(sk);
+               struct sock *nsk = NULL;
+
+               sk = req->rsk_listener;
+               if (tcp_v4_inbound_md5_hash(sk, skb))
+                       goto discard_and_relse;
+               if (likely(sk->sk_state == TCP_LISTEN)) {
+                       nsk = tcp_check_req(sk, skb, req, false);
+               } else {
+                       inet_csk_reqsk_queue_drop_and_put(sk, req);
+                       goto lookup;
+               }
+               if (!nsk) {
+                       reqsk_put(req);
+                       goto discard_it;
+               }
+               if (nsk == sk) {
+                       sock_hold(sk);
+                       reqsk_put(req);
+               } else if (tcp_child_process(sk, nsk, skb)) {
+                       tcp_v4_send_reset(nsk, skb);
+                       goto discard_it;
+               } else {
+                       return 0;
+               }
+       }
        if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
                NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
                goto discard_and_relse;
@@ -1619,9 +1624,15 @@ process:
        if (sk_filter(sk, skb))
                goto discard_and_relse;
 
-       sk_incoming_cpu_update(sk);
        skb->dev = NULL;
 
+       if (sk->sk_state == TCP_LISTEN) {
+               ret = tcp_v4_do_rcv(sk, skb);
+               goto put_and_return;
+       }
+
+       sk_incoming_cpu_update(sk);
+
        bh_lock_sock_nested(sk);
        tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
        ret = 0;
@@ -1636,6 +1647,7 @@ process:
        }
        bh_unlock_sock(sk);
 
+put_and_return:
        sock_put(sk);
 
        return ret;
@@ -1830,35 +1842,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
        ++st->num;
        ++st->offset;
 
-       if (st->state == TCP_SEQ_STATE_OPENREQ) {
-               struct request_sock *req = cur;
-
-               icsk = inet_csk(st->syn_wait_sk);
-               req = req->dl_next;
-               while (1) {
-                       while (req) {
-                               if (req->rsk_ops->family == st->family) {
-                                       cur = req;
-                                       goto out;
-                               }
-                               req = req->dl_next;
-                       }
-                       if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
-                               break;
-get_req:
-                       req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
-               }
-               sk        = sk_nulls_next(st->syn_wait_sk);
-               st->state = TCP_SEQ_STATE_LISTENING;
-               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-       } else {
-               icsk = inet_csk(sk);
-               spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               if (reqsk_queue_len(&icsk->icsk_accept_queue))
-                       goto start_req;
-               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               sk = sk_nulls_next(sk);
-       }
+       sk = sk_nulls_next(sk);
 get_sk:
        sk_nulls_for_each_from(sk, node) {
                if (!net_eq(sock_net(sk), net))
@@ -1868,16 +1852,6 @@ get_sk:
                        goto out;
                }
                icsk = inet_csk(sk);
-               spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
-start_req:
-                       st->uid         = sock_i_uid(sk);
-                       st->syn_wait_sk = sk;
-                       st->state       = TCP_SEQ_STATE_OPENREQ;
-                       st->sbucket     = 0;
-                       goto get_req;
-               }
-               spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
        }
        spin_unlock_bh(&ilb->lock);
        st->offset = 0;
@@ -2009,7 +1983,6 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
        void *rc = NULL;
 
        switch (st->state) {
-       case TCP_SEQ_STATE_OPENREQ:
        case TCP_SEQ_STATE_LISTENING:
                if (st->bucket >= INET_LHTABLE_SIZE)
                        break;
@@ -2068,7 +2041,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        }
 
        switch (st->state) {
-       case TCP_SEQ_STATE_OPENREQ:
        case TCP_SEQ_STATE_LISTENING:
                rc = listening_get_next(seq, v);
                if (!rc) {
@@ -2093,11 +2065,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
        struct tcp_iter_state *st = seq->private;
 
        switch (st->state) {
-       case TCP_SEQ_STATE_OPENREQ:
-               if (v) {
-                       struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
-                       spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
-               }
        case TCP_SEQ_STATE_LISTENING:
                if (v != SEQ_START_TOKEN)
                        spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
@@ -2151,7 +2118,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
 EXPORT_SYMBOL(tcp_proc_unregister);
 
 static void get_openreq4(const struct request_sock *req,
-                        struct seq_file *f, int i, kuid_t uid)
+                        struct seq_file *f, int i)
 {
        const struct inet_request_sock *ireq = inet_rsk(req);
        long delta = req->rsk_timer.expires - jiffies;
@@ -2168,7 +2135,8 @@ static void get_openreq4(const struct request_sock *req,
                1,    /* timers active (only the expire timer) */
                jiffies_delta_to_clock_t(delta),
                req->num_timeout,
-               from_kuid_munged(seq_user_ns(f), uid),
+               from_kuid_munged(seq_user_ns(f),
+                                sock_i_uid(req->rsk_listener)),
                0,  /* non standard timer */
                0, /* open_requests have no inode */
                0,
@@ -2269,18 +2237,12 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
        }
        st = seq->private;
 
-       switch (st->state) {
-       case TCP_SEQ_STATE_LISTENING:
-       case TCP_SEQ_STATE_ESTABLISHED:
-               if (sk->sk_state == TCP_TIME_WAIT)
-                       get_timewait4_sock(v, seq, st->num);
-               else
-                       get_tcp4_sock(v, seq, st->num);
-               break;
-       case TCP_SEQ_STATE_OPENREQ:
-               get_openreq4(v, seq, st->num, st->uid);
-               break;
-       }
+       if (sk->sk_state == TCP_TIME_WAIT)
+               get_timewait4_sock(v, seq, st->num);
+       else if (sk->sk_state == TCP_NEW_SYN_RECV)
+               get_openreq4(v, seq, st->num);
+       else
+               get_tcp4_sock(v, seq, st->num);
 out:
        seq_pad(seq, '\n');
        return 0;