soreuseport: fast reuseport TCP socket selection
[cascardo/linux.git] / net / ipv4 / inet_connection_sock.c
index 46b9c88..c16a2e6 100644 (file)
@@ -24,6 +24,7 @@
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 #include <net/tcp.h>
+#include <net/sock_reuseport.h>
 
 #ifdef INET_CSK_DEBUG
 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@ -67,7 +68,8 @@ int inet_csk_bind_conflict(const struct sock *sk,
                        if ((!reuse || !sk2->sk_reuse ||
                            sk2->sk_state == TCP_LISTEN) &&
                            (!reuseport || !sk2->sk_reuseport ||
-                           (sk2->sk_state != TCP_TIME_WAIT &&
+                            rcu_access_pointer(sk->sk_reuseport_cb) ||
+                            (sk2->sk_state != TCP_TIME_WAIT &&
                             !uid_eq(uid, sock_i_uid(sk2))))) {
 
                                if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@ -132,6 +134,7 @@ again:
                                              sk->sk_state != TCP_LISTEN) ||
                                             (tb->fastreuseport > 0 &&
                                              sk->sk_reuseport &&
+                                             !rcu_access_pointer(sk->sk_reuseport_cb) &&
                                              uid_eq(tb->fastuid, uid))) &&
                                            (tb->num_owners < smallest_size || smallest_size == -1)) {
                                                smallest_size = tb->num_owners;
@@ -193,15 +196,18 @@ tb_found:
                if (((tb->fastreuse > 0 &&
                      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
                     (tb->fastreuseport > 0 &&
-                     sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
-                   smallest_size == -1) {
+                     sk->sk_reuseport &&
+                     !rcu_access_pointer(sk->sk_reuseport_cb) &&
+                     uid_eq(tb->fastuid, uid))) && smallest_size == -1) {
                        goto success;
                } else {
                        ret = 1;
                        if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
                                if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
                                     (tb->fastreuseport > 0 &&
-                                     sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
+                                     sk->sk_reuseport &&
+                                     !rcu_access_pointer(sk->sk_reuseport_cb) &&
+                                     uid_eq(tb->fastuid, uid))) &&
                                    smallest_size != -1 && --attempts >= 0) {
                                        spin_unlock(&head->lock);
                                        goto again;
@@ -482,10 +488,6 @@ EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
 #define AF_INET_FAMILY(fam) true
 #endif
 
-/* Only thing we need from tcp.h */
-extern int sysctl_tcp_synack_retries;
-
-
 /* Decide when to expire the request and when to resend SYN-ACK */
 static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
                                  const int max_retries,
@@ -557,6 +559,7 @@ static void reqsk_timer_handler(unsigned long data)
 {
        struct request_sock *req = (struct request_sock *)data;
        struct sock *sk_listener = req->rsk_listener;
+       struct net *net = sock_net(sk_listener);
        struct inet_connection_sock *icsk = inet_csk(sk_listener);
        struct request_sock_queue *queue = &icsk->icsk_accept_queue;
        int qlen, expire = 0, resend = 0;
@@ -566,7 +569,7 @@ static void reqsk_timer_handler(unsigned long data)
        if (sk_state_load(sk_listener) != TCP_LISTEN)
                goto drop;
 
-       max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+       max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
        thresh = max_retries;
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
@@ -737,6 +740,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        struct inet_sock *inet = inet_sk(sk);
+       int err = -EADDRINUSE;
 
        reqsk_queue_alloc(&icsk->icsk_accept_queue);
 
@@ -754,13 +758,14 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
                inet->inet_sport = htons(inet->inet_num);
 
                sk_dst_reset(sk);
-               sk->sk_prot->hash(sk);
+               err = sk->sk_prot->hash(sk);
 
-               return 0;
+               if (likely(!err))
+                       return 0;
        }
 
        sk->sk_state = TCP_CLOSE;
-       return -EADDRINUSE;
+       return err;
 }
 EXPORT_SYMBOL_GPL(inet_csk_listen_start);