netfilter: conntrack: introduce nf_ct_acct_update()
[cascardo/linux.git] / net / netfilter / nf_conntrack_core.c
index afde5f5..25e0c26 100644 (file)
@@ -12,6 +12,8 @@
  * published by the Free Software Foundation.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/types.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
@@ -52,6 +54,7 @@
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_core.h>
 #include <net/netfilter/nf_nat_helper.h>
+#include <net/netns/hash.h>
 
 #define NF_CONNTRACK_VERSION   "0.5.0"
 
@@ -66,7 +69,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_locks);
 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock);
 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock);
 
+struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_hash);
+
 static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
+static __read_mostly seqcount_t nf_conntrack_generation;
 static __read_mostly bool nf_conntrack_locks_all;
 
 void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -105,7 +112,7 @@ static bool nf_conntrack_double_lock(struct net *net, unsigned int h1,
                spin_lock_nested(&nf_conntrack_locks[h1],
                                 SINGLE_DEPTH_NESTING);
        }
-       if (read_seqcount_retry(&net->ct.generation, sequence)) {
+       if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
                nf_conntrack_double_unlock(h1, h2);
                return true;
        }
@@ -139,43 +146,43 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
 EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
 
-unsigned int nf_conntrack_hash_rnd __read_mostly;
-EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
+static unsigned int nf_conntrack_hash_rnd __read_mostly;
 
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+                             const struct net *net)
 {
        unsigned int n;
+       u32 seed;
+
+       get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
 
        /* The direction must be ignored, so we hash everything up to the
         * destination ports (which is a multiple of 4) and treat the last
         * three bytes manually.
         */
+       seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
        n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
-       return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
+       return jhash2((u32 *)tuple, n, seed ^
                      (((__force __u16)tuple->dst.u.all << 16) |
                      tuple->dst.protonum));
 }
 
-static u32 __hash_bucket(u32 hash, unsigned int size)
-{
-       return reciprocal_scale(hash, size);
-}
-
-static u32 hash_bucket(u32 hash, const struct net *net)
+static u32 scale_hash(u32 hash)
 {
-       return __hash_bucket(hash, net->ct.htable_size);
+       return reciprocal_scale(hash, nf_conntrack_htable_size);
 }
 
-static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
-                                 unsigned int size)
+static u32 __hash_conntrack(const struct net *net,
+                           const struct nf_conntrack_tuple *tuple,
+                           unsigned int size)
 {
-       return __hash_bucket(hash_conntrack_raw(tuple), size);
+       return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
 }
 
-static inline u_int32_t hash_conntrack(const struct net *net,
-                                      const struct nf_conntrack_tuple *tuple)
+static u32 hash_conntrack(const struct net *net,
+                         const struct nf_conntrack_tuple *tuple)
 {
-       return __hash_conntrack(tuple, net->ct.htable_size);
+       return scale_hash(hash_conntrack_raw(tuple, net));
 }
 
 bool
@@ -356,7 +363,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
        }
        rcu_read_lock();
        l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
-       if (l4proto && l4proto->destroy)
+       if (l4proto->destroy)
                l4proto->destroy(ct);
 
        rcu_read_unlock();
@@ -391,7 +398,7 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
 
        local_bh_disable();
        do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                reply_hash = hash_conntrack(net,
@@ -443,7 +450,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
 static inline bool
 nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
                const struct nf_conntrack_tuple *tuple,
-               const struct nf_conntrack_zone *zone)
+               const struct nf_conntrack_zone *zone,
+               const struct net *net)
 {
        struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 
@@ -452,7 +460,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
         */
        return nf_ct_tuple_equal(tuple, &h->tuple) &&
               nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
-              nf_ct_is_confirmed(ct);
+              nf_ct_is_confirmed(ct) &&
+              net_eq(net, nf_ct_net(ct));
 }
 
 /*
@@ -465,21 +474,23 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple, u32 hash)
 {
        struct nf_conntrack_tuple_hash *h;
+       struct hlist_nulls_head *ct_hash;
        struct hlist_nulls_node *n;
-       unsigned int bucket = hash_bucket(hash, net);
+       unsigned int bucket, sequence;
 
-       /* Disable BHs the entire time since we normally need to disable them
-        * at least once for the stats anyway.
-        */
-       local_bh_disable();
 begin:
-       hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
-               if (nf_ct_key_equal(h, tuple, zone)) {
-                       NF_CT_STAT_INC(net, found);
-                       local_bh_enable();
+       do {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+               bucket = scale_hash(hash);
+               ct_hash = nf_conntrack_hash;
+       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+       hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+               if (nf_ct_key_equal(h, tuple, zone, net)) {
+                       NF_CT_STAT_INC_ATOMIC(net, found);
                        return h;
                }
-               NF_CT_STAT_INC(net, searched);
+               NF_CT_STAT_INC_ATOMIC(net, searched);
        }
        /*
         * if the nulls value we got at the end of this lookup is
@@ -487,10 +498,9 @@ begin:
         * We probably met an item that was moved to another chain.
         */
        if (get_nulls_value(n) != bucket) {
-               NF_CT_STAT_INC(net, search_restart);
+               NF_CT_STAT_INC_ATOMIC(net, search_restart);
                goto begin;
        }
-       local_bh_enable();
 
        return NULL;
 }
@@ -512,7 +522,7 @@ begin:
                             !atomic_inc_not_zero(&ct->ct_general.use)))
                        h = NULL;
                else {
-                       if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
+                       if (unlikely(!nf_ct_key_equal(h, tuple, zone, net))) {
                                nf_ct_put(ct);
                                goto begin;
                        }
@@ -528,7 +538,7 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
                      const struct nf_conntrack_tuple *tuple)
 {
        return __nf_conntrack_find_get(net, zone, tuple,
-                                      hash_conntrack_raw(tuple));
+                                      hash_conntrack_raw(tuple, net));
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
 
@@ -536,12 +546,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
                                       unsigned int hash,
                                       unsigned int reply_hash)
 {
-       struct net *net = nf_ct_net(ct);
-
        hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
-                          &net->ct.hash[hash]);
+                          &nf_conntrack_hash[hash]);
        hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
-                          &net->ct.hash[reply_hash]);
+                          &nf_conntrack_hash[reply_hash]);
 }
 
 int
@@ -558,7 +566,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
 
        local_bh_disable();
        do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                hash = hash_conntrack(net,
                                      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
                reply_hash = hash_conntrack(net,
@@ -566,17 +574,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
        } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
 
        /* See if there's one in the list already, including reverse */
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                   zone, net))
                        goto out;
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                   zone, net))
                        goto out;
 
        add_timer(&ct->timeout);
@@ -597,6 +602,21 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
 
+static inline void nf_ct_acct_update(struct nf_conn *ct,
+                                    enum ip_conntrack_info ctinfo,
+                                    unsigned int len)
+{
+       struct nf_conn_acct *acct;
+
+       acct = nf_conn_acct_find(ct);
+       if (acct) {
+               struct nf_conn_counter *counter = acct->counter;
+
+               atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
+               atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
+       }
+}
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff *skb)
@@ -626,10 +646,10 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        local_bh_disable();
 
        do {
-               sequence = read_seqcount_begin(&net->ct.generation);
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
                /* reuse the hash saved before */
                hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
-               hash = hash_bucket(hash, net);
+               hash = scale_hash(hash);
                reply_hash = hash_conntrack(net,
                                           &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
@@ -659,17 +679,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
        /* See if there's one in the list already, including reverse:
           NAT could have grabbed it without realizing, since we're
           not in the hash.  If there is, we lost race. */
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+                                   zone, net))
                        goto out;
-       hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
-               if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
-                                     &h->tuple) &&
-                   nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
-                                    NF_CT_DIRECTION(h)))
+
+       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+               if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+                                   zone, net))
                        goto out;
 
        /* Timer relative to confirmation time, not original
@@ -724,29 +741,31 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
        struct net *net = nf_ct_net(ignored_conntrack);
        const struct nf_conntrack_zone *zone;
        struct nf_conntrack_tuple_hash *h;
+       struct hlist_nulls_head *ct_hash;
+       unsigned int hash, sequence;
        struct hlist_nulls_node *n;
        struct nf_conn *ct;
-       unsigned int hash;
 
        zone = nf_ct_zone(ignored_conntrack);
-       hash = hash_conntrack(net, tuple);
 
-       /* Disable BHs the entire time since we need to disable them at
-        * least once for the stats anyway.
-        */
-       rcu_read_lock_bh();
-       hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
+       rcu_read_lock();
+       do {
+               sequence = read_seqcount_begin(&nf_conntrack_generation);
+               hash = hash_conntrack(net, tuple);
+               ct_hash = nf_conntrack_hash;
+       } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+       hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
                ct = nf_ct_tuplehash_to_ctrack(h);
                if (ct != ignored_conntrack &&
-                   nf_ct_tuple_equal(tuple, &h->tuple) &&
-                   nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
-                       NF_CT_STAT_INC(net, found);
-                       rcu_read_unlock_bh();
+                   nf_ct_key_equal(h, tuple, zone, net)) {
+                       NF_CT_STAT_INC_ATOMIC(net, found);
+                       rcu_read_unlock();
                        return 1;
                }
-               NF_CT_STAT_INC(net, searched);
+               NF_CT_STAT_INC_ATOMIC(net, searched);
        }
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
 
        return 0;
 }
@@ -760,71 +779,63 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
 {
        /* Use oldest entry, which is roughly LRU */
        struct nf_conntrack_tuple_hash *h;
-       struct nf_conn *ct = NULL, *tmp;
+       struct nf_conn *tmp;
        struct hlist_nulls_node *n;
-       unsigned int i = 0, cnt = 0;
-       int dropped = 0;
-       unsigned int hash, sequence;
+       unsigned int i, hash, sequence;
+       struct nf_conn *ct = NULL;
        spinlock_t *lockp;
+       bool ret = false;
+
+       i = 0;
 
        local_bh_disable();
 restart:
-       sequence = read_seqcount_begin(&net->ct.generation);
-       hash = hash_bucket(_hash, net);
-       for (; i < net->ct.htable_size; i++) {
+       sequence = read_seqcount_begin(&nf_conntrack_generation);
+       for (; i < NF_CT_EVICTION_RANGE; i++) {
+               hash = scale_hash(_hash++);
                lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
                nf_conntrack_lock(lockp);
-               if (read_seqcount_retry(&net->ct.generation, sequence)) {
+               if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
                        spin_unlock(lockp);
                        goto restart;
                }
-               hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
-                                        hnnode) {
+               hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
+                                              hnnode) {
                        tmp = nf_ct_tuplehash_to_ctrack(h);
-                       if (!test_bit(IPS_ASSURED_BIT, &tmp->status) &&
-                           !nf_ct_is_dying(tmp) &&
-                           atomic_inc_not_zero(&tmp->ct_general.use)) {
+
+                       if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+                           !net_eq(nf_ct_net(tmp), net) ||
+                           nf_ct_is_dying(tmp))
+                               continue;
+
+                       if (atomic_inc_not_zero(&tmp->ct_general.use)) {
                                ct = tmp;
                                break;
                        }
-                       cnt++;
                }
 
-               hash = (hash + 1) % net->ct.htable_size;
                spin_unlock(lockp);
-
-               if (ct || cnt >= NF_CT_EVICTION_RANGE)
+               if (ct)
                        break;
-
        }
+
        local_bh_enable();
 
        if (!ct)
-               return dropped;
+               return false;
 
-       if (del_timer(&ct->timeout)) {
+       /* kill only if in same netns -- might have moved due to
+        * SLAB_DESTROY_BY_RCU rules
+        */
+       if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
                if (nf_ct_delete(ct, 0, 0)) {
-                       dropped = 1;
                        NF_CT_STAT_INC_ATOMIC(net, early_drop);
+                       ret = true;
                }
        }
-       nf_ct_put(ct);
-       return dropped;
-}
-
-void init_nf_conntrack_hash_rnd(void)
-{
-       unsigned int rand;
 
-       /*
-        * Why not initialize nf_conntrack_rnd in a "init()" function ?
-        * Because there isn't enough entropy when system initializing,
-        * and we initialize it as late as possible.
-        */
-       do {
-               get_random_bytes(&rand, sizeof(rand));
-       } while (!rand);
-       cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
+       nf_ct_put(ct);
+       return ret;
 }
 
 static struct nf_conn *
@@ -836,12 +847,6 @@ __nf_conntrack_alloc(struct net *net,
 {
        struct nf_conn *ct;
 
-       if (unlikely(!nf_conntrack_hash_rnd)) {
-               init_nf_conntrack_hash_rnd();
-               /* recompute the hash as nf_conntrack_hash_rnd is initialized */
-               hash = hash_conntrack_raw(orig);
-       }
-
        /* We don't want any race condition at early drop stage */
        atomic_inc(&net->ct.count);
 
@@ -966,7 +971,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
 
        if (!l4proto->new(ct, skb, dataoff, timeouts)) {
                nf_conntrack_free(ct);
-               pr_debug("init conntrack: can't track with proto module\n");
+               pr_debug("can't track with proto module\n");
                return NULL;
        }
 
@@ -988,7 +993,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
                spin_lock(&nf_conntrack_expect_lock);
                exp = nf_ct_find_expectation(net, zone, tuple);
                if (exp) {
-                       pr_debug("conntrack: expectation arrives ct=%p exp=%p\n",
+                       pr_debug("expectation arrives ct=%p exp=%p\n",
                                 ct, exp);
                        /* Welcome, Mr. Bond.  We've been expecting you... */
                        __set_bit(IPS_EXPECTED_BIT, &ct->status);
@@ -1053,13 +1058,13 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
                             dataoff, l3num, protonum, net, &tuple, l3proto,
                             l4proto)) {
-               pr_debug("resolve_normal_ct: Can't get tuple\n");
+               pr_debug("Can't get tuple\n");
                return NULL;
        }
 
        /* look for tuple match */
        zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
-       hash = hash_conntrack_raw(&tuple);
+       hash = hash_conntrack_raw(&tuple, net);
        h = __nf_conntrack_find_get(net, zone, &tuple, hash);
        if (!h) {
                h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1079,14 +1084,13 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
        } else {
                /* Once we've had two way comms, always ESTABLISHED. */
                if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
-                       pr_debug("nf_conntrack_in: normal packet for %p\n", ct);
+                       pr_debug("normal packet for %p\n", ct);
                        *ctinfo = IP_CT_ESTABLISHED;
                } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
-                       pr_debug("nf_conntrack_in: related packet for %p\n",
-                                ct);
+                       pr_debug("related packet for %p\n", ct);
                        *ctinfo = IP_CT_RELATED;
                } else {
-                       pr_debug("nf_conntrack_in: new packet for %p\n", ct);
+                       pr_debug("new packet for %p\n", ct);
                        *ctinfo = IP_CT_NEW;
                }
                *set_reply = 0;
@@ -1269,17 +1273,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
        }
 
 acct:
-       if (do_acct) {
-               struct nf_conn_acct *acct;
-
-               acct = nf_conn_acct_find(ct);
-               if (acct) {
-                       struct nf_conn_counter *counter = acct->counter;
-
-                       atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
-                       atomic64_add(skb->len, &counter[CTINFO2DIR(ctinfo)].bytes);
-               }
-       }
+       if (do_acct)
+               nf_ct_acct_update(ct, ctinfo, skb->len);
 }
 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
 
@@ -1288,18 +1283,8 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
                       const struct sk_buff *skb,
                       int do_acct)
 {
-       if (do_acct) {
-               struct nf_conn_acct *acct;
-
-               acct = nf_conn_acct_find(ct);
-               if (acct) {
-                       struct nf_conn_counter *counter = acct->counter;
-
-                       atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
-                       atomic64_add(skb->len - skb_network_offset(skb),
-                                    &counter[CTINFO2DIR(ctinfo)].bytes);
-               }
-       }
+       if (do_acct)
+               nf_ct_acct_update(ct, ctinfo, skb->len);
 
        if (del_timer(&ct->timeout)) {
                ct->timeout.function((unsigned long)ct);
@@ -1395,16 +1380,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
        int cpu;
        spinlock_t *lockp;
 
-       for (; *bucket < net->ct.htable_size; (*bucket)++) {
+       for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
                lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
                local_bh_disable();
                nf_conntrack_lock(lockp);
-               if (*bucket < net->ct.htable_size) {
-                       hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
+               if (*bucket < nf_conntrack_htable_size) {
+                       hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
                                if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
                                        continue;
                                ct = nf_ct_tuplehash_to_ctrack(h);
-                               if (iter(ct, data))
+                               if (net_eq(nf_ct_net(ct), net) &&
+                                   iter(ct, data))
                                        goto found;
                        }
                }
@@ -1442,6 +1428,9 @@ void nf_ct_iterate_cleanup(struct net *net,
 
        might_sleep();
 
+       if (atomic_read(&net->ct.count) == 0)
+               return;
+
        while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
                /* Time to push up daises... */
                if (del_timer(&ct->timeout))
@@ -1493,6 +1482,8 @@ void nf_conntrack_cleanup_end(void)
        while (untrack_refs() > 0)
                schedule();
 
+       nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
+
 #ifdef CONFIG_NF_CONNTRACK_ZONES
        nf_ct_extend_unregister(&nf_ct_zone_extend);
 #endif
@@ -1543,7 +1534,6 @@ i_see_dead_people:
        }
 
        list_for_each_entry(net, net_exit_list, exit_list) {
-               nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
                nf_conntrack_proto_pernet_fini(net);
                nf_conntrack_helper_pernet_fini(net);
                nf_conntrack_ecache_pernet_fini(net);
@@ -1606,7 +1596,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 
        local_bh_disable();
        nf_conntrack_all_lock();
-       write_seqcount_begin(&init_net.ct.generation);
+       write_seqcount_begin(&nf_conntrack_generation);
 
        /* Lookups in the old hash might happen in parallel, which means we
         * might get false negatives during connection lookup. New connections
@@ -1614,26 +1604,28 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
         * though since that required taking the locks.
         */
 
-       for (i = 0; i < init_net.ct.htable_size; i++) {
-               while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
-                       h = hlist_nulls_entry(init_net.ct.hash[i].first,
-                                       struct nf_conntrack_tuple_hash, hnnode);
+       for (i = 0; i < nf_conntrack_htable_size; i++) {
+               while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+                       h = hlist_nulls_entry(nf_conntrack_hash[i].first,
+                                             struct nf_conntrack_tuple_hash, hnnode);
                        ct = nf_ct_tuplehash_to_ctrack(h);
                        hlist_nulls_del_rcu(&h->hnnode);
-                       bucket = __hash_conntrack(&h->tuple, hashsize);
+                       bucket = __hash_conntrack(nf_ct_net(ct),
+                                                 &h->tuple, hashsize);
                        hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
                }
        }
-       old_size = init_net.ct.htable_size;
-       old_hash = init_net.ct.hash;
+       old_size = nf_conntrack_htable_size;
+       old_hash = nf_conntrack_hash;
 
-       init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
-       init_net.ct.hash = hash;
+       nf_conntrack_hash = hash;
+       nf_conntrack_htable_size = hashsize;
 
-       write_seqcount_end(&init_net.ct.generation);
+       write_seqcount_end(&nf_conntrack_generation);
        nf_conntrack_all_unlock();
        local_bh_enable();
 
+       synchronize_net();
        nf_ct_free_hashtable(old_hash, old_size);
        return 0;
 }
@@ -1656,6 +1648,8 @@ int nf_conntrack_init_start(void)
        int max_factor = 8;
        int i, ret, cpu;
 
+       seqcount_init(&nf_conntrack_generation);
+
        for (i = 0; i < CONNTRACK_LOCKS; i++)
                spin_lock_init(&nf_conntrack_locks[i]);
 
@@ -1681,6 +1675,11 @@ int nf_conntrack_init_start(void)
                 * entries. */
                max_factor = 4;
        }
+
+       nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
+       if (!nf_conntrack_hash)
+               return -ENOMEM;
+
        nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
        printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
@@ -1759,6 +1758,7 @@ err_tstamp:
 err_acct:
        nf_conntrack_expect_fini();
 err_expect:
+       nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
        return ret;
 }
 
@@ -1782,7 +1782,6 @@ int nf_conntrack_init_net(struct net *net)
        int cpu;
 
        atomic_set(&net->ct.count, 0);
-       seqcount_init(&net->ct.generation);
 
        net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
        if (!net->ct.pcpu_lists)
@@ -1812,12 +1811,6 @@ int nf_conntrack_init_net(struct net *net)
                goto err_cache;
        }
 
-       net->ct.htable_size = nf_conntrack_htable_size;
-       net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
-       if (!net->ct.hash) {
-               printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
-               goto err_hash;
-       }
        ret = nf_conntrack_expect_pernet_init(net);
        if (ret < 0)
                goto err_expect;
@@ -1849,8 +1842,6 @@ err_tstamp:
 err_acct:
        nf_conntrack_expect_pernet_fini(net);
 err_expect:
-       nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
-err_hash:
        kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
        kfree(net->ct.slabname);