dataoath: compat: Do not use upstream fill-meta-data function for compat tunnel
[cascardo/ovs.git] / datapath / linux / compat / stt.c
index 8e00112..c899517 100644 (file)
 #define STT_DST_PORT 7471
 
 #ifdef OVS_STT
+#ifdef CONFIG_SLUB
+/*
+ * We saw better performance with skipping zero copy in case of SLUB.
+ * So skip zero copy for SLUB case.
+ */
+#define SKIP_ZERO_COPY
+#endif
+
 #define STT_VER 0
 
 /* @list: Per-net list of STT ports.
@@ -62,6 +70,7 @@ struct stt_dev {
        struct net_device       *dev;
        struct net              *net;
        struct list_head        next;
+       struct list_head        up_next;
        struct socket           *sock;
        __be16                  dst_port;
 };
@@ -150,7 +159,11 @@ struct frag_skb_cb {
 /* per-network namespace private data for this module */
 struct stt_net {
        struct list_head stt_list;
+       struct list_head stt_up_list;   /* Devices which are in IFF_UP state. */
        int n_tunnels;
+#ifdef HAVE_NF_REGISTER_NET_HOOK
+       bool nf_hook_reg_done;
+#endif
 };
 
 static int stt_net_id;
@@ -167,12 +180,12 @@ static DEFINE_PER_CPU(u32, pkt_seq_counter);
 static void clean_percpu(struct work_struct *work);
 static DECLARE_DELAYED_WORK(clean_percpu_wq, clean_percpu);
 
-static struct stt_dev *stt_find_sock(struct net *net, __be16 port)
+static struct stt_dev *stt_find_up_dev(struct net *net, __be16 port)
 {
        struct stt_net *sn = net_generic(net, stt_net_id);
        struct stt_dev *stt_dev;
 
-       list_for_each_entry_rcu(stt_dev, &sn->stt_list, next) {
+       list_for_each_entry_rcu(stt_dev, &sn->stt_up_list, up_next) {
                if (stt_dev->dst_port == port)
                        return stt_dev;
        }
@@ -214,73 +227,6 @@ static int clear_gso(struct sk_buff *skb)
        return 0;
 }
 
-static struct sk_buff *normalize_frag_list(struct sk_buff *head,
-                                          struct sk_buff **skbp)
-{
-       struct sk_buff *skb = *skbp;
-       struct sk_buff *last;
-
-       do {
-               struct sk_buff *frags;
-
-               if (skb_shared(skb)) {
-                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
-                       if (unlikely(!nskb))
-                               return ERR_PTR(-ENOMEM);
-
-                       nskb->next = skb->next;
-                       consume_skb(skb);
-                       skb = nskb;
-                       *skbp = skb;
-               }
-
-               if (head) {
-                       head->len -= skb->len;
-                       head->data_len -= skb->len;
-                       head->truesize -= skb->truesize;
-               }
-
-               frags = skb_shinfo(skb)->frag_list;
-               if (frags) {
-                       int err;
-
-                       err = skb_unclone(skb, GFP_ATOMIC);
-                       if (unlikely(err))
-                               return ERR_PTR(err);
-
-                       last = normalize_frag_list(skb, &frags);
-                       if (IS_ERR(last))
-                               return last;
-
-                       skb_shinfo(skb)->frag_list = NULL;
-                       last->next = skb->next;
-                       skb->next = frags;
-               } else {
-                       last = skb;
-               }
-
-               skbp = &skb->next;
-       } while ((skb = skb->next));
-
-       return last;
-}
-
-/* Takes a linked list of skbs, which potentially contain frag_list
- * (whose members in turn potentially contain frag_lists, etc.) and
- * converts them into a single linear linked list.
- */
-static int straighten_frag_list(struct sk_buff **skbp)
-{
-       struct sk_buff *err_skb;
-
-       err_skb = normalize_frag_list(NULL, skbp);
-       if (IS_ERR(err_skb))
-               return PTR_ERR(err_skb);
-
-       return 0;
-}
-
 static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from)
 {
        to->protocol = from->protocol;
@@ -460,6 +406,74 @@ static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset)
        return 0;
 }
 
+#ifndef SKIP_ZERO_COPY
+static struct sk_buff *normalize_frag_list(struct sk_buff *head,
+                                          struct sk_buff **skbp)
+{
+       struct sk_buff *skb = *skbp;
+       struct sk_buff *last;
+
+       do {
+               struct sk_buff *frags;
+
+               if (skb_shared(skb)) {
+                       struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
+
+                       if (unlikely(!nskb))
+                               return ERR_PTR(-ENOMEM);
+
+                       nskb->next = skb->next;
+                       consume_skb(skb);
+                       skb = nskb;
+                       *skbp = skb;
+               }
+
+               if (head) {
+                       head->len -= skb->len;
+                       head->data_len -= skb->len;
+                       head->truesize -= skb->truesize;
+               }
+
+               frags = skb_shinfo(skb)->frag_list;
+               if (frags) {
+                       int err;
+
+                       err = skb_unclone(skb, GFP_ATOMIC);
+                       if (unlikely(err))
+                               return ERR_PTR(err);
+
+                       last = normalize_frag_list(skb, &frags);
+                       if (IS_ERR(last))
+                               return last;
+
+                       skb_shinfo(skb)->frag_list = NULL;
+                       last->next = skb->next;
+                       skb->next = frags;
+               } else {
+                       last = skb;
+               }
+
+               skbp = &skb->next;
+       } while ((skb = skb->next));
+
+       return last;
+}
+
+/* Takes a linked list of skbs, which potentially contain frag_list
+ * (whose members in turn potentially contain frag_lists, etc.) and
+ * converts them into a single linear linked list.
+ */
+static int straighten_frag_list(struct sk_buff **skbp)
+{
+       struct sk_buff *err_skb;
+
+       err_skb = normalize_frag_list(NULL, skbp);
+       if (IS_ERR(err_skb))
+               return PTR_ERR(err_skb);
+
+       return 0;
+}
+
 static int coalesce_skb(struct sk_buff **headp)
 {
        struct sk_buff *frag, *head, *prev;
@@ -505,6 +519,34 @@ static int coalesce_skb(struct sk_buff **headp)
        head->next = NULL;
        return 0;
 }
+#else
+static int coalesce_skb(struct sk_buff **headp)
+{
+       struct sk_buff *frag, *head = *headp, *next;
+       int delta = FRAG_CB(head)->first.tot_len - skb_headlen(head);
+       int err;
+
+       if (unlikely(!head->next))
+               return 0;
+
+       err = pskb_expand_head(head, 0, delta, GFP_ATOMIC);
+       if (unlikely(err))
+               return err;
+
+       if (unlikely(!__pskb_pull_tail(head, head->data_len)))
+               BUG();
+
+       for (frag = head->next; frag; frag = next) {
+               skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len);
+               next = frag->next;
+               kfree_skb(frag);
+       }
+
+       head->next = NULL;
+       head->truesize = SKB_TRUESIZE(head->len);
+       return 0;
+}
+#endif
 
 static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
                            bool ipv4, bool tcp, int l4_offset)
@@ -517,6 +559,12 @@ static int __try_to_segment(struct sk_buff *skb, bool csum_partial,
 
 static int try_to_segment(struct sk_buff *skb)
 {
+#ifdef SKIP_ZERO_COPY
+       /* coalesce_skb() since does not generate frag-list no need to
+        * linearize it here.
+        */
+       return 0;
+#else
        struct stthdr *stth = stt_hdr(skb);
        bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL);
        bool ipv4 = !!(stth->flags & STT_PROTO_IPV4);
@@ -524,16 +572,19 @@ static int try_to_segment(struct sk_buff *skb)
        int l4_offset = stth->l4_offset;
 
        return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset);
+#endif
 }
 
 static int segment_skb(struct sk_buff **headp, bool csum_partial,
                       bool ipv4, bool tcp, int l4_offset)
 {
+#ifndef SKIP_ZERO_COPY
        int err;
 
        err = coalesce_skb(headp);
        if (err)
                return err;
+#endif
 
        if (skb_shinfo(*headp)->frag_list)
                return __try_to_segment(*headp, csum_partial,
@@ -800,11 +851,9 @@ error:
        return ERR_PTR(err);
 }
 
-static int skb_list_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src,
-                        __be32 dst, __u8 tos, __u8 ttl, __be16 df)
+static void skb_list_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src,
+                         __be32 dst, __u8 tos, __u8 ttl, __be16 df)
 {
-       int len = 0;
-
        while (skb) {
                struct sk_buff *next = skb->next;
 
@@ -812,12 +861,11 @@ static int skb_list_xmit(struct rtable *rt, struct sk_buff *skb, __be32 src,
                        dst_clone(&rt->dst);
 
                skb->next = NULL;
-               len += iptunnel_xmit(NULL, rt, skb, src, dst, IPPROTO_TCP,
-                                    tos, ttl, df, false);
+               iptunnel_xmit(NULL, rt, skb, src, dst, IPPROTO_TCP,
+                             tos, ttl, df, false);
 
                skb = next;
        }
-       return len;
 }
 
 static u8 parse_ipv6_l4_proto(struct sk_buff *skb)
@@ -858,9 +906,9 @@ static u8 skb_get_l4_proto(struct sk_buff *skb, __be16 l3_proto)
 }
 
 static int stt_xmit_skb(struct sk_buff *skb, struct rtable *rt,
-                __be32 src, __be32 dst, __u8 tos,
-                __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
-                __be64 tun_id)
+                       __be32 src, __be32 dst, __u8 tos,
+                       __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
+                       __be64 tun_id)
 {
        struct ethhdr *eh = eth_hdr(skb);
        int ret = 0, min_headroom;
@@ -915,13 +963,13 @@ static int stt_xmit_skb(struct sk_buff *skb, struct rtable *rt,
                }
 
                /* Push IP header. */
-               ret += skb_list_xmit(rt, skb, src, dst, tos, ttl, df);
+               skb_list_xmit(rt, skb, src, dst, tos, ttl, df);
 
 next:
                skb = next_skb;
        }
 
-       return ret;
+       return 0;
 
 err_free_rt:
        ip_rt_put(rt);
@@ -929,6 +977,24 @@ err_free_rt:
        return ret;
 }
 
+static struct rtable *stt_get_rt(struct sk_buff *skb,
+                                struct net_device *dev,
+                                struct flowi4 *fl,
+                                const struct ip_tunnel_key *key)
+{
+       struct net *net = dev_net(dev);
+
+       /* Route lookup */
+       memset(fl, 0, sizeof(*fl));
+       fl->daddr = key->u.ipv4.dst;
+       fl->saddr = key->u.ipv4.src;
+       fl->flowi4_tos = RT_TOS(key->tos);
+       fl->flowi4_mark = skb->mark;
+       fl->flowi4_proto = IPPROTO_TCP;
+
+       return ip_route_output_key(net, fl);
+}
+
 netdev_tx_t ovs_stt_xmit(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
@@ -951,14 +1017,7 @@ netdev_tx_t ovs_stt_xmit(struct sk_buff *skb)
 
        tun_key = &tun_info->key;
 
-       /* Route lookup */
-       memset(&fl, 0, sizeof(fl));
-       fl.daddr = tun_key->u.ipv4.dst;
-       fl.saddr = tun_key->u.ipv4.src;
-       fl.flowi4_tos = RT_TOS(tun_key->tos);
-       fl.flowi4_mark = skb->mark;
-       fl.flowi4_proto = IPPROTO_TCP;
-       rt = ip_route_output_key(net, &fl);
+       rt = stt_get_rt(skb, dev, &fl, tun_key);
        if (IS_ERR(rt)) {
                err = PTR_ERR(rt);
                goto error;
@@ -968,10 +1027,9 @@ netdev_tx_t ovs_stt_xmit(struct sk_buff *skb)
        sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
        skb->ignore_df = 1;
 
-       err = stt_xmit_skb(skb, rt, fl.saddr, tun_key->u.ipv4.dst,
-                           tun_key->tos, tun_key->ttl,
-                           df, sport, dport, tun_key->tun_id);
-       iptunnel_xmit_stats(err, &dev->stats, (struct pcpu_sw_netstats __percpu *)dev->tstats);
+       stt_xmit_skb(skb, rt, fl.saddr, tun_key->u.ipv4.dst,
+                   tun_key->tos, tun_key->ttl,
+                   df, sport, dport, tun_key->tun_id);
        return NETDEV_TX_OK;
 error:
        kfree_skb(skb);
@@ -1049,16 +1107,58 @@ static struct pkt_frag *lookup_frag(struct net *net,
        return victim_frag;
 }
 
+#ifdef SKIP_ZERO_COPY
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+                     int *delta, bool *headstolen)
+{
+       int err;
+
+       if (unlikely(to->next))
+               return -EINVAL;
+
+       if (unlikely(FRAG_CB(to)->offset))
+               return -EINVAL;
+
+       if (unlikely(skb_unclone(to, GFP_ATOMIC)))
+               return -ENOMEM;
+
+       if (skb_try_coalesce(to, from, headstolen, delta))
+               return 0;
+
+       *headstolen = false;
+       err = pskb_expand_head(to, 0, to->data_len + from->len, GFP_ATOMIC);
+       if (unlikely(err))
+               return err;
+
+       if (unlikely(!__pskb_pull_tail(to, to->data_len)))
+               BUG();
+
+       skb_copy_bits(from, 0, skb_put(to, from->len), from->len);
+
+       *delta = from->len;
+       to->truesize += from->len;
+       return 0;
+}
+#else
+static int __copy_skb(struct sk_buff *to, struct sk_buff *from,
+                     int *delta, bool *headstolen)
+{
+       *headstolen = false;
+       return -EINVAL;
+}
+#endif
+
 static struct sk_buff *reassemble(struct sk_buff *skb)
 {
        struct iphdr *iph = ip_hdr(skb);
        struct tcphdr *tcph = tcp_hdr(skb);
        u32 seq = ntohl(tcph->seq);
        struct stt_percpu *stt_percpu;
-       struct sk_buff *last_skb;
+       struct sk_buff *last_skb, *copied_skb = NULL;
        struct pkt_frag *frag;
        struct pkt_key key;
-       int tot_len;
+       int tot_len, delta = skb->truesize;
+       bool headstolen;
        u32 hash;
 
        tot_len = seq >> STT_SEQ_LEN_SHIFT;
@@ -1098,7 +1198,6 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
                FRAG_CB(skb)->first.set_ecn_ce = false;
                list_add_tail(&frag->lru_node, &stt_percpu->frag_lru);
                stt_percpu->frag_mem_used += skb->truesize;
-
                skb = NULL;
                goto unlock;
        }
@@ -1109,8 +1208,13 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
        last_skb = FRAG_CB(frag->skbs)->first.last_skb;
        if (likely(FRAG_CB(last_skb)->offset + last_skb->len ==
                   FRAG_CB(skb)->offset)) {
-               last_skb->next = skb;
-               FRAG_CB(frag->skbs)->first.last_skb = skb;
+
+               if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) {
+                       copied_skb = skb;
+               } else {
+                       last_skb->next = skb;
+                       FRAG_CB(frag->skbs)->first.last_skb = skb;
+               }
        } else {
                struct sk_buff *prev = NULL, *next;
 
@@ -1149,8 +1253,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
 
        FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos);
        FRAG_CB(frag->skbs)->first.rcvd_len += skb->len;
-       FRAG_CB(frag->skbs)->first.mem_used += skb->truesize;
-       stt_percpu->frag_mem_used += skb->truesize;
+       stt_percpu->frag_mem_used += delta;
+       FRAG_CB(frag->skbs)->first.mem_used += delta;
 
        if (FRAG_CB(frag->skbs)->first.tot_len ==
            FRAG_CB(frag->skbs)->first.rcvd_len) {
@@ -1169,6 +1273,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb)
                skb = NULL;
        }
 
+       if (copied_skb)
+               kfree_skb_partial(copied_skb, headstolen);
        goto unlock;
 
 unlock_free:
@@ -1302,12 +1408,12 @@ static void rcv_list(struct net_device *dev, struct sk_buff *skb,
        } while ((skb = next));
 }
 
-#ifndef HAVE_METADATA_DST
+#ifndef USE_UPSTREAM_TUNNEL
 static int __stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
 {
        struct metadata_dst tun_dst;
 
-       ovs_ip_tun_rx_dst(&tun_dst.u.tun_info, skb, TUNNEL_KEY | TUNNEL_CSUM,
+       ovs_ip_tun_rx_dst(&tun_dst, skb, TUNNEL_KEY | TUNNEL_CSUM,
                          get_unaligned(&stt_hdr(skb)->key), 0);
        tun_dst.u.tun_info.key.tp_src = tcp_hdr(skb)->source;
        tun_dst.u.tun_info.key.tp_dst = tcp_hdr(skb)->dest;
@@ -1342,6 +1448,7 @@ static void stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
        if (unlikely(!validate_checksum(skb)))
                goto drop;
 
+       __skb_pull(skb, sizeof(struct tcphdr));
        skb = reassemble(skb);
        if (!skb)
                return;
@@ -1351,7 +1458,8 @@ static void stt_rcv(struct stt_dev *stt_dev, struct sk_buff *skb)
 
        err = iptunnel_pull_header(skb,
                                   sizeof(struct stthdr) + STT_ETH_PAD,
-                                  htons(ETH_P_TEB));
+                                  htons(ETH_P_TEB),
+                                  !net_eq(stt_dev->net, dev_net(stt_dev->dev)));
        if (unlikely(err))
                goto drop;
 
@@ -1481,11 +1589,11 @@ static unsigned int nf_ip_hook(FIRST_PARAM, struct sk_buff *skb, LAST_PARAM)
 
        skb_set_transport_header(skb, ip_hdr_len);
 
-       stt_dev = stt_find_sock(dev_net(skb->dev), tcp_hdr(skb)->dest);
+       stt_dev = stt_find_up_dev(dev_net(skb->dev), tcp_hdr(skb)->dest);
        if (!stt_dev)
                return NF_ACCEPT;
 
-       __skb_pull(skb, ip_hdr_len + sizeof(struct tcphdr));
+       __skb_pull(skb, ip_hdr_len);
        stt_rcv(stt_dev, skb);
        return NF_STOLEN;
 }
@@ -1551,12 +1659,23 @@ static int stt_start(struct net *net)
         * rtnl-lock, which results in dead lock in stt-dev-create. Therefore
         * use this new API.
         */
+
+       if (sn->nf_hook_reg_done)
+               goto out;
+
        err = nf_register_net_hook(net, &nf_hook_ops);
+       if (!err)
+               sn->nf_hook_reg_done = true;
 #else
+       /* Register STT only on very first STT device addition. */
+       if (!list_empty(&nf_hook_ops.list))
+               goto out;
+
        err = nf_register_hook(&nf_hook_ops);
 #endif
        if (err)
                goto dec_n_tunnel;
+out:
        sn->n_tunnels++;
        return 0;
 
@@ -1584,12 +1703,6 @@ static void stt_cleanup(struct net *net)
        sn->n_tunnels--;
        if (sn->n_tunnels)
                goto out;
-#ifdef HAVE_NF_REGISTER_NET_HOOK
-       nf_unregister_net_hook(net, &nf_hook_ops);
-#else
-       nf_unregister_hook(&nf_hook_ops);
-#endif
-
 out:
        n_tunnels--;
        if (n_tunnels)
@@ -1615,7 +1728,7 @@ out:
 
 static netdev_tx_t stt_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-#ifdef HAVE_METADATA_DST
+#ifdef USE_UPSTREAM_TUNNEL
        return ovs_stt_xmit(skb);
 #else
        /* Drop All packets coming from networking stack. OVS-CB is
@@ -1646,6 +1759,7 @@ static int stt_open(struct net_device *dev)
 {
        struct stt_dev *stt = netdev_priv(dev);
        struct net *net = stt->net;
+       struct stt_net *sn = net_generic(net, stt_net_id);
        int err;
 
        err = stt_start(net);
@@ -1655,6 +1769,7 @@ static int stt_open(struct net_device *dev)
        err = tcp_sock_create4(net, stt->dst_port, &stt->sock);
        if (err)
                return err;
+       list_add_rcu(&stt->up_next, &sn->stt_up_list);
        return 0;
 }
 
@@ -1663,12 +1778,63 @@ static int stt_stop(struct net_device *dev)
        struct stt_dev *stt_dev = netdev_priv(dev);
        struct net *net = stt_dev->net;
 
+       list_del_rcu(&stt_dev->up_next);
+       synchronize_net();
        tcp_sock_release(stt_dev->sock);
        stt_dev->sock = NULL;
        stt_cleanup(net);
        return 0;
 }
 
+static int __stt_change_mtu(struct net_device *dev, int new_mtu, bool strict)
+{
+       int max_mtu = IP_MAX_MTU - STT_HEADER_LEN - sizeof(struct iphdr)
+                     - dev->hard_header_len;
+
+       if (new_mtu < 68)
+               return -EINVAL;
+
+       if (new_mtu > max_mtu) {
+               if (strict)
+                       return -EINVAL;
+
+               new_mtu = max_mtu;
+       }
+
+       dev->mtu = new_mtu;
+       return 0;
+}
+
+static int stt_change_mtu(struct net_device *dev, int new_mtu)
+{
+       return __stt_change_mtu(dev, new_mtu, true);
+}
+
+int ovs_stt_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+       struct ip_tunnel_info *info = skb_tunnel_info(skb);
+       struct stt_dev *stt_dev = netdev_priv(dev);
+       struct net *net = stt_dev->net;
+       __be16 dport = stt_dev->dst_port;
+       struct flowi4 fl4;
+       struct rtable *rt;
+
+       if (ip_tunnel_info_af(info) != AF_INET)
+               return -EINVAL;
+
+       rt = stt_get_rt(skb, dev, &fl4, &info->key);
+       if (IS_ERR(rt))
+               return PTR_ERR(rt);
+
+       ip_rt_put(rt);
+
+       info->key.u.ipv4.src = fl4.saddr;
+       info->key.tp_src = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
+       info->key.tp_dst = dport;
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ovs_stt_fill_metadata_dst);
+
 static const struct net_device_ops stt_netdev_ops = {
        .ndo_init               = stt_init,
        .ndo_uninit             = stt_uninit,
@@ -1676,9 +1842,14 @@ static const struct net_device_ops stt_netdev_ops = {
        .ndo_stop               = stt_stop,
        .ndo_start_xmit         = stt_dev_xmit,
        .ndo_get_stats64        = ip_tunnel_get_stats64,
-       .ndo_change_mtu         = eth_change_mtu,
+       .ndo_change_mtu         = stt_change_mtu,
        .ndo_validate_addr      = eth_validate_addr,
        .ndo_set_mac_address    = eth_mac_addr,
+#ifdef USE_UPSTREAM_TUNNEL
+#ifdef HAVE_NDO_FILL_METADATA_DST
+       .ndo_fill_metadata_dst  = stt_fill_metadata_dst,
+#endif
+#endif
 };
 
 static void stt_get_drvinfo(struct net_device *dev,
@@ -1717,7 +1888,7 @@ static void stt_setup(struct net_device *dev)
        dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
        dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 
-#ifdef HAVE_METADATA_DST
+#ifdef USE_UPSTREAM_TUNNEL
        netif_keep_dst(dev);
 #endif
        dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
@@ -1768,11 +1939,15 @@ static int stt_configure(struct net *net, struct net_device *dev,
        if (find_dev(net, dst_port))
                return -EBUSY;
 
+       err = __stt_change_mtu(dev, IP_MAX_MTU, false);
+       if (err)
+               return err;
+
        err = register_netdevice(dev);
        if (err)
                return err;
 
-       list_add_rcu(&stt->next, &sn->stt_list);
+       list_add(&stt->next, &sn->stt_list);
        return 0;
 }
 
@@ -1791,7 +1966,7 @@ static void stt_dellink(struct net_device *dev, struct list_head *head)
 {
        struct stt_dev *stt = netdev_priv(dev);
 
-       list_del_rcu(&stt->next);
+       list_del(&stt->next);
        unregister_netdevice_queue(dev, head);
 }
 
@@ -1853,6 +2028,10 @@ static int stt_init_net(struct net *net)
        struct stt_net *sn = net_generic(net, stt_net_id);
 
        INIT_LIST_HEAD(&sn->stt_list);
+       INIT_LIST_HEAD(&sn->stt_up_list);
+#ifdef HAVE_NF_REGISTER_NET_HOOK
+       sn->nf_hook_reg_done = false;
+#endif
        return 0;
 }
 
@@ -1863,6 +2042,14 @@ static void stt_exit_net(struct net *net)
        struct net_device *dev, *aux;
        LIST_HEAD(list);
 
+#ifdef HAVE_NF_REGISTER_NET_HOOK
+       /* Ideally this should be done from stt_stop(), But on some kernels
+        * nf-unreg operation needs RTNL-lock, which can cause deallock.
+        * So it is done from here. */
+       if (sn->nf_hook_reg_done)
+               nf_unregister_net_hook(net, &nf_hook_ops);
+#endif
+
        rtnl_lock();
 
        /* gather any stt devices that were moved into this ns */
@@ -1902,6 +2089,7 @@ int stt_init_module(void)
        if (rc)
                goto out2;
 
+       INIT_LIST_HEAD(&nf_hook_ops.list);
        pr_info("STT tunneling driver\n");
        return 0;
 out2:
@@ -1912,6 +2100,10 @@ out1:
 
 void stt_cleanup_module(void)
 {
+#ifndef HAVE_NF_REGISTER_NET_HOOK
+       if (!list_empty(&nf_hook_ops.list))
+               nf_unregister_hook(&nf_hook_ops);
+#endif
        rtnl_link_unregister(&stt_link_ops);
        unregister_pernet_subsys(&stt_net_ops);
 }