datapath: Sync OVS recursive loop counter with upstream.
[cascardo/ovs.git] / datapath / actions.c
index 117bdae..3f2ba4c 100644 (file)
@@ -78,9 +78,7 @@ struct action_fifo {
 };
 
 static struct action_fifo __percpu *action_fifos;
-#define EXEC_ACTIONS_LEVEL_LIMIT 4   /* limit used to detect packet
-                                     * looping by the network stack
-                                     */
+
 static DEFINE_PER_CPU(int, exec_actions_level);
 
 static void action_fifo_init(struct action_fifo *fifo)
@@ -139,11 +137,23 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
        return !!key->eth.type;
 }
 
+static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr,
+                            __be16 ethertype)
+{
+       if (skb->ip_summed == CHECKSUM_COMPLETE) {
+               __be16 diff[] = { ~(hdr->h_proto), ethertype };
+
+               skb->csum = ~csum_partial((char *)diff, sizeof(diff),
+                                       ~skb->csum);
+       }
+
+       hdr->h_proto = ethertype;
+}
+
 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
                     const struct ovs_action_push_mpls *mpls)
 {
        __be32 *new_mpls_lse;
-       struct ethhdr *hdr;
 
        /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
        if (skb->encapsulation)
@@ -160,12 +170,9 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
        new_mpls_lse = (__be32 *)skb_mpls_header(skb);
        *new_mpls_lse = mpls->mpls_lse;
 
-       if (skb->ip_summed == CHECKSUM_COMPLETE)
-               skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
-                                                            MPLS_HLEN, 0));
+       skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN);
 
-       hdr = eth_hdr(skb);
-       hdr->h_proto = mpls->mpls_ethertype;
+       update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype);
        if (!ovs_skb_get_inner_protocol(skb))
                ovs_skb_set_inner_protocol(skb, skb->protocol);
        skb->protocol = mpls->mpls_ethertype;
@@ -196,7 +203,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
         * field correctly in the presence of VLAN tags.
         */
        hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
-       hdr->h_proto = ethertype;
+       update_ethertype(skb, hdr, ethertype);
        if (eth_p_mpls(skb->protocol))
                skb->protocol = ethertype;
 
@@ -281,7 +288,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
        ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
                               mask->eth_dst);
 
-       ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
+       skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
        ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
        ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
@@ -624,7 +631,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 
 static int ovs_vport_output(OVS_VPORT_OUTPUT_PARAMS)
 {
-       struct ovs_frag_data *data = get_pcpu_ptr(ovs_frag_data_storage);
+       struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
        struct vport *vport = data->vport;
 
        if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -641,7 +648,7 @@ static int ovs_vport_output(OVS_VPORT_OUTPUT_PARAMS)
        /* Reconstruct the MAC header.  */
        skb_push(skb, data->l2_len);
        memcpy(skb->data, &data->l2_data, data->l2_len);
-       ovs_skb_postpush_rcsum(skb, skb->data, data->l2_len);
+       skb_postpush_rcsum(skb, skb->data, data->l2_len);
        skb_reset_mac_header(skb);
 
        ovs_vport_send(vport, skb);
@@ -667,7 +674,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
        unsigned int hlen = skb_network_offset(skb);
        struct ovs_frag_data *data;
 
-       data = get_pcpu_ptr(ovs_frag_data_storage);
+       data = this_cpu_ptr(&ovs_frag_data_storage);
        data->dst = (unsigned long) skb_dst(skb);
        data->vport = vport;
        data->cb = *OVS_GSO_CB(skb);
@@ -681,8 +688,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
        skb_pull(skb, hlen);
 }
 
-static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
-                        __be16 ethertype)
+static void ovs_fragment(struct net *net, struct vport *vport,
+                        struct sk_buff *skb, u16 mru, __be16 ethertype)
 {
        if (skb_network_offset(skb) > MAX_L2_LEN) {
                OVS_NLERR(1, "L2 header too long to fragment");
@@ -702,7 +709,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
                skb_dst_set_noref(skb, &ovs_dst);
                IPCB(skb)->frag_max_size = mru;
 
-               ip_do_fragment(skb->sk, skb, ovs_vport_output);
+               ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
                refdst_drop(orig_dst);
        } else if (ethertype == htons(ETH_P_IPV6)) {
                const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@@ -744,10 +751,19 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
 
        if (likely(vport)) {
                u16 mru = OVS_CB(skb)->mru;
+               u32 cutlen = OVS_CB(skb)->cutlen;
+
+               if (unlikely(cutlen > 0)) {
+                       if (skb->len - cutlen > ETH_HLEN)
+                               pskb_trim(skb, skb->len - cutlen);
+                       else
+                               pskb_trim(skb, ETH_HLEN);
+               }
 
                if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
                        ovs_vport_send(vport, skb);
                } else if (mru <= vport->dev->mtu) {
+                       struct net *net = ovs_dp_get_net(dp);
                        __be16 ethertype = key->eth.type;
 
                        if (!is_flow_key_valid(key)) {
@@ -757,7 +773,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
                                        ethertype = vlan_get_protocol(skb);
                        }
 
-                       ovs_fragment(vport, skb, mru, ethertype);
+                       ovs_fragment(net, vport, skb, mru, ethertype);
                } else {
                        OVS_NLERR(true, "Cannot fragment IP frames");
                        kfree_skb(skb);
@@ -766,19 +782,21 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
                kfree_skb(skb);
        }
 }
+
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
                            struct sw_flow_key *key, const struct nlattr *attr,
-                           const struct nlattr *actions, int actions_len)
+                           const struct nlattr *actions, int actions_len,
+                           uint32_t cutlen)
 {
-       struct ip_tunnel_info info;
        struct dp_upcall_info upcall;
        const struct nlattr *a;
-       int rem;
+       int rem, err;
 
        memset(&upcall, 0, sizeof(upcall));
        upcall.cmd = OVS_PACKET_CMD_ACTION;
        upcall.mru = OVS_CB(skb)->mru;
 
+       SKB_INIT_FILL_METADATA_DST(skb);
        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
                 a = nla_next(a, &rem)) {
                switch (nla_type(a)) {
@@ -798,11 +816,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
                        if (vport) {
                                int err;
 
-                               upcall.egress_tun_info = &info;
-                               err = ovs_vport_get_egress_tun_info(vport, skb,
-                                                                   &upcall);
-                               if (err)
-                                       upcall.egress_tun_info = NULL;
+                               err = dev_fill_metadata_dst(vport->dev, skb);
+                               if (!err)
+                                       upcall.egress_tun_info = skb_tunnel_info(skb);
                        }
 
                        break;
@@ -818,7 +834,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
                } /* End of switch. */
        }
 
-       return ovs_dp_upcall(dp, skb, key, &upcall);
+       err = ovs_dp_upcall(dp, skb, key, &upcall, cutlen);
+       SKB_RESTORE_FILL_METADATA_DST(skb);
+       return err;
 }
 
 static int sample(struct datapath *dp, struct sk_buff *skb,
@@ -828,6 +846,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
        const struct nlattr *acts_list = NULL;
        const struct nlattr *a;
        int rem;
+       u32 cutlen = 0;
 
        for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
                 a = nla_next(a, &rem)) {
@@ -854,13 +873,24 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
                return 0;
 
        /* The only known usage of sample action is having a single user-space
+        * action, or having a truncate action followed by a single user-space
         * action. Treat this usage as a special case.
         * The output_userspace() should clone the skb to be sent to the
         * user space. This skb will be consumed by its caller.
         */
+       if (unlikely(nla_type(a) == OVS_ACTION_ATTR_TRUNC)) {
+               struct ovs_action_trunc *trunc = nla_data(a);
+
+               if (skb->len > trunc->max_len)
+                       cutlen = skb->len - trunc->max_len;
+
+               a = nla_next(a, &rem);
+       }
+
        if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
                   nla_is_last(a, rem)))
-               return output_userspace(dp, skb, key, a, actions, actions_len);
+               return output_userspace(dp, skb, key, a, actions,
+                                       actions_len, cutlen);
 
        skb = skb_clone(skb, GFP_ATOMIC);
        if (!skb)
@@ -1047,6 +1077,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        if (out_skb)
                                do_output(dp, out_skb, prev_port, key);
 
+                       OVS_CB(skb)->cutlen = 0;
                        prev_port = -1;
                }
 
@@ -1055,8 +1086,18 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        prev_port = nla_get_u32(a);
                        break;
 
+               case OVS_ACTION_ATTR_TRUNC: {
+                       struct ovs_action_trunc *trunc = nla_data(a);
+
+                       if (skb->len > trunc->max_len)
+                               OVS_CB(skb)->cutlen = skb->len - trunc->max_len;
+                       break;
+               }
+
                case OVS_ACTION_ATTR_USERSPACE:
-                       output_userspace(dp, skb, key, a, attr, len);
+                       output_userspace(dp, skb, key, a, attr,
+                                                    len, OVS_CB(skb)->cutlen);
+                       OVS_CB(skb)->cutlen = 0;
                        break;
 
                case OVS_ACTION_ATTR_HASH:
@@ -1164,31 +1205,26 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
                        const struct sw_flow_actions *acts,
                        struct sw_flow_key *key)
 {
-       int level = this_cpu_read(exec_actions_level);
-       int err;
-
-       if (unlikely(level >= EXEC_ACTIONS_LEVEL_LIMIT)) {
-               if (net_ratelimit())
-                       pr_warn("%s: packet loop detected, dropping.\n",
-                               ovs_dp_name(dp));
+       static const int ovs_recursion_limit = 4;
+       int err, level;
 
+       level = __this_cpu_inc_return(exec_actions_level);
+       if (unlikely(level > ovs_recursion_limit)) {
+               net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
+                                    ovs_dp_name(dp));
                kfree_skb(skb);
-               return -ELOOP;
+               err = -ENETDOWN;
+               goto out;
        }
 
-       this_cpu_inc(exec_actions_level);
        err = do_execute_actions(dp, skb, key,
                                 acts->actions, acts->actions_len);
 
-       if (!level)
+       if (level == 1)
                process_deferred_actions(dp);
 
-       this_cpu_dec(exec_actions_level);
-
-       /* This return status currently does not reflect the errors
-        * encounted during deferred actions execution. Probably needs to
-        * be fixed in the future.
-        */
+out:
+       __this_cpu_dec(exec_actions_level);
        return err;
 }