datapath: Include datapath actions with sampled-packet upcall to userspace.
[cascardo/ovs.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
25 #include <linux/in.h>
26 #include <linux/ip.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/version.h>
40 #include <linux/ethtool.h>
41 #include <linux/wait.h>
42 #include <asm/div64.h>
43 #include <linux/highmem.h>
44 #include <linux/netfilter_bridge.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <linux/inetdevice.h>
47 #include <linux/list.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <net/genetlink.h>
52 #include <net/net_namespace.h>
53 #include <net/netns/generic.h>
54
55 #include "datapath.h"
56 #include "flow.h"
57 #include "flow_table.h"
58 #include "flow_netlink.h"
59 #include "vlan.h"
60 #include "vport-internal_dev.h"
61 #include "vport-netdev.h"
62
63 int ovs_net_id __read_mostly;
64 EXPORT_SYMBOL_GPL(ovs_net_id);
65
66 static struct genl_family dp_packet_genl_family;
67 static struct genl_family dp_flow_genl_family;
68 static struct genl_family dp_datapath_genl_family;
69
70 static const struct nla_policy flow_policy[];
71
72 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
73         .name = OVS_FLOW_MCGROUP
74 };
75
76 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
77         .name = OVS_DATAPATH_MCGROUP
78 };
79
80 struct genl_multicast_group ovs_dp_vport_multicast_group = {
81         .name = OVS_VPORT_MCGROUP
82 };
83
84 /* Check if need to build a reply message.
85  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
86  */
87 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
88                             unsigned int group)
89 {
90         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
91                genl_has_listeners(family, genl_info_net(info), group);
92 }
93
94 static void ovs_notify(struct genl_family *family, struct genl_multicast_group *grp,
95                        struct sk_buff *skb, struct genl_info *info)
96 {
97         genl_notify(family, skb, genl_info_net(info),
98                     info->snd_portid, GROUP_ID(grp), info->nlhdr, GFP_KERNEL);
99 }
100
101 /**
102  * DOC: Locking:
103  *
104  * All writes e.g. Writes to device state (add/remove datapath, port, set
105  * operations on vports, etc.), Writes to other state (flow table
106  * modifications, set miscellaneous datapath parameters, etc.) are protected
107  * by ovs_lock.
108  *
109  * Reads are protected by RCU.
110  *
111  * There are a few special cases (mostly stats) that have their own
112  * synchronization but they nest under all of above and don't interact with
113  * each other.
114  *
115  * The RTNL lock nests inside ovs_mutex.
116  */
117
118 static DEFINE_MUTEX(ovs_mutex);
119
120 void ovs_lock(void)
121 {
122         mutex_lock(&ovs_mutex);
123 }
124
125 void ovs_unlock(void)
126 {
127         mutex_unlock(&ovs_mutex);
128 }
129
130 #ifdef CONFIG_LOCKDEP
131 int lockdep_ovsl_is_held(void)
132 {
133         if (debug_locks)
134                 return lockdep_is_held(&ovs_mutex);
135         else
136                 return 1;
137 }
138 EXPORT_SYMBOL_GPL(lockdep_ovsl_is_held);
139 #endif
140
141 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
142                              const struct sw_flow_key *,
143                              const struct dp_upcall_info *);
144 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
145                                   const struct sw_flow_key *,
146                                   const struct dp_upcall_info *);
147
148 /* Must be called with rcu_read_lock. */
149 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
150 {
151         struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
152
153         if (dev) {
154                 struct vport *vport = ovs_internal_dev_get_vport(dev);
155                 if (vport)
156                         return vport->dp;
157         }
158
159         return NULL;
160 }
161
162 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
163  * returned dp pointer valid.
164  */
165 static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
166 {
167         struct datapath *dp;
168
169         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
170         rcu_read_lock();
171         dp = get_dp_rcu(net, dp_ifindex);
172         rcu_read_unlock();
173
174         return dp;
175 }
176
177 /* Must be called with rcu_read_lock or ovs_mutex. */
178 const char *ovs_dp_name(const struct datapath *dp)
179 {
180         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
181         return vport->ops->get_name(vport);
182 }
183
184 static int get_dpifindex(const struct datapath *dp)
185 {
186         struct vport *local;
187         int ifindex;
188
189         rcu_read_lock();
190
191         local = ovs_vport_rcu(dp, OVSP_LOCAL);
192         if (local)
193                 ifindex = netdev_vport_priv(local)->dev->ifindex;
194         else
195                 ifindex = 0;
196
197         rcu_read_unlock();
198
199         return ifindex;
200 }
201
202 static void destroy_dp_rcu(struct rcu_head *rcu)
203 {
204         struct datapath *dp = container_of(rcu, struct datapath, rcu);
205
206         ovs_flow_tbl_destroy(&dp->table);
207         free_percpu(dp->stats_percpu);
208         release_net(ovs_dp_get_net(dp));
209         kfree(dp->ports);
210         kfree(dp);
211 }
212
213 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
214                                             u16 port_no)
215 {
216         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
217 }
218
219 /* Called with ovs_mutex or RCU read lock. */
220 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
221 {
222         struct vport *vport;
223         struct hlist_head *head;
224
225         head = vport_hash_bucket(dp, port_no);
226         hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
227                 if (vport->port_no == port_no)
228                         return vport;
229         }
230         return NULL;
231 }
232
233 /* Called with ovs_mutex. */
234 static struct vport *new_vport(const struct vport_parms *parms)
235 {
236         struct vport *vport;
237
238         vport = ovs_vport_add(parms);
239         if (!IS_ERR(vport)) {
240                 struct datapath *dp = parms->dp;
241                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
242
243                 hlist_add_head_rcu(&vport->dp_hash_node, head);
244         }
245         return vport;
246 }
247
248 void ovs_dp_detach_port(struct vport *p)
249 {
250         ASSERT_OVSL();
251
252         /* First drop references to device. */
253         hlist_del_rcu(&p->dp_hash_node);
254
255         /* Then destroy it. */
256         ovs_vport_del(p);
257 }
258
259 /* Must be called with rcu_read_lock. */
260 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
261 {
262         const struct vport *p = OVS_CB(skb)->input_vport;
263         struct datapath *dp = p->dp;
264         struct sw_flow *flow;
265         struct sw_flow_actions *sf_acts;
266         struct dp_stats_percpu *stats;
267         u64 *stats_counter;
268         u32 n_mask_hit;
269
270         stats = this_cpu_ptr(dp->stats_percpu);
271
272         /* Look up flow. */
273         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
274                                          &n_mask_hit);
275         if (unlikely(!flow)) {
276                 struct dp_upcall_info upcall;
277                 int error;
278
279                 memset(&upcall, 0, sizeof(upcall));
280                 upcall.cmd = OVS_PACKET_CMD_MISS;
281                 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
282                 error = ovs_dp_upcall(dp, skb, key, &upcall);
283                 if (unlikely(error))
284                         kfree_skb(skb);
285                 else
286                         consume_skb(skb);
287                 stats_counter = &stats->n_missed;
288                 goto out;
289         }
290
291         ovs_flow_stats_update(flow, key->tp.flags, skb);
292         sf_acts = rcu_dereference(flow->sf_acts);
293         ovs_execute_actions(dp, skb, sf_acts, key);
294
295         stats_counter = &stats->n_hit;
296
297 out:
298         /* Update datapath statistics. */
299         u64_stats_update_begin(&stats->syncp);
300         (*stats_counter)++;
301         stats->n_mask_hit += n_mask_hit;
302         u64_stats_update_end(&stats->syncp);
303 }
304
305 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
306                   const struct sw_flow_key *key,
307                   const struct dp_upcall_info *upcall_info)
308 {
309         struct dp_stats_percpu *stats;
310         int err;
311
312         if (upcall_info->portid == 0) {
313                 err = -ENOTCONN;
314                 goto err;
315         }
316
317         if (!skb_is_gso(skb))
318                 err = queue_userspace_packet(dp, skb, key, upcall_info);
319         else
320                 err = queue_gso_packets(dp, skb, key, upcall_info);
321         if (err)
322                 goto err;
323
324         return 0;
325
326 err:
327         stats = this_cpu_ptr(dp->stats_percpu);
328
329         u64_stats_update_begin(&stats->syncp);
330         stats->n_lost++;
331         u64_stats_update_end(&stats->syncp);
332
333         return err;
334 }
335
336 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
337                              const struct sw_flow_key *key,
338                              const struct dp_upcall_info *upcall_info)
339 {
340         unsigned short gso_type = skb_shinfo(skb)->gso_type;
341         struct sw_flow_key later_key;
342         struct sk_buff *segs, *nskb;
343         struct ovs_skb_cb ovs_cb;
344         int err;
345
346         ovs_cb = *OVS_CB(skb);
347         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
348         *OVS_CB(skb) = ovs_cb;
349         if (IS_ERR(segs))
350                 return PTR_ERR(segs);
351         if (segs == NULL)
352                 return -EINVAL;
353
354         if (gso_type & SKB_GSO_UDP) {
355                 /* The initial flow key extracted by ovs_flow_key_extract()
356                  * in this case is for a first fragment, so we need to
357                  * properly mark later fragments.
358                  */
359                 later_key = *key;
360                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
361         }
362
363         /* Queue all of the segments. */
364         skb = segs;
365         do {
366                 *OVS_CB(skb) = ovs_cb;
367                 if (gso_type & SKB_GSO_UDP && skb != segs)
368                         key = &later_key;
369
370                 err = queue_userspace_packet(dp, skb, key, upcall_info);
371                 if (err)
372                         break;
373
374         } while ((skb = skb->next));
375
376         /* Free all of the segments. */
377         skb = segs;
378         do {
379                 nskb = skb->next;
380                 if (err)
381                         kfree_skb(skb);
382                 else
383                         consume_skb(skb);
384         } while ((skb = nskb));
385         return err;
386 }
387
388 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
389                               unsigned int hdrlen)
390 {
391         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
392                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
393                 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
394
395         /* OVS_PACKET_ATTR_USERDATA */
396         if (upcall_info->userdata)
397                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
398
399         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
400         if (upcall_info->egress_tun_info)
401                 size += nla_total_size(ovs_tun_key_attr_size());
402
403         /* OVS_PACKET_ATTR_ACTIONS */
404         if (upcall_info->actions_len)
405                 size += nla_total_size(upcall_info->actions_len);
406
407         return size;
408 }
409
410 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
411                                   const struct sw_flow_key *key,
412                                   const struct dp_upcall_info *upcall_info)
413 {
414         struct ovs_header *upcall;
415         struct sk_buff *nskb = NULL;
416         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
417         struct nlattr *nla;
418         struct genl_info info = {
419 #ifdef HAVE_GENLMSG_NEW_UNICAST
420                 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
421 #endif
422                 .snd_portid = upcall_info->portid,
423         };
424         size_t len;
425         unsigned int hlen;
426         int err, dp_ifindex;
427
428         dp_ifindex = get_dpifindex(dp);
429         if (!dp_ifindex)
430                 return -ENODEV;
431
432         if (skb_vlan_tag_present(skb)) {
433                 nskb = skb_clone(skb, GFP_ATOMIC);
434                 if (!nskb)
435                         return -ENOMEM;
436
437                 nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));
438                 if (!nskb)
439                         return -ENOMEM;
440
441                 vlan_set_tci(nskb, 0);
442
443                 skb = nskb;
444         }
445
446         if (nla_attr_size(skb->len) > USHRT_MAX) {
447                 err = -EFBIG;
448                 goto out;
449         }
450
451         /* Complete checksum if needed */
452         if (skb->ip_summed == CHECKSUM_PARTIAL &&
453             (err = skb_checksum_help(skb)))
454                 goto out;
455
456         /* Older versions of OVS user space enforce alignment of the last
457          * Netlink attribute to NLA_ALIGNTO which would require extensive
458          * padding logic. Only perform zerocopy if padding is not required.
459          */
460         if (dp->user_features & OVS_DP_F_UNALIGNED)
461                 hlen = skb_zerocopy_headlen(skb);
462         else
463                 hlen = skb->len;
464
465         len = upcall_msg_size(upcall_info, hlen);
466         user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
467         if (!user_skb) {
468                 err = -ENOMEM;
469                 goto out;
470         }
471
472         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
473                              0, upcall_info->cmd);
474         upcall->dp_ifindex = dp_ifindex;
475
476         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
477         BUG_ON(err);
478
479         if (upcall_info->userdata)
480                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
481                           nla_len(upcall_info->userdata),
482                           nla_data(upcall_info->userdata));
483
484         if (upcall_info->egress_tun_info) {
485                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
486                 err = ovs_nla_put_egress_tunnel_key(user_skb,
487                                                     upcall_info->egress_tun_info);
488                 BUG_ON(err);
489                 nla_nest_end(user_skb, nla);
490         }
491
492         if (upcall_info->actions_len) {
493                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
494                 err = ovs_nla_put_actions(upcall_info->actions,
495                                           upcall_info->actions_len,
496                                           user_skb);
497                 if (!err)
498                         nla_nest_end(user_skb, nla);
499                 else
500                         nla_nest_cancel(user_skb, nla);
501         }
502
503         /* Only reserve room for attribute header, packet data is added
504          * in skb_zerocopy()
505          */
506         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
507                 err = -ENOBUFS;
508                 goto out;
509         }
510         nla->nla_len = nla_attr_size(skb->len);
511
512         err = skb_zerocopy(user_skb, skb, skb->len, hlen);
513         if (err)
514                 goto out;
515
516         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
517         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
518                 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
519
520                 if (plen > 0)
521                         memset(skb_put(user_skb, plen), 0, plen);
522         }
523
524         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
525
526         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
527         user_skb = NULL;
528 out:
529         if (err)
530                 skb_tx_error(skb);
531         kfree_skb(user_skb);
532         kfree_skb(nskb);
533         return err;
534 }
535
536 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
537 {
538         struct ovs_header *ovs_header = info->userhdr;
539         struct nlattr **a = info->attrs;
540         struct sw_flow_actions *acts;
541         struct sk_buff *packet;
542         struct sw_flow *flow;
543         struct sw_flow_actions *sf_acts;
544         struct datapath *dp;
545         struct ethhdr *eth;
546         struct vport *input_vport;
547         int len;
548         int err;
549         bool log = !a[OVS_PACKET_ATTR_PROBE];
550
551         err = -EINVAL;
552         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
553             !a[OVS_PACKET_ATTR_ACTIONS])
554                 goto err;
555
556         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
557         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
558         err = -ENOMEM;
559         if (!packet)
560                 goto err;
561         skb_reserve(packet, NET_IP_ALIGN);
562
563         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
564
565         skb_reset_mac_header(packet);
566         eth = eth_hdr(packet);
567
568         /* Normally, setting the skb 'protocol' field would be handled by a
569          * call to eth_type_trans(), but it assumes there's a sending
570          * device, which we may not have.
571          */
572         if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
573                 packet->protocol = eth->h_proto;
574         else
575                 packet->protocol = htons(ETH_P_802_2);
576
577         /* Build an sw_flow for sending this packet. */
578         flow = ovs_flow_alloc();
579         err = PTR_ERR(flow);
580         if (IS_ERR(flow))
581                 goto err_kfree_skb;
582
583         err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
584                                              &flow->key, log);
585         if (err)
586                 goto err_flow_free;
587
588         err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
589                                    &flow->key, &acts, log);
590         if (err)
591                 goto err_flow_free;
592
593         rcu_assign_pointer(flow->sf_acts, acts);
594         OVS_CB(packet)->egress_tun_info = NULL;
595         packet->priority = flow->key.phy.priority;
596         packet->mark = flow->key.phy.skb_mark;
597
598         rcu_read_lock();
599         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
600         err = -ENODEV;
601         if (!dp)
602                 goto err_unlock;
603
604         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
605         if (!input_vport)
606                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
607
608         if (!input_vport)
609                 goto err_unlock;
610
611         OVS_CB(packet)->input_vport = input_vport;
612         sf_acts = rcu_dereference(flow->sf_acts);
613
614         local_bh_disable();
615         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
616         local_bh_enable();
617         rcu_read_unlock();
618
619         ovs_flow_free(flow, false);
620         return err;
621
622 err_unlock:
623         rcu_read_unlock();
624 err_flow_free:
625         ovs_flow_free(flow, false);
626 err_kfree_skb:
627         kfree_skb(packet);
628 err:
629         return err;
630 }
631
632 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
633         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
634         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
635         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
636         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
637 };
638
639 static struct genl_ops dp_packet_genl_ops[] = {
640         { .cmd = OVS_PACKET_CMD_EXECUTE,
641           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
642           .policy = packet_policy,
643           .doit = ovs_packet_cmd_execute
644         }
645 };
646
647 static struct genl_family dp_packet_genl_family = {
648         .id = GENL_ID_GENERATE,
649         .hdrsize = sizeof(struct ovs_header),
650         .name = OVS_PACKET_FAMILY,
651         .version = OVS_PACKET_VERSION,
652         .maxattr = OVS_PACKET_ATTR_MAX,
653         .netnsok = true,
654         .parallel_ops = true,
655         .ops = dp_packet_genl_ops,
656         .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
657 };
658
659 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
660                          struct ovs_dp_megaflow_stats *mega_stats)
661 {
662         int i;
663
664         memset(mega_stats, 0, sizeof(*mega_stats));
665
666         stats->n_flows = ovs_flow_tbl_count(&dp->table);
667         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
668
669         stats->n_hit = stats->n_missed = stats->n_lost = 0;
670
671         for_each_possible_cpu(i) {
672                 const struct dp_stats_percpu *percpu_stats;
673                 struct dp_stats_percpu local_stats;
674                 unsigned int start;
675
676                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
677
678                 do {
679                         start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
680                         local_stats = *percpu_stats;
681                 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
682
683                 stats->n_hit += local_stats.n_hit;
684                 stats->n_missed += local_stats.n_missed;
685                 stats->n_lost += local_stats.n_lost;
686                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
687         }
688 }
689
690 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
691 {
692         return ovs_identifier_is_ufid(sfid) &&
693                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
694 }
695
696 static bool should_fill_mask(uint32_t ufid_flags)
697 {
698         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
699 }
700
701 static bool should_fill_actions(uint32_t ufid_flags)
702 {
703         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
704 }
705
706 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
707                                     const struct sw_flow_id *sfid,
708                                     uint32_t ufid_flags)
709 {
710         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
711
712         /* OVS_FLOW_ATTR_UFID */
713         if (sfid && ovs_identifier_is_ufid(sfid))
714                 len += nla_total_size(sfid->ufid_len);
715
716         /* OVS_FLOW_ATTR_KEY */
717         if (!sfid || should_fill_key(sfid, ufid_flags))
718                 len += nla_total_size(ovs_key_attr_size());
719
720         /* OVS_FLOW_ATTR_MASK */
721         if (should_fill_mask(ufid_flags))
722                 len += nla_total_size(ovs_key_attr_size());
723
724         /* OVS_FLOW_ATTR_ACTIONS */
725         if (should_fill_actions(ufid_flags))
726                 len += nla_total_size(acts->actions_len);
727
728         return len
729                 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
730                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
731                 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
732 }
733
734 /* Called with ovs_mutex or RCU read lock. */
735 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
736                                    struct sk_buff *skb)
737 {
738         struct ovs_flow_stats stats;
739         __be16 tcp_flags;
740         unsigned long used;
741
742         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
743
744         if (used &&
745             nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
746                 return -EMSGSIZE;
747
748         if (stats.n_packets &&
749             nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
750                 return -EMSGSIZE;
751
752         if ((u8)ntohs(tcp_flags) &&
753              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
754                 return -EMSGSIZE;
755
756         return 0;
757 }
758
759 /* Called with ovs_mutex or RCU read lock. */
760 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
761                                      struct sk_buff *skb, int skb_orig_len)
762 {
763         struct nlattr *start;
764         int err;
765
766         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
767          * this is the first flow to be dumped into 'skb'.  This is unusual for
768          * Netlink but individual action lists can be longer than
769          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
770          * The userspace caller can always fetch the actions separately if it
771          * really wants them.  (Most userspace callers in fact don't care.)
772          *
773          * This can only fail for dump operations because the skb is always
774          * properly sized for single flows.
775          */
776         start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
777         if (start) {
778                 const struct sw_flow_actions *sf_acts;
779
780                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
781                 err = ovs_nla_put_actions(sf_acts->actions,
782                                           sf_acts->actions_len, skb);
783
784                 if (!err)
785                         nla_nest_end(skb, start);
786                 else {
787                         if (skb_orig_len)
788                                 return err;
789
790                         nla_nest_cancel(skb, start);
791                 }
792         } else if (skb_orig_len) {
793                 return -EMSGSIZE;
794         }
795
796         return 0;
797 }
798
799 /* Called with ovs_mutex or RCU read lock. */
800 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
801                                   struct sk_buff *skb, u32 portid,
802                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
803 {
804         const int skb_orig_len = skb->len;
805         struct ovs_header *ovs_header;
806         int err;
807
808         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
809                                  flags, cmd);
810         if (!ovs_header)
811                 return -EMSGSIZE;
812
813         ovs_header->dp_ifindex = dp_ifindex;
814
815         err = ovs_nla_put_identifier(flow, skb);
816         if (err)
817                 goto error;
818
819         if (should_fill_key(&flow->id, ufid_flags)) {
820                 err = ovs_nla_put_masked_key(flow, skb);
821                 if (err)
822                         goto error;
823         }
824
825         if (should_fill_mask(ufid_flags)) {
826                 err = ovs_nla_put_mask(flow, skb);
827                 if (err)
828                         goto error;
829         }
830
831         err = ovs_flow_cmd_fill_stats(flow, skb);
832         if (err)
833                 goto error;
834
835         if (should_fill_actions(ufid_flags)) {
836                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
837                 if (err)
838                         goto error;
839         }
840
841         genlmsg_end(skb, ovs_header);
842         return 0;
843
844 error:
845         genlmsg_cancel(skb, ovs_header);
846         return err;
847 }
848
849 /* May not be called with RCU read lock. */
850 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
851                                                const struct sw_flow_id *sfid,
852                                                struct genl_info *info,
853                                                bool always,
854                                                uint32_t ufid_flags)
855 {
856         struct sk_buff *skb;
857         size_t len;
858
859         if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
860                                         GROUP_ID(&ovs_dp_flow_multicast_group)))
861                 return NULL;
862
863         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
864         skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
865         if (!skb)
866                 return ERR_PTR(-ENOMEM);
867
868         return skb;
869 }
870
871 /* Called with ovs_mutex. */
872 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
873                                                int dp_ifindex,
874                                                struct genl_info *info, u8 cmd,
875                                                bool always, u32 ufid_flags)
876 {
877         struct sk_buff *skb;
878         int retval;
879
880         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
881                                       &flow->id, info, always, ufid_flags);
882         if (IS_ERR_OR_NULL(skb))
883                 return skb;
884
885         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
886                                         info->snd_portid, info->snd_seq, 0,
887                                         cmd, ufid_flags);
888         BUG_ON(retval < 0);
889         return skb;
890 }
891
892 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
893 {
894         struct nlattr **a = info->attrs;
895         struct ovs_header *ovs_header = info->userhdr;
896         struct sw_flow *flow = NULL, *new_flow;
897         struct sw_flow_mask mask;
898         struct sk_buff *reply;
899         struct datapath *dp;
900         struct sw_flow_key key;
901         struct sw_flow_actions *acts;
902         struct sw_flow_match match;
903         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
904         int error;
905         bool log = !a[OVS_FLOW_ATTR_PROBE];
906
907         /* Must have key and actions. */
908         error = -EINVAL;
909         if (!a[OVS_FLOW_ATTR_KEY]) {
910                 OVS_NLERR(log, "Flow key attr not present in new flow.");
911                 goto error;
912         }
913         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
914                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
915                 goto error;
916         }
917
918         /* Most of the time we need to allocate a new flow, do it before
919          * locking.
920          */
921         new_flow = ovs_flow_alloc();
922         if (IS_ERR(new_flow)) {
923                 error = PTR_ERR(new_flow);
924                 goto error;
925         }
926
927         /* Extract key. */
928         ovs_match_init(&match, &key, &mask);
929         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
930                                   a[OVS_FLOW_ATTR_MASK], log);
931         if (error)
932                 goto err_kfree_flow;
933
934         ovs_flow_mask_key(&new_flow->key, &key, &mask);
935
936         /* Extract flow identifier. */
937         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
938                                        &key, log);
939         if (error)
940                 goto err_kfree_flow;
941
942         /* Validate actions. */
943         error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
944                                      &acts, log);
945         if (error) {
946                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
947                 goto err_kfree_flow;
948         }
949
950         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
951                                         ufid_flags);
952         if (IS_ERR(reply)) {
953                 error = PTR_ERR(reply);
954                 goto err_kfree_acts;
955         }
956
957         ovs_lock();
958         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
959         if (unlikely(!dp)) {
960                 error = -ENODEV;
961                 goto err_unlock_ovs;
962         }
963
964         /* Check if this is a duplicate flow */
965         if (ovs_identifier_is_ufid(&new_flow->id))
966                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
967         if (!flow)
968                 flow = ovs_flow_tbl_lookup(&dp->table, &key);
969         if (likely(!flow)) {
970                 rcu_assign_pointer(new_flow->sf_acts, acts);
971
972                 /* Put flow in bucket. */
973                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
974                 if (unlikely(error)) {
975                         acts = NULL;
976                         goto err_unlock_ovs;
977                 }
978
979                 if (unlikely(reply)) {
980                         error = ovs_flow_cmd_fill_info(new_flow,
981                                                        ovs_header->dp_ifindex,
982                                                        reply, info->snd_portid,
983                                                        info->snd_seq, 0,
984                                                        OVS_FLOW_CMD_NEW,
985                                                        ufid_flags);
986                         BUG_ON(error < 0);
987                 }
988                 ovs_unlock();
989         } else {
990                 struct sw_flow_actions *old_acts;
991
992                 /* Bail out if we're not allowed to modify an existing flow.
993                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
994                  * because Generic Netlink treats the latter as a dump
995                  * request.  We also accept NLM_F_EXCL in case that bug ever
996                  * gets fixed.
997                  */
998                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
999                                                          | NLM_F_EXCL))) {
1000                         error = -EEXIST;
1001                         goto err_unlock_ovs;
1002                 }
1003                 /* The flow identifier has to be the same for flow updates.
1004                  * Look for any overlapping flow.
1005                  */
1006                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1007                         if (ovs_identifier_is_key(&flow->id))
1008                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1009                                                                  &match);
1010                         else /* UFID matches but key is different */
1011                                 flow = NULL;
1012                         if (!flow) {
1013                                 error = -ENOENT;
1014                                 goto err_unlock_ovs;
1015                         }
1016                 }
1017                 /* Update actions. */
1018                 old_acts = ovsl_dereference(flow->sf_acts);
1019                 rcu_assign_pointer(flow->sf_acts, acts);
1020
1021                 if (unlikely(reply)) {
1022                         error = ovs_flow_cmd_fill_info(flow,
1023                                                        ovs_header->dp_ifindex,
1024                                                        reply, info->snd_portid,
1025                                                        info->snd_seq, 0,
1026                                                        OVS_FLOW_CMD_NEW,
1027                                                        ufid_flags);
1028                         BUG_ON(error < 0);
1029                 }
1030                 ovs_unlock();
1031
1032                 ovs_nla_free_flow_actions(old_acts);
1033                 ovs_flow_free(new_flow, false);
1034         }
1035
1036         if (reply)
1037                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1038         return 0;
1039
1040 err_unlock_ovs:
1041         ovs_unlock();
1042         kfree_skb(reply);
1043 err_kfree_acts:
1044         kfree(acts);
1045 err_kfree_flow:
1046         ovs_flow_free(new_flow, false);
1047 error:
1048         return error;
1049 }
1050
1051 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1052 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1053                                                 const struct sw_flow_key *key,
1054                                                 const struct sw_flow_mask *mask,
1055                                                 bool log)
1056 {
1057         struct sw_flow_actions *acts;
1058         struct sw_flow_key masked_key;
1059         int error;
1060
1061         ovs_flow_mask_key(&masked_key, key, mask);
1062         error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
1063         if (error) {
1064                 OVS_NLERR(log,
1065                           "Actions may not be safe on all matching packets");
1066                 return ERR_PTR(error);
1067         }
1068
1069         return acts;
1070 }
1071
1072 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1073 {
1074         struct nlattr **a = info->attrs;
1075         struct ovs_header *ovs_header = info->userhdr;
1076         struct sw_flow_key key;
1077         struct sw_flow *flow;
1078         struct sw_flow_mask mask;
1079         struct sk_buff *reply = NULL;
1080         struct datapath *dp;
1081         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1082         struct sw_flow_match match;
1083         struct sw_flow_id sfid;
1084         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1085         int error;
1086         bool log = !a[OVS_FLOW_ATTR_PROBE];
1087         bool ufid_present;
1088
1089         /* Extract key. */
1090         error = -EINVAL;
1091         if (!a[OVS_FLOW_ATTR_KEY]) {
1092                 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1093                 goto error;
1094         }
1095
1096         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1097         ovs_match_init(&match, &key, &mask);
1098         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
1099                                   a[OVS_FLOW_ATTR_MASK], log);
1100         if (error)
1101                 goto error;
1102
1103         /* Validate actions. */
1104         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1105                 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1106                                         log);
1107                 if (IS_ERR(acts)) {
1108                         error = PTR_ERR(acts);
1109                         goto error;
1110                 }
1111
1112                 /* Can allocate before locking if have acts. */
1113                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1114                                                 ufid_flags);
1115                 if (IS_ERR(reply)) {
1116                         error = PTR_ERR(reply);
1117                         goto err_kfree_acts;
1118                 }
1119         }
1120
1121         ovs_lock();
1122         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1123         if (unlikely(!dp)) {
1124                 error = -ENODEV;
1125                 goto err_unlock_ovs;
1126         }
1127         /* Check that the flow exists. */
1128         if (ufid_present)
1129                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1130         else
1131                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1132         if (unlikely(!flow)) {
1133                 error = -ENOENT;
1134                 goto err_unlock_ovs;
1135         }
1136
1137         /* Update actions, if present. */
1138         if (likely(acts)) {
1139                 old_acts = ovsl_dereference(flow->sf_acts);
1140                 rcu_assign_pointer(flow->sf_acts, acts);
1141
1142                 if (unlikely(reply)) {
1143                         error = ovs_flow_cmd_fill_info(flow,
1144                                                        ovs_header->dp_ifindex,
1145                                                        reply, info->snd_portid,
1146                                                        info->snd_seq, 0,
1147                                                        OVS_FLOW_CMD_NEW,
1148                                                        ufid_flags);
1149                         BUG_ON(error < 0);
1150                 }
1151         } else {
1152                 /* Could not alloc without acts before locking. */
1153                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1154                                                 info, OVS_FLOW_CMD_NEW, false,
1155                                                 ufid_flags);
1156
1157                 if (unlikely(IS_ERR(reply))) {
1158                         error = PTR_ERR(reply);
1159                         goto err_unlock_ovs;
1160                 }
1161         }
1162
1163         /* Clear stats. */
1164         if (a[OVS_FLOW_ATTR_CLEAR])
1165                 ovs_flow_stats_clear(flow);
1166         ovs_unlock();
1167
1168         if (reply)
1169                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1170         if (old_acts)
1171                 ovs_nla_free_flow_actions(old_acts);
1172
1173         return 0;
1174
1175 err_unlock_ovs:
1176         ovs_unlock();
1177         kfree_skb(reply);
1178 err_kfree_acts:
1179         kfree(acts);
1180 error:
1181         return error;
1182 }
1183
1184 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1185 {
1186         struct nlattr **a = info->attrs;
1187         struct ovs_header *ovs_header = info->userhdr;
1188         struct sw_flow_key key;
1189         struct sk_buff *reply;
1190         struct sw_flow *flow;
1191         struct datapath *dp;
1192         struct sw_flow_match match;
1193         struct sw_flow_id ufid;
1194         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1195         int err = 0;
1196         bool log = !a[OVS_FLOW_ATTR_PROBE];
1197         bool ufid_present;
1198
1199         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1200         if (a[OVS_FLOW_ATTR_KEY]) {
1201                 ovs_match_init(&match, &key, NULL);
1202                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1203                                         log);
1204         } else if (!ufid_present) {
1205                 OVS_NLERR(log,
1206                           "Flow get message rejected, Key attribute missing.");
1207                 err = -EINVAL;
1208         }
1209         if (err)
1210                 return err;
1211
1212         ovs_lock();
1213         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1214         if (!dp) {
1215                 err = -ENODEV;
1216                 goto unlock;
1217         }
1218
1219         if (ufid_present)
1220                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1221         else
1222                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1223         if (!flow) {
1224                 err = -ENOENT;
1225                 goto unlock;
1226         }
1227
1228         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1229                                         OVS_FLOW_CMD_NEW, true, ufid_flags);
1230         if (IS_ERR(reply)) {
1231                 err = PTR_ERR(reply);
1232                 goto unlock;
1233         }
1234
1235         ovs_unlock();
1236         return genlmsg_reply(reply, info);
1237 unlock:
1238         ovs_unlock();
1239         return err;
1240 }
1241
1242 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1243 {
1244         struct nlattr **a = info->attrs;
1245         struct ovs_header *ovs_header = info->userhdr;
1246         struct sw_flow_key key;
1247         struct sk_buff *reply;
1248         struct sw_flow *flow = NULL;
1249         struct datapath *dp;
1250         struct sw_flow_match match;
1251         struct sw_flow_id ufid;
1252         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1253         int err;
1254         bool log = !a[OVS_FLOW_ATTR_PROBE];
1255         bool ufid_present;
1256
1257         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1258         if (a[OVS_FLOW_ATTR_KEY]) {
1259                 ovs_match_init(&match, &key, NULL);
1260                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1261                                         log);
1262                 if (unlikely(err))
1263                         return err;
1264         }
1265
1266         ovs_lock();
1267         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1268         if (unlikely(!dp)) {
1269                 err = -ENODEV;
1270                 goto unlock;
1271         }
1272
1273         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1274                 err = ovs_flow_tbl_flush(&dp->table);
1275                 goto unlock;
1276         }
1277
1278         if (ufid_present)
1279                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1280         else
1281                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1282         if (unlikely(!flow)) {
1283                 err = -ENOENT;
1284                 goto unlock;
1285         }
1286
1287         ovs_flow_tbl_remove(&dp->table, flow);
1288         ovs_unlock();
1289
1290         reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
1291                                         &flow->id, info, false, ufid_flags);
1292
1293         if (likely(reply)) {
1294                 if (likely(!IS_ERR(reply))) {
1295                         rcu_read_lock();        /*To keep RCU checker happy. */
1296                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1297                                                      reply, info->snd_portid,
1298                                                      info->snd_seq, 0,
1299                                                      OVS_FLOW_CMD_DEL,
1300                                                      ufid_flags);
1301                         rcu_read_unlock();
1302                         BUG_ON(err < 0);
1303                         ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1304                 } else {
1305                         genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
1306                                      GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
1307
1308                 }
1309         }
1310
1311         ovs_flow_free(flow, true);
1312         return 0;
1313 unlock:
1314         ovs_unlock();
1315         return err;
1316 }
1317
1318 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1319 {
1320         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1321         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1322         struct table_instance *ti;
1323         struct datapath *dp;
1324         u32 ufid_flags;
1325         int err;
1326
1327         err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1328                             OVS_FLOW_ATTR_MAX, flow_policy);
1329         if (err)
1330                 return err;
1331         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1332
1333         rcu_read_lock();
1334         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1335         if (!dp) {
1336                 rcu_read_unlock();
1337                 return -ENODEV;
1338         }
1339
1340         ti = rcu_dereference(dp->table.ti);
1341         for (;;) {
1342                 struct sw_flow *flow;
1343                 u32 bucket, obj;
1344
1345                 bucket = cb->args[0];
1346                 obj = cb->args[1];
1347                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1348                 if (!flow)
1349                         break;
1350
1351                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1352                                            NETLINK_CB(cb->skb).portid,
1353                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1354                                            OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1355                         break;
1356
1357                 cb->args[0] = bucket;
1358                 cb->args[1] = obj;
1359         }
1360         rcu_read_unlock();
1361         return skb->len;
1362 }
1363
1364 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1365         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1366         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1367         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1368         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1369         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1370         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1371         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1372 };
1373
1374 static struct genl_ops dp_flow_genl_ops[] = {
1375         { .cmd = OVS_FLOW_CMD_NEW,
1376           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1377           .policy = flow_policy,
1378           .doit = ovs_flow_cmd_new
1379         },
1380         { .cmd = OVS_FLOW_CMD_DEL,
1381           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1382           .policy = flow_policy,
1383           .doit = ovs_flow_cmd_del
1384         },
1385         { .cmd = OVS_FLOW_CMD_GET,
1386           .flags = 0,               /* OK for unprivileged users. */
1387           .policy = flow_policy,
1388           .doit = ovs_flow_cmd_get,
1389           .dumpit = ovs_flow_cmd_dump
1390         },
1391         { .cmd = OVS_FLOW_CMD_SET,
1392           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1393           .policy = flow_policy,
1394           .doit = ovs_flow_cmd_set,
1395         },
1396 };
1397
1398 static struct genl_family dp_flow_genl_family = {
1399         .id = GENL_ID_GENERATE,
1400         .hdrsize = sizeof(struct ovs_header),
1401         .name = OVS_FLOW_FAMILY,
1402         .version = OVS_FLOW_VERSION,
1403         .maxattr = OVS_FLOW_ATTR_MAX,
1404         .netnsok = true,
1405         .parallel_ops = true,
1406         .ops = dp_flow_genl_ops,
1407         .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1408         .mcgrps = &ovs_dp_flow_multicast_group,
1409         .n_mcgrps = 1,
1410 };
1411
1412 static size_t ovs_dp_cmd_msg_size(void)
1413 {
1414         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1415
1416         msgsize += nla_total_size(IFNAMSIZ);
1417         msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1418         msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1419         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1420
1421         return msgsize;
1422 }
1423
1424 /* Called with ovs_mutex. */
1425 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1426                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1427 {
1428         struct ovs_header *ovs_header;
1429         struct ovs_dp_stats dp_stats;
1430         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1431         int err;
1432
1433         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1434                                    flags, cmd);
1435         if (!ovs_header)
1436                 goto error;
1437
1438         ovs_header->dp_ifindex = get_dpifindex(dp);
1439
1440         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1441         if (err)
1442                 goto nla_put_failure;
1443
1444         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1445         if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1446                         &dp_stats))
1447                 goto nla_put_failure;
1448
1449         if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1450                         sizeof(struct ovs_dp_megaflow_stats),
1451                         &dp_megaflow_stats))
1452                 goto nla_put_failure;
1453
1454         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1455                 goto nla_put_failure;
1456
1457         genlmsg_end(skb, ovs_header);
1458         return 0;
1459
1460 nla_put_failure:
1461         genlmsg_cancel(skb, ovs_header);
1462 error:
1463         return -EMSGSIZE;
1464 }
1465
1466 static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1467 {
1468         return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1469 }
1470
1471 /* Called with rcu_read_lock or ovs_mutex. */
1472 static struct datapath *lookup_datapath(struct net *net,
1473                                         const struct ovs_header *ovs_header,
1474                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1475 {
1476         struct datapath *dp;
1477
1478         if (!a[OVS_DP_ATTR_NAME])
1479                 dp = get_dp(net, ovs_header->dp_ifindex);
1480         else {
1481                 struct vport *vport;
1482
1483                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1484                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1485         }
1486         return dp ? dp : ERR_PTR(-ENODEV);
1487 }
1488
1489 static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1490 {
1491         struct datapath *dp;
1492
1493         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1494         if (IS_ERR(dp))
1495                 return;
1496
1497         WARN(dp->user_features, "Dropping previously announced user features\n");
1498         dp->user_features = 0;
1499 }
1500
1501 static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1502 {
1503         if (a[OVS_DP_ATTR_USER_FEATURES])
1504                 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1505 }
1506
1507 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1508 {
1509         struct nlattr **a = info->attrs;
1510         struct vport_parms parms;
1511         struct sk_buff *reply;
1512         struct datapath *dp;
1513         struct vport *vport;
1514         struct ovs_net *ovs_net;
1515         int err, i;
1516
1517         err = -EINVAL;
1518         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1519                 goto err;
1520
1521         reply = ovs_dp_cmd_alloc_info(info);
1522         if (!reply)
1523                 return -ENOMEM;
1524
1525         err = -ENOMEM;
1526         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1527         if (dp == NULL)
1528                 goto err_free_reply;
1529
1530         ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1531
1532         /* Allocate table. */
1533         err = ovs_flow_tbl_init(&dp->table);
1534         if (err)
1535                 goto err_free_dp;
1536
1537         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1538         if (!dp->stats_percpu) {
1539                 err = -ENOMEM;
1540                 goto err_destroy_table;
1541         }
1542
1543         dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1544                             GFP_KERNEL);
1545         if (!dp->ports) {
1546                 err = -ENOMEM;
1547                 goto err_destroy_percpu;
1548         }
1549
1550         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1551                 INIT_HLIST_HEAD(&dp->ports[i]);
1552
1553         /* Set up our datapath device. */
1554         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1555         parms.type = OVS_VPORT_TYPE_INTERNAL;
1556         parms.options = NULL;
1557         parms.dp = dp;
1558         parms.port_no = OVSP_LOCAL;
1559         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1560
1561         ovs_dp_change(dp, a);
1562
1563         /* So far only local changes have been made, now need the lock. */
1564         ovs_lock();
1565
1566         vport = new_vport(&parms);
1567         if (IS_ERR(vport)) {
1568                 err = PTR_ERR(vport);
1569                 if (err == -EBUSY)
1570                         err = -EEXIST;
1571
1572                 if (err == -EEXIST) {
1573                         /* An outdated user space instance that does not understand
1574                          * the concept of user_features has attempted to create a new
1575                          * datapath and is likely to reuse it. Drop all user features.
1576                          */
1577                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1578                                 ovs_dp_reset_user_features(skb, info);
1579                 }
1580
1581                 goto err_destroy_ports_array;
1582         }
1583
1584         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1585                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1586         BUG_ON(err < 0);
1587
1588         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1589         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1590         ovs_unlock();
1591
1592         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1593         return 0;
1594
1595 err_destroy_ports_array:
1596         ovs_unlock();
1597         kfree(dp->ports);
1598 err_destroy_percpu:
1599         free_percpu(dp->stats_percpu);
1600 err_destroy_table:
1601         ovs_flow_tbl_destroy(&dp->table);
1602 err_free_dp:
1603         release_net(ovs_dp_get_net(dp));
1604         kfree(dp);
1605 err_free_reply:
1606         kfree_skb(reply);
1607 err:
1608         return err;
1609 }
1610
1611 /* Called with ovs_mutex. */
1612 static void __dp_destroy(struct datapath *dp)
1613 {
1614         int i;
1615
1616         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1617                 struct vport *vport;
1618                 struct hlist_node *n;
1619
1620                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1621                         if (vport->port_no != OVSP_LOCAL)
1622                                 ovs_dp_detach_port(vport);
1623         }
1624
1625         list_del_rcu(&dp->list_node);
1626
1627         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1628          * all ports in datapath are destroyed first before freeing datapath.
1629          */
1630         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1631
1632         /* RCU destroy the flow table */
1633         call_rcu(&dp->rcu, destroy_dp_rcu);
1634 }
1635
1636 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1637 {
1638         struct sk_buff *reply;
1639         struct datapath *dp;
1640         int err;
1641
1642         reply = ovs_dp_cmd_alloc_info(info);
1643         if (!reply)
1644                 return -ENOMEM;
1645
1646         ovs_lock();
1647         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1648         err = PTR_ERR(dp);
1649         if (IS_ERR(dp))
1650                 goto err_unlock_free;
1651
1652         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1653                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1654         BUG_ON(err < 0);
1655
1656         __dp_destroy(dp);
1657         ovs_unlock();
1658
1659         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1660         return 0;
1661
1662 err_unlock_free:
1663         ovs_unlock();
1664         kfree_skb(reply);
1665         return err;
1666 }
1667
1668 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1669 {
1670         struct sk_buff *reply;
1671         struct datapath *dp;
1672         int err;
1673
1674         reply = ovs_dp_cmd_alloc_info(info);
1675         if (!reply)
1676                 return -ENOMEM;
1677
1678         ovs_lock();
1679         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1680         err = PTR_ERR(dp);
1681         if (IS_ERR(dp))
1682                 goto err_unlock_free;
1683
1684         ovs_dp_change(dp, info->attrs);
1685
1686         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1687                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1688         BUG_ON(err < 0);
1689         ovs_unlock();
1690
1691         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1692         return 0;
1693
1694 err_unlock_free:
1695         ovs_unlock();
1696         kfree_skb(reply);
1697         return err;
1698 }
1699
1700 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1701 {
1702         struct sk_buff *reply;
1703         struct datapath *dp;
1704         int err;
1705
1706         reply = ovs_dp_cmd_alloc_info(info);
1707         if (!reply)
1708                 return -ENOMEM;
1709
1710         ovs_lock();
1711         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1712         if (IS_ERR(dp)) {
1713                 err = PTR_ERR(dp);
1714                 goto err_unlock_free;
1715         }
1716         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1717                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1718         BUG_ON(err < 0);
1719         ovs_unlock();
1720
1721         return genlmsg_reply(reply, info);
1722
1723 err_unlock_free:
1724         ovs_unlock();
1725         kfree_skb(reply);
1726         return err;
1727 }
1728
1729 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1730 {
1731         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1732         struct datapath *dp;
1733         int skip = cb->args[0];
1734         int i = 0;
1735
1736         ovs_lock();
1737         list_for_each_entry(dp, &ovs_net->dps, list_node) {
1738                 if (i >= skip &&
1739                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1740                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1741                                          OVS_DP_CMD_NEW) < 0)
1742                         break;
1743                 i++;
1744         }
1745         ovs_unlock();
1746
1747         cb->args[0] = i;
1748
1749         return skb->len;
1750 }
1751
1752 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1753         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1754         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1755         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1756 };
1757
1758 static struct genl_ops dp_datapath_genl_ops[] = {
1759         { .cmd = OVS_DP_CMD_NEW,
1760           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1761           .policy = datapath_policy,
1762           .doit = ovs_dp_cmd_new
1763         },
1764         { .cmd = OVS_DP_CMD_DEL,
1765           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1766           .policy = datapath_policy,
1767           .doit = ovs_dp_cmd_del
1768         },
1769         { .cmd = OVS_DP_CMD_GET,
1770           .flags = 0,               /* OK for unprivileged users. */
1771           .policy = datapath_policy,
1772           .doit = ovs_dp_cmd_get,
1773           .dumpit = ovs_dp_cmd_dump
1774         },
1775         { .cmd = OVS_DP_CMD_SET,
1776           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1777           .policy = datapath_policy,
1778           .doit = ovs_dp_cmd_set,
1779         },
1780 };
1781
1782 static struct genl_family dp_datapath_genl_family = {
1783         .id = GENL_ID_GENERATE,
1784         .hdrsize = sizeof(struct ovs_header),
1785         .name = OVS_DATAPATH_FAMILY,
1786         .version = OVS_DATAPATH_VERSION,
1787         .maxattr = OVS_DP_ATTR_MAX,
1788         .netnsok = true,
1789         .parallel_ops = true,
1790         .ops = dp_datapath_genl_ops,
1791         .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1792         .mcgrps = &ovs_dp_datapath_multicast_group,
1793         .n_mcgrps = 1,
1794 };
1795
1796 /* Called with ovs_mutex or RCU read lock. */
1797 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1798                                    u32 portid, u32 seq, u32 flags, u8 cmd)
1799 {
1800         struct ovs_header *ovs_header;
1801         struct ovs_vport_stats vport_stats;
1802         int err;
1803
1804         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1805                                  flags, cmd);
1806         if (!ovs_header)
1807                 return -EMSGSIZE;
1808
1809         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1810
1811         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1812             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1813             nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)))
1814                 goto nla_put_failure;
1815
1816         ovs_vport_get_stats(vport, &vport_stats);
1817         if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1818                     &vport_stats))
1819                 goto nla_put_failure;
1820
1821         if (ovs_vport_get_upcall_portids(vport, skb))
1822                 goto nla_put_failure;
1823
1824         err = ovs_vport_get_options(vport, skb);
1825         if (err == -EMSGSIZE)
1826                 goto error;
1827
1828         genlmsg_end(skb, ovs_header);
1829         return 0;
1830
1831 nla_put_failure:
1832         err = -EMSGSIZE;
1833 error:
1834         genlmsg_cancel(skb, ovs_header);
1835         return err;
1836 }
1837
1838 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1839 {
1840         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1841 }
1842
1843 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1844 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1845                                          u32 seq, u8 cmd)
1846 {
1847         struct sk_buff *skb;
1848         int retval;
1849
1850         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1851         if (!skb)
1852                 return ERR_PTR(-ENOMEM);
1853
1854         retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1855         BUG_ON(retval < 0);
1856
1857         return skb;
1858 }
1859
1860 /* Called with ovs_mutex or RCU read lock. */
1861 static struct vport *lookup_vport(struct net *net,
1862                                   const struct ovs_header *ovs_header,
1863                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1864 {
1865         struct datapath *dp;
1866         struct vport *vport;
1867
1868         if (a[OVS_VPORT_ATTR_NAME]) {
1869                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1870                 if (!vport)
1871                         return ERR_PTR(-ENODEV);
1872                 if (ovs_header->dp_ifindex &&
1873                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1874                         return ERR_PTR(-ENODEV);
1875                 return vport;
1876         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1877                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1878
1879                 if (port_no >= DP_MAX_PORTS)
1880                         return ERR_PTR(-EFBIG);
1881
1882                 dp = get_dp(net, ovs_header->dp_ifindex);
1883                 if (!dp)
1884                         return ERR_PTR(-ENODEV);
1885
1886                 vport = ovs_vport_ovsl_rcu(dp, port_no);
1887                 if (!vport)
1888                         return ERR_PTR(-ENODEV);
1889                 return vport;
1890         } else
1891                 return ERR_PTR(-EINVAL);
1892 }
1893
1894 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1895 {
1896         struct nlattr **a = info->attrs;
1897         struct ovs_header *ovs_header = info->userhdr;
1898         struct vport_parms parms;
1899         struct sk_buff *reply;
1900         struct vport *vport;
1901         struct datapath *dp;
1902         u32 port_no;
1903         int err;
1904
1905         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1906             !a[OVS_VPORT_ATTR_UPCALL_PID])
1907                 return -EINVAL;
1908
1909         port_no = a[OVS_VPORT_ATTR_PORT_NO]
1910                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1911         if (port_no >= DP_MAX_PORTS)
1912                 return -EFBIG;
1913
1914         reply = ovs_vport_cmd_alloc_info();
1915         if (!reply)
1916                 return -ENOMEM;
1917
1918         ovs_lock();
1919 restart:
1920         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1921         err = -ENODEV;
1922         if (!dp)
1923                 goto exit_unlock_free;
1924
1925         if (port_no) {
1926                 vport = ovs_vport_ovsl(dp, port_no);
1927                 err = -EBUSY;
1928                 if (vport)
1929                         goto exit_unlock_free;
1930         } else {
1931                 for (port_no = 1; ; port_no++) {
1932                         if (port_no >= DP_MAX_PORTS) {
1933                                 err = -EFBIG;
1934                                 goto exit_unlock_free;
1935                         }
1936                         vport = ovs_vport_ovsl(dp, port_no);
1937                         if (!vport)
1938                                 break;
1939                 }
1940         }
1941
1942         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1943         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1944         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1945         parms.dp = dp;
1946         parms.port_no = port_no;
1947         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1948
1949         vport = new_vport(&parms);
1950         err = PTR_ERR(vport);
1951         if (IS_ERR(vport)) {
1952                 if (err == -EAGAIN)
1953                         goto restart;
1954                 goto exit_unlock_free;
1955         }
1956
1957         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1958                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1959         BUG_ON(err < 0);
1960         ovs_unlock();
1961
1962         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
1963         return 0;
1964
1965 exit_unlock_free:
1966         ovs_unlock();
1967         kfree_skb(reply);
1968         return err;
1969 }
1970
1971 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1972 {
1973         struct nlattr **a = info->attrs;
1974         struct sk_buff *reply;
1975         struct vport *vport;
1976         int err;
1977
1978         reply = ovs_vport_cmd_alloc_info();
1979         if (!reply)
1980                 return -ENOMEM;
1981
1982         ovs_lock();
1983         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1984         err = PTR_ERR(vport);
1985         if (IS_ERR(vport))
1986                 goto exit_unlock_free;
1987
1988         if (a[OVS_VPORT_ATTR_TYPE] &&
1989             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1990                 err = -EINVAL;
1991                 goto exit_unlock_free;
1992         }
1993
1994         if (a[OVS_VPORT_ATTR_OPTIONS]) {
1995                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1996                 if (err)
1997                         goto exit_unlock_free;
1998         }
1999
2000         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2001                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2002
2003                 err = ovs_vport_set_upcall_portids(vport, ids);
2004                 if (err)
2005                         goto exit_unlock_free;
2006         }
2007
2008         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2009                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2010         BUG_ON(err < 0);
2011         ovs_unlock();
2012
2013         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
2014         return 0;
2015
2016 exit_unlock_free:
2017         ovs_unlock();
2018         kfree_skb(reply);
2019         return err;
2020 }
2021
2022 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2023 {
2024         struct nlattr **a = info->attrs;
2025         struct sk_buff *reply;
2026         struct vport *vport;
2027         int err;
2028
2029         reply = ovs_vport_cmd_alloc_info();
2030         if (!reply)
2031                 return -ENOMEM;
2032
2033         ovs_lock();
2034         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2035         err = PTR_ERR(vport);
2036         if (IS_ERR(vport))
2037                 goto exit_unlock_free;
2038
2039         if (vport->port_no == OVSP_LOCAL) {
2040                 err = -EINVAL;
2041                 goto exit_unlock_free;
2042         }
2043
2044         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2045                                       info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2046         BUG_ON(err < 0);
2047         ovs_dp_detach_port(vport);
2048         ovs_unlock();
2049
2050         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
2051         return 0;
2052
2053 exit_unlock_free:
2054         ovs_unlock();
2055         kfree_skb(reply);
2056         return err;
2057 }
2058
2059 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2060 {
2061         struct nlattr **a = info->attrs;
2062         struct ovs_header *ovs_header = info->userhdr;
2063         struct sk_buff *reply;
2064         struct vport *vport;
2065         int err;
2066
2067         reply = ovs_vport_cmd_alloc_info();
2068         if (!reply)
2069                 return -ENOMEM;
2070
2071         rcu_read_lock();
2072         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2073         err = PTR_ERR(vport);
2074         if (IS_ERR(vport))
2075                 goto exit_unlock_free;
2076         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2077                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2078         BUG_ON(err < 0);
2079         rcu_read_unlock();
2080
2081         return genlmsg_reply(reply, info);
2082
2083 exit_unlock_free:
2084         rcu_read_unlock();
2085         kfree_skb(reply);
2086         return err;
2087 }
2088
2089 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2090 {
2091         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2092         struct datapath *dp;
2093         int bucket = cb->args[0], skip = cb->args[1];
2094         int i, j = 0;
2095
2096         rcu_read_lock();
2097         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2098         if (!dp) {
2099                 rcu_read_unlock();
2100                 return -ENODEV;
2101         }
2102         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2103                 struct vport *vport;
2104
2105                 j = 0;
2106                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2107                         if (j >= skip &&
2108                             ovs_vport_cmd_fill_info(vport, skb,
2109                                                     NETLINK_CB(cb->skb).portid,
2110                                                     cb->nlh->nlmsg_seq,
2111                                                     NLM_F_MULTI,
2112                                                     OVS_VPORT_CMD_NEW) < 0)
2113                                 goto out;
2114
2115                         j++;
2116                 }
2117                 skip = 0;
2118         }
2119 out:
2120         rcu_read_unlock();
2121
2122         cb->args[0] = i;
2123         cb->args[1] = j;
2124
2125         return skb->len;
2126 }
2127
2128 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2129         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2130         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2131         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2132         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2133         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2134         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2135 };
2136
2137 static struct genl_ops dp_vport_genl_ops[] = {
2138         { .cmd = OVS_VPORT_CMD_NEW,
2139           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2140           .policy = vport_policy,
2141           .doit = ovs_vport_cmd_new
2142         },
2143         { .cmd = OVS_VPORT_CMD_DEL,
2144           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2145           .policy = vport_policy,
2146           .doit = ovs_vport_cmd_del
2147         },
2148         { .cmd = OVS_VPORT_CMD_GET,
2149           .flags = 0,               /* OK for unprivileged users. */
2150           .policy = vport_policy,
2151           .doit = ovs_vport_cmd_get,
2152           .dumpit = ovs_vport_cmd_dump
2153         },
2154         { .cmd = OVS_VPORT_CMD_SET,
2155           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2156           .policy = vport_policy,
2157           .doit = ovs_vport_cmd_set,
2158         },
2159 };
2160
2161 struct genl_family dp_vport_genl_family = {
2162         .id = GENL_ID_GENERATE,
2163         .hdrsize = sizeof(struct ovs_header),
2164         .name = OVS_VPORT_FAMILY,
2165         .version = OVS_VPORT_VERSION,
2166         .maxattr = OVS_VPORT_ATTR_MAX,
2167         .netnsok = true,
2168         .parallel_ops = true,
2169         .ops = dp_vport_genl_ops,
2170         .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2171         .mcgrps = &ovs_dp_vport_multicast_group,
2172         .n_mcgrps = 1,
2173 };
2174
2175 static struct genl_family *dp_genl_families[] = {
2176         &dp_datapath_genl_family,
2177         &dp_vport_genl_family,
2178         &dp_flow_genl_family,
2179         &dp_packet_genl_family,
2180 };
2181
2182 static void dp_unregister_genl(int n_families)
2183 {
2184         int i;
2185
2186         for (i = 0; i < n_families; i++)
2187                 genl_unregister_family(dp_genl_families[i]);
2188 }
2189
2190 static int dp_register_genl(void)
2191 {
2192         int err;
2193         int i;
2194
2195         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2196
2197                 err = genl_register_family(dp_genl_families[i]);
2198                 if (err)
2199                         goto error;
2200         }
2201
2202         return 0;
2203
2204 error:
2205         dp_unregister_genl(i);
2206         return err;
2207 }
2208
2209 static int __net_init ovs_init_net(struct net *net)
2210 {
2211         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2212
2213         INIT_LIST_HEAD(&ovs_net->dps);
2214         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2215         return 0;
2216 }
2217
2218 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2219                                             struct list_head *head)
2220 {
2221         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2222         struct datapath *dp;
2223
2224         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2225                 int i;
2226
2227                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2228                         struct vport *vport;
2229
2230                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2231                                 struct netdev_vport *netdev_vport;
2232
2233                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2234                                         continue;
2235
2236                                 netdev_vport = netdev_vport_priv(vport);
2237                                 if (dev_net(netdev_vport->dev) == dnet)
2238                                         list_add(&vport->detach_list, head);
2239                         }
2240                 }
2241         }
2242 }
2243
2244 static void __net_exit ovs_exit_net(struct net *dnet)
2245 {
2246         struct datapath *dp, *dp_next;
2247         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2248         struct vport *vport, *vport_next;
2249         struct net *net;
2250         LIST_HEAD(head);
2251
2252         ovs_lock();
2253         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2254                 __dp_destroy(dp);
2255
2256         rtnl_lock();
2257         for_each_net(net)
2258                 list_vports_from_net(net, dnet, &head);
2259         rtnl_unlock();
2260
2261         /* Detach all vports from given namespace. */
2262         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2263                 list_del(&vport->detach_list);
2264                 ovs_dp_detach_port(vport);
2265         }
2266
2267         ovs_unlock();
2268
2269         cancel_work_sync(&ovs_net->dp_notify_work);
2270 }
2271
2272 static struct pernet_operations ovs_net_ops = {
2273         .init = ovs_init_net,
2274         .exit = ovs_exit_net,
2275         .id   = &ovs_net_id,
2276         .size = sizeof(struct ovs_net),
2277 };
2278
2279 DEFINE_COMPAT_PNET_REG_FUNC(device);
2280
2281 static int __init dp_init(void)
2282 {
2283         int err;
2284
2285         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2286
2287         pr_info("Open vSwitch switching datapath %s\n", VERSION);
2288
2289         err = action_fifos_init();
2290         if (err)
2291                 goto error;
2292
2293         err = ovs_internal_dev_rtnl_link_register();
2294         if (err)
2295                 goto error_action_fifos_exit;
2296
2297         err = ovs_flow_init();
2298         if (err)
2299                 goto error_unreg_rtnl_link;
2300
2301         err = ovs_vport_init();
2302         if (err)
2303                 goto error_flow_exit;
2304
2305         err = register_pernet_device(&ovs_net_ops);
2306         if (err)
2307                 goto error_vport_exit;
2308
2309         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2310         if (err)
2311                 goto error_netns_exit;
2312
2313         err = ovs_netdev_init();
2314         if (err)
2315                 goto error_unreg_notifier;
2316
2317         err = dp_register_genl();
2318         if (err < 0)
2319                 goto error_unreg_netdev;
2320
2321         return 0;
2322
2323 error_unreg_netdev:
2324         ovs_netdev_exit();
2325 error_unreg_notifier:
2326         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2327 error_netns_exit:
2328         unregister_pernet_device(&ovs_net_ops);
2329 error_vport_exit:
2330         ovs_vport_exit();
2331 error_flow_exit:
2332         ovs_flow_exit();
2333 error_unreg_rtnl_link:
2334         ovs_internal_dev_rtnl_link_unregister();
2335 error_action_fifos_exit:
2336         action_fifos_exit();
2337 error:
2338         return err;
2339 }
2340
2341 static void dp_cleanup(void)
2342 {
2343         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2344         ovs_netdev_exit();
2345         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2346         unregister_pernet_device(&ovs_net_ops);
2347         rcu_barrier();
2348         ovs_vport_exit();
2349         ovs_flow_exit();
2350         ovs_internal_dev_rtnl_link_unregister();
2351         action_fifos_exit();
2352 }
2353
2354 module_init(dp_init);
2355 module_exit(dp_cleanup);
2356
2357 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2358 MODULE_LICENSE("GPL");
2359 MODULE_VERSION(VERSION);