Merge remote-tracking branch 'origin/master' into ovn3
[cascardo/ovs.git] / datapath / datapath.c
1 /*
2  * Copyright (c) 2007-2014 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/init.h>
22 #include <linux/module.h>
23 #include <linux/if_arp.h>
24 #include <linux/if_vlan.h>
25 #include <linux/in.h>
26 #include <linux/ip.h>
27 #include <linux/jhash.h>
28 #include <linux/delay.h>
29 #include <linux/time.h>
30 #include <linux/etherdevice.h>
31 #include <linux/genetlink.h>
32 #include <linux/kernel.h>
33 #include <linux/kthread.h>
34 #include <linux/mutex.h>
35 #include <linux/percpu.h>
36 #include <linux/rcupdate.h>
37 #include <linux/tcp.h>
38 #include <linux/udp.h>
39 #include <linux/version.h>
40 #include <linux/ethtool.h>
41 #include <linux/wait.h>
42 #include <asm/div64.h>
43 #include <linux/highmem.h>
44 #include <linux/netfilter_bridge.h>
45 #include <linux/netfilter_ipv4.h>
46 #include <linux/inetdevice.h>
47 #include <linux/list.h>
48 #include <linux/openvswitch.h>
49 #include <linux/rculist.h>
50 #include <linux/dmi.h>
51 #include <net/genetlink.h>
52 #include <net/net_namespace.h>
53 #include <net/netns/generic.h>
54
55 #include "datapath.h"
56 #include "flow.h"
57 #include "flow_table.h"
58 #include "flow_netlink.h"
59 #include "vlan.h"
60 #include "vport-internal_dev.h"
61 #include "vport-netdev.h"
62
63 int ovs_net_id __read_mostly;
64
65 static struct genl_family dp_packet_genl_family;
66 static struct genl_family dp_flow_genl_family;
67 static struct genl_family dp_datapath_genl_family;
68
69 static const struct nla_policy flow_policy[];
70
71 static struct genl_multicast_group ovs_dp_flow_multicast_group = {
72         .name = OVS_FLOW_MCGROUP
73 };
74
75 static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
76         .name = OVS_DATAPATH_MCGROUP
77 };
78
79 struct genl_multicast_group ovs_dp_vport_multicast_group = {
80         .name = OVS_VPORT_MCGROUP
81 };
82
83 /* Check if need to build a reply message.
84  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply.
85  */
86 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
87                             unsigned int group)
88 {
89         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
90                genl_has_listeners(family, genl_info_net(info), group);
91 }
92
93 static void ovs_notify(struct genl_family *family, struct genl_multicast_group *grp,
94                        struct sk_buff *skb, struct genl_info *info)
95 {
96         genl_notify(family, skb, genl_info_net(info),
97                     info->snd_portid, GROUP_ID(grp), info->nlhdr, GFP_KERNEL);
98 }
99
100 /**
101  * DOC: Locking:
102  *
103  * All writes e.g. Writes to device state (add/remove datapath, port, set
104  * operations on vports, etc.), Writes to other state (flow table
105  * modifications, set miscellaneous datapath parameters, etc.) are protected
106  * by ovs_lock.
107  *
108  * Reads are protected by RCU.
109  *
110  * There are a few special cases (mostly stats) that have their own
111  * synchronization but they nest under all of above and don't interact with
112  * each other.
113  *
114  * The RTNL lock nests inside ovs_mutex.
115  */
116
117 static DEFINE_MUTEX(ovs_mutex);
118
119 void ovs_lock(void)
120 {
121         mutex_lock(&ovs_mutex);
122 }
123
124 void ovs_unlock(void)
125 {
126         mutex_unlock(&ovs_mutex);
127 }
128
129 #ifdef CONFIG_LOCKDEP
130 int lockdep_ovsl_is_held(void)
131 {
132         if (debug_locks)
133                 return lockdep_is_held(&ovs_mutex);
134         else
135                 return 1;
136 }
137 #endif
138
139 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
140                              const struct sw_flow_key *,
141                              const struct dp_upcall_info *);
142 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
143                                   const struct sw_flow_key *,
144                                   const struct dp_upcall_info *);
145
146 /* Must be called with rcu_read_lock. */
147 static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex)
148 {
149         struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex);
150
151         if (dev) {
152                 struct vport *vport = ovs_internal_dev_get_vport(dev);
153                 if (vport)
154                         return vport->dp;
155         }
156
157         return NULL;
158 }
159
160 /* The caller must hold either ovs_mutex or rcu_read_lock to keep the
161  * returned dp pointer valid.
162  */
163 static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
164 {
165         struct datapath *dp;
166
167         WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held());
168         rcu_read_lock();
169         dp = get_dp_rcu(net, dp_ifindex);
170         rcu_read_unlock();
171
172         return dp;
173 }
174
175 /* Must be called with rcu_read_lock or ovs_mutex. */
176 const char *ovs_dp_name(const struct datapath *dp)
177 {
178         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
179         return vport->ops->get_name(vport);
180 }
181
182 static int get_dpifindex(const struct datapath *dp)
183 {
184         struct vport *local;
185         int ifindex;
186
187         rcu_read_lock();
188
189         local = ovs_vport_rcu(dp, OVSP_LOCAL);
190         if (local)
191                 ifindex = netdev_vport_priv(local)->dev->ifindex;
192         else
193                 ifindex = 0;
194
195         rcu_read_unlock();
196
197         return ifindex;
198 }
199
200 static void destroy_dp_rcu(struct rcu_head *rcu)
201 {
202         struct datapath *dp = container_of(rcu, struct datapath, rcu);
203
204         ovs_flow_tbl_destroy(&dp->table);
205         free_percpu(dp->stats_percpu);
206         release_net(ovs_dp_get_net(dp));
207         kfree(dp->ports);
208         kfree(dp);
209 }
210
211 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
212                                             u16 port_no)
213 {
214         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
215 }
216
217 /* Called with ovs_mutex or RCU read lock. */
218 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
219 {
220         struct vport *vport;
221         struct hlist_head *head;
222
223         head = vport_hash_bucket(dp, port_no);
224         hlist_for_each_entry_rcu(vport, head, dp_hash_node) {
225                 if (vport->port_no == port_no)
226                         return vport;
227         }
228         return NULL;
229 }
230
231 /* Called with ovs_mutex. */
232 static struct vport *new_vport(const struct vport_parms *parms)
233 {
234         struct vport *vport;
235
236         vport = ovs_vport_add(parms);
237         if (!IS_ERR(vport)) {
238                 struct datapath *dp = parms->dp;
239                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
240
241                 hlist_add_head_rcu(&vport->dp_hash_node, head);
242         }
243         return vport;
244 }
245
246 void ovs_dp_detach_port(struct vport *p)
247 {
248         ASSERT_OVSL();
249
250         /* First drop references to device. */
251         hlist_del_rcu(&p->dp_hash_node);
252
253         /* Then destroy it. */
254         ovs_vport_del(p);
255 }
256
257 /* Must be called with rcu_read_lock. */
258 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
259 {
260         const struct vport *p = OVS_CB(skb)->input_vport;
261         struct datapath *dp = p->dp;
262         struct sw_flow *flow;
263         struct sw_flow_actions *sf_acts;
264         struct dp_stats_percpu *stats;
265         u64 *stats_counter;
266         u32 n_mask_hit;
267
268         stats = this_cpu_ptr(dp->stats_percpu);
269
270         /* Look up flow. */
271         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
272                                          &n_mask_hit);
273         if (unlikely(!flow)) {
274                 struct dp_upcall_info upcall;
275                 int error;
276
277                 upcall.cmd = OVS_PACKET_CMD_MISS;
278                 upcall.userdata = NULL;
279                 upcall.portid = ovs_vport_find_upcall_portid(p, skb);
280                 upcall.egress_tun_info = NULL;
281                 error = ovs_dp_upcall(dp, skb, key, &upcall);
282                 if (unlikely(error))
283                         kfree_skb(skb);
284                 else
285                         consume_skb(skb);
286                 stats_counter = &stats->n_missed;
287                 goto out;
288         }
289
290         ovs_flow_stats_update(flow, key->tp.flags, skb);
291         sf_acts = rcu_dereference(flow->sf_acts);
292         ovs_execute_actions(dp, skb, sf_acts, key);
293
294         stats_counter = &stats->n_hit;
295
296 out:
297         /* Update datapath statistics. */
298         u64_stats_update_begin(&stats->syncp);
299         (*stats_counter)++;
300         stats->n_mask_hit += n_mask_hit;
301         u64_stats_update_end(&stats->syncp);
302 }
303
304 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
305                   const struct sw_flow_key *key,
306                   const struct dp_upcall_info *upcall_info)
307 {
308         struct dp_stats_percpu *stats;
309         int err;
310
311         if (upcall_info->portid == 0) {
312                 err = -ENOTCONN;
313                 goto err;
314         }
315
316         if (!skb_is_gso(skb))
317                 err = queue_userspace_packet(dp, skb, key, upcall_info);
318         else
319                 err = queue_gso_packets(dp, skb, key, upcall_info);
320         if (err)
321                 goto err;
322
323         return 0;
324
325 err:
326         stats = this_cpu_ptr(dp->stats_percpu);
327
328         u64_stats_update_begin(&stats->syncp);
329         stats->n_lost++;
330         u64_stats_update_end(&stats->syncp);
331
332         return err;
333 }
334
335 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
336                              const struct sw_flow_key *key,
337                              const struct dp_upcall_info *upcall_info)
338 {
339         unsigned short gso_type = skb_shinfo(skb)->gso_type;
340         struct sw_flow_key later_key;
341         struct sk_buff *segs, *nskb;
342         struct ovs_skb_cb ovs_cb;
343         int err;
344
345         ovs_cb = *OVS_CB(skb);
346         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
347         *OVS_CB(skb) = ovs_cb;
348         if (IS_ERR(segs))
349                 return PTR_ERR(segs);
350         if (segs == NULL)
351                 return -EINVAL;
352
353         if (gso_type & SKB_GSO_UDP) {
354                 /* The initial flow key extracted by ovs_flow_key_extract()
355                  * in this case is for a first fragment, so we need to
356                  * properly mark later fragments.
357                  */
358                 later_key = *key;
359                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
360         }
361
362         /* Queue all of the segments. */
363         skb = segs;
364         do {
365                 *OVS_CB(skb) = ovs_cb;
366                 if (gso_type & SKB_GSO_UDP && skb != segs)
367                         key = &later_key;
368
369                 err = queue_userspace_packet(dp, skb, key, upcall_info);
370                 if (err)
371                         break;
372
373         } while ((skb = skb->next));
374
375         /* Free all of the segments. */
376         skb = segs;
377         do {
378                 nskb = skb->next;
379                 if (err)
380                         kfree_skb(skb);
381                 else
382                         consume_skb(skb);
383         } while ((skb = nskb));
384         return err;
385 }
386
387 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
388                               unsigned int hdrlen)
389 {
390         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
391                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
392                 + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */
393
394         /* OVS_PACKET_ATTR_USERDATA */
395         if (upcall_info->userdata)
396                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
397
398         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
399         if (upcall_info->egress_tun_info)
400                 size += nla_total_size(ovs_tun_key_attr_size());
401
402         return size;
403 }
404
405 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
406                                   const struct sw_flow_key *key,
407                                   const struct dp_upcall_info *upcall_info)
408 {
409         struct ovs_header *upcall;
410         struct sk_buff *nskb = NULL;
411         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
412         struct nlattr *nla;
413         struct genl_info info = {
414 #ifdef HAVE_GENLMSG_NEW_UNICAST
415                 .dst_sk = ovs_dp_get_net(dp)->genl_sock,
416 #endif
417                 .snd_portid = upcall_info->portid,
418         };
419         size_t len;
420         unsigned int hlen;
421         int err, dp_ifindex;
422
423         dp_ifindex = get_dpifindex(dp);
424         if (!dp_ifindex)
425                 return -ENODEV;
426
427         if (skb_vlan_tag_present(skb)) {
428                 nskb = skb_clone(skb, GFP_ATOMIC);
429                 if (!nskb)
430                         return -ENOMEM;
431
432                 nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto, skb_vlan_tag_get(nskb));
433                 if (!nskb)
434                         return -ENOMEM;
435
436                 vlan_set_tci(nskb, 0);
437
438                 skb = nskb;
439         }
440
441         if (nla_attr_size(skb->len) > USHRT_MAX) {
442                 err = -EFBIG;
443                 goto out;
444         }
445
446         /* Complete checksum if needed */
447         if (skb->ip_summed == CHECKSUM_PARTIAL &&
448             (err = skb_checksum_help(skb)))
449                 goto out;
450
451         /* Older versions of OVS user space enforce alignment of the last
452          * Netlink attribute to NLA_ALIGNTO which would require extensive
453          * padding logic. Only perform zerocopy if padding is not required.
454          */
455         if (dp->user_features & OVS_DP_F_UNALIGNED)
456                 hlen = skb_zerocopy_headlen(skb);
457         else
458                 hlen = skb->len;
459
460         len = upcall_msg_size(upcall_info, hlen);
461         user_skb = genlmsg_new_unicast(len, &info, GFP_ATOMIC);
462         if (!user_skb) {
463                 err = -ENOMEM;
464                 goto out;
465         }
466
467         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
468                              0, upcall_info->cmd);
469         upcall->dp_ifindex = dp_ifindex;
470
471         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
472         BUG_ON(err);
473
474         if (upcall_info->userdata)
475                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
476                           nla_len(upcall_info->userdata),
477                           nla_data(upcall_info->userdata));
478
479         if (upcall_info->egress_tun_info) {
480                 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
481                 err = ovs_nla_put_egress_tunnel_key(user_skb,
482                                                     upcall_info->egress_tun_info);
483                 BUG_ON(err);
484                 nla_nest_end(user_skb, nla);
485         }
486
487         /* Only reserve room for attribute header, packet data is added
488          * in skb_zerocopy()
489          */
490         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
491                 err = -ENOBUFS;
492                 goto out;
493         }
494         nla->nla_len = nla_attr_size(skb->len);
495
496         err = skb_zerocopy(user_skb, skb, skb->len, hlen);
497         if (err)
498                 goto out;
499
500         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
501         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
502                 size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
503
504                 if (plen > 0)
505                         memset(skb_put(user_skb, plen), 0, plen);
506         }
507
508         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
509
510         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
511         user_skb = NULL;
512 out:
513         if (err)
514                 skb_tx_error(skb);
515         kfree_skb(user_skb);
516         kfree_skb(nskb);
517         return err;
518 }
519
520 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
521 {
522         struct ovs_header *ovs_header = info->userhdr;
523         struct nlattr **a = info->attrs;
524         struct sw_flow_actions *acts;
525         struct sk_buff *packet;
526         struct sw_flow *flow;
527         struct sw_flow_actions *sf_acts;
528         struct datapath *dp;
529         struct ethhdr *eth;
530         struct vport *input_vport;
531         int len;
532         int err;
533         bool log = !a[OVS_PACKET_ATTR_PROBE];
534
535         err = -EINVAL;
536         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
537             !a[OVS_PACKET_ATTR_ACTIONS])
538                 goto err;
539
540         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
541         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
542         err = -ENOMEM;
543         if (!packet)
544                 goto err;
545         skb_reserve(packet, NET_IP_ALIGN);
546
547         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
548
549         skb_reset_mac_header(packet);
550         eth = eth_hdr(packet);
551
552         /* Normally, setting the skb 'protocol' field would be handled by a
553          * call to eth_type_trans(), but it assumes there's a sending
554          * device, which we may not have.
555          */
556         if (ntohs(eth->h_proto) >= ETH_P_802_3_MIN)
557                 packet->protocol = eth->h_proto;
558         else
559                 packet->protocol = htons(ETH_P_802_2);
560
561         /* Build an sw_flow for sending this packet. */
562         flow = ovs_flow_alloc();
563         err = PTR_ERR(flow);
564         if (IS_ERR(flow))
565                 goto err_kfree_skb;
566
567         err = ovs_flow_key_extract_userspace(a[OVS_PACKET_ATTR_KEY], packet,
568                                              &flow->key, log);
569         if (err)
570                 goto err_flow_free;
571
572         err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
573                                    &flow->key, &acts, log);
574         if (err)
575                 goto err_flow_free;
576
577         rcu_assign_pointer(flow->sf_acts, acts);
578         OVS_CB(packet)->egress_tun_info = NULL;
579         packet->priority = flow->key.phy.priority;
580         packet->mark = flow->key.phy.skb_mark;
581
582         rcu_read_lock();
583         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
584         err = -ENODEV;
585         if (!dp)
586                 goto err_unlock;
587
588         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
589         if (!input_vport)
590                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
591
592         if (!input_vport)
593                 goto err_unlock;
594
595         OVS_CB(packet)->input_vport = input_vport;
596         sf_acts = rcu_dereference(flow->sf_acts);
597
598         local_bh_disable();
599         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
600         local_bh_enable();
601         rcu_read_unlock();
602
603         ovs_flow_free(flow, false);
604         return err;
605
606 err_unlock:
607         rcu_read_unlock();
608 err_flow_free:
609         ovs_flow_free(flow, false);
610 err_kfree_skb:
611         kfree_skb(packet);
612 err:
613         return err;
614 }
615
616 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
617         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
618         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
619         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
620         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
621 };
622
623 static struct genl_ops dp_packet_genl_ops[] = {
624         { .cmd = OVS_PACKET_CMD_EXECUTE,
625           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
626           .policy = packet_policy,
627           .doit = ovs_packet_cmd_execute
628         }
629 };
630
631 static struct genl_family dp_packet_genl_family = {
632         .id = GENL_ID_GENERATE,
633         .hdrsize = sizeof(struct ovs_header),
634         .name = OVS_PACKET_FAMILY,
635         .version = OVS_PACKET_VERSION,
636         .maxattr = OVS_PACKET_ATTR_MAX,
637         .netnsok = true,
638         .parallel_ops = true,
639         .ops = dp_packet_genl_ops,
640         .n_ops = ARRAY_SIZE(dp_packet_genl_ops),
641 };
642
643 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
644                          struct ovs_dp_megaflow_stats *mega_stats)
645 {
646         int i;
647
648         memset(mega_stats, 0, sizeof(*mega_stats));
649
650         stats->n_flows = ovs_flow_tbl_count(&dp->table);
651         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
652
653         stats->n_hit = stats->n_missed = stats->n_lost = 0;
654
655         for_each_possible_cpu(i) {
656                 const struct dp_stats_percpu *percpu_stats;
657                 struct dp_stats_percpu local_stats;
658                 unsigned int start;
659
660                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
661
662                 do {
663                         start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
664                         local_stats = *percpu_stats;
665                 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
666
667                 stats->n_hit += local_stats.n_hit;
668                 stats->n_missed += local_stats.n_missed;
669                 stats->n_lost += local_stats.n_lost;
670                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
671         }
672 }
673
674 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
675 {
676         return ovs_identifier_is_ufid(sfid) &&
677                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
678 }
679
680 static bool should_fill_mask(uint32_t ufid_flags)
681 {
682         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
683 }
684
685 static bool should_fill_actions(uint32_t ufid_flags)
686 {
687         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
688 }
689
690 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
691                                     const struct sw_flow_id *sfid,
692                                     uint32_t ufid_flags)
693 {
694         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
695
696         /* OVS_FLOW_ATTR_UFID */
697         if (sfid && ovs_identifier_is_ufid(sfid))
698                 len += nla_total_size(sfid->ufid_len);
699
700         /* OVS_FLOW_ATTR_KEY */
701         if (!sfid || should_fill_key(sfid, ufid_flags))
702                 len += nla_total_size(ovs_key_attr_size());
703
704         /* OVS_FLOW_ATTR_MASK */
705         if (should_fill_mask(ufid_flags))
706                 len += nla_total_size(ovs_key_attr_size());
707
708         /* OVS_FLOW_ATTR_ACTIONS */
709         if (should_fill_actions(ufid_flags))
710                 len += nla_total_size(acts->actions_len);
711
712         return len
713                 + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
714                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
715                 + nla_total_size(8); /* OVS_FLOW_ATTR_USED */
716 }
717
718 /* Called with ovs_mutex or RCU read lock. */
719 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
720                                    struct sk_buff *skb)
721 {
722         struct ovs_flow_stats stats;
723         __be16 tcp_flags;
724         unsigned long used;
725
726         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
727
728         if (used &&
729             nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
730                 return -EMSGSIZE;
731
732         if (stats.n_packets &&
733             nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats))
734                 return -EMSGSIZE;
735
736         if ((u8)ntohs(tcp_flags) &&
737              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
738                 return -EMSGSIZE;
739
740         return 0;
741 }
742
743 /* Called with ovs_mutex or RCU read lock. */
744 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
745                                      struct sk_buff *skb, int skb_orig_len)
746 {
747         struct nlattr *start;
748         int err;
749
750         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
751          * this is the first flow to be dumped into 'skb'.  This is unusual for
752          * Netlink but individual action lists can be longer than
753          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
754          * The userspace caller can always fetch the actions separately if it
755          * really wants them.  (Most userspace callers in fact don't care.)
756          *
757          * This can only fail for dump operations because the skb is always
758          * properly sized for single flows.
759          */
760         start = nla_nest_start(skb, OVS_FLOW_ATTR_ACTIONS);
761         if (start) {
762                 const struct sw_flow_actions *sf_acts;
763
764                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
765                 err = ovs_nla_put_actions(sf_acts->actions,
766                                           sf_acts->actions_len, skb);
767
768                 if (!err)
769                         nla_nest_end(skb, start);
770                 else {
771                         if (skb_orig_len)
772                                 return err;
773
774                         nla_nest_cancel(skb, start);
775                 }
776         } else if (skb_orig_len) {
777                 return -EMSGSIZE;
778         }
779
780         return 0;
781 }
782
783 /* Called with ovs_mutex or RCU read lock. */
784 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
785                                   struct sk_buff *skb, u32 portid,
786                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
787 {
788         const int skb_orig_len = skb->len;
789         struct ovs_header *ovs_header;
790         int err;
791
792         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
793                                  flags, cmd);
794         if (!ovs_header)
795                 return -EMSGSIZE;
796
797         ovs_header->dp_ifindex = dp_ifindex;
798
799         err = ovs_nla_put_identifier(flow, skb);
800         if (err)
801                 goto error;
802
803         if (should_fill_key(&flow->id, ufid_flags)) {
804                 err = ovs_nla_put_masked_key(flow, skb);
805                 if (err)
806                         goto error;
807         }
808
809         if (should_fill_mask(ufid_flags)) {
810                 err = ovs_nla_put_mask(flow, skb);
811                 if (err)
812                         goto error;
813         }
814
815         err = ovs_flow_cmd_fill_stats(flow, skb);
816         if (err)
817                 goto error;
818
819         if (should_fill_actions(ufid_flags)) {
820                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
821                 if (err)
822                         goto error;
823         }
824
825         genlmsg_end(skb, ovs_header);
826         return 0;
827
828 error:
829         genlmsg_cancel(skb, ovs_header);
830         return err;
831 }
832
833 /* May not be called with RCU read lock. */
834 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
835                                                const struct sw_flow_id *sfid,
836                                                struct genl_info *info,
837                                                bool always,
838                                                uint32_t ufid_flags)
839 {
840         struct sk_buff *skb;
841         size_t len;
842
843         if (!always && !ovs_must_notify(&dp_flow_genl_family, info,
844                                         GROUP_ID(&ovs_dp_flow_multicast_group)))
845                 return NULL;
846
847         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
848         skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
849         if (!skb)
850                 return ERR_PTR(-ENOMEM);
851
852         return skb;
853 }
854
855 /* Called with ovs_mutex. */
856 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
857                                                int dp_ifindex,
858                                                struct genl_info *info, u8 cmd,
859                                                bool always, u32 ufid_flags)
860 {
861         struct sk_buff *skb;
862         int retval;
863
864         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
865                                       &flow->id, info, always, ufid_flags);
866         if (IS_ERR_OR_NULL(skb))
867                 return skb;
868
869         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
870                                         info->snd_portid, info->snd_seq, 0,
871                                         cmd, ufid_flags);
872         BUG_ON(retval < 0);
873         return skb;
874 }
875
876 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
877 {
878         struct nlattr **a = info->attrs;
879         struct ovs_header *ovs_header = info->userhdr;
880         struct sw_flow *flow = NULL, *new_flow;
881         struct sw_flow_mask mask;
882         struct sk_buff *reply;
883         struct datapath *dp;
884         struct sw_flow_key key;
885         struct sw_flow_actions *acts;
886         struct sw_flow_match match;
887         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
888         int error;
889         bool log = !a[OVS_FLOW_ATTR_PROBE];
890
891         /* Must have key and actions. */
892         error = -EINVAL;
893         if (!a[OVS_FLOW_ATTR_KEY]) {
894                 OVS_NLERR(log, "Flow key attr not present in new flow.");
895                 goto error;
896         }
897         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
898                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
899                 goto error;
900         }
901
902         /* Most of the time we need to allocate a new flow, do it before
903          * locking.
904          */
905         new_flow = ovs_flow_alloc();
906         if (IS_ERR(new_flow)) {
907                 error = PTR_ERR(new_flow);
908                 goto error;
909         }
910
911         /* Extract key. */
912         ovs_match_init(&match, &key, &mask);
913         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
914                                   a[OVS_FLOW_ATTR_MASK], log);
915         if (error)
916                 goto err_kfree_flow;
917
918         ovs_flow_mask_key(&new_flow->key, &key, &mask);
919
920         /* Extract flow identifier. */
921         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
922                                        &key, log);
923         if (error)
924                 goto err_kfree_flow;
925
926         /* Validate actions. */
927         error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
928                                      &acts, log);
929         if (error) {
930                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
931                 goto err_kfree_flow;
932         }
933
934         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
935                                         ufid_flags);
936         if (IS_ERR(reply)) {
937                 error = PTR_ERR(reply);
938                 goto err_kfree_acts;
939         }
940
941         ovs_lock();
942         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
943         if (unlikely(!dp)) {
944                 error = -ENODEV;
945                 goto err_unlock_ovs;
946         }
947
948         /* Check if this is a duplicate flow */
949         if (ovs_identifier_is_ufid(&new_flow->id))
950                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
951         if (!flow)
952                 flow = ovs_flow_tbl_lookup(&dp->table, &key);
953         if (likely(!flow)) {
954                 rcu_assign_pointer(new_flow->sf_acts, acts);
955
956                 /* Put flow in bucket. */
957                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
958                 if (unlikely(error)) {
959                         acts = NULL;
960                         goto err_unlock_ovs;
961                 }
962
963                 if (unlikely(reply)) {
964                         error = ovs_flow_cmd_fill_info(new_flow,
965                                                        ovs_header->dp_ifindex,
966                                                        reply, info->snd_portid,
967                                                        info->snd_seq, 0,
968                                                        OVS_FLOW_CMD_NEW,
969                                                        ufid_flags);
970                         BUG_ON(error < 0);
971                 }
972                 ovs_unlock();
973         } else {
974                 struct sw_flow_actions *old_acts;
975
976                 /* Bail out if we're not allowed to modify an existing flow.
977                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
978                  * because Generic Netlink treats the latter as a dump
979                  * request.  We also accept NLM_F_EXCL in case that bug ever
980                  * gets fixed.
981                  */
982                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
983                                                          | NLM_F_EXCL))) {
984                         error = -EEXIST;
985                         goto err_unlock_ovs;
986                 }
987                 /* The flow identifier has to be the same for flow updates.
988                  * Look for any overlapping flow.
989                  */
990                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
991                         if (ovs_identifier_is_key(&flow->id))
992                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
993                                                                  &match);
994                         else /* UFID matches but key is different */
995                                 flow = NULL;
996                         if (!flow) {
997                                 error = -ENOENT;
998                                 goto err_unlock_ovs;
999                         }
1000                 }
1001                 /* Update actions. */
1002                 old_acts = ovsl_dereference(flow->sf_acts);
1003                 rcu_assign_pointer(flow->sf_acts, acts);
1004
1005                 if (unlikely(reply)) {
1006                         error = ovs_flow_cmd_fill_info(flow,
1007                                                        ovs_header->dp_ifindex,
1008                                                        reply, info->snd_portid,
1009                                                        info->snd_seq, 0,
1010                                                        OVS_FLOW_CMD_NEW,
1011                                                        ufid_flags);
1012                         BUG_ON(error < 0);
1013                 }
1014                 ovs_unlock();
1015
1016                 ovs_nla_free_flow_actions(old_acts);
1017                 ovs_flow_free(new_flow, false);
1018         }
1019
1020         if (reply)
1021                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1022         return 0;
1023
1024 err_unlock_ovs:
1025         ovs_unlock();
1026         kfree_skb(reply);
1027 err_kfree_acts:
1028         kfree(acts);
1029 err_kfree_flow:
1030         ovs_flow_free(new_flow, false);
1031 error:
1032         return error;
1033 }
1034
1035 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1036 static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
1037                                                 const struct sw_flow_key *key,
1038                                                 const struct sw_flow_mask *mask,
1039                                                 bool log)
1040 {
1041         struct sw_flow_actions *acts;
1042         struct sw_flow_key masked_key;
1043         int error;
1044
1045         ovs_flow_mask_key(&masked_key, key, mask);
1046         error = ovs_nla_copy_actions(a, &masked_key, &acts, log);
1047         if (error) {
1048                 OVS_NLERR(log,
1049                           "Actions may not be safe on all matching packets");
1050                 return ERR_PTR(error);
1051         }
1052
1053         return acts;
1054 }
1055
1056 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1057 {
1058         struct nlattr **a = info->attrs;
1059         struct ovs_header *ovs_header = info->userhdr;
1060         struct sw_flow_key key;
1061         struct sw_flow *flow;
1062         struct sw_flow_mask mask;
1063         struct sk_buff *reply = NULL;
1064         struct datapath *dp;
1065         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1066         struct sw_flow_match match;
1067         struct sw_flow_id sfid;
1068         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1069         int error;
1070         bool log = !a[OVS_FLOW_ATTR_PROBE];
1071         bool ufid_present;
1072
1073         /* Extract key. */
1074         error = -EINVAL;
1075         if (!a[OVS_FLOW_ATTR_KEY]) {
1076                 OVS_NLERR(log, "Flow key attribute not present in set flow.");
1077                 goto error;
1078         }
1079
1080         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1081         ovs_match_init(&match, &key, &mask);
1082         error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
1083                                   a[OVS_FLOW_ATTR_MASK], log);
1084         if (error)
1085                 goto error;
1086
1087         /* Validate actions. */
1088         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1089                 acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
1090                                         log);
1091                 if (IS_ERR(acts)) {
1092                         error = PTR_ERR(acts);
1093                         goto error;
1094                 }
1095
1096                 /* Can allocate before locking if have acts. */
1097                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1098                                                 ufid_flags);
1099                 if (IS_ERR(reply)) {
1100                         error = PTR_ERR(reply);
1101                         goto err_kfree_acts;
1102                 }
1103         }
1104
1105         ovs_lock();
1106         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1107         if (unlikely(!dp)) {
1108                 error = -ENODEV;
1109                 goto err_unlock_ovs;
1110         }
1111         /* Check that the flow exists. */
1112         if (ufid_present)
1113                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1114         else
1115                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1116         if (unlikely(!flow)) {
1117                 error = -ENOENT;
1118                 goto err_unlock_ovs;
1119         }
1120
1121         /* Update actions, if present. */
1122         if (likely(acts)) {
1123                 old_acts = ovsl_dereference(flow->sf_acts);
1124                 rcu_assign_pointer(flow->sf_acts, acts);
1125
1126                 if (unlikely(reply)) {
1127                         error = ovs_flow_cmd_fill_info(flow,
1128                                                        ovs_header->dp_ifindex,
1129                                                        reply, info->snd_portid,
1130                                                        info->snd_seq, 0,
1131                                                        OVS_FLOW_CMD_NEW,
1132                                                        ufid_flags);
1133                         BUG_ON(error < 0);
1134                 }
1135         } else {
1136                 /* Could not alloc without acts before locking. */
1137                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1138                                                 info, OVS_FLOW_CMD_NEW, false,
1139                                                 ufid_flags);
1140
1141                 if (unlikely(IS_ERR(reply))) {
1142                         error = PTR_ERR(reply);
1143                         goto err_unlock_ovs;
1144                 }
1145         }
1146
1147         /* Clear stats. */
1148         if (a[OVS_FLOW_ATTR_CLEAR])
1149                 ovs_flow_stats_clear(flow);
1150         ovs_unlock();
1151
1152         if (reply)
1153                 ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1154         if (old_acts)
1155                 ovs_nla_free_flow_actions(old_acts);
1156
1157         return 0;
1158
1159 err_unlock_ovs:
1160         ovs_unlock();
1161         kfree_skb(reply);
1162 err_kfree_acts:
1163         kfree(acts);
1164 error:
1165         return error;
1166 }
1167
1168 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1169 {
1170         struct nlattr **a = info->attrs;
1171         struct ovs_header *ovs_header = info->userhdr;
1172         struct sw_flow_key key;
1173         struct sk_buff *reply;
1174         struct sw_flow *flow;
1175         struct datapath *dp;
1176         struct sw_flow_match match;
1177         struct sw_flow_id ufid;
1178         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1179         int err = 0;
1180         bool log = !a[OVS_FLOW_ATTR_PROBE];
1181         bool ufid_present;
1182
1183         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1184         if (a[OVS_FLOW_ATTR_KEY]) {
1185                 ovs_match_init(&match, &key, NULL);
1186                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1187                                         log);
1188         } else if (!ufid_present) {
1189                 OVS_NLERR(log,
1190                           "Flow get message rejected, Key attribute missing.");
1191                 err = -EINVAL;
1192         }
1193         if (err)
1194                 return err;
1195
1196         ovs_lock();
1197         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1198         if (!dp) {
1199                 err = -ENODEV;
1200                 goto unlock;
1201         }
1202
1203         if (ufid_present)
1204                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1205         else
1206                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1207         if (!flow) {
1208                 err = -ENOENT;
1209                 goto unlock;
1210         }
1211
1212         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1213                                         OVS_FLOW_CMD_NEW, true, ufid_flags);
1214         if (IS_ERR(reply)) {
1215                 err = PTR_ERR(reply);
1216                 goto unlock;
1217         }
1218
1219         ovs_unlock();
1220         return genlmsg_reply(reply, info);
1221 unlock:
1222         ovs_unlock();
1223         return err;
1224 }
1225
1226 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1227 {
1228         struct nlattr **a = info->attrs;
1229         struct ovs_header *ovs_header = info->userhdr;
1230         struct sw_flow_key key;
1231         struct sk_buff *reply;
1232         struct sw_flow *flow = NULL;
1233         struct datapath *dp;
1234         struct sw_flow_match match;
1235         struct sw_flow_id ufid;
1236         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1237         int err;
1238         bool log = !a[OVS_FLOW_ATTR_PROBE];
1239         bool ufid_present;
1240
1241         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1242         if (a[OVS_FLOW_ATTR_KEY]) {
1243                 ovs_match_init(&match, &key, NULL);
1244                 err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
1245                                         log);
1246                 if (unlikely(err))
1247                         return err;
1248         }
1249
1250         ovs_lock();
1251         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1252         if (unlikely(!dp)) {
1253                 err = -ENODEV;
1254                 goto unlock;
1255         }
1256
1257         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1258                 err = ovs_flow_tbl_flush(&dp->table);
1259                 goto unlock;
1260         }
1261
1262         if (ufid_present)
1263                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1264         else
1265                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1266         if (unlikely(!flow)) {
1267                 err = -ENOENT;
1268                 goto unlock;
1269         }
1270
1271         ovs_flow_tbl_remove(&dp->table, flow);
1272         ovs_unlock();
1273
1274         reply = ovs_flow_cmd_alloc_info(rcu_dereference_raw(flow->sf_acts),
1275                                         &flow->id, info, false, ufid_flags);
1276
1277         if (likely(reply)) {
1278                 if (likely(!IS_ERR(reply))) {
1279                         rcu_read_lock();        /*To keep RCU checker happy. */
1280                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1281                                                      reply, info->snd_portid,
1282                                                      info->snd_seq, 0,
1283                                                      OVS_FLOW_CMD_DEL,
1284                                                      ufid_flags);
1285                         rcu_read_unlock();
1286                         BUG_ON(err < 0);
1287                         ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
1288                 } else {
1289                         genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
1290                                      GROUP_ID(&ovs_dp_flow_multicast_group), PTR_ERR(reply));
1291
1292                 }
1293         }
1294
1295         ovs_flow_free(flow, true);
1296         return 0;
1297 unlock:
1298         ovs_unlock();
1299         return err;
1300 }
1301
1302 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1303 {
1304         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1305         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1306         struct table_instance *ti;
1307         struct datapath *dp;
1308         u32 ufid_flags;
1309         int err;
1310
1311         err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
1312                             OVS_FLOW_ATTR_MAX, flow_policy);
1313         if (err)
1314                 return err;
1315         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1316
1317         rcu_read_lock();
1318         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1319         if (!dp) {
1320                 rcu_read_unlock();
1321                 return -ENODEV;
1322         }
1323
1324         ti = rcu_dereference(dp->table.ti);
1325         for (;;) {
1326                 struct sw_flow *flow;
1327                 u32 bucket, obj;
1328
1329                 bucket = cb->args[0];
1330                 obj = cb->args[1];
1331                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1332                 if (!flow)
1333                         break;
1334
1335                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1336                                            NETLINK_CB(cb->skb).portid,
1337                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1338                                            OVS_FLOW_CMD_NEW, ufid_flags) < 0)
1339                         break;
1340
1341                 cb->args[0] = bucket;
1342                 cb->args[1] = obj;
1343         }
1344         rcu_read_unlock();
1345         return skb->len;
1346 }
1347
1348 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1349         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1350         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1351         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1352         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1353         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1354         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1355         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1356 };
1357
1358 static struct genl_ops dp_flow_genl_ops[] = {
1359         { .cmd = OVS_FLOW_CMD_NEW,
1360           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1361           .policy = flow_policy,
1362           .doit = ovs_flow_cmd_new
1363         },
1364         { .cmd = OVS_FLOW_CMD_DEL,
1365           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1366           .policy = flow_policy,
1367           .doit = ovs_flow_cmd_del
1368         },
1369         { .cmd = OVS_FLOW_CMD_GET,
1370           .flags = 0,               /* OK for unprivileged users. */
1371           .policy = flow_policy,
1372           .doit = ovs_flow_cmd_get,
1373           .dumpit = ovs_flow_cmd_dump
1374         },
1375         { .cmd = OVS_FLOW_CMD_SET,
1376           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1377           .policy = flow_policy,
1378           .doit = ovs_flow_cmd_set,
1379         },
1380 };
1381
1382 static struct genl_family dp_flow_genl_family = {
1383         .id = GENL_ID_GENERATE,
1384         .hdrsize = sizeof(struct ovs_header),
1385         .name = OVS_FLOW_FAMILY,
1386         .version = OVS_FLOW_VERSION,
1387         .maxattr = OVS_FLOW_ATTR_MAX,
1388         .netnsok = true,
1389         .parallel_ops = true,
1390         .ops = dp_flow_genl_ops,
1391         .n_ops = ARRAY_SIZE(dp_flow_genl_ops),
1392         .mcgrps = &ovs_dp_flow_multicast_group,
1393         .n_mcgrps = 1,
1394 };
1395
1396 static size_t ovs_dp_cmd_msg_size(void)
1397 {
1398         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1399
1400         msgsize += nla_total_size(IFNAMSIZ);
1401         msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
1402         msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
1403         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1404
1405         return msgsize;
1406 }
1407
1408 /* Called with ovs_mutex. */
1409 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1410                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1411 {
1412         struct ovs_header *ovs_header;
1413         struct ovs_dp_stats dp_stats;
1414         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1415         int err;
1416
1417         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1418                                    flags, cmd);
1419         if (!ovs_header)
1420                 goto error;
1421
1422         ovs_header->dp_ifindex = get_dpifindex(dp);
1423
1424         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1425         if (err)
1426                 goto nla_put_failure;
1427
1428         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1429         if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1430                         &dp_stats))
1431                 goto nla_put_failure;
1432
1433         if (nla_put(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1434                         sizeof(struct ovs_dp_megaflow_stats),
1435                         &dp_megaflow_stats))
1436                 goto nla_put_failure;
1437
1438         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1439                 goto nla_put_failure;
1440
1441         genlmsg_end(skb, ovs_header);
1442         return 0;
1443
1444 nla_put_failure:
1445         genlmsg_cancel(skb, ovs_header);
1446 error:
1447         return -EMSGSIZE;
1448 }
1449
1450 static struct sk_buff *ovs_dp_cmd_alloc_info(struct genl_info *info)
1451 {
1452         return genlmsg_new_unicast(ovs_dp_cmd_msg_size(), info, GFP_KERNEL);
1453 }
1454
1455 /* Called with rcu_read_lock or ovs_mutex. */
1456 static struct datapath *lookup_datapath(struct net *net,
1457                                         const struct ovs_header *ovs_header,
1458                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1459 {
1460         struct datapath *dp;
1461
1462         if (!a[OVS_DP_ATTR_NAME])
1463                 dp = get_dp(net, ovs_header->dp_ifindex);
1464         else {
1465                 struct vport *vport;
1466
1467                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1468                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1469         }
1470         return dp ? dp : ERR_PTR(-ENODEV);
1471 }
1472
1473 static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *info)
1474 {
1475         struct datapath *dp;
1476
1477         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1478         if (IS_ERR(dp))
1479                 return;
1480
1481         WARN(dp->user_features, "Dropping previously announced user features\n");
1482         dp->user_features = 0;
1483 }
1484
1485 static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1486 {
1487         if (a[OVS_DP_ATTR_USER_FEATURES])
1488                 dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1489 }
1490
1491 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1492 {
1493         struct nlattr **a = info->attrs;
1494         struct vport_parms parms;
1495         struct sk_buff *reply;
1496         struct datapath *dp;
1497         struct vport *vport;
1498         struct ovs_net *ovs_net;
1499         int err, i;
1500
1501         err = -EINVAL;
1502         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1503                 goto err;
1504
1505         reply = ovs_dp_cmd_alloc_info(info);
1506         if (!reply)
1507                 return -ENOMEM;
1508
1509         err = -ENOMEM;
1510         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1511         if (dp == NULL)
1512                 goto err_free_reply;
1513
1514         ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1515
1516         /* Allocate table. */
1517         err = ovs_flow_tbl_init(&dp->table);
1518         if (err)
1519                 goto err_free_dp;
1520
1521         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1522         if (!dp->stats_percpu) {
1523                 err = -ENOMEM;
1524                 goto err_destroy_table;
1525         }
1526
1527         dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1528                             GFP_KERNEL);
1529         if (!dp->ports) {
1530                 err = -ENOMEM;
1531                 goto err_destroy_percpu;
1532         }
1533
1534         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1535                 INIT_HLIST_HEAD(&dp->ports[i]);
1536
1537         /* Set up our datapath device. */
1538         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1539         parms.type = OVS_VPORT_TYPE_INTERNAL;
1540         parms.options = NULL;
1541         parms.dp = dp;
1542         parms.port_no = OVSP_LOCAL;
1543         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1544
1545         ovs_dp_change(dp, a);
1546
1547         /* So far only local changes have been made, now need the lock. */
1548         ovs_lock();
1549
1550         vport = new_vport(&parms);
1551         if (IS_ERR(vport)) {
1552                 err = PTR_ERR(vport);
1553                 if (err == -EBUSY)
1554                         err = -EEXIST;
1555
1556                 if (err == -EEXIST) {
1557                         /* An outdated user space instance that does not understand
1558                          * the concept of user_features has attempted to create a new
1559                          * datapath and is likely to reuse it. Drop all user features.
1560                          */
1561                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1562                                 ovs_dp_reset_user_features(skb, info);
1563                 }
1564
1565                 goto err_destroy_ports_array;
1566         }
1567
1568         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1569                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1570         BUG_ON(err < 0);
1571
1572         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1573         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1574         ovs_unlock();
1575
1576         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1577         return 0;
1578
1579 err_destroy_ports_array:
1580         ovs_unlock();
1581         kfree(dp->ports);
1582 err_destroy_percpu:
1583         free_percpu(dp->stats_percpu);
1584 err_destroy_table:
1585         ovs_flow_tbl_destroy(&dp->table);
1586 err_free_dp:
1587         release_net(ovs_dp_get_net(dp));
1588         kfree(dp);
1589 err_free_reply:
1590         kfree_skb(reply);
1591 err:
1592         return err;
1593 }
1594
1595 /* Called with ovs_mutex. */
1596 static void __dp_destroy(struct datapath *dp)
1597 {
1598         int i;
1599
1600         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1601                 struct vport *vport;
1602                 struct hlist_node *n;
1603
1604                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1605                         if (vport->port_no != OVSP_LOCAL)
1606                                 ovs_dp_detach_port(vport);
1607         }
1608
1609         list_del_rcu(&dp->list_node);
1610
1611         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1612          * all ports in datapath are destroyed first before freeing datapath.
1613          */
1614         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1615
1616         /* RCU destroy the flow table */
1617         call_rcu(&dp->rcu, destroy_dp_rcu);
1618 }
1619
1620 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1621 {
1622         struct sk_buff *reply;
1623         struct datapath *dp;
1624         int err;
1625
1626         reply = ovs_dp_cmd_alloc_info(info);
1627         if (!reply)
1628                 return -ENOMEM;
1629
1630         ovs_lock();
1631         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1632         err = PTR_ERR(dp);
1633         if (IS_ERR(dp))
1634                 goto err_unlock_free;
1635
1636         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1637                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1638         BUG_ON(err < 0);
1639
1640         __dp_destroy(dp);
1641         ovs_unlock();
1642
1643         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1644         return 0;
1645
1646 err_unlock_free:
1647         ovs_unlock();
1648         kfree_skb(reply);
1649         return err;
1650 }
1651
1652 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1653 {
1654         struct sk_buff *reply;
1655         struct datapath *dp;
1656         int err;
1657
1658         reply = ovs_dp_cmd_alloc_info(info);
1659         if (!reply)
1660                 return -ENOMEM;
1661
1662         ovs_lock();
1663         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1664         err = PTR_ERR(dp);
1665         if (IS_ERR(dp))
1666                 goto err_unlock_free;
1667
1668         ovs_dp_change(dp, info->attrs);
1669
1670         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1671                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1672         BUG_ON(err < 0);
1673         ovs_unlock();
1674
1675         ovs_notify(&dp_datapath_genl_family, &ovs_dp_datapath_multicast_group, reply, info);
1676         return 0;
1677
1678 err_unlock_free:
1679         ovs_unlock();
1680         kfree_skb(reply);
1681         return err;
1682 }
1683
1684 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1685 {
1686         struct sk_buff *reply;
1687         struct datapath *dp;
1688         int err;
1689
1690         reply = ovs_dp_cmd_alloc_info(info);
1691         if (!reply)
1692                 return -ENOMEM;
1693
1694         ovs_lock();
1695         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1696         if (IS_ERR(dp)) {
1697                 err = PTR_ERR(dp);
1698                 goto err_unlock_free;
1699         }
1700         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1701                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1702         BUG_ON(err < 0);
1703         ovs_unlock();
1704
1705         return genlmsg_reply(reply, info);
1706
1707 err_unlock_free:
1708         ovs_unlock();
1709         kfree_skb(reply);
1710         return err;
1711 }
1712
1713 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1714 {
1715         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1716         struct datapath *dp;
1717         int skip = cb->args[0];
1718         int i = 0;
1719
1720         ovs_lock();
1721         list_for_each_entry(dp, &ovs_net->dps, list_node) {
1722                 if (i >= skip &&
1723                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1724                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1725                                          OVS_DP_CMD_NEW) < 0)
1726                         break;
1727                 i++;
1728         }
1729         ovs_unlock();
1730
1731         cb->args[0] = i;
1732
1733         return skb->len;
1734 }
1735
1736 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1737         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1738         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1739         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1740 };
1741
1742 static struct genl_ops dp_datapath_genl_ops[] = {
1743         { .cmd = OVS_DP_CMD_NEW,
1744           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1745           .policy = datapath_policy,
1746           .doit = ovs_dp_cmd_new
1747         },
1748         { .cmd = OVS_DP_CMD_DEL,
1749           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1750           .policy = datapath_policy,
1751           .doit = ovs_dp_cmd_del
1752         },
1753         { .cmd = OVS_DP_CMD_GET,
1754           .flags = 0,               /* OK for unprivileged users. */
1755           .policy = datapath_policy,
1756           .doit = ovs_dp_cmd_get,
1757           .dumpit = ovs_dp_cmd_dump
1758         },
1759         { .cmd = OVS_DP_CMD_SET,
1760           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1761           .policy = datapath_policy,
1762           .doit = ovs_dp_cmd_set,
1763         },
1764 };
1765
1766 static struct genl_family dp_datapath_genl_family = {
1767         .id = GENL_ID_GENERATE,
1768         .hdrsize = sizeof(struct ovs_header),
1769         .name = OVS_DATAPATH_FAMILY,
1770         .version = OVS_DATAPATH_VERSION,
1771         .maxattr = OVS_DP_ATTR_MAX,
1772         .netnsok = true,
1773         .parallel_ops = true,
1774         .ops = dp_datapath_genl_ops,
1775         .n_ops = ARRAY_SIZE(dp_datapath_genl_ops),
1776         .mcgrps = &ovs_dp_datapath_multicast_group,
1777         .n_mcgrps = 1,
1778 };
1779
1780 /* Called with ovs_mutex or RCU read lock. */
1781 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1782                                    u32 portid, u32 seq, u32 flags, u8 cmd)
1783 {
1784         struct ovs_header *ovs_header;
1785         struct ovs_vport_stats vport_stats;
1786         int err;
1787
1788         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1789                                  flags, cmd);
1790         if (!ovs_header)
1791                 return -EMSGSIZE;
1792
1793         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1794
1795         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1796             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1797             nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)))
1798                 goto nla_put_failure;
1799
1800         ovs_vport_get_stats(vport, &vport_stats);
1801         if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1802                     &vport_stats))
1803                 goto nla_put_failure;
1804
1805         if (ovs_vport_get_upcall_portids(vport, skb))
1806                 goto nla_put_failure;
1807
1808         err = ovs_vport_get_options(vport, skb);
1809         if (err == -EMSGSIZE)
1810                 goto error;
1811
1812         genlmsg_end(skb, ovs_header);
1813         return 0;
1814
1815 nla_put_failure:
1816         err = -EMSGSIZE;
1817 error:
1818         genlmsg_cancel(skb, ovs_header);
1819         return err;
1820 }
1821
1822 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
1823 {
1824         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1825 }
1826
1827 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
1828 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1829                                          u32 seq, u8 cmd)
1830 {
1831         struct sk_buff *skb;
1832         int retval;
1833
1834         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1835         if (!skb)
1836                 return ERR_PTR(-ENOMEM);
1837
1838         retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1839         BUG_ON(retval < 0);
1840
1841         return skb;
1842 }
1843
1844 /* Called with ovs_mutex or RCU read lock. */
1845 static struct vport *lookup_vport(struct net *net,
1846                                   const struct ovs_header *ovs_header,
1847                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1848 {
1849         struct datapath *dp;
1850         struct vport *vport;
1851
1852         if (a[OVS_VPORT_ATTR_NAME]) {
1853                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1854                 if (!vport)
1855                         return ERR_PTR(-ENODEV);
1856                 if (ovs_header->dp_ifindex &&
1857                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1858                         return ERR_PTR(-ENODEV);
1859                 return vport;
1860         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1861                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1862
1863                 if (port_no >= DP_MAX_PORTS)
1864                         return ERR_PTR(-EFBIG);
1865
1866                 dp = get_dp(net, ovs_header->dp_ifindex);
1867                 if (!dp)
1868                         return ERR_PTR(-ENODEV);
1869
1870                 vport = ovs_vport_ovsl_rcu(dp, port_no);
1871                 if (!vport)
1872                         return ERR_PTR(-ENODEV);
1873                 return vport;
1874         } else
1875                 return ERR_PTR(-EINVAL);
1876 }
1877
1878 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1879 {
1880         struct nlattr **a = info->attrs;
1881         struct ovs_header *ovs_header = info->userhdr;
1882         struct vport_parms parms;
1883         struct sk_buff *reply;
1884         struct vport *vport;
1885         struct datapath *dp;
1886         u32 port_no;
1887         int err;
1888
1889         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1890             !a[OVS_VPORT_ATTR_UPCALL_PID])
1891                 return -EINVAL;
1892
1893         port_no = a[OVS_VPORT_ATTR_PORT_NO]
1894                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
1895         if (port_no >= DP_MAX_PORTS)
1896                 return -EFBIG;
1897
1898         reply = ovs_vport_cmd_alloc_info();
1899         if (!reply)
1900                 return -ENOMEM;
1901
1902         ovs_lock();
1903         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1904         err = -ENODEV;
1905         if (!dp)
1906                 goto exit_unlock_free;
1907
1908         if (port_no) {
1909                 vport = ovs_vport_ovsl(dp, port_no);
1910                 err = -EBUSY;
1911                 if (vport)
1912                         goto exit_unlock_free;
1913         } else {
1914                 for (port_no = 1; ; port_no++) {
1915                         if (port_no >= DP_MAX_PORTS) {
1916                                 err = -EFBIG;
1917                                 goto exit_unlock_free;
1918                         }
1919                         vport = ovs_vport_ovsl(dp, port_no);
1920                         if (!vport)
1921                                 break;
1922                 }
1923         }
1924
1925         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1926         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1927         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1928         parms.dp = dp;
1929         parms.port_no = port_no;
1930         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
1931
1932         vport = new_vport(&parms);
1933         err = PTR_ERR(vport);
1934         if (IS_ERR(vport))
1935                 goto exit_unlock_free;
1936
1937         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1938                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1939         BUG_ON(err < 0);
1940         ovs_unlock();
1941
1942         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
1943         return 0;
1944
1945 exit_unlock_free:
1946         ovs_unlock();
1947         kfree_skb(reply);
1948         return err;
1949 }
1950
1951 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1952 {
1953         struct nlattr **a = info->attrs;
1954         struct sk_buff *reply;
1955         struct vport *vport;
1956         int err;
1957
1958         reply = ovs_vport_cmd_alloc_info();
1959         if (!reply)
1960                 return -ENOMEM;
1961
1962         ovs_lock();
1963         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1964         err = PTR_ERR(vport);
1965         if (IS_ERR(vport))
1966                 goto exit_unlock_free;
1967
1968         if (a[OVS_VPORT_ATTR_TYPE] &&
1969             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
1970                 err = -EINVAL;
1971                 goto exit_unlock_free;
1972         }
1973
1974         if (a[OVS_VPORT_ATTR_OPTIONS]) {
1975                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1976                 if (err)
1977                         goto exit_unlock_free;
1978         }
1979
1980         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
1981                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
1982
1983                 err = ovs_vport_set_upcall_portids(vport, ids);
1984                 if (err)
1985                         goto exit_unlock_free;
1986         }
1987
1988         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
1989                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
1990         BUG_ON(err < 0);
1991         ovs_unlock();
1992
1993         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
1994         return 0;
1995
1996 exit_unlock_free:
1997         ovs_unlock();
1998         kfree_skb(reply);
1999         return err;
2000 }
2001
2002 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2003 {
2004         struct nlattr **a = info->attrs;
2005         struct sk_buff *reply;
2006         struct vport *vport;
2007         int err;
2008
2009         reply = ovs_vport_cmd_alloc_info();
2010         if (!reply)
2011                 return -ENOMEM;
2012
2013         ovs_lock();
2014         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2015         err = PTR_ERR(vport);
2016         if (IS_ERR(vport))
2017                 goto exit_unlock_free;
2018
2019         if (vport->port_no == OVSP_LOCAL) {
2020                 err = -EINVAL;
2021                 goto exit_unlock_free;
2022         }
2023
2024         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2025                                       info->snd_seq, 0, OVS_VPORT_CMD_DEL);
2026         BUG_ON(err < 0);
2027         ovs_dp_detach_port(vport);
2028         ovs_unlock();
2029
2030         ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
2031         return 0;
2032
2033 exit_unlock_free:
2034         ovs_unlock();
2035         kfree_skb(reply);
2036         return err;
2037 }
2038
2039 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2040 {
2041         struct nlattr **a = info->attrs;
2042         struct ovs_header *ovs_header = info->userhdr;
2043         struct sk_buff *reply;
2044         struct vport *vport;
2045         int err;
2046
2047         reply = ovs_vport_cmd_alloc_info();
2048         if (!reply)
2049                 return -ENOMEM;
2050
2051         rcu_read_lock();
2052         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2053         err = PTR_ERR(vport);
2054         if (IS_ERR(vport))
2055                 goto exit_unlock_free;
2056         err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
2057                                       info->snd_seq, 0, OVS_VPORT_CMD_NEW);
2058         BUG_ON(err < 0);
2059         rcu_read_unlock();
2060
2061         return genlmsg_reply(reply, info);
2062
2063 exit_unlock_free:
2064         rcu_read_unlock();
2065         kfree_skb(reply);
2066         return err;
2067 }
2068
2069 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2070 {
2071         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2072         struct datapath *dp;
2073         int bucket = cb->args[0], skip = cb->args[1];
2074         int i, j = 0;
2075
2076         rcu_read_lock();
2077         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2078         if (!dp) {
2079                 rcu_read_unlock();
2080                 return -ENODEV;
2081         }
2082         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2083                 struct vport *vport;
2084
2085                 j = 0;
2086                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2087                         if (j >= skip &&
2088                             ovs_vport_cmd_fill_info(vport, skb,
2089                                                     NETLINK_CB(cb->skb).portid,
2090                                                     cb->nlh->nlmsg_seq,
2091                                                     NLM_F_MULTI,
2092                                                     OVS_VPORT_CMD_NEW) < 0)
2093                                 goto out;
2094
2095                         j++;
2096                 }
2097                 skip = 0;
2098         }
2099 out:
2100         rcu_read_unlock();
2101
2102         cb->args[0] = i;
2103         cb->args[1] = j;
2104
2105         return skb->len;
2106 }
2107
2108 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2109         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2110         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2111         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2112         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2113         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
2114         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2115 };
2116
2117 static struct genl_ops dp_vport_genl_ops[] = {
2118         { .cmd = OVS_VPORT_CMD_NEW,
2119           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2120           .policy = vport_policy,
2121           .doit = ovs_vport_cmd_new
2122         },
2123         { .cmd = OVS_VPORT_CMD_DEL,
2124           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2125           .policy = vport_policy,
2126           .doit = ovs_vport_cmd_del
2127         },
2128         { .cmd = OVS_VPORT_CMD_GET,
2129           .flags = 0,               /* OK for unprivileged users. */
2130           .policy = vport_policy,
2131           .doit = ovs_vport_cmd_get,
2132           .dumpit = ovs_vport_cmd_dump
2133         },
2134         { .cmd = OVS_VPORT_CMD_SET,
2135           .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2136           .policy = vport_policy,
2137           .doit = ovs_vport_cmd_set,
2138         },
2139 };
2140
2141 struct genl_family dp_vport_genl_family = {
2142         .id = GENL_ID_GENERATE,
2143         .hdrsize = sizeof(struct ovs_header),
2144         .name = OVS_VPORT_FAMILY,
2145         .version = OVS_VPORT_VERSION,
2146         .maxattr = OVS_VPORT_ATTR_MAX,
2147         .netnsok = true,
2148         .parallel_ops = true,
2149         .ops = dp_vport_genl_ops,
2150         .n_ops = ARRAY_SIZE(dp_vport_genl_ops),
2151         .mcgrps = &ovs_dp_vport_multicast_group,
2152         .n_mcgrps = 1,
2153 };
2154
2155 static struct genl_family *dp_genl_families[] = {
2156         &dp_datapath_genl_family,
2157         &dp_vport_genl_family,
2158         &dp_flow_genl_family,
2159         &dp_packet_genl_family,
2160 };
2161
2162 static void dp_unregister_genl(int n_families)
2163 {
2164         int i;
2165
2166         for (i = 0; i < n_families; i++)
2167                 genl_unregister_family(dp_genl_families[i]);
2168 }
2169
2170 static int dp_register_genl(void)
2171 {
2172         int err;
2173         int i;
2174
2175         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2176
2177                 err = genl_register_family(dp_genl_families[i]);
2178                 if (err)
2179                         goto error;
2180         }
2181
2182         return 0;
2183
2184 error:
2185         dp_unregister_genl(i);
2186         return err;
2187 }
2188
2189 static int __net_init ovs_init_net(struct net *net)
2190 {
2191         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2192
2193         INIT_LIST_HEAD(&ovs_net->dps);
2194         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2195         return 0;
2196 }
2197
2198 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2199                                             struct list_head *head)
2200 {
2201         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2202         struct datapath *dp;
2203
2204         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2205                 int i;
2206
2207                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2208                         struct vport *vport;
2209
2210                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2211                                 struct netdev_vport *netdev_vport;
2212
2213                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2214                                         continue;
2215
2216                                 netdev_vport = netdev_vport_priv(vport);
2217                                 if (dev_net(netdev_vport->dev) == dnet)
2218                                         list_add(&vport->detach_list, head);
2219                         }
2220                 }
2221         }
2222 }
2223
2224 static void __net_exit ovs_exit_net(struct net *dnet)
2225 {
2226         struct datapath *dp, *dp_next;
2227         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2228         struct vport *vport, *vport_next;
2229         struct net *net;
2230         LIST_HEAD(head);
2231
2232         ovs_lock();
2233         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2234                 __dp_destroy(dp);
2235
2236         rtnl_lock();
2237         for_each_net(net)
2238                 list_vports_from_net(net, dnet, &head);
2239         rtnl_unlock();
2240
2241         /* Detach all vports from given namespace. */
2242         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2243                 list_del(&vport->detach_list);
2244                 ovs_dp_detach_port(vport);
2245         }
2246
2247         ovs_unlock();
2248
2249         cancel_work_sync(&ovs_net->dp_notify_work);
2250 }
2251
2252 static struct pernet_operations ovs_net_ops = {
2253         .init = ovs_init_net,
2254         .exit = ovs_exit_net,
2255         .id   = &ovs_net_id,
2256         .size = sizeof(struct ovs_net),
2257 };
2258
2259 DEFINE_COMPAT_PNET_REG_FUNC(device);
2260
2261 static int __init dp_init(void)
2262 {
2263         int err;
2264
2265         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
2266
2267         pr_info("Open vSwitch switching datapath %s, built "__DATE__" "__TIME__"\n",
2268                 VERSION);
2269
2270         err = action_fifos_init();
2271         if (err)
2272                 goto error;
2273
2274         err = ovs_internal_dev_rtnl_link_register();
2275         if (err)
2276                 goto error_action_fifos_exit;
2277
2278         err = ovs_flow_init();
2279         if (err)
2280                 goto error_unreg_rtnl_link;
2281
2282         err = ovs_vport_init();
2283         if (err)
2284                 goto error_flow_exit;
2285
2286         err = register_pernet_device(&ovs_net_ops);
2287         if (err)
2288                 goto error_vport_exit;
2289
2290         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2291         if (err)
2292                 goto error_netns_exit;
2293
2294         err = dp_register_genl();
2295         if (err < 0)
2296                 goto error_unreg_notifier;
2297
2298         return 0;
2299
2300 error_unreg_notifier:
2301         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2302 error_netns_exit:
2303         unregister_pernet_device(&ovs_net_ops);
2304 error_vport_exit:
2305         ovs_vport_exit();
2306 error_flow_exit:
2307         ovs_flow_exit();
2308 error_unreg_rtnl_link:
2309         ovs_internal_dev_rtnl_link_unregister();
2310 error_action_fifos_exit:
2311         action_fifos_exit();
2312 error:
2313         return err;
2314 }
2315
2316 static void dp_cleanup(void)
2317 {
2318         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2319         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2320         unregister_pernet_device(&ovs_net_ops);
2321         rcu_barrier();
2322         ovs_vport_exit();
2323         ovs_flow_exit();
2324         ovs_internal_dev_rtnl_link_unregister();
2325         action_fifos_exit();
2326 }
2327
2328 module_init(dp_init);
2329 module_exit(dp_cleanup);
2330
2331 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2332 MODULE_LICENSE("GPL");
2333 MODULE_VERSION(VERSION);