Merge remote-tracking branches 'spi/fix/gqspi', 'spi/fix/imx', 'spi/fix/mg-spfi'...
[cascardo/linux.git] / drivers / net / geneve.c
1 /*
2  * GENEVE: Generic Network Virtualization Encapsulation
3  *
4  * Copyright (c) 2015 Red Hat, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/etherdevice.h>
17 #include <linux/hash.h>
18 #include <net/rtnetlink.h>
19 #include <net/geneve.h>
20
21 #define GENEVE_NETDEV_VER       "0.6"
22
23 #define GENEVE_UDP_PORT         6081
24
25 #define GENEVE_N_VID            (1u << 24)
26 #define GENEVE_VID_MASK         (GENEVE_N_VID - 1)
27
28 #define VNI_HASH_BITS           10
29 #define VNI_HASH_SIZE           (1<<VNI_HASH_BITS)
30
31 static bool log_ecn_error = true;
32 module_param(log_ecn_error, bool, 0644);
33 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
34
35 /* per-network namespace private data for this module */
36 struct geneve_net {
37         struct list_head  geneve_list;
38         struct hlist_head vni_list[VNI_HASH_SIZE];
39 };
40
41 /* Pseudo network device */
42 struct geneve_dev {
43         struct hlist_node  hlist;       /* vni hash table */
44         struct net         *net;        /* netns for packet i/o */
45         struct net_device  *dev;        /* netdev for geneve tunnel */
46         struct geneve_sock *sock;       /* socket used for geneve tunnel */
47         u8                 vni[3];      /* virtual network ID for tunnel */
48         u8                 ttl;         /* TTL override */
49         u8                 tos;         /* TOS override */
50         struct sockaddr_in remote;      /* IPv4 address for link partner */
51         struct list_head   next;        /* geneve's per namespace list */
52 };
53
54 static int geneve_net_id;
55
56 static inline __u32 geneve_net_vni_hash(u8 vni[3])
57 {
58         __u32 vnid;
59
60         vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
61         return hash_32(vnid, VNI_HASH_BITS);
62 }
63
64 /* geneve receive/decap routine */
65 static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
66 {
67         struct genevehdr *gnvh = geneve_hdr(skb);
68         struct geneve_dev *dummy, *geneve = NULL;
69         struct geneve_net *gn;
70         struct iphdr *iph = NULL;
71         struct pcpu_sw_netstats *stats;
72         struct hlist_head *vni_list_head;
73         int err = 0;
74         __u32 hash;
75
76         iph = ip_hdr(skb); /* Still outer IP header... */
77
78         gn = gs->rcv_data;
79
80         /* Find the device for this VNI */
81         hash = geneve_net_vni_hash(gnvh->vni);
82         vni_list_head = &gn->vni_list[hash];
83         hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
84                 if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) &&
85                     iph->saddr == dummy->remote.sin_addr.s_addr) {
86                         geneve = dummy;
87                         break;
88                 }
89         }
90         if (!geneve)
91                 goto drop;
92
93         /* Drop packets w/ critical options,
94          * since we don't support any...
95          */
96         if (gnvh->critical)
97                 goto drop;
98
99         skb_reset_mac_header(skb);
100         skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
101         skb->protocol = eth_type_trans(skb, geneve->dev);
102         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
103
104         /* Ignore packet loops (and multicast echo) */
105         if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
106                 goto drop;
107
108         skb_reset_network_header(skb);
109
110         iph = ip_hdr(skb); /* Now inner IP header... */
111         err = IP_ECN_decapsulate(iph, skb);
112
113         if (unlikely(err)) {
114                 if (log_ecn_error)
115                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
116                                              &iph->saddr, iph->tos);
117                 if (err > 1) {
118                         ++geneve->dev->stats.rx_frame_errors;
119                         ++geneve->dev->stats.rx_errors;
120                         goto drop;
121                 }
122         }
123
124         stats = this_cpu_ptr(geneve->dev->tstats);
125         u64_stats_update_begin(&stats->syncp);
126         stats->rx_packets++;
127         stats->rx_bytes += skb->len;
128         u64_stats_update_end(&stats->syncp);
129
130         netif_rx(skb);
131
132         return;
133 drop:
134         /* Consume bad packet */
135         kfree_skb(skb);
136 }
137
138 /* Setup stats when device is created */
139 static int geneve_init(struct net_device *dev)
140 {
141         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
142         if (!dev->tstats)
143                 return -ENOMEM;
144
145         return 0;
146 }
147
148 static void geneve_uninit(struct net_device *dev)
149 {
150         free_percpu(dev->tstats);
151 }
152
153 static int geneve_open(struct net_device *dev)
154 {
155         struct geneve_dev *geneve = netdev_priv(dev);
156         struct net *net = geneve->net;
157         struct geneve_net *gn = net_generic(geneve->net, geneve_net_id);
158         struct geneve_sock *gs;
159
160         gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn,
161                              false, false);
162         if (IS_ERR(gs))
163                 return PTR_ERR(gs);
164
165         geneve->sock = gs;
166
167         return 0;
168 }
169
170 static int geneve_stop(struct net_device *dev)
171 {
172         struct geneve_dev *geneve = netdev_priv(dev);
173         struct geneve_sock *gs = geneve->sock;
174
175         geneve_sock_release(gs);
176
177         return 0;
178 }
179
180 static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
181 {
182         struct geneve_dev *geneve = netdev_priv(dev);
183         struct geneve_sock *gs = geneve->sock;
184         struct rtable *rt = NULL;
185         const struct iphdr *iip; /* interior IP header */
186         struct flowi4 fl4;
187         int err;
188         __be16 sport;
189         __u8 tos, ttl;
190
191         iip = ip_hdr(skb);
192
193         skb_reset_mac_header(skb);
194
195         /* TODO: port min/max limits should be configurable */
196         sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true);
197
198         tos = geneve->tos;
199         if (tos == 1)
200                 tos = ip_tunnel_get_dsfield(iip, skb);
201
202         memset(&fl4, 0, sizeof(fl4));
203         fl4.flowi4_tos = RT_TOS(tos);
204         fl4.daddr = geneve->remote.sin_addr.s_addr;
205         rt = ip_route_output_key(geneve->net, &fl4);
206         if (IS_ERR(rt)) {
207                 netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
208                 dev->stats.tx_carrier_errors++;
209                 goto tx_error;
210         }
211         if (rt->dst.dev == dev) { /* is this necessary? */
212                 netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
213                 dev->stats.collisions++;
214                 goto rt_tx_error;
215         }
216
217         tos = ip_tunnel_ecn_encap(tos, iip, skb);
218
219         ttl = geneve->ttl;
220         if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
221                 ttl = 1;
222
223         ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
224
225         /* no need to handle local destination and encap bypass...yet... */
226
227         err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr,
228                               tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0,
229                               geneve->vni, 0, NULL, false,
230                               !net_eq(geneve->net, dev_net(geneve->dev)));
231         if (err < 0)
232                 ip_rt_put(rt);
233
234         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
235
236         return NETDEV_TX_OK;
237
238 rt_tx_error:
239         ip_rt_put(rt);
240 tx_error:
241         dev->stats.tx_errors++;
242         dev_kfree_skb(skb);
243         return NETDEV_TX_OK;
244 }
245
246 static const struct net_device_ops geneve_netdev_ops = {
247         .ndo_init               = geneve_init,
248         .ndo_uninit             = geneve_uninit,
249         .ndo_open               = geneve_open,
250         .ndo_stop               = geneve_stop,
251         .ndo_start_xmit         = geneve_xmit,
252         .ndo_get_stats64        = ip_tunnel_get_stats64,
253         .ndo_change_mtu         = eth_change_mtu,
254         .ndo_validate_addr      = eth_validate_addr,
255         .ndo_set_mac_address    = eth_mac_addr,
256 };
257
258 static void geneve_get_drvinfo(struct net_device *dev,
259                                struct ethtool_drvinfo *drvinfo)
260 {
261         strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
262         strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
263 }
264
265 static const struct ethtool_ops geneve_ethtool_ops = {
266         .get_drvinfo    = geneve_get_drvinfo,
267         .get_link       = ethtool_op_get_link,
268 };
269
270 /* Info for udev, that this is a virtual tunnel endpoint */
271 static struct device_type geneve_type = {
272         .name = "geneve",
273 };
274
275 /* Initialize the device structure. */
276 static void geneve_setup(struct net_device *dev)
277 {
278         ether_setup(dev);
279
280         dev->netdev_ops = &geneve_netdev_ops;
281         dev->ethtool_ops = &geneve_ethtool_ops;
282         dev->destructor = free_netdev;
283
284         SET_NETDEV_DEVTYPE(dev, &geneve_type);
285
286         dev->tx_queue_len = 0;
287         dev->features    |= NETIF_F_LLTX;
288         dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;
289         dev->features    |= NETIF_F_RXCSUM;
290         dev->features    |= NETIF_F_GSO_SOFTWARE;
291
292         dev->vlan_features = dev->features;
293         dev->features    |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
294
295         dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
296         dev->hw_features |= NETIF_F_GSO_SOFTWARE;
297         dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
298
299         netif_keep_dst(dev);
300         dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
301 }
302
303 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
304         [IFLA_GENEVE_ID]                = { .type = NLA_U32 },
305         [IFLA_GENEVE_REMOTE]            = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
306         [IFLA_GENEVE_TTL]               = { .type = NLA_U8 },
307         [IFLA_GENEVE_TOS]               = { .type = NLA_U8 },
308 };
309
310 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
311 {
312         if (tb[IFLA_ADDRESS]) {
313                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
314                         return -EINVAL;
315
316                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
317                         return -EADDRNOTAVAIL;
318         }
319
320         if (!data)
321                 return -EINVAL;
322
323         if (data[IFLA_GENEVE_ID]) {
324                 __u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
325
326                 if (vni >= GENEVE_VID_MASK)
327                         return -ERANGE;
328         }
329
330         return 0;
331 }
332
333 static int geneve_newlink(struct net *net, struct net_device *dev,
334                          struct nlattr *tb[], struct nlattr *data[])
335 {
336         struct geneve_net *gn = net_generic(net, geneve_net_id);
337         struct geneve_dev *dummy, *geneve = netdev_priv(dev);
338         struct hlist_head *vni_list_head;
339         struct sockaddr_in remote;      /* IPv4 address for link partner */
340         __u32 vni, hash;
341         int err;
342
343         if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE])
344                 return -EINVAL;
345
346         geneve->net = net;
347         geneve->dev = dev;
348
349         vni = nla_get_u32(data[IFLA_GENEVE_ID]);
350         geneve->vni[0] = (vni & 0x00ff0000) >> 16;
351         geneve->vni[1] = (vni & 0x0000ff00) >> 8;
352         geneve->vni[2] =  vni & 0x000000ff;
353
354         geneve->remote.sin_addr.s_addr =
355                 nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
356         if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr)))
357                 return -EINVAL;
358
359         remote = geneve->remote;
360         hash = geneve_net_vni_hash(geneve->vni);
361         vni_list_head = &gn->vni_list[hash];
362         hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
363                 if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) &&
364                     !memcmp(&remote, &dummy->remote, sizeof(dummy->remote)))
365                         return -EBUSY;
366         }
367
368         if (tb[IFLA_ADDRESS] == NULL)
369                 eth_hw_addr_random(dev);
370
371         err = register_netdevice(dev);
372         if (err)
373                 return err;
374
375         if (data[IFLA_GENEVE_TTL])
376                 geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
377
378         if (data[IFLA_GENEVE_TOS])
379                 geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
380
381         list_add(&geneve->next, &gn->geneve_list);
382
383         hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
384
385         return 0;
386 }
387
388 static void geneve_dellink(struct net_device *dev, struct list_head *head)
389 {
390         struct geneve_dev *geneve = netdev_priv(dev);
391
392         if (!hlist_unhashed(&geneve->hlist))
393                 hlist_del_rcu(&geneve->hlist);
394
395         list_del(&geneve->next);
396         unregister_netdevice_queue(dev, head);
397 }
398
399 static size_t geneve_get_size(const struct net_device *dev)
400 {
401         return nla_total_size(sizeof(__u32)) +  /* IFLA_GENEVE_ID */
402                 nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
403                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
404                 nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
405                 0;
406 }
407
408 static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
409 {
410         struct geneve_dev *geneve = netdev_priv(dev);
411         __u32 vni;
412
413         vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
414         if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
415                 goto nla_put_failure;
416
417         if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
418                             geneve->remote.sin_addr.s_addr))
419                 goto nla_put_failure;
420
421         if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
422             nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
423                 goto nla_put_failure;
424
425         return 0;
426
427 nla_put_failure:
428         return -EMSGSIZE;
429 }
430
431 static struct rtnl_link_ops geneve_link_ops __read_mostly = {
432         .kind           = "geneve",
433         .maxtype        = IFLA_GENEVE_MAX,
434         .policy         = geneve_policy,
435         .priv_size      = sizeof(struct geneve_dev),
436         .setup          = geneve_setup,
437         .validate       = geneve_validate,
438         .newlink        = geneve_newlink,
439         .dellink        = geneve_dellink,
440         .get_size       = geneve_get_size,
441         .fill_info      = geneve_fill_info,
442 };
443
444 static __net_init int geneve_init_net(struct net *net)
445 {
446         struct geneve_net *gn = net_generic(net, geneve_net_id);
447         unsigned int h;
448
449         INIT_LIST_HEAD(&gn->geneve_list);
450
451         for (h = 0; h < VNI_HASH_SIZE; ++h)
452                 INIT_HLIST_HEAD(&gn->vni_list[h]);
453
454         return 0;
455 }
456
457 static void __net_exit geneve_exit_net(struct net *net)
458 {
459         struct geneve_net *gn = net_generic(net, geneve_net_id);
460         struct geneve_dev *geneve, *next;
461         struct net_device *dev, *aux;
462         LIST_HEAD(list);
463
464         rtnl_lock();
465
466         /* gather any geneve devices that were moved into this ns */
467         for_each_netdev_safe(net, dev, aux)
468                 if (dev->rtnl_link_ops == &geneve_link_ops)
469                         unregister_netdevice_queue(dev, &list);
470
471         /* now gather any other geneve devices that were created in this ns */
472         list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
473                 /* If geneve->dev is in the same netns, it was already added
474                  * to the list by the previous loop.
475                  */
476                 if (!net_eq(dev_net(geneve->dev), net))
477                         unregister_netdevice_queue(geneve->dev, &list);
478         }
479
480         /* unregister the devices gathered above */
481         unregister_netdevice_many(&list);
482         rtnl_unlock();
483 }
484
485 static struct pernet_operations geneve_net_ops = {
486         .init = geneve_init_net,
487         .exit = geneve_exit_net,
488         .id   = &geneve_net_id,
489         .size = sizeof(struct geneve_net),
490 };
491
492 static int __init geneve_init_module(void)
493 {
494         int rc;
495
496         rc = register_pernet_subsys(&geneve_net_ops);
497         if (rc)
498                 goto out1;
499
500         rc = rtnl_link_register(&geneve_link_ops);
501         if (rc)
502                 goto out2;
503
504         return 0;
505 out2:
506         unregister_pernet_subsys(&geneve_net_ops);
507 out1:
508         return rc;
509 }
510 late_initcall(geneve_init_module);
511
512 static void __exit geneve_cleanup_module(void)
513 {
514         rtnl_link_unregister(&geneve_link_ops);
515         unregister_pernet_subsys(&geneve_net_ops);
516 }
517 module_exit(geneve_cleanup_module);
518
519 MODULE_LICENSE("GPL");
520 MODULE_VERSION(GENEVE_NETDEV_VER);
521 MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
522 MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
523 MODULE_ALIAS_RTNL_LINK("geneve");