2 * Copyright (c) 2014 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21 #include <linux/version.h>
25 #include <linux/net.h>
26 #include <linux/rculist.h>
27 #include <linux/udp.h>
29 #include <net/geneve.h>
32 #include <net/route.h>
34 #include <net/vxlan.h>
43 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
44 * |Ver| Opt Len |O|C| Rsvd. | Protocol Type |
45 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
46 * | Virtual Network Identifier (VNI) | Reserved |
47 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
48 * | Variable Length Options |
49 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
52 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
53 * | Option Class | Type |R|R|R| Length |
54 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
55 * | Variable Option Data |
56 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
60 #ifdef __LITTLE_ENDIAN_BITFIELD
76 struct geneve_opt options[];
81 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
84 * struct geneve_port - Keeps track of open UDP ports
85 * @sock: The socket created for this port number.
93 static LIST_HEAD(geneve_ports);
95 static inline struct geneve_port *geneve_vport(const struct vport *vport)
97 return vport_priv(vport);
100 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
102 return (struct genevehdr *)(udp_hdr(skb) + 1);
105 /* Convert 64 bit tunnel ID to 24 bit VNI. */
106 static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
109 vni[0] = (__force __u8)(tun_id >> 16);
110 vni[1] = (__force __u8)(tun_id >> 8);
111 vni[2] = (__force __u8)tun_id;
113 vni[0] = (__force __u8)((__force u64)tun_id >> 40);
114 vni[1] = (__force __u8)((__force u64)tun_id >> 48);
115 vni[2] = (__force __u8)((__force u64)tun_id >> 56);
119 /* Convert 24 bit VNI to 64 bit tunnel ID. */
120 static __be64 vni_to_tunnel_id(const __u8 *vni)
123 return (vni[0] << 16) | (vni[1] << 8) | vni[2];
125 return (__force __be64)(((__force u64)vni[0] << 40) |
126 ((__force u64)vni[1] << 48) |
127 ((__force u64)vni[2] << 56));
131 static void geneve_build_header(const struct vport *vport,
134 struct geneve_port *geneve_port = geneve_vport(vport);
135 struct net *net = ovs_dp_get_net(vport->dp);
136 struct udphdr *udph = udp_hdr(skb);
137 struct genevehdr *geneveh = (struct genevehdr *)(udph + 1);
138 const struct ovs_tunnel_info *tun_info = OVS_CB(skb)->egress_tun_info;
140 udph->dest = inet_sport(geneve_port->sock->sk);
141 udph->source = udp_flow_src_port(net, skb, 0, 0, true);
143 udph->len = htons(skb->len - skb_transport_offset(skb));
145 geneveh->ver = GENEVE_VER;
146 geneveh->opt_len = tun_info->options_len / 4;
147 geneveh->oam = !!(tun_info->tunnel.tun_flags & TUNNEL_OAM);
148 geneveh->critical = !!(tun_info->tunnel.tun_flags & TUNNEL_CRIT_OPT);
150 geneveh->proto_type = htons(ETH_P_TEB);
151 tunnel_id_to_vni(tun_info->tunnel.tun_id, geneveh->vni);
154 memcpy(geneveh->options, tun_info->options, tun_info->options_len);
157 static int geneve_rcv(struct sock *sk, struct sk_buff *skb)
159 struct geneve_port *geneve_port;
160 struct genevehdr *geneveh;
162 struct ovs_tunnel_info tun_info;
166 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,16,0)
167 if (unlikely(udp_lib_checksum_complete(skb)))
171 if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
174 geneveh = geneve_hdr(skb);
176 if (unlikely(geneveh->ver != GENEVE_VER))
179 if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
182 geneve_port = rcu_dereference_sk_user_data(sk);
183 if (unlikely(!geneve_port))
186 opts_len = geneveh->opt_len * 4;
187 if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
191 geneveh = geneve_hdr(skb);
193 flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
194 (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
195 (geneveh->oam ? TUNNEL_OAM : 0) |
196 (geneveh->critical ? TUNNEL_CRIT_OPT : 0);
198 key = vni_to_tunnel_id(geneveh->vni);
199 ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
200 udp_hdr(skb)->source, udp_hdr(skb)->dest,
202 geneveh->options, opts_len);
204 ovs_vport_receive(vport_from_priv(geneve_port), skb, &tun_info);
213 /* Arbitrary value. Irrelevant as long as it's not 0 since we set the handler. */
214 #define UDP_ENCAP_GENEVE 1
215 static int geneve_socket_init(struct geneve_port *geneve_port, struct net *net,
218 struct sockaddr_in sin;
221 err = sock_create_kern(AF_INET, SOCK_DGRAM, 0,
226 /* release net ref. */
227 sk_change_net(geneve_port->sock->sk, net);
229 sin.sin_family = AF_INET;
230 sin.sin_addr.s_addr = htonl(INADDR_ANY);
231 sin.sin_port = dst_port;
233 err = kernel_bind(geneve_port->sock,
234 (struct sockaddr *)&sin, sizeof(struct sockaddr_in));
238 rcu_assign_sk_user_data(geneve_port->sock->sk, geneve_port);
239 udp_sk(geneve_port->sock->sk)->encap_type = UDP_ENCAP_GENEVE;
240 udp_sk(geneve_port->sock->sk)->encap_rcv = geneve_rcv;
247 sk_release_kernel(geneve_port->sock->sk);
249 pr_warn("cannot register geneve protocol handler: %d\n", err);
253 static int geneve_get_options(const struct vport *vport,
256 struct geneve_port *geneve_port = geneve_vport(vport);
258 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT,
259 ntohs(inet_sport(geneve_port->sock->sk))))
264 static void geneve_tnl_destroy(struct vport *vport)
266 struct geneve_port *geneve_port = geneve_vport(vport);
269 rcu_assign_sk_user_data(geneve_port->sock->sk, NULL);
270 sk_release_kernel(geneve_port->sock->sk);
272 ovs_vport_deferred_free(vport);
275 static struct vport *geneve_tnl_create(const struct vport_parms *parms)
277 struct net *net = ovs_dp_get_net(parms->dp);
278 struct nlattr *options = parms->options;
279 struct geneve_port *geneve_port;
290 a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
291 if (a && nla_len(a) == sizeof(u16)) {
292 dst_port = nla_get_u16(a);
294 /* Require destination port from userspace. */
299 vport = ovs_vport_alloc(sizeof(struct geneve_port),
300 &ovs_geneve_vport_ops, parms);
304 geneve_port = geneve_vport(vport);
305 strncpy(geneve_port->name, parms->name, IFNAMSIZ);
307 err = geneve_socket_init(geneve_port, net, htons(dst_port));
314 ovs_vport_free(vport);
319 #if LINUX_VERSION_CODE < KERNEL_VERSION(3,12,0)
321 static void geneve_fix_segment(struct sk_buff *skb)
323 struct udphdr *udph = udp_hdr(skb);
325 udph->len = htons(skb->len - skb_transport_offset(skb));
328 static int handle_offloads(struct sk_buff *skb)
331 OVS_GSO_CB(skb)->fix_segment = geneve_fix_segment;
332 else if (skb->ip_summed != CHECKSUM_PARTIAL)
333 skb->ip_summed = CHECKSUM_NONE;
337 static int handle_offloads(struct sk_buff *skb)
339 if (skb->encapsulation && skb_is_gso(skb)) {
344 if (skb_is_gso(skb)) {
345 int err = skb_unclone(skb, GFP_ATOMIC);
349 skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL;
350 } else if (skb->ip_summed != CHECKSUM_PARTIAL)
351 skb->ip_summed = CHECKSUM_NONE;
353 skb->encapsulation = 1;
358 static int geneve_send(struct vport *vport, struct sk_buff *skb)
360 struct ovs_key_ipv4_tunnel *tun_key;
361 int network_offset = skb_network_offset(skb);
369 if (unlikely(!OVS_CB(skb)->egress_tun_info))
372 tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
375 saddr = tun_key->ipv4_src;
376 rt = find_route(ovs_dp_get_net(vport->dp),
377 &saddr, tun_key->ipv4_dst,
378 IPPROTO_UDP, tun_key->ipv4_tos,
385 min_headroom = LL_RESERVED_SPACE(rt_dst(rt).dev) + rt_dst(rt).header_len
387 + OVS_CB(skb)->egress_tun_info->options_len
388 + sizeof(struct iphdr)
389 + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
391 if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
392 int head_delta = SKB_DATA_ALIGN(min_headroom -
396 err = pskb_expand_head(skb, max_t(int, head_delta, 0),
402 if (vlan_tx_tag_present(skb)) {
403 if (unlikely(!__vlan_put_tag(skb,
405 vlan_tx_tag_get(skb)))) {
409 vlan_set_tci(skb, 0);
412 skb_reset_inner_headers(skb);
414 __skb_push(skb, GENEVE_BASE_HLEN +
415 OVS_CB(skb)->egress_tun_info->options_len);
416 skb_reset_transport_header(skb);
418 geneve_build_header(vport, skb);
421 err = handle_offloads(skb);
425 df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
427 sent_len = iptunnel_xmit(skb->sk, rt, skb,
428 saddr, tun_key->ipv4_dst,
429 IPPROTO_UDP, tun_key->ipv4_tos,
433 return sent_len > 0 ? sent_len + network_offset : sent_len;
441 static const char *geneve_get_name(const struct vport *vport)
443 struct geneve_port *geneve_port = geneve_vport(vport);
444 return geneve_port->name;
447 static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
448 struct ovs_tunnel_info *egress_tun_info)
450 struct geneve_port *geneve_port = geneve_vport(vport);
451 struct net *net = ovs_dp_get_net(vport->dp);
454 * Get tp_src and tp_dst, refert to geneve_build_header().
456 return ovs_tunnel_get_egress_info(egress_tun_info,
457 ovs_dp_get_net(vport->dp),
458 OVS_CB(skb)->egress_tun_info,
459 IPPROTO_UDP, skb->mark,
460 udp_flow_src_port(net, skb, 0, 0, true),
461 inet_sport(geneve_port->sock->sk));
465 const struct vport_ops ovs_geneve_vport_ops = {
466 .type = OVS_VPORT_TYPE_GENEVE,
467 .create = geneve_tnl_create,
468 .destroy = geneve_tnl_destroy,
469 .get_name = geneve_get_name,
470 .get_options = geneve_get_options,
472 .get_egress_tun_info = geneve_get_egress_tun_info,