2 * Copyright (c) 2016 Nicira, Inc.
3 * Copyright (c) 2016 Red Hat, Inc.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
20 #include "netdev-native-tnl.h"
24 #include <sys/socket.h>
26 #include <netinet/in.h>
27 #include <netinet/ip.h>
28 #include <netinet/ip6.h>
29 #include <sys/ioctl.h>
35 #include "byte-order.h"
37 #include "dp-packet.h"
39 #include "netdev-vport.h"
40 #include "netdev-vport-private.h"
41 #include "odp-netlink.h"
44 #include "unaligned.h"
46 #include "openvswitch/vlog.h"
48 VLOG_DEFINE_THIS_MODULE(native_tnl);
49 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
51 #define VXLAN_HLEN (sizeof(struct udp_header) + \
52 sizeof(struct vxlanhdr))
54 #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
55 sizeof(struct genevehdr))
57 uint16_t tnl_udp_port_min = 32768;
58 uint16_t tnl_udp_port_max = 61000;
61 netdev_tnl_ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
66 struct ovs_16aligned_ip6_hdr *ip6;
70 nh = dp_packet_l3(packet);
73 l4 = dp_packet_l4(packet);
79 *hlen = sizeof(struct eth_header);
81 l3_size = dp_packet_size(packet) -
82 ((char *)nh - (char *)dp_packet_data(packet));
84 if (IP_VER(ip->ip_ihl_ver) == 4) {
86 ovs_be32 ip_src, ip_dst;
88 if (csum(ip, IP_IHL(ip->ip_ihl_ver) * 4)) {
89 VLOG_WARN_RL(&err_rl, "ip packet has invalid checksum");
93 if (ntohs(ip->ip_tot_len) > l3_size) {
94 VLOG_WARN_RL(&err_rl, "ip packet is truncated (IP length %d, actual %d)",
95 ntohs(ip->ip_tot_len), l3_size);
98 if (IP_IHL(ip->ip_ihl_ver) * 4 > sizeof(struct ip_header)) {
99 VLOG_WARN_RL(&err_rl, "ip options not supported on tunnel packets "
100 "(%d bytes)", IP_IHL(ip->ip_ihl_ver) * 4);
104 ip_src = get_16aligned_be32(&ip->ip_src);
105 ip_dst = get_16aligned_be32(&ip->ip_dst);
107 tnl->ip_src = ip_src;
108 tnl->ip_dst = ip_dst;
109 tnl->ip_tos = ip->ip_tos;
110 tnl->ip_ttl = ip->ip_ttl;
112 *hlen += IP_HEADER_LEN;
114 } else if (IP_VER(ip->ip_ihl_ver) == 6) {
115 ovs_be32 tc_flow = get_16aligned_be32(&ip6->ip6_flow);
117 memcpy(tnl->ipv6_src.s6_addr, ip6->ip6_src.be16, sizeof ip6->ip6_src);
118 memcpy(tnl->ipv6_dst.s6_addr, ip6->ip6_dst.be16, sizeof ip6->ip6_dst);
120 tnl->ip_tos = ntohl(tc_flow) >> 20;
121 tnl->ip_ttl = ip6->ip6_hlim;
123 *hlen += IPV6_HEADER_LEN;
126 VLOG_WARN_RL(&err_rl, "ipv4 packet has invalid version (%d)",
127 IP_VER(ip->ip_ihl_ver));
134 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
135 * reallocating the packet if necessary. 'header' should contain an Ethernet
136 * header, followed by an IPv4 header (without options), and an L4 header.
138 * This function sets the IP header's ip_tot_len field (which should be zeroed
139 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
140 * updates IP header checksum.
142 * Return pointer to the L4 header added to 'packet'. */
144 netdev_tnl_push_ip_header(struct dp_packet *packet,
145 const void *header, int size, int *ip_tot_size)
147 struct eth_header *eth;
148 struct ip_header *ip;
149 struct ovs_16aligned_ip6_hdr *ip6;
151 eth = dp_packet_push_uninit(packet, size);
152 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
154 memcpy(eth, header, size);
156 if (netdev_tnl_is_header_ipv6(header)) {
157 ip6 = netdev_tnl_ipv6_hdr(eth);
158 *ip_tot_size -= IPV6_HEADER_LEN;
159 ip6->ip6_plen = htons(*ip_tot_size);
162 ip = netdev_tnl_ip_hdr(eth);
163 ip->ip_tot_len = htons(*ip_tot_size);
164 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
165 *ip_tot_size -= IP_HEADER_LEN;
171 udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
174 struct udp_header *udp;
176 udp = netdev_tnl_ip_extract_tnl_md(packet, tnl, hlen);
183 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
184 csum = packet_csum_pseudoheader6(dp_packet_l3(packet));
186 csum = packet_csum_pseudoheader(dp_packet_l3(packet));
189 csum = csum_continue(csum, udp, dp_packet_size(packet) -
190 ((const unsigned char *)udp -
191 (const unsigned char *)dp_packet_l2(packet)));
192 if (csum_finish(csum)) {
195 tnl->flags |= FLOW_TNL_F_CSUM;
198 tnl->tp_src = udp->udp_src;
199 tnl->tp_dst = udp->udp_dst;
206 netdev_tnl_push_udp_header(struct dp_packet *packet,
207 const struct ovs_action_push_tnl *data)
209 struct udp_header *udp;
212 udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
214 /* set udp src port */
215 udp->udp_src = netdev_tnl_get_src_port(packet);
216 udp->udp_len = htons(ip_tot_size);
220 if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
221 csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet)));
223 csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
226 csum = csum_continue(csum, udp, ip_tot_size);
227 udp->udp_csum = csum_finish(csum);
229 if (!udp->udp_csum) {
230 udp->udp_csum = htons(0xffff);
236 eth_build_header(struct ovs_action_push_tnl *data,
237 const struct netdev_tnl_build_header_params *params)
239 uint16_t eth_proto = params->is_ipv6 ? ETH_TYPE_IPV6 : ETH_TYPE_IP;
240 struct eth_header *eth;
242 memset(data->header, 0, sizeof data->header);
244 eth = (struct eth_header *)data->header;
245 eth->eth_dst = params->dmac;
246 eth->eth_src = params->smac;
247 eth->eth_type = htons(eth_proto);
248 data->header_len = sizeof(struct eth_header);
253 netdev_tnl_ip_build_header(struct ovs_action_push_tnl *data,
254 const struct netdev_tnl_build_header_params *params,
259 l3 = eth_build_header(data, params);
260 if (!params->is_ipv6) {
261 ovs_be32 ip_src = in6_addr_get_mapped_ipv4(params->s_ip);
262 struct ip_header *ip;
264 ip = (struct ip_header *) l3;
266 ip->ip_ihl_ver = IP_IHL_VER(5, 4);
267 ip->ip_tos = params->flow->tunnel.ip_tos;
268 ip->ip_ttl = params->flow->tunnel.ip_ttl;
269 ip->ip_proto = next_proto;
270 put_16aligned_be32(&ip->ip_src, ip_src);
271 put_16aligned_be32(&ip->ip_dst, params->flow->tunnel.ip_dst);
273 ip->ip_frag_off = (params->flow->tunnel.flags & FLOW_TNL_F_DONT_FRAGMENT) ?
276 /* Checksum has already been zeroed by eth_build_header. */
277 ip->ip_csum = csum(ip, sizeof *ip);
279 data->header_len += IP_HEADER_LEN;
282 struct ovs_16aligned_ip6_hdr *ip6;
284 ip6 = (struct ovs_16aligned_ip6_hdr *) l3;
286 put_16aligned_be32(&ip6->ip6_flow, htonl(6 << 28) |
287 htonl(params->flow->tunnel.ip_tos << 20));
288 ip6->ip6_hlim = params->flow->tunnel.ip_ttl;
289 ip6->ip6_nxt = next_proto;
290 memcpy(&ip6->ip6_src, params->s_ip, sizeof(ovs_be32[4]));
291 memcpy(&ip6->ip6_dst, ¶ms->flow->tunnel.ipv6_dst, sizeof(ovs_be32[4]));
293 data->header_len += IPV6_HEADER_LEN;
299 udp_build_header(struct netdev_tunnel_config *tnl_cfg,
300 struct ovs_action_push_tnl *data,
301 const struct netdev_tnl_build_header_params *params)
303 struct udp_header *udp;
305 udp = netdev_tnl_ip_build_header(data, params, IPPROTO_UDP);
306 udp->udp_dst = tnl_cfg->dst_port;
308 if (params->is_ipv6 || params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
309 /* Write a value in now to mark that we should compute the checksum
310 * later. 0xffff is handy because it is transparent to the
312 udp->udp_csum = htons(0xffff);
314 data->header_len += sizeof *udp;
319 gre_header_len(ovs_be16 flags)
323 if (flags & htons(GRE_CSUM)) {
326 if (flags & htons(GRE_KEY)) {
329 if (flags & htons(GRE_SEQ)) {
336 parse_gre_header(struct dp_packet *packet,
337 struct flow_tnl *tnl)
339 const struct gre_base_hdr *greh;
340 ovs_16aligned_be32 *options;
344 greh = netdev_tnl_ip_extract_tnl_md(packet, tnl, &ulen);
349 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
353 if (greh->protocol != htons(ETH_TYPE_TEB)) {
357 hlen = ulen + gre_header_len(greh->flags);
358 if (hlen > dp_packet_size(packet)) {
362 options = (ovs_16aligned_be32 *)(greh + 1);
363 if (greh->flags & htons(GRE_CSUM)) {
366 pkt_csum = csum(greh, dp_packet_size(packet) -
367 ((const unsigned char *)greh -
368 (const unsigned char *)dp_packet_l2(packet)));
372 tnl->flags = FLOW_TNL_F_CSUM;
376 if (greh->flags & htons(GRE_KEY)) {
377 tnl->tun_id = be32_to_be64(get_16aligned_be32(options));
378 tnl->flags |= FLOW_TNL_F_KEY;
382 if (greh->flags & htons(GRE_SEQ)) {
390 netdev_gre_pop_header(struct dp_packet *packet)
392 struct pkt_metadata *md = &packet->md;
393 struct flow_tnl *tnl = &md->tunnel;
394 int hlen = sizeof(struct eth_header) + 4;
396 hlen += netdev_tnl_is_header_ipv6(dp_packet_data(packet)) ?
397 IPV6_HEADER_LEN : IP_HEADER_LEN;
399 pkt_metadata_init_tnl(md);
400 if (hlen > dp_packet_size(packet)) {
404 hlen = parse_gre_header(packet, tnl);
409 dp_packet_reset_packet(packet, hlen);
413 dp_packet_delete(packet);
418 netdev_gre_push_header(struct dp_packet *packet,
419 const struct ovs_action_push_tnl *data)
421 struct gre_base_hdr *greh;
424 greh = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
426 if (greh->flags & htons(GRE_CSUM)) {
427 ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
428 *csum_opt = csum(greh, ip_tot_size);
433 netdev_gre_build_header(const struct netdev *netdev,
434 struct ovs_action_push_tnl *data,
435 const struct netdev_tnl_build_header_params *params)
437 struct netdev_vport *dev = netdev_vport_cast(netdev);
438 struct netdev_tunnel_config *tnl_cfg;
439 struct gre_base_hdr *greh;
440 ovs_16aligned_be32 *options;
443 /* XXX: RCUfy tnl_cfg. */
444 ovs_mutex_lock(&dev->mutex);
445 tnl_cfg = &dev->tnl_cfg;
447 greh = netdev_tnl_ip_build_header(data, params, IPPROTO_GRE);
449 greh->protocol = htons(ETH_TYPE_TEB);
452 options = (ovs_16aligned_be32 *) (greh + 1);
453 if (params->flow->tunnel.flags & FLOW_TNL_F_CSUM) {
454 greh->flags |= htons(GRE_CSUM);
455 put_16aligned_be32(options, 0);
459 if (tnl_cfg->out_key_present) {
460 greh->flags |= htons(GRE_KEY);
461 put_16aligned_be32(options, be64_to_be32(params->flow->tunnel.tun_id));
465 ovs_mutex_unlock(&dev->mutex);
467 hlen = (uint8_t *) options - (uint8_t *) greh;
469 data->header_len += hlen;
470 data->tnl_type = OVS_VPORT_TYPE_GRE;
475 netdev_vxlan_pop_header(struct dp_packet *packet)
477 struct pkt_metadata *md = &packet->md;
478 struct flow_tnl *tnl = &md->tunnel;
479 struct vxlanhdr *vxh;
482 pkt_metadata_init_tnl(md);
483 if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
487 vxh = udp_extract_tnl_md(packet, tnl, &hlen);
492 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
493 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
494 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
495 ntohl(get_16aligned_be32(&vxh->vx_flags)),
496 ntohl(get_16aligned_be32(&vxh->vx_vni)));
499 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
500 tnl->flags |= FLOW_TNL_F_KEY;
502 dp_packet_reset_packet(packet, hlen + VXLAN_HLEN);
506 dp_packet_delete(packet);
511 netdev_vxlan_build_header(const struct netdev *netdev,
512 struct ovs_action_push_tnl *data,
513 const struct netdev_tnl_build_header_params *params)
515 struct netdev_vport *dev = netdev_vport_cast(netdev);
516 struct netdev_tunnel_config *tnl_cfg;
517 struct vxlanhdr *vxh;
519 /* XXX: RCUfy tnl_cfg. */
520 ovs_mutex_lock(&dev->mutex);
521 tnl_cfg = &dev->tnl_cfg;
523 vxh = udp_build_header(tnl_cfg, data, params);
525 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
526 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(params->flow->tunnel.tun_id) << 8));
528 ovs_mutex_unlock(&dev->mutex);
529 data->header_len += sizeof *vxh;
530 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
535 netdev_geneve_pop_header(struct dp_packet *packet)
537 struct pkt_metadata *md = &packet->md;
538 struct flow_tnl *tnl = &md->tunnel;
539 struct genevehdr *gnh;
540 unsigned int hlen, opts_len, ulen;
542 pkt_metadata_init_tnl(md);
543 if (GENEVE_BASE_HLEN > dp_packet_l4_size(packet)) {
544 VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%"PRIuSIZE"\n",
545 (unsigned int)GENEVE_BASE_HLEN, dp_packet_l4_size(packet));
549 gnh = udp_extract_tnl_md(packet, tnl, &ulen);
554 opts_len = gnh->opt_len * 4;
555 hlen = ulen + GENEVE_BASE_HLEN + opts_len;
556 if (hlen > dp_packet_size(packet)) {
557 VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
558 hlen, dp_packet_size(packet));
563 VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
567 if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
568 VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
569 ntohs(gnh->proto_type));
573 tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
574 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
575 tnl->flags |= FLOW_TNL_F_KEY;
577 memcpy(tnl->metadata.opts.gnv, gnh->options, opts_len);
578 tnl->metadata.present.len = opts_len;
579 tnl->flags |= FLOW_TNL_F_UDPIF;
581 dp_packet_reset_packet(packet, hlen);
585 dp_packet_delete(packet);
590 netdev_geneve_build_header(const struct netdev *netdev,
591 struct ovs_action_push_tnl *data,
592 const struct netdev_tnl_build_header_params *params)
594 struct netdev_vport *dev = netdev_vport_cast(netdev);
595 struct netdev_tunnel_config *tnl_cfg;
596 struct genevehdr *gnh;
600 /* XXX: RCUfy tnl_cfg. */
601 ovs_mutex_lock(&dev->mutex);
602 tnl_cfg = &dev->tnl_cfg;
604 gnh = udp_build_header(tnl_cfg, data, params);
606 put_16aligned_be32(&gnh->vni, htonl(ntohll(params->flow->tunnel.tun_id) << 8));
608 ovs_mutex_unlock(&dev->mutex);
610 opt_len = tun_metadata_to_geneve_header(¶ms->flow->tunnel,
611 gnh->options, &crit_opt);
613 gnh->opt_len = opt_len / 4;
614 gnh->oam = !!(params->flow->tunnel.flags & FLOW_TNL_F_OAM);
615 gnh->critical = crit_opt ? 1 : 0;
616 gnh->proto_type = htons(ETH_TYPE_TEB);
618 data->header_len += sizeof *gnh + opt_len;
619 data->tnl_type = OVS_VPORT_TYPE_GENEVE;
625 netdev_tnl_egress_port_range(struct unixctl_conn *conn, int argc,
626 const char *argv[], void *aux OVS_UNUSED)
631 struct ds ds = DS_EMPTY_INITIALIZER;
633 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
634 tnl_udp_port_min, tnl_udp_port_max);
636 unixctl_command_reply(conn, ds_cstr(&ds));
645 val1 = atoi(argv[1]);
646 if (val1 <= 0 || val1 > UINT16_MAX) {
647 unixctl_command_reply(conn, "Invalid min.");
650 val2 = atoi(argv[2]);
651 if (val2 <= 0 || val2 > UINT16_MAX) {
652 unixctl_command_reply(conn, "Invalid max.");
657 tnl_udp_port_min = val2;
658 tnl_udp_port_max = val1;
660 tnl_udp_port_min = val1;
661 tnl_udp_port_max = val2;
663 seq_change(tnl_conf_seq);
665 unixctl_command_reply(conn, "OK");