2 * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netdev-vport.h"
23 #include <sys/socket.h>
25 #include <sys/ioctl.h>
27 #include "byte-order.h"
32 #include "dp-packet.h"
33 #include "dynamic-string.h"
38 #include "netdev-provider.h"
39 #include "odp-netlink.h"
40 #include "dp-packet.h"
41 #include "ovs-router.h"
43 #include "poll-loop.h"
44 #include "route-table.h"
46 #include "socket-util.h"
47 #include "openvswitch/vlog.h"
48 #include "unaligned.h"
52 VLOG_DEFINE_THIS_MODULE(netdev_vport);
53 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
55 #define GENEVE_DST_PORT 6081
56 #define VXLAN_DST_PORT 4789
57 #define LISP_DST_PORT 4341
59 #define VXLAN_HLEN (sizeof(struct eth_header) + \
60 sizeof(struct ip_header) + \
61 sizeof(struct udp_header) + \
62 sizeof(struct vxlanhdr))
64 #define DEFAULT_TTL 64
69 /* Protects all members below. */
70 struct ovs_mutex mutex;
72 uint8_t etheraddr[ETH_ADDR_LEN];
73 struct netdev_stats stats;
76 struct netdev_tunnel_config tnl_cfg;
77 char egress_iface[IFNAMSIZ];
85 const char *dpif_port;
86 struct netdev_class netdev_class;
89 /* Last read of the route-table's change number. */
90 static uint64_t rt_change_seqno;
92 static int netdev_vport_construct(struct netdev *);
93 static int get_patch_config(const struct netdev *netdev, struct smap *args);
94 static int get_tunnel_config(const struct netdev *, struct smap *args);
95 static bool tunnel_check_status_change__(struct netdev_vport *);
97 static uint16_t tnl_udp_port_min = 32768;
98 static uint16_t tnl_udp_port_max = 61000;
101 is_vport_class(const struct netdev_class *class)
103 return class->construct == netdev_vport_construct;
107 netdev_vport_is_vport_class(const struct netdev_class *class)
109 return is_vport_class(class);
112 static const struct vport_class *
113 vport_class_cast(const struct netdev_class *class)
115 ovs_assert(is_vport_class(class));
116 return CONTAINER_OF(class, struct vport_class, netdev_class);
119 static struct netdev_vport *
120 netdev_vport_cast(const struct netdev *netdev)
122 ovs_assert(is_vport_class(netdev_get_class(netdev)));
123 return CONTAINER_OF(netdev, struct netdev_vport, up);
126 static const struct netdev_tunnel_config *
127 get_netdev_tunnel_config(const struct netdev *netdev)
129 return &netdev_vport_cast(netdev)->tnl_cfg;
133 netdev_vport_is_patch(const struct netdev *netdev)
135 const struct netdev_class *class = netdev_get_class(netdev);
137 return class->get_config == get_patch_config;
141 netdev_vport_is_layer3(const struct netdev *dev)
143 const char *type = netdev_get_type(dev);
145 return (!strcmp("lisp", type));
149 netdev_vport_needs_dst_port(const struct netdev *dev)
151 const struct netdev_class *class = netdev_get_class(dev);
152 const char *type = netdev_get_type(dev);
154 return (class->get_config == get_tunnel_config &&
155 (!strcmp("geneve", type) || !strcmp("vxlan", type) ||
156 !strcmp("lisp", type)));
160 netdev_vport_class_get_dpif_port(const struct netdev_class *class)
162 return is_vport_class(class) ? vport_class_cast(class)->dpif_port : NULL;
166 netdev_vport_get_dpif_port(const struct netdev *netdev,
167 char namebuf[], size_t bufsize)
169 const struct netdev_class *class = netdev_get_class(netdev);
170 const char *dpif_port = netdev_vport_class_get_dpif_port(class);
173 return netdev_get_name(netdev);
176 if (netdev_vport_needs_dst_port(netdev)) {
177 const struct netdev_vport *vport = netdev_vport_cast(netdev);
180 * Note: IFNAMSIZ is 16 bytes long. Implementations should choose
181 * a dpif port name that is short enough to fit including any
182 * port numbers but assert just in case.
184 BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE >= IFNAMSIZ);
185 ovs_assert(strlen(dpif_port) + 6 < IFNAMSIZ);
186 snprintf(namebuf, bufsize, "%s_%d", dpif_port,
187 ntohs(vport->tnl_cfg.dst_port));
195 netdev_vport_get_dpif_port_strdup(const struct netdev *netdev)
197 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
199 return xstrdup(netdev_vport_get_dpif_port(netdev, namebuf,
203 /* Whenever the route-table change number is incremented,
204 * netdev_vport_route_changed() should be called to update
205 * the corresponding tunnel interface status. */
207 netdev_vport_route_changed(void)
209 struct netdev **vports;
212 vports = netdev_get_vports(&n_vports);
213 for (i = 0; i < n_vports; i++) {
214 struct netdev *netdev_ = vports[i];
215 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
217 ovs_mutex_lock(&netdev->mutex);
218 /* Finds all tunnel vports. */
219 if (netdev->tnl_cfg.ip_dst) {
220 if (tunnel_check_status_change__(netdev)) {
221 netdev_change_seq_changed(netdev_);
224 ovs_mutex_unlock(&netdev->mutex);
226 netdev_close(netdev_);
232 static struct netdev *
233 netdev_vport_alloc(void)
235 struct netdev_vport *netdev = xzalloc(sizeof *netdev);
240 netdev_vport_construct(struct netdev *netdev_)
242 struct netdev_vport *dev = netdev_vport_cast(netdev_);
243 const char *type = netdev_get_type(netdev_);
245 ovs_mutex_init(&dev->mutex);
246 eth_addr_random(dev->etheraddr);
248 /* Add a default destination port for tunnel ports if none specified. */
249 if (!strcmp(type, "geneve")) {
250 dev->tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
251 } else if (!strcmp(type, "vxlan")) {
252 dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
253 } else if (!strcmp(type, "lisp")) {
254 dev->tnl_cfg.dst_port = htons(LISP_DST_PORT);
261 netdev_vport_destruct(struct netdev *netdev_)
263 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
266 ovs_mutex_destroy(&netdev->mutex);
270 netdev_vport_dealloc(struct netdev *netdev_)
272 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
277 netdev_vport_set_etheraddr(struct netdev *netdev_,
278 const uint8_t mac[ETH_ADDR_LEN])
280 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
282 ovs_mutex_lock(&netdev->mutex);
283 memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN);
284 ovs_mutex_unlock(&netdev->mutex);
285 netdev_change_seq_changed(netdev_);
291 netdev_vport_get_etheraddr(const struct netdev *netdev_,
292 uint8_t mac[ETH_ADDR_LEN])
294 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
296 ovs_mutex_lock(&netdev->mutex);
297 memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN);
298 ovs_mutex_unlock(&netdev->mutex);
303 /* Checks if the tunnel status has changed and returns a boolean.
304 * Updates the tunnel status if it has changed. */
306 tunnel_check_status_change__(struct netdev_vport *netdev)
307 OVS_REQUIRES(netdev->mutex)
309 char iface[IFNAMSIZ];
315 route = netdev->tnl_cfg.ip_dst;
316 if (ovs_router_lookup(route, iface, &gw)) {
317 struct netdev *egress_netdev;
319 if (!netdev_open(iface, "system", &egress_netdev)) {
320 status = netdev_get_carrier(egress_netdev);
321 netdev_close(egress_netdev);
325 if (strcmp(netdev->egress_iface, iface)
326 || netdev->carrier_status != status) {
327 ovs_strlcpy(netdev->egress_iface, iface, IFNAMSIZ);
328 netdev->carrier_status = status;
337 tunnel_get_status(const struct netdev *netdev_, struct smap *smap)
339 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
341 if (netdev->egress_iface[0]) {
342 smap_add(smap, "tunnel_egress_iface", netdev->egress_iface);
344 smap_add(smap, "tunnel_egress_iface_carrier",
345 netdev->carrier_status ? "up" : "down");
352 netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
353 enum netdev_flags off,
354 enum netdev_flags on OVS_UNUSED,
355 enum netdev_flags *old_flagsp)
357 if (off & (NETDEV_UP | NETDEV_PROMISC)) {
361 *old_flagsp = NETDEV_UP | NETDEV_PROMISC;
366 netdev_vport_run(void)
371 seq = route_table_get_change_seq();
372 if (rt_change_seqno != seq) {
373 rt_change_seqno = seq;
374 netdev_vport_route_changed();
379 netdev_vport_wait(void)
384 seq = route_table_get_change_seq();
385 if (rt_change_seqno != seq) {
386 poll_immediate_wake();
390 /* Code specific to tunnel types. */
393 parse_key(const struct smap *args, const char *name,
394 bool *present, bool *flow)
401 s = smap_get(args, name);
403 s = smap_get(args, "key");
411 if (!strcmp(s, "flow")) {
415 return htonll(strtoull(s, NULL, 0));
420 set_tunnel_config(struct netdev *dev_, const struct smap *args)
422 struct netdev_vport *dev = netdev_vport_cast(dev_);
423 const char *name = netdev_get_name(dev_);
424 const char *type = netdev_get_type(dev_);
425 bool ipsec_mech_set, needs_dst_port, has_csum;
426 struct netdev_tunnel_config tnl_cfg;
427 struct smap_node *node;
429 has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
430 strstr(type, "vxlan");
431 ipsec_mech_set = false;
432 memset(&tnl_cfg, 0, sizeof tnl_cfg);
434 /* Add a default destination port for tunnel ports if none specified. */
435 if (!strcmp(type, "geneve")) {
436 tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
439 if (!strcmp(type, "vxlan")) {
440 tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
443 if (!strcmp(type, "lisp")) {
444 tnl_cfg.dst_port = htons(LISP_DST_PORT);
447 needs_dst_port = netdev_vport_needs_dst_port(dev_);
448 tnl_cfg.ipsec = strstr(type, "ipsec");
449 tnl_cfg.dont_fragment = true;
451 SMAP_FOR_EACH (node, args) {
452 if (!strcmp(node->key, "remote_ip")) {
453 struct in_addr in_addr;
454 if (!strcmp(node->value, "flow")) {
455 tnl_cfg.ip_dst_flow = true;
456 tnl_cfg.ip_dst = htonl(0);
457 } else if (lookup_ip(node->value, &in_addr)) {
458 VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
459 } else if (ip_is_multicast(in_addr.s_addr)) {
460 VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed",
461 name, IP_ARGS(in_addr.s_addr));
464 tnl_cfg.ip_dst = in_addr.s_addr;
466 } else if (!strcmp(node->key, "local_ip")) {
467 struct in_addr in_addr;
468 if (!strcmp(node->value, "flow")) {
469 tnl_cfg.ip_src_flow = true;
470 tnl_cfg.ip_src = htonl(0);
471 } else if (lookup_ip(node->value, &in_addr)) {
472 VLOG_WARN("%s: bad %s 'local_ip'", name, type);
474 tnl_cfg.ip_src = in_addr.s_addr;
476 } else if (!strcmp(node->key, "tos")) {
477 if (!strcmp(node->value, "inherit")) {
478 tnl_cfg.tos_inherit = true;
482 tos = strtol(node->value, &endptr, 0);
483 if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) {
486 VLOG_WARN("%s: invalid TOS %s", name, node->value);
489 } else if (!strcmp(node->key, "ttl")) {
490 if (!strcmp(node->value, "inherit")) {
491 tnl_cfg.ttl_inherit = true;
493 tnl_cfg.ttl = atoi(node->value);
495 } else if (!strcmp(node->key, "dst_port") && needs_dst_port) {
496 tnl_cfg.dst_port = htons(atoi(node->value));
497 } else if (!strcmp(node->key, "csum") && has_csum) {
498 if (!strcmp(node->value, "true")) {
501 } else if (!strcmp(node->key, "df_default")) {
502 if (!strcmp(node->value, "false")) {
503 tnl_cfg.dont_fragment = false;
505 } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) {
506 if (smap_get(args, "certificate")) {
507 ipsec_mech_set = true;
509 const char *use_ssl_cert;
511 /* If the "use_ssl_cert" is true, then "certificate" and
512 * "private_key" will be pulled from the SSL table. The
513 * use of this option is strongly discouraged, since it
514 * will like be removed when multiple SSL configurations
515 * are supported by OVS.
517 use_ssl_cert = smap_get(args, "use_ssl_cert");
518 if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
519 VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
523 ipsec_mech_set = true;
525 } else if (!strcmp(node->key, "psk") && tnl_cfg.ipsec) {
526 ipsec_mech_set = true;
527 } else if (tnl_cfg.ipsec
528 && (!strcmp(node->key, "certificate")
529 || !strcmp(node->key, "private_key")
530 || !strcmp(node->key, "use_ssl_cert"))) {
531 /* Ignore options not used by the netdev. */
532 } else if (!strcmp(node->key, "key") ||
533 !strcmp(node->key, "in_key") ||
534 !strcmp(node->key, "out_key")) {
535 /* Handled separately below. */
536 } else if (!strcmp(node->key, "exts")) {
537 char *str = xstrdup(node->value);
538 char *ext, *save_ptr = NULL;
542 ext = strtok_r(str, ",", &save_ptr);
544 if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) {
545 tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP);
547 VLOG_WARN("%s: unknown extension '%s'", name, ext);
550 ext = strtok_r(NULL, ",", &save_ptr);
555 VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
560 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
561 static pid_t pid = 0;
564 ovs_mutex_lock(&mutex);
566 char *file_name = xasprintf("%s/%s", ovs_rundir(),
567 "ovs-monitor-ipsec.pid");
568 pid = read_pidfile(file_name);
571 ovs_mutex_unlock(&mutex);
575 VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
580 if (smap_get(args, "peer_cert") && smap_get(args, "psk")) {
581 VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
585 if (!ipsec_mech_set) {
586 VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
592 if (!tnl_cfg.ip_dst && !tnl_cfg.ip_dst_flow) {
593 VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
597 if (tnl_cfg.ip_src_flow && !tnl_cfg.ip_dst_flow) {
598 VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'",
603 tnl_cfg.ttl = DEFAULT_TTL;
606 tnl_cfg.in_key = parse_key(args, "in_key",
607 &tnl_cfg.in_key_present,
608 &tnl_cfg.in_key_flow);
610 tnl_cfg.out_key = parse_key(args, "out_key",
611 &tnl_cfg.out_key_present,
612 &tnl_cfg.out_key_flow);
614 ovs_mutex_lock(&dev->mutex);
615 if (memcmp(&dev->tnl_cfg, &tnl_cfg, sizeof tnl_cfg)) {
616 dev->tnl_cfg = tnl_cfg;
617 tunnel_check_status_change__(dev);
618 netdev_change_seq_changed(dev_);
620 ovs_mutex_unlock(&dev->mutex);
626 get_tunnel_config(const struct netdev *dev, struct smap *args)
628 struct netdev_vport *netdev = netdev_vport_cast(dev);
629 struct netdev_tunnel_config tnl_cfg;
631 ovs_mutex_lock(&netdev->mutex);
632 tnl_cfg = netdev->tnl_cfg;
633 ovs_mutex_unlock(&netdev->mutex);
635 if (tnl_cfg.ip_dst) {
636 smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_dst));
637 } else if (tnl_cfg.ip_dst_flow) {
638 smap_add(args, "remote_ip", "flow");
641 if (tnl_cfg.ip_src) {
642 smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_src));
643 } else if (tnl_cfg.ip_src_flow) {
644 smap_add(args, "local_ip", "flow");
647 if (tnl_cfg.in_key_flow && tnl_cfg.out_key_flow) {
648 smap_add(args, "key", "flow");
649 } else if (tnl_cfg.in_key_present && tnl_cfg.out_key_present
650 && tnl_cfg.in_key == tnl_cfg.out_key) {
651 smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg.in_key));
653 if (tnl_cfg.in_key_flow) {
654 smap_add(args, "in_key", "flow");
655 } else if (tnl_cfg.in_key_present) {
656 smap_add_format(args, "in_key", "%"PRIu64,
657 ntohll(tnl_cfg.in_key));
660 if (tnl_cfg.out_key_flow) {
661 smap_add(args, "out_key", "flow");
662 } else if (tnl_cfg.out_key_present) {
663 smap_add_format(args, "out_key", "%"PRIu64,
664 ntohll(tnl_cfg.out_key));
668 if (tnl_cfg.ttl_inherit) {
669 smap_add(args, "ttl", "inherit");
670 } else if (tnl_cfg.ttl != DEFAULT_TTL) {
671 smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg.ttl);
674 if (tnl_cfg.tos_inherit) {
675 smap_add(args, "tos", "inherit");
676 } else if (tnl_cfg.tos) {
677 smap_add_format(args, "tos", "0x%x", tnl_cfg.tos);
680 if (tnl_cfg.dst_port) {
681 uint16_t dst_port = ntohs(tnl_cfg.dst_port);
682 const char *type = netdev_get_type(dev);
684 if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
685 (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
686 (!strcmp("lisp", type) && dst_port != LISP_DST_PORT)) {
687 smap_add_format(args, "dst_port", "%d", dst_port);
692 smap_add(args, "csum", "true");
695 if (!tnl_cfg.dont_fragment) {
696 smap_add(args, "df_default", "false");
702 /* Code specific to patch ports. */
704 /* If 'netdev' is a patch port, returns the name of its peer as a malloc()'d
705 * string that the caller must free.
707 * If 'netdev' is not a patch port, returns NULL. */
709 netdev_vport_patch_peer(const struct netdev *netdev_)
713 if (netdev_vport_is_patch(netdev_)) {
714 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
716 ovs_mutex_lock(&netdev->mutex);
718 peer = xstrdup(netdev->peer);
720 ovs_mutex_unlock(&netdev->mutex);
727 netdev_vport_inc_rx(const struct netdev *netdev,
728 const struct dpif_flow_stats *stats)
730 if (is_vport_class(netdev_get_class(netdev))) {
731 struct netdev_vport *dev = netdev_vport_cast(netdev);
733 ovs_mutex_lock(&dev->mutex);
734 dev->stats.rx_packets += stats->n_packets;
735 dev->stats.rx_bytes += stats->n_bytes;
736 ovs_mutex_unlock(&dev->mutex);
741 netdev_vport_inc_tx(const struct netdev *netdev,
742 const struct dpif_flow_stats *stats)
744 if (is_vport_class(netdev_get_class(netdev))) {
745 struct netdev_vport *dev = netdev_vport_cast(netdev);
747 ovs_mutex_lock(&dev->mutex);
748 dev->stats.tx_packets += stats->n_packets;
749 dev->stats.tx_bytes += stats->n_bytes;
750 ovs_mutex_unlock(&dev->mutex);
755 get_patch_config(const struct netdev *dev_, struct smap *args)
757 struct netdev_vport *dev = netdev_vport_cast(dev_);
759 ovs_mutex_lock(&dev->mutex);
761 smap_add(args, "peer", dev->peer);
763 ovs_mutex_unlock(&dev->mutex);
769 set_patch_config(struct netdev *dev_, const struct smap *args)
771 struct netdev_vport *dev = netdev_vport_cast(dev_);
772 const char *name = netdev_get_name(dev_);
775 peer = smap_get(args, "peer");
777 VLOG_ERR("%s: patch type requires valid 'peer' argument", name);
781 if (smap_count(args) > 1) {
782 VLOG_ERR("%s: patch type takes only a 'peer' argument", name);
786 if (!strcmp(name, peer)) {
787 VLOG_ERR("%s: patch peer must not be self", name);
791 ovs_mutex_lock(&dev->mutex);
792 if (!dev->peer || strcmp(dev->peer, peer)) {
794 dev->peer = xstrdup(peer);
795 netdev_change_seq_changed(dev_);
797 ovs_mutex_unlock(&dev->mutex);
803 get_stats(const struct netdev *netdev, struct netdev_stats *stats)
805 struct netdev_vport *dev = netdev_vport_cast(netdev);
807 ovs_mutex_lock(&dev->mutex);
809 ovs_mutex_unlock(&dev->mutex);
815 /* Tunnel push pop ops. */
817 static struct ip_header *
820 return (void *)((char *)eth + sizeof (struct eth_header));
823 static struct gre_base_hdr *
824 gre_hdr(struct ip_header *ip)
826 return (void *)((char *)ip + sizeof (struct ip_header));
830 ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
832 struct ip_header *nh;
835 nh = dp_packet_l3(packet);
836 l4 = dp_packet_l4(packet);
842 tnl->ip_src = get_16aligned_be32(&nh->ip_src);
843 tnl->ip_dst = get_16aligned_be32(&nh->ip_dst);
844 tnl->ip_tos = nh->ip_tos;
845 tnl->ip_ttl = nh->ip_ttl;
850 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
851 * reallocating the packet if necessary. 'header' should contain an Ethernet
852 * header, followed by an IPv4 header (without options), and an L4 header.
854 * This function sets the IP header's ip_tot_len field (which should be zeroed
855 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
856 * updates IP header checksum.
858 * Return pointer to the L4 header added to 'packet'. */
860 push_ip_header(struct dp_packet *packet,
861 const void *header, int size, int *ip_tot_size)
863 struct eth_header *eth;
864 struct ip_header *ip;
866 eth = dp_packet_push_uninit(packet, size);
867 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
869 memcpy(eth, header, size);
871 ip->ip_tot_len = htons(*ip_tot_size);
874 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
880 gre_header_len(ovs_be16 flags)
882 int hlen = sizeof(struct eth_header) +
883 sizeof(struct ip_header) + 4;
885 if (flags & htons(GRE_CSUM)) {
888 if (flags & htons(GRE_KEY)) {
891 if (flags & htons(GRE_SEQ)) {
898 parse_gre_header(struct dp_packet *packet,
899 struct flow_tnl *tnl)
901 const struct gre_base_hdr *greh;
902 ovs_16aligned_be32 *options;
905 greh = ip_extract_tnl_md(packet, tnl);
910 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
914 if (greh->protocol != htons(ETH_TYPE_TEB)) {
918 hlen = gre_header_len(greh->flags);
919 if (hlen > dp_packet_size(packet)) {
923 options = (ovs_16aligned_be32 *)(greh + 1);
924 if (greh->flags & htons(GRE_CSUM)) {
927 pkt_csum = csum(greh, dp_packet_size(packet) -
928 ((const unsigned char *)greh -
929 (const unsigned char *)dp_packet_l2(packet)));
933 tnl->flags = FLOW_TNL_F_CSUM;
937 if (greh->flags & htons(GRE_KEY)) {
938 tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
939 tnl->flags |= FLOW_TNL_F_KEY;
943 if (greh->flags & htons(GRE_SEQ)) {
951 reset_tnl_md(struct pkt_metadata *md)
953 memset(&md->tunnel, 0, sizeof(md->tunnel));
957 gre_extract_md(struct dp_packet *packet)
959 struct pkt_metadata *md = &packet->md;
960 struct flow_tnl *tnl = &md->tunnel;
961 int hlen = sizeof(struct eth_header) +
962 sizeof(struct ip_header) + 4;
964 memset(md, 0, sizeof *md);
965 if (hlen > dp_packet_size(packet)) {
969 hlen = parse_gre_header(packet, tnl);
974 dp_packet_reset_packet(packet, hlen);
978 netdev_gre_pop_header(struct netdev *netdev_ OVS_UNUSED,
979 struct dp_packet **pkt, int cnt)
983 for (i = 0; i < cnt; i++) {
984 gre_extract_md(pkt[i]);
990 netdev_gre_push_header__(struct dp_packet *packet,
991 const void *header, int size)
993 struct gre_base_hdr *greh;
996 greh = push_ip_header(packet, header, size, &ip_tot_size);
998 if (greh->flags & htons(GRE_CSUM)) {
999 ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
1000 *csum_opt = csum(greh, ip_tot_size - sizeof (struct ip_header));
1005 netdev_gre_push_header(const struct netdev *netdev OVS_UNUSED,
1006 struct dp_packet **packets, int cnt,
1007 const struct ovs_action_push_tnl *data)
1011 for (i = 0; i < cnt; i++) {
1012 netdev_gre_push_header__(packets[i], data->header, data->header_len);
1013 packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
1020 netdev_gre_build_header(const struct netdev *netdev,
1021 struct ovs_action_push_tnl *data,
1022 const struct flow *tnl_flow)
1024 struct netdev_vport *dev = netdev_vport_cast(netdev);
1025 struct netdev_tunnel_config *tnl_cfg;
1026 struct ip_header *ip;
1027 struct gre_base_hdr *greh;
1028 ovs_16aligned_be32 *options;
1031 /* XXX: RCUfy tnl_cfg. */
1032 ovs_mutex_lock(&dev->mutex);
1033 tnl_cfg = &dev->tnl_cfg;
1035 ip = ip_hdr(data->header);
1036 ip->ip_proto = IPPROTO_GRE;
1039 greh->protocol = htons(ETH_TYPE_TEB);
1042 options = (ovs_16aligned_be32 *) (greh + 1);
1043 if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
1044 greh->flags |= htons(GRE_CSUM);
1045 put_16aligned_be32(options, 0);
1049 if (tnl_cfg->out_key_present) {
1050 greh->flags |= htons(GRE_KEY);
1051 put_16aligned_be32(options, (OVS_FORCE ovs_be32)
1052 ((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
1056 ovs_mutex_unlock(&dev->mutex);
1058 hlen = (uint8_t *) options - (uint8_t *) greh;
1060 data->header_len = sizeof(struct eth_header) +
1061 sizeof(struct ip_header) + hlen;
1062 data->tnl_type = OVS_VPORT_TYPE_GRE;
1067 vxlan_extract_md(struct dp_packet *packet)
1069 struct pkt_metadata *md = &packet->md;
1070 struct flow_tnl *tnl = &md->tunnel;
1071 struct udp_header *udp;
1072 struct vxlanhdr *vxh;
1074 memset(md, 0, sizeof *md);
1075 if (VXLAN_HLEN > dp_packet_size(packet)) {
1079 udp = ip_extract_tnl_md(packet, tnl);
1083 vxh = (struct vxlanhdr *) (udp + 1);
1085 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
1086 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
1087 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
1088 ntohl(get_16aligned_be32(&vxh->vx_flags)),
1089 ntohl(get_16aligned_be32(&vxh->vx_vni)));
1093 tnl->tp_src = udp->udp_src;
1094 tnl->tp_dst = udp->udp_dst;
1095 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
1096 tnl->flags |= FLOW_TNL_F_KEY;
1098 dp_packet_reset_packet(packet, VXLAN_HLEN);
1102 netdev_vxlan_pop_header(struct netdev *netdev_ OVS_UNUSED,
1103 struct dp_packet **pkt, int cnt)
1107 for (i = 0; i < cnt; i++) {
1108 vxlan_extract_md(pkt[i]);
1114 netdev_vxlan_build_header(const struct netdev *netdev,
1115 struct ovs_action_push_tnl *data,
1116 const struct flow *tnl_flow)
1118 struct netdev_vport *dev = netdev_vport_cast(netdev);
1119 struct netdev_tunnel_config *tnl_cfg;
1120 struct ip_header *ip;
1121 struct udp_header *udp;
1122 struct vxlanhdr *vxh;
1124 /* XXX: RCUfy tnl_cfg. */
1125 ovs_mutex_lock(&dev->mutex);
1126 tnl_cfg = &dev->tnl_cfg;
1128 ip = ip_hdr(data->header);
1129 ip->ip_proto = IPPROTO_UDP;
1131 udp = (struct udp_header *) (ip + 1);
1132 udp->udp_dst = tnl_cfg->dst_port;
1134 vxh = (struct vxlanhdr *) (udp + 1);
1135 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
1136 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
1138 ovs_mutex_unlock(&dev->mutex);
1139 data->header_len = VXLAN_HLEN;
1140 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
1145 get_src_port(struct dp_packet *packet)
1149 hash = dp_packet_get_dp_hash(packet);
1151 return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
1156 netdev_vxlan_push_header__(struct dp_packet *packet,
1157 const void *header, int size)
1159 struct udp_header *udp;
1162 udp = push_ip_header(packet, header, size, &ip_tot_size);
1164 /* set udp src port */
1165 udp->udp_src = get_src_port(packet);
1166 udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
1167 /* udp_csum is zero */
1171 netdev_vxlan_push_header(const struct netdev *netdev OVS_UNUSED,
1172 struct dp_packet **packets, int cnt,
1173 const struct ovs_action_push_tnl *data)
1177 for (i = 0; i < cnt; i++) {
1178 netdev_vxlan_push_header__(packets[i],
1179 data->header, VXLAN_HLEN);
1180 packets[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
1186 netdev_vport_range(struct unixctl_conn *conn, int argc,
1187 const char *argv[], void *aux OVS_UNUSED)
1192 struct ds ds = DS_EMPTY_INITIALIZER;
1194 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
1195 tnl_udp_port_min, tnl_udp_port_max);
1197 unixctl_command_reply(conn, ds_cstr(&ds));
1206 val1 = atoi(argv[1]);
1207 if (val1 <= 0 || val1 > UINT16_MAX) {
1208 unixctl_command_reply(conn, "Invalid min.");
1211 val2 = atoi(argv[2]);
1212 if (val2 <= 0 || val2 > UINT16_MAX) {
1213 unixctl_command_reply(conn, "Invalid max.");
1218 tnl_udp_port_min = val2;
1219 tnl_udp_port_max = val1;
1221 tnl_udp_port_min = val1;
1222 tnl_udp_port_max = val2;
1224 seq_change(tnl_conf_seq);
1226 unixctl_command_reply(conn, "OK");
1230 #define VPORT_FUNCTIONS(GET_CONFIG, SET_CONFIG, \
1231 GET_TUNNEL_CONFIG, GET_STATUS, \
1233 PUSH_HEADER, POP_HEADER) \
1236 netdev_vport_wait, \
1238 netdev_vport_alloc, \
1239 netdev_vport_construct, \
1240 netdev_vport_destruct, \
1241 netdev_vport_dealloc, \
1244 GET_TUNNEL_CONFIG, \
1248 NULL, /* get_numa_id */ \
1249 NULL, /* set_multiq */ \
1252 NULL, /* send_wait */ \
1254 netdev_vport_set_etheraddr, \
1255 netdev_vport_get_etheraddr, \
1256 NULL, /* get_mtu */ \
1257 NULL, /* set_mtu */ \
1258 NULL, /* get_ifindex */ \
1259 NULL, /* get_carrier */ \
1260 NULL, /* get_carrier_resets */ \
1261 NULL, /* get_miimon */ \
1264 NULL, /* get_features */ \
1265 NULL, /* set_advertisements */ \
1267 NULL, /* set_policing */ \
1268 NULL, /* get_qos_types */ \
1269 NULL, /* get_qos_capabilities */ \
1270 NULL, /* get_qos */ \
1271 NULL, /* set_qos */ \
1272 NULL, /* get_queue */ \
1273 NULL, /* set_queue */ \
1274 NULL, /* delete_queue */ \
1275 NULL, /* get_queue_stats */ \
1276 NULL, /* queue_dump_start */ \
1277 NULL, /* queue_dump_next */ \
1278 NULL, /* queue_dump_done */ \
1279 NULL, /* dump_queue_stats */ \
1281 NULL, /* get_in4 */ \
1282 NULL, /* set_in4 */ \
1283 NULL, /* get_in6 */ \
1284 NULL, /* add_router */ \
1285 NULL, /* get_next_hop */ \
1287 NULL, /* arp_lookup */ \
1289 netdev_vport_update_flags, \
1291 NULL, /* rx_alloc */ \
1292 NULL, /* rx_construct */ \
1293 NULL, /* rx_destruct */ \
1294 NULL, /* rx_dealloc */ \
1295 NULL, /* rx_recv */ \
1296 NULL, /* rx_wait */ \
1297 NULL, /* rx_drain */
1300 #define TUNNEL_CLASS(NAME, DPIF_PORT, BUILD_HEADER, PUSH_HEADER, POP_HEADER) \
1302 { NAME, VPORT_FUNCTIONS(get_tunnel_config, \
1303 set_tunnel_config, \
1304 get_netdev_tunnel_config, \
1305 tunnel_get_status, \
1306 BUILD_HEADER, PUSH_HEADER, POP_HEADER) }}
1309 netdev_vport_tunnel_register(void)
1311 /* The name of the dpif_port should be short enough to accomodate adding
1312 * a port number to the end if one is necessary. */
1313 static const struct vport_class vport_classes[] = {
1314 TUNNEL_CLASS("geneve", "genev_sys", NULL, NULL, NULL),
1315 TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
1316 netdev_gre_push_header,
1317 netdev_gre_pop_header),
1318 TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),
1319 TUNNEL_CLASS("gre64", "gre64_sys", NULL, NULL, NULL),
1320 TUNNEL_CLASS("ipsec_gre64", "gre64_sys", NULL, NULL, NULL),
1321 TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
1322 netdev_vxlan_push_header,
1323 netdev_vxlan_pop_header),
1324 TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL)
1326 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1328 if (ovsthread_once_start(&once)) {
1331 for (i = 0; i < ARRAY_SIZE(vport_classes); i++) {
1332 netdev_register_provider(&vport_classes[i].netdev_class);
1335 unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
1336 netdev_vport_range, NULL);
1338 ovsthread_once_done(&once);
1343 netdev_vport_patch_register(void)
1345 static const struct vport_class patch_class =
1347 { "patch", VPORT_FUNCTIONS(get_patch_config,
1350 NULL, NULL, NULL, NULL) }};
1351 netdev_register_provider(&patch_class.netdev_class);