2 * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netdev-vport.h"
23 #include <sys/socket.h>
25 #include <sys/ioctl.h>
27 #include "byte-order.h"
32 #include "dp-packet.h"
33 #include "dynamic-string.h"
38 #include "netdev-provider.h"
39 #include "odp-netlink.h"
40 #include "dp-packet.h"
41 #include "ovs-router.h"
43 #include "poll-loop.h"
44 #include "route-table.h"
46 #include "socket-util.h"
47 #include "openvswitch/vlog.h"
48 #include "unaligned.h"
52 VLOG_DEFINE_THIS_MODULE(netdev_vport);
53 static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
55 #define GENEVE_DST_PORT 6081
56 #define VXLAN_DST_PORT 4789
57 #define LISP_DST_PORT 4341
58 #define STT_DST_PORT 7471
60 #define VXLAN_HLEN (sizeof(struct eth_header) + \
61 sizeof(struct ip_header) + \
62 sizeof(struct udp_header) + \
63 sizeof(struct vxlanhdr))
65 #define GENEVE_BASE_HLEN (sizeof(struct eth_header) + \
66 sizeof(struct ip_header) + \
67 sizeof(struct udp_header) + \
68 sizeof(struct genevehdr))
70 #define DEFAULT_TTL 64
75 /* Protects all members below. */
76 struct ovs_mutex mutex;
78 uint8_t etheraddr[ETH_ADDR_LEN];
79 struct netdev_stats stats;
82 struct netdev_tunnel_config tnl_cfg;
83 char egress_iface[IFNAMSIZ];
91 const char *dpif_port;
92 struct netdev_class netdev_class;
95 /* Last read of the route-table's change number. */
96 static uint64_t rt_change_seqno;
98 static int netdev_vport_construct(struct netdev *);
99 static int get_patch_config(const struct netdev *netdev, struct smap *args);
100 static int get_tunnel_config(const struct netdev *, struct smap *args);
101 static bool tunnel_check_status_change__(struct netdev_vport *);
103 static uint16_t tnl_udp_port_min = 32768;
104 static uint16_t tnl_udp_port_max = 61000;
107 is_vport_class(const struct netdev_class *class)
109 return class->construct == netdev_vport_construct;
113 netdev_vport_is_vport_class(const struct netdev_class *class)
115 return is_vport_class(class);
118 static const struct vport_class *
119 vport_class_cast(const struct netdev_class *class)
121 ovs_assert(is_vport_class(class));
122 return CONTAINER_OF(class, struct vport_class, netdev_class);
125 static struct netdev_vport *
126 netdev_vport_cast(const struct netdev *netdev)
128 ovs_assert(is_vport_class(netdev_get_class(netdev)));
129 return CONTAINER_OF(netdev, struct netdev_vport, up);
132 static const struct netdev_tunnel_config *
133 get_netdev_tunnel_config(const struct netdev *netdev)
135 return &netdev_vport_cast(netdev)->tnl_cfg;
139 netdev_vport_is_patch(const struct netdev *netdev)
141 const struct netdev_class *class = netdev_get_class(netdev);
143 return class->get_config == get_patch_config;
147 netdev_vport_is_layer3(const struct netdev *dev)
149 const char *type = netdev_get_type(dev);
151 return (!strcmp("lisp", type));
155 netdev_vport_needs_dst_port(const struct netdev *dev)
157 const struct netdev_class *class = netdev_get_class(dev);
158 const char *type = netdev_get_type(dev);
160 return (class->get_config == get_tunnel_config &&
161 (!strcmp("geneve", type) || !strcmp("vxlan", type) ||
162 !strcmp("lisp", type) || !strcmp("stt", type)) );
166 netdev_vport_class_get_dpif_port(const struct netdev_class *class)
168 return is_vport_class(class) ? vport_class_cast(class)->dpif_port : NULL;
172 netdev_vport_get_dpif_port(const struct netdev *netdev,
173 char namebuf[], size_t bufsize)
175 const struct netdev_class *class = netdev_get_class(netdev);
176 const char *dpif_port = netdev_vport_class_get_dpif_port(class);
179 return netdev_get_name(netdev);
182 if (netdev_vport_needs_dst_port(netdev)) {
183 const struct netdev_vport *vport = netdev_vport_cast(netdev);
186 * Note: IFNAMSIZ is 16 bytes long. Implementations should choose
187 * a dpif port name that is short enough to fit including any
188 * port numbers but assert just in case.
190 BUILD_ASSERT(NETDEV_VPORT_NAME_BUFSIZE >= IFNAMSIZ);
191 ovs_assert(strlen(dpif_port) + 6 < IFNAMSIZ);
192 snprintf(namebuf, bufsize, "%s_%d", dpif_port,
193 ntohs(vport->tnl_cfg.dst_port));
201 netdev_vport_get_dpif_port_strdup(const struct netdev *netdev)
203 char namebuf[NETDEV_VPORT_NAME_BUFSIZE];
205 return xstrdup(netdev_vport_get_dpif_port(netdev, namebuf,
209 /* Whenever the route-table change number is incremented,
210 * netdev_vport_route_changed() should be called to update
211 * the corresponding tunnel interface status. */
213 netdev_vport_route_changed(void)
215 struct netdev **vports;
218 vports = netdev_get_vports(&n_vports);
219 for (i = 0; i < n_vports; i++) {
220 struct netdev *netdev_ = vports[i];
221 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
223 ovs_mutex_lock(&netdev->mutex);
224 /* Finds all tunnel vports. */
225 if (netdev->tnl_cfg.ip_dst) {
226 if (tunnel_check_status_change__(netdev)) {
227 netdev_change_seq_changed(netdev_);
230 ovs_mutex_unlock(&netdev->mutex);
232 netdev_close(netdev_);
238 static struct netdev *
239 netdev_vport_alloc(void)
241 struct netdev_vport *netdev = xzalloc(sizeof *netdev);
245 static struct netdev *
246 netdev_gre64_vport_alloc(void)
248 VLOG_WARN_ONCE("GRE64 tunnel protocol is deprecated. It will be removed from OVS release 2.5.");
249 return netdev_vport_alloc();
253 netdev_vport_construct(struct netdev *netdev_)
255 struct netdev_vport *dev = netdev_vport_cast(netdev_);
256 const char *type = netdev_get_type(netdev_);
258 ovs_mutex_init(&dev->mutex);
259 eth_addr_random(dev->etheraddr);
261 /* Add a default destination port for tunnel ports if none specified. */
262 if (!strcmp(type, "geneve")) {
263 dev->tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
264 } else if (!strcmp(type, "vxlan")) {
265 dev->tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
266 } else if (!strcmp(type, "lisp")) {
267 dev->tnl_cfg.dst_port = htons(LISP_DST_PORT);
268 } else if (!strcmp(type, "stt")) {
269 dev->tnl_cfg.dst_port = htons(STT_DST_PORT);
272 dev->tnl_cfg.dont_fragment = true;
273 dev->tnl_cfg.ttl = DEFAULT_TTL;
278 netdev_vport_destruct(struct netdev *netdev_)
280 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
283 ovs_mutex_destroy(&netdev->mutex);
287 netdev_vport_dealloc(struct netdev *netdev_)
289 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
294 netdev_vport_set_etheraddr(struct netdev *netdev_,
295 const uint8_t mac[ETH_ADDR_LEN])
297 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
299 ovs_mutex_lock(&netdev->mutex);
300 memcpy(netdev->etheraddr, mac, ETH_ADDR_LEN);
301 ovs_mutex_unlock(&netdev->mutex);
302 netdev_change_seq_changed(netdev_);
308 netdev_vport_get_etheraddr(const struct netdev *netdev_,
309 uint8_t mac[ETH_ADDR_LEN])
311 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
313 ovs_mutex_lock(&netdev->mutex);
314 memcpy(mac, netdev->etheraddr, ETH_ADDR_LEN);
315 ovs_mutex_unlock(&netdev->mutex);
320 /* Checks if the tunnel status has changed and returns a boolean.
321 * Updates the tunnel status if it has changed. */
323 tunnel_check_status_change__(struct netdev_vport *netdev)
324 OVS_REQUIRES(netdev->mutex)
326 char iface[IFNAMSIZ];
332 route = netdev->tnl_cfg.ip_dst;
333 if (ovs_router_lookup(route, iface, &gw)) {
334 struct netdev *egress_netdev;
336 if (!netdev_open(iface, "system", &egress_netdev)) {
337 status = netdev_get_carrier(egress_netdev);
338 netdev_close(egress_netdev);
342 if (strcmp(netdev->egress_iface, iface)
343 || netdev->carrier_status != status) {
344 ovs_strlcpy(netdev->egress_iface, iface, IFNAMSIZ);
345 netdev->carrier_status = status;
354 tunnel_get_status(const struct netdev *netdev_, struct smap *smap)
356 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
358 if (netdev->egress_iface[0]) {
359 smap_add(smap, "tunnel_egress_iface", netdev->egress_iface);
361 smap_add(smap, "tunnel_egress_iface_carrier",
362 netdev->carrier_status ? "up" : "down");
369 netdev_vport_update_flags(struct netdev *netdev OVS_UNUSED,
370 enum netdev_flags off,
371 enum netdev_flags on OVS_UNUSED,
372 enum netdev_flags *old_flagsp)
374 if (off & (NETDEV_UP | NETDEV_PROMISC)) {
378 *old_flagsp = NETDEV_UP | NETDEV_PROMISC;
383 netdev_vport_run(void)
388 seq = route_table_get_change_seq();
389 if (rt_change_seqno != seq) {
390 rt_change_seqno = seq;
391 netdev_vport_route_changed();
396 netdev_vport_wait(void)
401 seq = route_table_get_change_seq();
402 if (rt_change_seqno != seq) {
403 poll_immediate_wake();
407 /* Code specific to tunnel types. */
410 parse_key(const struct smap *args, const char *name,
411 bool *present, bool *flow)
418 s = smap_get(args, name);
420 s = smap_get(args, "key");
428 if (!strcmp(s, "flow")) {
432 return htonll(strtoull(s, NULL, 0));
437 set_tunnel_config(struct netdev *dev_, const struct smap *args)
439 struct netdev_vport *dev = netdev_vport_cast(dev_);
440 const char *name = netdev_get_name(dev_);
441 const char *type = netdev_get_type(dev_);
442 bool ipsec_mech_set, needs_dst_port, has_csum;
443 struct netdev_tunnel_config tnl_cfg;
444 struct smap_node *node;
446 has_csum = strstr(type, "gre") || strstr(type, "geneve") ||
447 strstr(type, "stt") || strstr(type, "vxlan");
448 ipsec_mech_set = false;
449 memset(&tnl_cfg, 0, sizeof tnl_cfg);
451 /* Add a default destination port for tunnel ports if none specified. */
452 if (!strcmp(type, "geneve")) {
453 tnl_cfg.dst_port = htons(GENEVE_DST_PORT);
456 if (!strcmp(type, "vxlan")) {
457 tnl_cfg.dst_port = htons(VXLAN_DST_PORT);
460 if (!strcmp(type, "lisp")) {
461 tnl_cfg.dst_port = htons(LISP_DST_PORT);
464 if (!strcmp(type, "stt")) {
465 tnl_cfg.dst_port = htons(STT_DST_PORT);
468 needs_dst_port = netdev_vport_needs_dst_port(dev_);
469 tnl_cfg.ipsec = strstr(type, "ipsec");
470 tnl_cfg.dont_fragment = true;
472 SMAP_FOR_EACH (node, args) {
473 if (!strcmp(node->key, "remote_ip")) {
474 struct in_addr in_addr;
475 if (!strcmp(node->value, "flow")) {
476 tnl_cfg.ip_dst_flow = true;
477 tnl_cfg.ip_dst = htonl(0);
478 } else if (lookup_ip(node->value, &in_addr)) {
479 VLOG_WARN("%s: bad %s 'remote_ip'", name, type);
480 } else if (ip_is_multicast(in_addr.s_addr)) {
481 VLOG_WARN("%s: multicast remote_ip="IP_FMT" not allowed",
482 name, IP_ARGS(in_addr.s_addr));
485 tnl_cfg.ip_dst = in_addr.s_addr;
487 } else if (!strcmp(node->key, "local_ip")) {
488 struct in_addr in_addr;
489 if (!strcmp(node->value, "flow")) {
490 tnl_cfg.ip_src_flow = true;
491 tnl_cfg.ip_src = htonl(0);
492 } else if (lookup_ip(node->value, &in_addr)) {
493 VLOG_WARN("%s: bad %s 'local_ip'", name, type);
495 tnl_cfg.ip_src = in_addr.s_addr;
497 } else if (!strcmp(node->key, "tos")) {
498 if (!strcmp(node->value, "inherit")) {
499 tnl_cfg.tos_inherit = true;
503 tos = strtol(node->value, &endptr, 0);
504 if (*endptr == '\0' && tos == (tos & IP_DSCP_MASK)) {
507 VLOG_WARN("%s: invalid TOS %s", name, node->value);
510 } else if (!strcmp(node->key, "ttl")) {
511 if (!strcmp(node->value, "inherit")) {
512 tnl_cfg.ttl_inherit = true;
514 tnl_cfg.ttl = atoi(node->value);
516 } else if (!strcmp(node->key, "dst_port") && needs_dst_port) {
517 tnl_cfg.dst_port = htons(atoi(node->value));
518 } else if (!strcmp(node->key, "csum") && has_csum) {
519 if (!strcmp(node->value, "true")) {
522 } else if (!strcmp(node->key, "df_default")) {
523 if (!strcmp(node->value, "false")) {
524 tnl_cfg.dont_fragment = false;
526 } else if (!strcmp(node->key, "peer_cert") && tnl_cfg.ipsec) {
527 if (smap_get(args, "certificate")) {
528 ipsec_mech_set = true;
530 const char *use_ssl_cert;
532 /* If the "use_ssl_cert" is true, then "certificate" and
533 * "private_key" will be pulled from the SSL table. The
534 * use of this option is strongly discouraged, since it
535 * will like be removed when multiple SSL configurations
536 * are supported by OVS.
538 use_ssl_cert = smap_get(args, "use_ssl_cert");
539 if (!use_ssl_cert || strcmp(use_ssl_cert, "true")) {
540 VLOG_ERR("%s: 'peer_cert' requires 'certificate' argument",
544 ipsec_mech_set = true;
546 } else if (!strcmp(node->key, "psk") && tnl_cfg.ipsec) {
547 ipsec_mech_set = true;
548 } else if (tnl_cfg.ipsec
549 && (!strcmp(node->key, "certificate")
550 || !strcmp(node->key, "private_key")
551 || !strcmp(node->key, "use_ssl_cert"))) {
552 /* Ignore options not used by the netdev. */
553 } else if (!strcmp(node->key, "key") ||
554 !strcmp(node->key, "in_key") ||
555 !strcmp(node->key, "out_key")) {
556 /* Handled separately below. */
557 } else if (!strcmp(node->key, "exts")) {
558 char *str = xstrdup(node->value);
559 char *ext, *save_ptr = NULL;
563 ext = strtok_r(str, ",", &save_ptr);
565 if (!strcmp(type, "vxlan") && !strcmp(ext, "gbp")) {
566 tnl_cfg.exts |= (1 << OVS_VXLAN_EXT_GBP);
568 VLOG_WARN("%s: unknown extension '%s'", name, ext);
571 ext = strtok_r(NULL, ",", &save_ptr);
576 VLOG_WARN("%s: unknown %s argument '%s'", name, type, node->key);
581 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
582 static pid_t pid = 0;
585 ovs_mutex_lock(&mutex);
587 char *file_name = xasprintf("%s/%s", ovs_rundir(),
588 "ovs-monitor-ipsec.pid");
589 pid = read_pidfile(file_name);
592 ovs_mutex_unlock(&mutex);
596 VLOG_ERR("%s: IPsec requires the ovs-monitor-ipsec daemon",
601 if (smap_get(args, "peer_cert") && smap_get(args, "psk")) {
602 VLOG_ERR("%s: cannot define both 'peer_cert' and 'psk'", name);
606 if (!ipsec_mech_set) {
607 VLOG_ERR("%s: IPsec requires an 'peer_cert' or psk' argument",
613 if (!tnl_cfg.ip_dst && !tnl_cfg.ip_dst_flow) {
614 VLOG_ERR("%s: %s type requires valid 'remote_ip' argument",
618 if (tnl_cfg.ip_src_flow && !tnl_cfg.ip_dst_flow) {
619 VLOG_ERR("%s: %s type requires 'remote_ip=flow' with 'local_ip=flow'",
624 tnl_cfg.ttl = DEFAULT_TTL;
627 tnl_cfg.in_key = parse_key(args, "in_key",
628 &tnl_cfg.in_key_present,
629 &tnl_cfg.in_key_flow);
631 tnl_cfg.out_key = parse_key(args, "out_key",
632 &tnl_cfg.out_key_present,
633 &tnl_cfg.out_key_flow);
635 ovs_mutex_lock(&dev->mutex);
636 if (memcmp(&dev->tnl_cfg, &tnl_cfg, sizeof tnl_cfg)) {
637 dev->tnl_cfg = tnl_cfg;
638 tunnel_check_status_change__(dev);
639 netdev_change_seq_changed(dev_);
641 ovs_mutex_unlock(&dev->mutex);
647 get_tunnel_config(const struct netdev *dev, struct smap *args)
649 struct netdev_vport *netdev = netdev_vport_cast(dev);
650 struct netdev_tunnel_config tnl_cfg;
652 ovs_mutex_lock(&netdev->mutex);
653 tnl_cfg = netdev->tnl_cfg;
654 ovs_mutex_unlock(&netdev->mutex);
656 if (tnl_cfg.ip_dst) {
657 smap_add_format(args, "remote_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_dst));
658 } else if (tnl_cfg.ip_dst_flow) {
659 smap_add(args, "remote_ip", "flow");
662 if (tnl_cfg.ip_src) {
663 smap_add_format(args, "local_ip", IP_FMT, IP_ARGS(tnl_cfg.ip_src));
664 } else if (tnl_cfg.ip_src_flow) {
665 smap_add(args, "local_ip", "flow");
668 if (tnl_cfg.in_key_flow && tnl_cfg.out_key_flow) {
669 smap_add(args, "key", "flow");
670 } else if (tnl_cfg.in_key_present && tnl_cfg.out_key_present
671 && tnl_cfg.in_key == tnl_cfg.out_key) {
672 smap_add_format(args, "key", "%"PRIu64, ntohll(tnl_cfg.in_key));
674 if (tnl_cfg.in_key_flow) {
675 smap_add(args, "in_key", "flow");
676 } else if (tnl_cfg.in_key_present) {
677 smap_add_format(args, "in_key", "%"PRIu64,
678 ntohll(tnl_cfg.in_key));
681 if (tnl_cfg.out_key_flow) {
682 smap_add(args, "out_key", "flow");
683 } else if (tnl_cfg.out_key_present) {
684 smap_add_format(args, "out_key", "%"PRIu64,
685 ntohll(tnl_cfg.out_key));
689 if (tnl_cfg.ttl_inherit) {
690 smap_add(args, "ttl", "inherit");
691 } else if (tnl_cfg.ttl != DEFAULT_TTL) {
692 smap_add_format(args, "ttl", "%"PRIu8, tnl_cfg.ttl);
695 if (tnl_cfg.tos_inherit) {
696 smap_add(args, "tos", "inherit");
697 } else if (tnl_cfg.tos) {
698 smap_add_format(args, "tos", "0x%x", tnl_cfg.tos);
701 if (tnl_cfg.dst_port) {
702 uint16_t dst_port = ntohs(tnl_cfg.dst_port);
703 const char *type = netdev_get_type(dev);
705 if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
706 (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
707 (!strcmp("lisp", type) && dst_port != LISP_DST_PORT) ||
708 (!strcmp("stt", type) && dst_port != STT_DST_PORT)) {
709 smap_add_format(args, "dst_port", "%d", dst_port);
714 smap_add(args, "csum", "true");
717 if (!tnl_cfg.dont_fragment) {
718 smap_add(args, "df_default", "false");
724 /* Code specific to patch ports. */
726 /* If 'netdev' is a patch port, returns the name of its peer as a malloc()'d
727 * string that the caller must free.
729 * If 'netdev' is not a patch port, returns NULL. */
731 netdev_vport_patch_peer(const struct netdev *netdev_)
735 if (netdev_vport_is_patch(netdev_)) {
736 struct netdev_vport *netdev = netdev_vport_cast(netdev_);
738 ovs_mutex_lock(&netdev->mutex);
740 peer = xstrdup(netdev->peer);
742 ovs_mutex_unlock(&netdev->mutex);
749 netdev_vport_inc_rx(const struct netdev *netdev,
750 const struct dpif_flow_stats *stats)
752 if (is_vport_class(netdev_get_class(netdev))) {
753 struct netdev_vport *dev = netdev_vport_cast(netdev);
755 ovs_mutex_lock(&dev->mutex);
756 dev->stats.rx_packets += stats->n_packets;
757 dev->stats.rx_bytes += stats->n_bytes;
758 ovs_mutex_unlock(&dev->mutex);
763 netdev_vport_inc_tx(const struct netdev *netdev,
764 const struct dpif_flow_stats *stats)
766 if (is_vport_class(netdev_get_class(netdev))) {
767 struct netdev_vport *dev = netdev_vport_cast(netdev);
769 ovs_mutex_lock(&dev->mutex);
770 dev->stats.tx_packets += stats->n_packets;
771 dev->stats.tx_bytes += stats->n_bytes;
772 ovs_mutex_unlock(&dev->mutex);
777 get_patch_config(const struct netdev *dev_, struct smap *args)
779 struct netdev_vport *dev = netdev_vport_cast(dev_);
781 ovs_mutex_lock(&dev->mutex);
783 smap_add(args, "peer", dev->peer);
785 ovs_mutex_unlock(&dev->mutex);
791 set_patch_config(struct netdev *dev_, const struct smap *args)
793 struct netdev_vport *dev = netdev_vport_cast(dev_);
794 const char *name = netdev_get_name(dev_);
797 peer = smap_get(args, "peer");
799 VLOG_ERR("%s: patch type requires valid 'peer' argument", name);
803 if (smap_count(args) > 1) {
804 VLOG_ERR("%s: patch type takes only a 'peer' argument", name);
808 if (!strcmp(name, peer)) {
809 VLOG_ERR("%s: patch peer must not be self", name);
813 ovs_mutex_lock(&dev->mutex);
814 if (!dev->peer || strcmp(dev->peer, peer)) {
816 dev->peer = xstrdup(peer);
817 netdev_change_seq_changed(dev_);
819 ovs_mutex_unlock(&dev->mutex);
825 get_stats(const struct netdev *netdev, struct netdev_stats *stats)
827 struct netdev_vport *dev = netdev_vport_cast(netdev);
829 ovs_mutex_lock(&dev->mutex);
831 ovs_mutex_unlock(&dev->mutex);
837 /* Tunnel push pop ops. */
839 static struct ip_header *
842 return (void *)((char *)eth + sizeof (struct eth_header));
845 static struct gre_base_hdr *
846 gre_hdr(struct ip_header *ip)
848 return (void *)((char *)ip + sizeof (struct ip_header));
852 ip_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
854 struct ip_header *nh;
857 nh = dp_packet_l3(packet);
858 l4 = dp_packet_l4(packet);
864 tnl->ip_src = get_16aligned_be32(&nh->ip_src);
865 tnl->ip_dst = get_16aligned_be32(&nh->ip_dst);
866 tnl->ip_tos = nh->ip_tos;
867 tnl->ip_ttl = nh->ip_ttl;
872 /* Pushes the 'size' bytes of 'header' into the headroom of 'packet',
873 * reallocating the packet if necessary. 'header' should contain an Ethernet
874 * header, followed by an IPv4 header (without options), and an L4 header.
876 * This function sets the IP header's ip_tot_len field (which should be zeroed
877 * as part of 'header') and puts its value into '*ip_tot_size' as well. Also
878 * updates IP header checksum.
880 * Return pointer to the L4 header added to 'packet'. */
882 push_ip_header(struct dp_packet *packet,
883 const void *header, int size, int *ip_tot_size)
885 struct eth_header *eth;
886 struct ip_header *ip;
888 eth = dp_packet_push_uninit(packet, size);
889 *ip_tot_size = dp_packet_size(packet) - sizeof (struct eth_header);
891 memcpy(eth, header, size);
893 ip->ip_tot_len = htons(*ip_tot_size);
896 ip->ip_csum = recalc_csum16(ip->ip_csum, 0, ip->ip_tot_len);
902 udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl)
904 struct udp_header *udp;
906 udp = ip_extract_tnl_md(packet, tnl);
912 uint32_t csum = packet_csum_pseudoheader(dp_packet_l3(packet));
914 csum = csum_continue(csum, udp, dp_packet_size(packet) -
915 ((const unsigned char *)udp -
916 (const unsigned char *)dp_packet_l2(packet)));
917 if (csum_finish(csum)) {
920 tnl->flags |= FLOW_TNL_F_CSUM;
923 tnl->tp_src = udp->udp_src;
924 tnl->tp_dst = udp->udp_dst;
930 get_src_port(struct dp_packet *packet)
934 hash = dp_packet_get_rss_hash(packet);
936 return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
941 push_udp_header(struct dp_packet *packet,
942 const struct ovs_action_push_tnl *data)
944 struct udp_header *udp;
947 udp = push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
949 /* set udp src port */
950 udp->udp_src = get_src_port(packet);
951 udp->udp_len = htons(ip_tot_size - sizeof (struct ip_header));
954 uint32_t csum = packet_csum_pseudoheader(ip_hdr(dp_packet_data(packet)));
956 csum = csum_continue(csum, udp,
957 ip_tot_size - sizeof (struct ip_header));
958 udp->udp_csum = csum_finish(csum);
960 if (!udp->udp_csum) {
961 udp->udp_csum = htons(0xffff);
967 udp_build_header(struct netdev_tunnel_config *tnl_cfg,
968 const struct flow *tnl_flow,
969 struct ovs_action_push_tnl *data)
971 struct ip_header *ip;
972 struct udp_header *udp;
974 ip = ip_hdr(data->header);
975 ip->ip_proto = IPPROTO_UDP;
977 udp = (struct udp_header *) (ip + 1);
978 udp->udp_dst = tnl_cfg->dst_port;
980 if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
981 /* Write a value in now to mark that we should compute the checksum
982 * later. 0xffff is handy because it is transparent to the
984 udp->udp_csum = htons(0xffff);
991 gre_header_len(ovs_be16 flags)
993 int hlen = sizeof(struct eth_header) +
994 sizeof(struct ip_header) + 4;
996 if (flags & htons(GRE_CSUM)) {
999 if (flags & htons(GRE_KEY)) {
1002 if (flags & htons(GRE_SEQ)) {
1009 parse_gre_header(struct dp_packet *packet,
1010 struct flow_tnl *tnl)
1012 const struct gre_base_hdr *greh;
1013 ovs_16aligned_be32 *options;
1016 greh = ip_extract_tnl_md(packet, tnl);
1021 if (greh->flags & ~(htons(GRE_CSUM | GRE_KEY | GRE_SEQ))) {
1025 if (greh->protocol != htons(ETH_TYPE_TEB)) {
1029 hlen = gre_header_len(greh->flags);
1030 if (hlen > dp_packet_size(packet)) {
1034 options = (ovs_16aligned_be32 *)(greh + 1);
1035 if (greh->flags & htons(GRE_CSUM)) {
1038 pkt_csum = csum(greh, dp_packet_size(packet) -
1039 ((const unsigned char *)greh -
1040 (const unsigned char *)dp_packet_l2(packet)));
1044 tnl->flags = FLOW_TNL_F_CSUM;
1048 if (greh->flags & htons(GRE_KEY)) {
1049 tnl->tun_id = (OVS_FORCE ovs_be64) ((OVS_FORCE uint64_t)(get_16aligned_be32(options)) << 32);
1050 tnl->flags |= FLOW_TNL_F_KEY;
1054 if (greh->flags & htons(GRE_SEQ)) {
1062 netdev_gre_pop_header(struct dp_packet *packet)
1064 struct pkt_metadata *md = &packet->md;
1065 struct flow_tnl *tnl = &md->tunnel;
1066 int hlen = sizeof(struct eth_header) +
1067 sizeof(struct ip_header) + 4;
1069 memset(md, 0, sizeof *md);
1070 if (hlen > dp_packet_size(packet)) {
1074 hlen = parse_gre_header(packet, tnl);
1079 dp_packet_reset_packet(packet, hlen);
1085 netdev_gre_push_header(struct dp_packet *packet,
1086 const struct ovs_action_push_tnl *data)
1088 struct gre_base_hdr *greh;
1091 greh = push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
1093 if (greh->flags & htons(GRE_CSUM)) {
1094 ovs_be16 *csum_opt = (ovs_be16 *) (greh + 1);
1095 *csum_opt = csum(greh, ip_tot_size - sizeof (struct ip_header));
1100 netdev_gre_build_header(const struct netdev *netdev,
1101 struct ovs_action_push_tnl *data,
1102 const struct flow *tnl_flow)
1104 struct netdev_vport *dev = netdev_vport_cast(netdev);
1105 struct netdev_tunnel_config *tnl_cfg;
1106 struct ip_header *ip;
1107 struct gre_base_hdr *greh;
1108 ovs_16aligned_be32 *options;
1111 /* XXX: RCUfy tnl_cfg. */
1112 ovs_mutex_lock(&dev->mutex);
1113 tnl_cfg = &dev->tnl_cfg;
1115 ip = ip_hdr(data->header);
1116 ip->ip_proto = IPPROTO_GRE;
1119 greh->protocol = htons(ETH_TYPE_TEB);
1122 options = (ovs_16aligned_be32 *) (greh + 1);
1123 if (tnl_flow->tunnel.flags & FLOW_TNL_F_CSUM) {
1124 greh->flags |= htons(GRE_CSUM);
1125 put_16aligned_be32(options, 0);
1129 if (tnl_cfg->out_key_present) {
1130 greh->flags |= htons(GRE_KEY);
1131 put_16aligned_be32(options, (OVS_FORCE ovs_be32)
1132 ((OVS_FORCE uint64_t) tnl_flow->tunnel.tun_id >> 32));
1136 ovs_mutex_unlock(&dev->mutex);
1138 hlen = (uint8_t *) options - (uint8_t *) greh;
1140 data->header_len = sizeof(struct eth_header) +
1141 sizeof(struct ip_header) + hlen;
1142 data->tnl_type = OVS_VPORT_TYPE_GRE;
1147 netdev_vxlan_pop_header(struct dp_packet *packet)
1149 struct pkt_metadata *md = &packet->md;
1150 struct flow_tnl *tnl = &md->tunnel;
1151 struct vxlanhdr *vxh;
1153 memset(md, 0, sizeof *md);
1154 if (VXLAN_HLEN > dp_packet_size(packet)) {
1158 vxh = udp_extract_tnl_md(packet, tnl);
1163 if (get_16aligned_be32(&vxh->vx_flags) != htonl(VXLAN_FLAGS) ||
1164 (get_16aligned_be32(&vxh->vx_vni) & htonl(0xff))) {
1165 VLOG_WARN_RL(&err_rl, "invalid vxlan flags=%#x vni=%#x\n",
1166 ntohl(get_16aligned_be32(&vxh->vx_flags)),
1167 ntohl(get_16aligned_be32(&vxh->vx_vni)));
1170 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8);
1171 tnl->flags |= FLOW_TNL_F_KEY;
1173 dp_packet_reset_packet(packet, VXLAN_HLEN);
1179 netdev_vxlan_build_header(const struct netdev *netdev,
1180 struct ovs_action_push_tnl *data,
1181 const struct flow *tnl_flow)
1183 struct netdev_vport *dev = netdev_vport_cast(netdev);
1184 struct netdev_tunnel_config *tnl_cfg;
1185 struct vxlanhdr *vxh;
1187 /* XXX: RCUfy tnl_cfg. */
1188 ovs_mutex_lock(&dev->mutex);
1189 tnl_cfg = &dev->tnl_cfg;
1191 vxh = udp_build_header(tnl_cfg, tnl_flow, data);
1193 put_16aligned_be32(&vxh->vx_flags, htonl(VXLAN_FLAGS));
1194 put_16aligned_be32(&vxh->vx_vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
1196 ovs_mutex_unlock(&dev->mutex);
1197 data->header_len = VXLAN_HLEN;
1198 data->tnl_type = OVS_VPORT_TYPE_VXLAN;
1203 netdev_geneve_pop_header(struct dp_packet *packet)
1205 struct pkt_metadata *md = &packet->md;
1206 struct flow_tnl *tnl = &md->tunnel;
1207 struct genevehdr *gnh;
1210 memset(md, 0, sizeof *md);
1211 if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
1212 VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
1213 (unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
1217 gnh = udp_extract_tnl_md(packet, tnl);
1222 hlen = GENEVE_BASE_HLEN + gnh->opt_len * 4;
1223 if (hlen > dp_packet_size(packet)) {
1224 VLOG_WARN_RL(&err_rl, "geneve packet too small: header len=%u packet size=%u\n",
1225 hlen, dp_packet_size(packet));
1229 if (gnh->ver != 0) {
1230 VLOG_WARN_RL(&err_rl, "unknown geneve version: %"PRIu8"\n", gnh->ver);
1234 if (gnh->opt_len && gnh->critical) {
1235 VLOG_WARN_RL(&err_rl, "unknown geneve critical options: %"PRIu8" bytes\n",
1240 if (gnh->proto_type != htons(ETH_TYPE_TEB)) {
1241 VLOG_WARN_RL(&err_rl, "unknown geneve encapsulated protocol: %#x\n",
1242 ntohs(gnh->proto_type));
1246 tnl->flags |= gnh->oam ? FLOW_TNL_F_OAM : 0;
1247 tnl->tun_id = htonll(ntohl(get_16aligned_be32(&gnh->vni)) >> 8);
1248 tnl->flags |= FLOW_TNL_F_KEY;
1250 dp_packet_reset_packet(packet, hlen);
1256 netdev_geneve_build_header(const struct netdev *netdev,
1257 struct ovs_action_push_tnl *data,
1258 const struct flow *tnl_flow)
1260 struct netdev_vport *dev = netdev_vport_cast(netdev);
1261 struct netdev_tunnel_config *tnl_cfg;
1262 struct genevehdr *gnh;
1264 /* XXX: RCUfy tnl_cfg. */
1265 ovs_mutex_lock(&dev->mutex);
1266 tnl_cfg = &dev->tnl_cfg;
1268 gnh = udp_build_header(tnl_cfg, tnl_flow, data);
1270 gnh->oam = !!(tnl_flow->tunnel.flags & FLOW_TNL_F_OAM);
1271 gnh->proto_type = htons(ETH_TYPE_TEB);
1272 put_16aligned_be32(&gnh->vni, htonl(ntohll(tnl_flow->tunnel.tun_id) << 8));
1274 ovs_mutex_unlock(&dev->mutex);
1275 data->header_len = GENEVE_BASE_HLEN;
1276 data->tnl_type = OVS_VPORT_TYPE_GENEVE;
1281 netdev_vport_range(struct unixctl_conn *conn, int argc,
1282 const char *argv[], void *aux OVS_UNUSED)
1287 struct ds ds = DS_EMPTY_INITIALIZER;
1289 ds_put_format(&ds, "Tunnel UDP source port range: %"PRIu16"-%"PRIu16"\n",
1290 tnl_udp_port_min, tnl_udp_port_max);
1292 unixctl_command_reply(conn, ds_cstr(&ds));
1301 val1 = atoi(argv[1]);
1302 if (val1 <= 0 || val1 > UINT16_MAX) {
1303 unixctl_command_reply(conn, "Invalid min.");
1306 val2 = atoi(argv[2]);
1307 if (val2 <= 0 || val2 > UINT16_MAX) {
1308 unixctl_command_reply(conn, "Invalid max.");
1313 tnl_udp_port_min = val2;
1314 tnl_udp_port_max = val1;
1316 tnl_udp_port_min = val1;
1317 tnl_udp_port_max = val2;
1319 seq_change(tnl_conf_seq);
1321 unixctl_command_reply(conn, "OK");
1325 #define VPORT_FUNCTIONS(ALLOC, GET_CONFIG, SET_CONFIG, \
1326 GET_TUNNEL_CONFIG, GET_STATUS, \
1328 PUSH_HEADER, POP_HEADER) \
1331 netdev_vport_wait, \
1334 netdev_vport_construct, \
1335 netdev_vport_destruct, \
1336 netdev_vport_dealloc, \
1339 GET_TUNNEL_CONFIG, \
1343 NULL, /* get_numa_id */ \
1344 NULL, /* set_multiq */ \
1347 NULL, /* send_wait */ \
1349 netdev_vport_set_etheraddr, \
1350 netdev_vport_get_etheraddr, \
1351 NULL, /* get_mtu */ \
1352 NULL, /* set_mtu */ \
1353 NULL, /* get_ifindex */ \
1354 NULL, /* get_carrier */ \
1355 NULL, /* get_carrier_resets */ \
1356 NULL, /* get_miimon */ \
1359 NULL, /* get_features */ \
1360 NULL, /* set_advertisements */ \
1362 NULL, /* set_policing */ \
1363 NULL, /* get_qos_types */ \
1364 NULL, /* get_qos_capabilities */ \
1365 NULL, /* get_qos */ \
1366 NULL, /* set_qos */ \
1367 NULL, /* get_queue */ \
1368 NULL, /* set_queue */ \
1369 NULL, /* delete_queue */ \
1370 NULL, /* get_queue_stats */ \
1371 NULL, /* queue_dump_start */ \
1372 NULL, /* queue_dump_next */ \
1373 NULL, /* queue_dump_done */ \
1374 NULL, /* dump_queue_stats */ \
1376 NULL, /* get_in4 */ \
1377 NULL, /* set_in4 */ \
1378 NULL, /* get_in6 */ \
1379 NULL, /* add_router */ \
1380 NULL, /* get_next_hop */ \
1382 NULL, /* arp_lookup */ \
1384 netdev_vport_update_flags, \
1386 NULL, /* rx_alloc */ \
1387 NULL, /* rx_construct */ \
1388 NULL, /* rx_destruct */ \
1389 NULL, /* rx_dealloc */ \
1390 NULL, /* rx_recv */ \
1391 NULL, /* rx_wait */ \
1392 NULL, /* rx_drain */
1395 #define TUNNEL_CLASS(NAME, DPIF_PORT, ALLOC, BUILD_HEADER, PUSH_HEADER, POP_HEADER) \
1397 { NAME, VPORT_FUNCTIONS(ALLOC, \
1398 get_tunnel_config, \
1399 set_tunnel_config, \
1400 get_netdev_tunnel_config, \
1401 tunnel_get_status, \
1402 BUILD_HEADER, PUSH_HEADER, POP_HEADER) }}
1405 netdev_vport_tunnel_register(void)
1407 /* The name of the dpif_port should be short enough to accomodate adding
1408 * a port number to the end if one is necessary. */
1409 static const struct vport_class vport_classes[] = {
1410 TUNNEL_CLASS("geneve", "genev_sys", netdev_vport_alloc,
1411 netdev_geneve_build_header,
1413 netdev_geneve_pop_header),
1414 TUNNEL_CLASS("gre", "gre_sys", netdev_vport_alloc,
1415 netdev_gre_build_header,
1416 netdev_gre_push_header,
1417 netdev_gre_pop_header),
1418 TUNNEL_CLASS("ipsec_gre", "gre_sys", netdev_vport_alloc,
1420 TUNNEL_CLASS("gre64", "gre64_sys", netdev_gre64_vport_alloc,
1422 TUNNEL_CLASS("ipsec_gre64", "gre64_sys", netdev_gre64_vport_alloc,
1424 TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vport_alloc,
1425 netdev_vxlan_build_header,
1427 netdev_vxlan_pop_header),
1428 TUNNEL_CLASS("lisp", "lisp_sys", netdev_vport_alloc, NULL, NULL, NULL),
1429 TUNNEL_CLASS("stt", "stt_sys", netdev_vport_alloc, NULL, NULL, NULL),
1431 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1433 if (ovsthread_once_start(&once)) {
1436 for (i = 0; i < ARRAY_SIZE(vport_classes); i++) {
1437 netdev_register_provider(&vport_classes[i].netdev_class);
1440 unixctl_command_register("tnl/egress_port_range", "min max", 0, 2,
1441 netdev_vport_range, NULL);
1443 ovsthread_once_done(&once);
1448 netdev_vport_patch_register(void)
1450 static const struct vport_class patch_class =
1452 { "patch", VPORT_FUNCTIONS(netdev_vport_alloc,
1456 NULL, NULL, NULL, NULL) }};
1457 netdev_register_provider(&patch_class.netdev_class);