X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=lib%2Fpackets.c;h=2915c7440787a07e7de07f5c0ab62730cabc19f5;hb=07659514c3c1e8998a4935a998b627d716c559f9;hp=bbf4934401b52c75ace8ac51d1f0865828333e94;hpb=2ea838acb2401df9e31074a1d1e78bf9bf739387;p=cascardo%2Fovs.git diff --git a/lib/packets.c b/lib/packets.c index bbf493440..2915c7440 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,18 +16,25 @@ #include #include "packets.h" -#include #include #include #include +#include +#include #include #include "byte-order.h" #include "csum.h" +#include "crc32c.h" #include "flow.h" +#include "hmap.h" #include "dynamic-string.h" -#include "ofpbuf.h" +#include "ovs-thread.h" +#include "odp-util.h" +#include "dp-packet.h" +#include "unaligned.h" const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT; +const struct in6_addr in6addr_all_hosts = IN6ADDR_ALL_HOSTS_INIT; /* Parses 's' as a 16-digit hexadecimal number representing a datapath ID. On * success stores the dpid into '*dpidp' and returns true, on failure stores 0 @@ -43,65 +50,78 @@ dpid_from_string(const char *s, uint64_t *dpidp) return *dpidp != 0; } -/* Returns true if 'ea' is a reserved multicast address, that a bridge must - * never forward, false otherwise. Includes some proprietary vendor protocols - * that shouldn't be forwarded as well. +/* Returns true if 'ea' is a reserved address, that a bridge must never + * forward, false otherwise. * * If you change this function's behavior, please update corresponding * documentation in vswitch.xml at the same time. */ bool -eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) +eth_addr_is_reserved(const struct eth_addr ea) { - struct masked_eth_addr { - uint8_t ea[ETH_ADDR_LEN]; - uint8_t mask[ETH_ADDR_LEN]; + struct eth_addr_node { + struct hmap_node hmap_node; + const uint64_t ea64; }; - static struct masked_eth_addr mea[] = { - { /* STP, IEEE pause frames, and other reserved protocols. */ - {0x01, 0x08, 0xc2, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xf0}}, - - { /* VRRP IPv4. */ - {0x00, 0x00, 0x5e, 0x00, 0x01, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0x00}}, - - { /* VRRP IPv6. */ - {0x00, 0x00, 0x5e, 0x00, 0x02, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0x00}}, - - { /* HSRPv1. */ - {0x00, 0x00, 0x0c, 0x07, 0xac, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0x00}}, - - { /* HSRPv2. */ - {0x00, 0x00, 0x0c, 0x9f, 0xf0, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xf0, 0x00}}, - - { /* GLBP. */ - {0x00, 0x07, 0xb4, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0x00, 0x00, 0x00}}, - - { /* Extreme Discovery Protocol. */ - {0x00, 0xE0, 0x2B, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xf0, 0x00}}, - - { /* Cisco Inter Switch Link. */ - {0x01, 0x00, 0x0c, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, + static struct eth_addr_node nodes[] = { + /* STP, IEEE pause frames, and other reserved protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000000ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000001ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000002ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000003ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000004ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000005ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000006ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000007ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000008ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c2000009ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000aULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000bULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000cULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000dULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000eULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x0180c200000fULL }, + + /* Extreme protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000000ULL }, /* EDP. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000004ULL }, /* EAPS. */ + { HMAP_NODE_NULL_INITIALIZER, 0x00e02b000006ULL }, /* EAPS. */ + + /* Cisco protocols. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000c000000ULL }, /* ISL. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccccULL }, /* PAgP, UDLD, CDP, + * DTP, VTP. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000ccccccdULL }, /* PVST+. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000ccdcdcdULL }, /* STP Uplink Fast, + * FlexLink. */ + + /* Cisco CFM. */ + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc0ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc1ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc2ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc3ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc4ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc5ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc6ULL }, + { HMAP_NODE_NULL_INITIALIZER, 0x01000cccccc7ULL }, + }; - { /* Cisco protocols plus others following the same pattern: - * - * CDP, VTP, DTP, PAgP (01-00-0c-cc-cc-cc) - * Spanning Tree PVSTP+ (01-00-0c-cc-cc-cd) - * STP Uplink Fast (01-00-0c-cd-cd-cd) */ - {0x01, 0x00, 0x0c, 0xcc, 0xcc, 0xcc}, - {0xff, 0xff, 0xff, 0xfe, 0xfe, 0xfe}}}; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; + struct eth_addr_node *node; + static struct hmap addrs; + uint64_t ea64; - size_t i; + if (ovsthread_once_start(&once)) { + hmap_init(&addrs); + for (node = nodes; node < &nodes[ARRAY_SIZE(nodes)]; node++) { + hmap_insert(&addrs, &node->hmap_node, hash_uint64(node->ea64)); + } + ovsthread_once_done(&once); + } - for (i = 0; i < ARRAY_SIZE(mea); i++) { - if (eth_addr_equal_except(ea, mea[i].ea, mea[i].mask)) { + ea64 = eth_addr_to_uint64(ea); + HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_uint64(ea64), &addrs) { + if (node->ea64 == ea64) { return true; } } @@ -109,13 +129,12 @@ eth_addr_is_reserved(const uint8_t ea[ETH_ADDR_LEN]) } bool -eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) +eth_addr_from_string(const char *s, struct eth_addr *ea) { - if (sscanf(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(ea)) - == ETH_ADDR_SCAN_COUNT) { + if (ovs_scan(s, ETH_ADDR_SCAN_FMT, ETH_ADDR_SCAN_ARGS(*ea))) { return true; } else { - memset(ea, 0, ETH_ADDR_LEN); + *ea = eth_addr_zero; return false; } } @@ -127,95 +146,223 @@ eth_addr_from_string(const char *s, uint8_t ea[ETH_ADDR_LEN]) * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void -compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN]) +compose_rarp(struct dp_packet *b, const struct eth_addr eth_src) { struct eth_header *eth; - struct rarp_header *rarp; - - ofpbuf_clear(b); - ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN - + RARP_HEADER_LEN); - ofpbuf_reserve(b, VLAN_HEADER_LEN); - eth = ofpbuf_put_uninit(b, sizeof *eth); - memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN); - memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); + struct arp_eth_header *arp; + + dp_packet_clear(b); + dp_packet_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + + ARP_ETH_HEADER_LEN); + dp_packet_reserve(b, 2 + VLAN_HEADER_LEN); + eth = dp_packet_put_uninit(b, sizeof *eth); + eth->eth_dst = eth_addr_broadcast; + eth->eth_src = eth_src; eth->eth_type = htons(ETH_TYPE_RARP); - rarp = ofpbuf_put_uninit(b, sizeof *rarp); - rarp->hw_addr_space = htons(ARP_HTYPE_ETH); - rarp->proto_addr_space = htons(ETH_TYPE_IP); - rarp->hw_addr_length = ETH_ADDR_LEN; - rarp->proto_addr_length = sizeof rarp->src_proto_addr; - rarp->opcode = htons(RARP_REQUEST_REVERSE); - memcpy(rarp->src_hw_addr, eth_src, ETH_ADDR_LEN); - rarp->src_proto_addr = htonl(0); - memcpy(rarp->target_hw_addr, eth_src, ETH_ADDR_LEN); - rarp->target_proto_addr = htonl(0); + arp = dp_packet_put_uninit(b, sizeof *arp); + arp->ar_hrd = htons(ARP_HRD_ETHERNET); + arp->ar_pro = htons(ARP_PRO_IP); + arp->ar_hln = sizeof arp->ar_sha; + arp->ar_pln = sizeof arp->ar_spa; + arp->ar_op = htons(ARP_OP_RARP); + arp->ar_sha = eth_src; + put_16aligned_be32(&arp->ar_spa, htonl(0)); + arp->ar_tha = eth_src; + put_16aligned_be32(&arp->ar_tpa, htonl(0)); + + dp_packet_reset_offsets(b); + dp_packet_set_l3(b, arp); } /* Insert VLAN header according to given TCI. Packet passed must be Ethernet * packet. Ignores the CFI bit of 'tci' using 0 instead. * - * Also sets 'packet->l2' to point to the new Ethernet header. */ + * Also adjusts the layer offsets accordingly. */ void -eth_push_vlan(struct ofpbuf *packet, ovs_be16 tci) +eth_push_vlan(struct dp_packet *packet, ovs_be16 tpid, ovs_be16 tci) { - struct eth_header *eh = packet->data; struct vlan_eth_header *veh; /* Insert new 802.1Q header. */ - struct vlan_eth_header tmp; - memcpy(tmp.veth_dst, eh->eth_dst, ETH_ADDR_LEN); - memcpy(tmp.veth_src, eh->eth_src, ETH_ADDR_LEN); - tmp.veth_type = htons(ETH_TYPE_VLAN); - tmp.veth_tci = tci & htons(~VLAN_CFI); - tmp.veth_next_type = eh->eth_type; - - veh = ofpbuf_push_uninit(packet, VLAN_HEADER_LEN); - memcpy(veh, &tmp, sizeof tmp); - - packet->l2 = packet->data; + veh = dp_packet_resize_l2(packet, VLAN_HEADER_LEN); + memmove(veh, (char *)veh + VLAN_HEADER_LEN, 2 * ETH_ADDR_LEN); + veh->veth_type = tpid; + veh->veth_tci = tci & htons(~VLAN_CFI); } /* Removes outermost VLAN header (if any is present) from 'packet'. * - * 'packet->l2' must initially point to 'packet''s Ethernet header. */ + * 'packet->l2_5' should initially point to 'packet''s outer-most VLAN header + * or may be NULL if there are no VLAN headers. */ +void +eth_pop_vlan(struct dp_packet *packet) +{ + struct vlan_eth_header *veh = dp_packet_l2(packet); + + if (veh && dp_packet_size(packet) >= sizeof *veh + && eth_type_vlan(veh->veth_type)) { + + memmove((char *)veh + VLAN_HEADER_LEN, veh, 2 * ETH_ADDR_LEN); + dp_packet_resize_l2(packet, -VLAN_HEADER_LEN); + } +} + +/* Set ethertype of the packet. */ +static void +set_ethertype(struct dp_packet *packet, ovs_be16 eth_type) +{ + struct eth_header *eh = dp_packet_l2(packet); + + if (!eh) { + return; + } + + if (eth_type_vlan(eh->eth_type)) { + ovs_be16 *p; + char *l2_5 = dp_packet_l2_5(packet); + + p = ALIGNED_CAST(ovs_be16 *, + (l2_5 ? l2_5 : (char *)dp_packet_l3(packet)) - 2); + *p = eth_type; + } else { + eh->eth_type = eth_type; + } +} + +static bool is_mpls(struct dp_packet *packet) +{ + return packet->l2_5_ofs != UINT16_MAX; +} + +/* Set time to live (TTL) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_ttl(ovs_be32 *lse, uint8_t ttl) +{ + *lse &= ~htonl(MPLS_TTL_MASK); + *lse |= htonl((ttl << MPLS_TTL_SHIFT) & MPLS_TTL_MASK); +} + +/* Set traffic class (TC) of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_tc(ovs_be32 *lse, uint8_t tc) +{ + *lse &= ~htonl(MPLS_TC_MASK); + *lse |= htonl((tc << MPLS_TC_SHIFT) & MPLS_TC_MASK); +} + +/* Set label of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_label(ovs_be32 *lse, ovs_be32 label) +{ + *lse &= ~htonl(MPLS_LABEL_MASK); + *lse |= htonl((ntohl(label) << MPLS_LABEL_SHIFT) & MPLS_LABEL_MASK); +} + +/* Set bottom of stack (BoS) bit of an MPLS label stack entry (LSE). */ +void +set_mpls_lse_bos(ovs_be32 *lse, uint8_t bos) +{ + *lse &= ~htonl(MPLS_BOS_MASK); + *lse |= htonl((bos << MPLS_BOS_SHIFT) & MPLS_BOS_MASK); +} + +/* Compose an MPLS label stack entry (LSE) from its components: + * label, traffic class (TC), time to live (TTL) and + * bottom of stack (BoS) bit. */ +ovs_be32 +set_mpls_lse_values(uint8_t ttl, uint8_t tc, uint8_t bos, ovs_be32 label) +{ + ovs_be32 lse = htonl(0); + set_mpls_lse_ttl(&lse, ttl); + set_mpls_lse_tc(&lse, tc); + set_mpls_lse_bos(&lse, bos); + set_mpls_lse_label(&lse, label); + return lse; +} + +/* Set MPLS label stack entry to outermost MPLS header.*/ void -eth_pop_vlan(struct ofpbuf *packet) +set_mpls_lse(struct dp_packet *packet, ovs_be32 mpls_lse) { - struct vlan_eth_header *veh = packet->l2; - if (packet->size >= sizeof *veh - && veh->veth_type == htons(ETH_TYPE_VLAN)) { - struct eth_header tmp; + /* Packet type should be MPLS to set label stack entry. */ + if (is_mpls(packet)) { + struct mpls_hdr *mh = dp_packet_l2_5(packet); - memcpy(tmp.eth_dst, veh->veth_dst, ETH_ADDR_LEN); - memcpy(tmp.eth_src, veh->veth_src, ETH_ADDR_LEN); - tmp.eth_type = veh->veth_next_type; + /* Update mpls label stack entry. */ + put_16aligned_be32(&mh->mpls_lse, mpls_lse); + } +} - ofpbuf_pull(packet, VLAN_HEADER_LEN); - packet->l2 = (char*)packet->l2 + VLAN_HEADER_LEN; - memcpy(packet->data, &tmp, sizeof tmp); +/* Push MPLS label stack entry 'lse' onto 'packet' as the outermost MPLS + * header. If 'packet' does not already have any MPLS labels, then its + * Ethertype is changed to 'ethtype' (which must be an MPLS Ethertype). */ +void +push_mpls(struct dp_packet *packet, ovs_be16 ethtype, ovs_be32 lse) +{ + char * header; + size_t len; + + if (!eth_type_mpls(ethtype)) { + return; + } + + if (!is_mpls(packet)) { + /* Set MPLS label stack offset. */ + packet->l2_5_ofs = packet->l3_ofs; + } + + set_ethertype(packet, ethtype); + + /* Push new MPLS shim header onto packet. */ + len = packet->l2_5_ofs; + header = dp_packet_resize_l2_5(packet, MPLS_HLEN); + memmove(header, header + MPLS_HLEN, len); + memcpy(header + len, &lse, sizeof lse); +} + +/* If 'packet' is an MPLS packet, removes its outermost MPLS label stack entry. + * If the label that was removed was the only MPLS label, changes 'packet''s + * Ethertype to 'ethtype' (which ordinarily should not be an MPLS + * Ethertype). */ +void +pop_mpls(struct dp_packet *packet, ovs_be16 ethtype) +{ + if (is_mpls(packet)) { + struct mpls_hdr *mh = dp_packet_l2_5(packet); + size_t len = packet->l2_5_ofs; + + set_ethertype(packet, ethtype); + if (get_16aligned_be32(&mh->mpls_lse) & htonl(MPLS_BOS_MASK)) { + dp_packet_set_l2_5(packet, NULL); + } + /* Shift the l2 header forward. */ + memmove((char*)dp_packet_data(packet) + MPLS_HLEN, dp_packet_data(packet), len); + dp_packet_resize_l2_5(packet, -MPLS_HLEN); } } /* Converts hex digits in 'hex' to an Ethernet packet in '*packetp'. The * caller must free '*packetp'. On success, returns NULL. On failure, returns - * an error message and stores NULL in '*packetp'. */ + * an error message and stores NULL in '*packetp'. + * + * Aligns the L3 header of '*packetp' on a 32-bit boundary. */ const char * -eth_from_hex(const char *hex, struct ofpbuf **packetp) +eth_from_hex(const char *hex, struct dp_packet **packetp) { - struct ofpbuf *packet; + struct dp_packet *packet; - packet = *packetp = ofpbuf_new(strlen(hex) / 2); + /* Use 2 bytes of headroom to 32-bit align the L3 header. */ + packet = *packetp = dp_packet_new_with_headroom(strlen(hex) / 2, 2); - if (ofpbuf_put_hex(packet, hex, NULL)[0] != '\0') { - ofpbuf_delete(packet); + if (dp_packet_put_hex(packet, hex, NULL)[0] != '\0') { + dp_packet_delete(packet); *packetp = NULL; return "Trailing garbage in packet data"; } - if (packet->size < ETH_HEADER_LEN) { - ofpbuf_delete(packet); + if (dp_packet_size(packet) < ETH_HEADER_LEN) { + dp_packet_delete(packet); *packetp = NULL; return "Packet data too short for Ethernet"; } @@ -224,46 +371,35 @@ eth_from_hex(const char *hex, struct ofpbuf **packetp) } void -eth_format_masked(const uint8_t eth[ETH_ADDR_LEN], - const uint8_t mask[ETH_ADDR_LEN], struct ds *s) +eth_format_masked(const struct eth_addr eth, + const struct eth_addr *mask, struct ds *s) { ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(eth)); - if (mask && !eth_mask_is_exact(mask)) { - ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(mask)); - } -} - -void -eth_addr_bitand(const uint8_t src[ETH_ADDR_LEN], - const uint8_t mask[ETH_ADDR_LEN], - uint8_t dst[ETH_ADDR_LEN]) -{ - int i; - - for (i = 0; i < ETH_ADDR_LEN; i++) { - dst[i] = src[i] & mask[i]; + if (mask && !eth_mask_is_exact(*mask)) { + ds_put_format(s, "/"ETH_ADDR_FMT, ETH_ADDR_ARGS(*mask)); } } /* Given the IP netmask 'netmask', returns the number of bits of the IP address - * that it specifies, that is, the number of 1-bits in 'netmask'. 'netmask' - * must be a CIDR netmask (see ip_is_cidr()). */ + * that it specifies, that is, the number of 1-bits in 'netmask'. + * + * If 'netmask' is not a CIDR netmask (see ip_is_cidr()), the return value will + * still be in the valid range but isn't otherwise meaningful. */ int ip_count_cidr_bits(ovs_be32 netmask) { - assert(ip_is_cidr(netmask)); - return 32 - ctz(ntohl(netmask)); + return 32 - ctz32(ntohl(netmask)); } void ip_format_masked(ovs_be32 ip, ovs_be32 mask, struct ds *s) { - ds_put_format(s, IP_FMT, IP_ARGS(&ip)); - if (mask != htonl(UINT32_MAX)) { + ds_put_format(s, IP_FMT, IP_ARGS(ip)); + if (mask != OVS_BE32_MAX) { if (ip_is_cidr(mask)) { ds_put_format(s, "/%d", ip_count_cidr_bits(mask)); } else { - ds_put_format(s, "/"IP_FMT, IP_ARGS(&mask)); + ds_put_format(s, "/"IP_FMT, IP_ARGS(mask)); } } } @@ -290,6 +426,17 @@ print_ipv6_addr(struct ds *string, const struct in6_addr *addr) string->length += strlen(dst); } +void +print_ipv6_mapped(struct ds *s, const struct in6_addr *addr) +{ + if (IN6_IS_ADDR_V4MAPPED(addr)) { + ds_put_format(s, IP_FMT, addr->s6_addr[12], addr->s6_addr[13], + addr->s6_addr[14], addr->s6_addr[15]); + } else { + print_ipv6_addr(s, addr); + } +} + void print_ipv6_masked(struct ds *s, const struct in6_addr *addr, const struct in6_addr *mask) @@ -349,7 +496,10 @@ ipv6_create_mask(int mask) /* Given the IPv6 netmask 'netmask', returns the number of bits of the IPv6 * address that it specifies, that is, the number of 1-bits in 'netmask'. - * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). */ + * 'netmask' must be a CIDR netmask (see ipv6_is_cidr()). + * + * If 'netmask' is not a CIDR netmask (see ipv6_is_cidr()), the return value + * will still be in the valid range but isn't otherwise meaningful. */ int ipv6_count_cidr_bits(const struct in6_addr *netmask) { @@ -357,8 +507,6 @@ ipv6_count_cidr_bits(const struct in6_addr *netmask) int count = 0; const uint8_t *netmaskp = &netmask->s6_addr[0]; - assert(ipv6_is_cidr(netmask)); - for (i=0; i<16; i++) { if (netmaskp[i] == 0xff) { count += 8; @@ -404,57 +552,202 @@ ipv6_is_cidr(const struct in6_addr *netmask) /* Populates 'b' with an Ethernet II packet headed with the given 'eth_dst', * 'eth_src' and 'eth_type' parameters. A payload of 'size' bytes is allocated * in 'b' and returned. This payload may be populated with appropriate - * information by the caller. Sets 'b''s 'l2' and 'l3' pointers to the - * Ethernet header and payload respectively. + * information by the caller. Sets 'b''s 'frame' pointer and 'l3' offset to + * the Ethernet header and payload respectively. Aligns b->l3 on a 32-bit + * boundary. * * The returned packet has enough headroom to insert an 802.1Q VLAN header if * desired. */ void * -eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN], - const uint8_t eth_src[ETH_ADDR_LEN], uint16_t eth_type, +eth_compose(struct dp_packet *b, const struct eth_addr eth_dst, + const struct eth_addr eth_src, uint16_t eth_type, size_t size) { void *data; struct eth_header *eth; - ofpbuf_clear(b); + dp_packet_clear(b); - ofpbuf_prealloc_tailroom(b, ETH_HEADER_LEN + VLAN_HEADER_LEN + size); - ofpbuf_reserve(b, VLAN_HEADER_LEN); - eth = ofpbuf_put_uninit(b, ETH_HEADER_LEN); - data = ofpbuf_put_uninit(b, size); + /* The magic 2 here ensures that the L3 header (when it is added later) + * will be 32-bit aligned. */ + dp_packet_prealloc_tailroom(b, 2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + size); + dp_packet_reserve(b, 2 + VLAN_HEADER_LEN); + eth = dp_packet_put_uninit(b, ETH_HEADER_LEN); + data = dp_packet_put_uninit(b, size); - memcpy(eth->eth_dst, eth_dst, ETH_ADDR_LEN); - memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN); + eth->eth_dst = eth_dst; + eth->eth_src = eth_src; eth->eth_type = htons(eth_type); - b->l2 = eth; - b->l3 = data; + dp_packet_reset_offsets(b); + dp_packet_set_l3(b, data); return data; } static void -packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) +packet_set_ipv4_addr(struct dp_packet *packet, + ovs_16aligned_be32 *addr, ovs_be32 new_addr) { - struct ip_header *nh = packet->l3; + struct ip_header *nh = dp_packet_l3(packet); + ovs_be32 old_addr = get_16aligned_be32(addr); + size_t l4_size = dp_packet_l4_size(packet); - if (nh->ip_proto == IPPROTO_TCP && packet->l7) { - struct tcp_header *th = packet->l4; + if (nh->ip_proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { + struct tcp_header *th = dp_packet_l4(packet); - th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr); - } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) { - struct udp_header *uh = packet->l4; + th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr); + } else if (nh->ip_proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN ) { + struct udp_header *uh = dp_packet_l4(packet); if (uh->udp_csum) { - uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr); + uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr); if (!uh->udp_csum) { uh->udp_csum = htons(0xffff); } } } - nh->ip_csum = recalc_csum32(nh->ip_csum, *addr, new_addr); - *addr = new_addr; + nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr); + put_16aligned_be32(addr, new_addr); +} + +/* Returns true, if packet contains at least one routing header where + * segements_left > 0. + * + * This function assumes that L3 and L4 offsets are set in the packet. */ +static bool +packet_rh_present(struct dp_packet *packet) +{ + const struct ovs_16aligned_ip6_hdr *nh; + int nexthdr; + size_t len; + size_t remaining; + uint8_t *data = dp_packet_l3(packet); + + remaining = packet->l4_ofs - packet->l3_ofs; + + if (remaining < sizeof *nh) { + return false; + } + nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data); + data += sizeof *nh; + remaining -= sizeof *nh; + nexthdr = nh->ip6_nxt; + + while (1) { + if ((nexthdr != IPPROTO_HOPOPTS) + && (nexthdr != IPPROTO_ROUTING) + && (nexthdr != IPPROTO_DSTOPTS) + && (nexthdr != IPPROTO_AH) + && (nexthdr != IPPROTO_FRAGMENT)) { + /* It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. */ + break; + } + + /* We only verify that at least 8 bytes of the next header are + * available, but many of these headers are longer. Ensure that + * accesses within the extension header are within those first 8 + * bytes. All extension headers are required to be at least 8 + * bytes. */ + if (remaining < 8) { + return false; + } + + if (nexthdr == IPPROTO_AH) { + /* A standard AH definition isn't available, but the fields + * we care about are in the same location as the generic + * option header--only the header length is calculated + * differently. */ + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 2) * 4; + } else if (nexthdr == IPPROTO_FRAGMENT) { + const struct ovs_16aligned_ip6_frag *frag_hdr + = ALIGNED_CAST(struct ovs_16aligned_ip6_frag *, data); + + nexthdr = frag_hdr->ip6f_nxt; + len = sizeof *frag_hdr; + } else if (nexthdr == IPPROTO_ROUTING) { + const struct ip6_rthdr *rh = (struct ip6_rthdr *)data; + + if (rh->ip6r_segleft > 0) { + return true; + } + + nexthdr = rh->ip6r_nxt; + len = (rh->ip6r_len + 1) * 8; + } else { + const struct ip6_ext *ext_hdr = (struct ip6_ext *)data; + + nexthdr = ext_hdr->ip6e_nxt; + len = (ext_hdr->ip6e_len + 1) * 8; + } + + if (remaining < len) { + return false; + } + remaining -= len; + data += len; + } + + return false; +} + +static void +packet_update_csum128(struct dp_packet *packet, uint8_t proto, + ovs_16aligned_be32 addr[4], const ovs_be32 new_addr[4]) +{ + size_t l4_size = dp_packet_l4_size(packet); + + if (proto == IPPROTO_TCP && l4_size >= TCP_HEADER_LEN) { + struct tcp_header *th = dp_packet_l4(packet); + + th->tcp_csum = recalc_csum128(th->tcp_csum, addr, new_addr); + } else if (proto == IPPROTO_UDP && l4_size >= UDP_HEADER_LEN) { + struct udp_header *uh = dp_packet_l4(packet); + + if (uh->udp_csum) { + uh->udp_csum = recalc_csum128(uh->udp_csum, addr, new_addr); + if (!uh->udp_csum) { + uh->udp_csum = htons(0xffff); + } + } + } else if (proto == IPPROTO_ICMPV6 && + l4_size >= sizeof(struct icmp6_header)) { + struct icmp6_header *icmp = dp_packet_l4(packet); + + icmp->icmp6_cksum = recalc_csum128(icmp->icmp6_cksum, addr, new_addr); + } +} + +static void +packet_set_ipv6_addr(struct dp_packet *packet, uint8_t proto, + ovs_16aligned_be32 addr[4], const ovs_be32 new_addr[4], + bool recalculate_csum) +{ + if (recalculate_csum) { + packet_update_csum128(packet, proto, addr, new_addr); + } + memcpy(addr, new_addr, sizeof(ovs_be32[4])); +} + +static void +packet_set_ipv6_flow_label(ovs_16aligned_be32 *flow_label, ovs_be32 flow_key) +{ + ovs_be32 old_label = get_16aligned_be32(flow_label); + ovs_be32 new_label = (old_label & htonl(~IPV6_LABEL_MASK)) | flow_key; + put_16aligned_be32(flow_label, new_label); +} + +static void +packet_set_ipv6_tc(ovs_16aligned_be32 *flow_label, uint8_t tc) +{ + ovs_be32 old_label = get_16aligned_be32(flow_label); + ovs_be32 new_label = (old_label & htonl(0xF00FFFFF)) | htonl(tc << 20); + put_16aligned_be32(flow_label, new_label); } /* Modifies the IPv4 header fields of 'packet' to be consistent with 'src', @@ -462,16 +755,16 @@ packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr) * 'packet' must contain a valid IPv4 packet with correctly populated l[347] * markers. */ void -packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, +packet_set_ipv4(struct dp_packet *packet, ovs_be32 src, ovs_be32 dst, uint8_t tos, uint8_t ttl) { - struct ip_header *nh = packet->l3; + struct ip_header *nh = dp_packet_l3(packet); - if (nh->ip_src != src) { + if (get_16aligned_be32(&nh->ip_src) != src) { packet_set_ipv4_addr(packet, &nh->ip_src, src); } - if (nh->ip_dst != dst) { + if (get_16aligned_be32(&nh->ip_dst) != dst) { packet_set_ipv4_addr(packet, &nh->ip_dst, dst); } @@ -492,6 +785,33 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst, } } +/* Modifies the IPv6 header fields of 'packet' to be consistent with 'src', + * 'dst', 'traffic class', and 'next hop'. Updates 'packet''s L4 checksums as + * appropriate. 'packet' must contain a valid IPv6 packet with correctly + * populated l[34] offsets. */ +void +packet_set_ipv6(struct dp_packet *packet, uint8_t proto, const ovs_be32 src[4], + const ovs_be32 dst[4], uint8_t key_tc, ovs_be32 key_fl, + uint8_t key_hl) +{ + struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet); + + if (memcmp(&nh->ip6_src, src, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, nh->ip6_src.be32, src, true); + } + + if (memcmp(&nh->ip6_dst, dst, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, proto, nh->ip6_dst.be32, dst, + !packet_rh_present(packet)); + } + + packet_set_ipv6_tc(&nh->ip6_flow, key_tc); + + packet_set_ipv6_flow_label(&nh->ip6_flow, key_fl); + + nh->ip6_hlim = key_hl; +} + static void packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) { @@ -503,11 +823,11 @@ packet_set_port(ovs_be16 *port, ovs_be16 new_port, ovs_be16 *csum) /* Sets the TCP source and destination port ('src' and 'dst' respectively) of * the TCP header contained in 'packet'. 'packet' must be a valid TCP packet - * with its l4 marker properly populated. */ + * with its l4 offset properly populated. */ void -packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) +packet_set_tcp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) { - struct tcp_header *th = packet->l4; + struct tcp_header *th = dp_packet_l4(packet); packet_set_port(&th->tcp_src, src, &th->tcp_csum); packet_set_port(&th->tcp_dst, dst, &th->tcp_csum); @@ -515,11 +835,11 @@ packet_set_tcp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) /* Sets the UDP source and destination port ('src' and 'dst' respectively) of * the UDP header contained in 'packet'. 'packet' must be a valid UDP packet - * with its l4 marker properly populated. */ + * with its l4 offset properly populated. */ void -packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) +packet_set_udp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) { - struct udp_header *uh = packet->l4; + struct udp_header *uh = dp_packet_l4(packet); if (uh->udp_csum) { packet_set_port(&uh->udp_src, src, &uh->udp_csum); @@ -534,28 +854,116 @@ packet_set_udp_port(struct ofpbuf *packet, ovs_be16 src, ovs_be16 dst) } } -/* If 'packet' is a TCP packet, returns the TCP flags. Otherwise, returns 0. - * - * 'flow' must be the flow corresponding to 'packet' and 'packet''s header - * pointers must be properly initialized (e.g. with flow_extract()). */ -uint8_t -packet_get_tcp_flags(const struct ofpbuf *packet, const struct flow *flow) -{ - if ((flow->dl_type == htons(ETH_TYPE_IP) || - flow->dl_type == htons(ETH_TYPE_IPV6)) && - flow->nw_proto == IPPROTO_TCP && packet->l7) { - const struct tcp_header *tcp = packet->l4; - return TCP_FLAGS(tcp->tcp_ctl); - } else { - return 0; +/* Sets the SCTP source and destination port ('src' and 'dst' respectively) of + * the SCTP header contained in 'packet'. 'packet' must be a valid SCTP packet + * with its l4 offset properly populated. */ +void +packet_set_sctp_port(struct dp_packet *packet, ovs_be16 src, ovs_be16 dst) +{ + struct sctp_header *sh = dp_packet_l4(packet); + ovs_be32 old_csum, old_correct_csum, new_csum; + uint16_t tp_len = dp_packet_l4_size(packet); + + old_csum = get_16aligned_be32(&sh->sctp_csum); + put_16aligned_be32(&sh->sctp_csum, 0); + old_correct_csum = crc32c((void *)sh, tp_len); + + sh->sctp_src = src; + sh->sctp_dst = dst; + + new_csum = crc32c((void *)sh, tp_len); + put_16aligned_be32(&sh->sctp_csum, old_csum ^ old_correct_csum ^ new_csum); +} + +void +packet_set_nd(struct dp_packet *packet, const ovs_be32 target[4], + const struct eth_addr sll, const struct eth_addr tll) { + struct ovs_nd_msg *ns; + struct ovs_nd_opt *nd_opt; + int bytes_remain = dp_packet_l4_size(packet); + + if (OVS_UNLIKELY(bytes_remain < sizeof(*ns))) { + return; + } + + ns = dp_packet_l4(packet); + nd_opt = &ns->options[0]; + bytes_remain -= sizeof(*ns); + + if (memcmp(&ns->target, target, sizeof(ovs_be32[4]))) { + packet_set_ipv6_addr(packet, IPPROTO_ICMPV6, + ns->target.be32, + target, true); + } + + while (bytes_remain >= ND_OPT_LEN && nd_opt->nd_opt_len != 0) { + if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LINKADDR + && nd_opt->nd_opt_len == 1) { + if (!eth_addr_equals(nd_opt->nd_opt_mac, sll)) { + ovs_be16 *csum = &(ns->icmph.icmp6_cksum); + + *csum = recalc_csum48(*csum, nd_opt->nd_opt_mac, sll); + nd_opt->nd_opt_mac = sll; + } + + /* A packet can only contain one SLL or TLL option */ + break; + } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LINKADDR + && nd_opt->nd_opt_len == 1) { + if (!eth_addr_equals(nd_opt->nd_opt_mac, tll)) { + ovs_be16 *csum = &(ns->icmph.icmp6_cksum); + + *csum = recalc_csum48(*csum, nd_opt->nd_opt_mac, tll); + nd_opt->nd_opt_mac = tll; + } + + /* A packet can only contain one SLL or TLL option */ + break; + } + + nd_opt += nd_opt->nd_opt_len; + bytes_remain -= nd_opt->nd_opt_len * ND_OPT_LEN; + } +} + +const char * +packet_tcp_flag_to_string(uint32_t flag) +{ + switch (flag) { + case TCP_FIN: + return "fin"; + case TCP_SYN: + return "syn"; + case TCP_RST: + return "rst"; + case TCP_PSH: + return "psh"; + case TCP_ACK: + return "ack"; + case TCP_URG: + return "urg"; + case TCP_ECE: + return "ece"; + case TCP_CWR: + return "cwr"; + case TCP_NS: + return "ns"; + case 0x200: + return "[200]"; + case 0x400: + return "[400]"; + case 0x800: + return "[800]"; + default: + return NULL; } } /* Appends a string representation of the TCP flags value 'tcp_flags' - * (e.g. obtained via packet_get_tcp_flags() or TCP_FLAGS) to 's', in the + * (e.g. from struct flow.tcp_flags or obtained via TCP_FLAGS) to 's', in the * format used by tcpdump. */ void -packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) +packet_format_tcp_flags(struct ds *s, uint16_t tcp_flags) { if (!tcp_flags) { ds_put_cstr(s, "none"); @@ -580,10 +988,78 @@ packet_format_tcp_flags(struct ds *s, uint8_t tcp_flags) if (tcp_flags & TCP_ACK) { ds_put_char(s, '.'); } - if (tcp_flags & 0x40) { - ds_put_cstr(s, "[40]"); + if (tcp_flags & TCP_ECE) { + ds_put_cstr(s, "E"); + } + if (tcp_flags & TCP_CWR) { + ds_put_cstr(s, "C"); + } + if (tcp_flags & TCP_NS) { + ds_put_cstr(s, "N"); + } + if (tcp_flags & 0x200) { + ds_put_cstr(s, "[200]"); + } + if (tcp_flags & 0x400) { + ds_put_cstr(s, "[400]"); } - if (tcp_flags & 0x80) { - ds_put_cstr(s, "[80]"); + if (tcp_flags & 0x800) { + ds_put_cstr(s, "[800]"); } } + +#define ARP_PACKET_SIZE (2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + \ + ARP_ETH_HEADER_LEN) + +/* Clears 'b' and replaces its contents by an ARP frame with the specified + * 'arp_op', 'arp_sha', 'arp_tha', 'arp_spa', and 'arp_tpa'. The outer + * Ethernet frame is initialized with Ethernet source 'arp_sha' and destination + * 'arp_tha', except that destination ff:ff:ff:ff:ff:ff is used instead if + * 'broadcast' is true. */ +void +compose_arp(struct dp_packet *b, uint16_t arp_op, + const struct eth_addr arp_sha, const struct eth_addr arp_tha, + bool broadcast, ovs_be32 arp_spa, ovs_be32 arp_tpa) +{ + struct eth_header *eth; + struct arp_eth_header *arp; + + dp_packet_clear(b); + dp_packet_prealloc_tailroom(b, ARP_PACKET_SIZE); + dp_packet_reserve(b, 2 + VLAN_HEADER_LEN); + + eth = dp_packet_put_uninit(b, sizeof *eth); + eth->eth_dst = broadcast ? eth_addr_broadcast : arp_tha; + eth->eth_src = arp_sha; + eth->eth_type = htons(ETH_TYPE_ARP); + + arp = dp_packet_put_uninit(b, sizeof *arp); + arp->ar_hrd = htons(ARP_HRD_ETHERNET); + arp->ar_pro = htons(ARP_PRO_IP); + arp->ar_hln = sizeof arp->ar_sha; + arp->ar_pln = sizeof arp->ar_spa; + arp->ar_op = htons(arp_op); + arp->ar_sha = arp_sha; + arp->ar_tha = arp_tha; + + put_16aligned_be32(&arp->ar_spa, arp_spa); + put_16aligned_be32(&arp->ar_tpa, arp_tpa); + + dp_packet_reset_offsets(b); + dp_packet_set_l3(b, arp); +} + +uint32_t +packet_csum_pseudoheader(const struct ip_header *ip) +{ + uint32_t partial = 0; + + partial = csum_add32(partial, get_16aligned_be32(&ip->ip_src)); + partial = csum_add32(partial, get_16aligned_be32(&ip->ip_dst)); + partial = csum_add16(partial, htons(ip->ip_proto)); + partial = csum_add16(partial, htons(ntohs(ip->ip_tot_len) - + IP_IHL(ip->ip_ihl_ver) * 4)); + + return partial; +} +