From 11ff30bdd5e003a26851f7aab43498842700dffd Mon Sep 17 00:00:00 2001
From: Ben Pfaff <blp@nicira.com>
Date: Thu, 15 Aug 2013 10:47:39 -0700
Subject: [PATCH] packets: Do not assume that IPv4, TCP, or ARP headers are
 32-bit aligned.

Ethernet headers are 14 bytes long, so when the beginning of such a header
is 32-bit aligned, the following data is misaligned.  The usual trick to
fix that is to start the Ethernet header on an odd-numbered 16-bit
boundary.  That trick works OK for Open vSwitch, but there are two
problems:

   - OVS doesn't use that trick everywhere.  Maybe it should, but it's
     difficult to make sure that it does consistently because the CPUs
     most commonly used with OVS don't care about misalignment, so we
     only find problems when porting.

   - Some protocols (GRE, VXLAN) don't use that trick, so in such a case
     one can properly align the inner or outer L3/L4/L7 but not both.  (OVS
     userspace doesn't directly deal with such protocols yet, so this is
     just future-proofing.)

   - OpenFlow uses the alignment trick in a few places but not all of them.

This commit starts the adoption of what I hope will be a more robust way
to avoid misalignment problems and the resulting bus errors on RISC
architectures.  Instead of trying to ensure that 32-bit quantities are
always aligned, we always read them as if they were misaligned.  To ensure
that they are read this way, we change their types from 32-bit types to
pairs of 16-bit types.  (I don't know of any protocols that offset the
next header by an odd number of bytes, so a 16-bit alignment assumption
seems OK.)

The same would be necessary for 64-bit types in protocol headers, but we
don't yet have any protocol definitions with 64-bit types.

IPv6 protocol headers need the same treatment, but for those we rely on
structs provided by system headers, so I'll leave them for an upcoming
patch.

Signed-off-by: Ben Pfaff <blp@nicira.com>
---
 include/openvswitch/types.h | 32 +++++++++++++++++++++++++++--
 lib/flow.c                  | 16 +++++++--------
 lib/packets.c               | 23 ++++++++++++---------
 lib/packets.h               | 18 ++++++++--------
 lib/unaligned.h             | 41 +++++++++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+), 29 deletions(-)

diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h
index 72caa5c38..d3d4318fa 100644
--- a/include/openvswitch/types.h
+++ b/include/openvswitch/types.h
@@ -39,11 +39,39 @@ typedef __be16 ovs_be16;
 typedef __be32 ovs_be32;
 typedef __be64 ovs_be64;
 
-/* Netlink and OpenFlow both contain 64-bit values that are only guaranteed to
- * be aligned on 32-bit boundaries.  These types help.
+/* These types help with a few funny situations:
+ *
+ *   - The Ethernet header is 14 bytes long, which misaligns everything after
+ *     that.  One can put 2 "shim" bytes before the Ethernet header, but this
+ *     helps only if there is exactly one Ethernet header.  If there are two,
+ *     as with GRE and VXLAN (and if the inner header doesn't use this
+ *     trick--GRE and VXLAN don't) then you have the choice of aligning the
+ *     inner data or the outer data.  So it seems better to treat 32-bit fields
+ *     in protocol headers as aligned only on 16-bit boundaries.
+ *
+ *   - ARP headers contain misaligned 32-bit fields.
+ *
+ *   - Netlink and OpenFlow contain 64-bit values that are only guaranteed to
+ *     be aligned on 32-bit boundaries.
  *
  * lib/unaligned.h has helper functions for accessing these. */
 
+/* A 32-bit value, in host byte order, that is only aligned on a 16-bit
+ * boundary.  */
+typedef struct {
+#ifdef WORDS_BIGENDIAN
+        uint16_t hi, lo;
+#else
+        uint16_t lo, hi;
+#endif
+} ovs_16aligned_u32;
+
+/* A 32-bit value, in network byte order, that is only aligned on a 16-bit
+ * boundary. */
+typedef struct {
+        ovs_be16 hi, lo;
+} ovs_16aligned_be32;
+
 /* A 64-bit value, in host byte order, that is only aligned on a 32-bit
  * boundary.  */
 typedef struct {
diff --git a/lib/flow.c b/lib/flow.c
index a77f4b3ba..549ab1c16 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -381,8 +381,8 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark,
         if (nh) {
             packet->l4 = b.data;
 
-            flow->nw_src = get_unaligned_be32(&nh->ip_src);
-            flow->nw_dst = get_unaligned_be32(&nh->ip_dst);
+            flow->nw_src = get_16aligned_be32(&nh->ip_src);
+            flow->nw_dst = get_16aligned_be32(&nh->ip_dst);
             flow->nw_proto = nh->ip_proto;
 
             flow->nw_tos = nh->ip_tos;
@@ -436,8 +436,8 @@ flow_extract(struct ofpbuf *packet, uint32_t skb_priority, uint32_t skb_mark,
                 flow->nw_proto = ntohs(arp->ar_op);
             }
 
-            flow->nw_src = arp->ar_spa;
-            flow->nw_dst = arp->ar_tpa;
+            flow->nw_src = get_16aligned_be32(&arp->ar_spa);
+            flow->nw_dst = get_16aligned_be32(&arp->ar_tpa);
             memcpy(flow->arp_sha, arp->ar_sha, ETH_ADDR_LEN);
             memcpy(flow->arp_tha, arp->ar_tha, ETH_ADDR_LEN);
         }
@@ -813,8 +813,8 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
         ip->ip_ihl_ver = IP_IHL_VER(5, 4);
         ip->ip_tos = flow->nw_tos;
         ip->ip_proto = flow->nw_proto;
-        ip->ip_src = flow->nw_src;
-        ip->ip_dst = flow->nw_dst;
+        put_16aligned_be32(&ip->ip_src, flow->nw_src);
+        put_16aligned_be32(&ip->ip_dst, flow->nw_dst);
 
         if (flow->nw_frag & FLOW_NW_FRAG_ANY) {
             ip->ip_frag_off |= htons(IP_MORE_FRAGMENTS);
@@ -866,8 +866,8 @@ flow_compose(struct ofpbuf *b, const struct flow *flow)
 
         if (flow->nw_proto == ARP_OP_REQUEST ||
             flow->nw_proto == ARP_OP_REPLY) {
-            arp->ar_spa = flow->nw_src;
-            arp->ar_tpa = flow->nw_dst;
+            put_16aligned_be32(&arp->ar_spa, flow->nw_src);
+            put_16aligned_be32(&arp->ar_tpa, flow->nw_dst);
             memcpy(arp->ar_sha, flow->arp_sha, ETH_ADDR_LEN);
             memcpy(arp->ar_tha, flow->arp_tha, ETH_ADDR_LEN);
         }
diff --git a/lib/packets.c b/lib/packets.c
index f922cc286..9198ff2db 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@
 #include "hmap.h"
 #include "dynamic-string.h"
 #include "ofpbuf.h"
+#include "unaligned.h"
 
 const struct in6_addr in6addr_exact = IN6ADDR_EXACT_INIT;
 
@@ -162,9 +163,9 @@ compose_rarp(struct ofpbuf *b, const uint8_t eth_src[ETH_ADDR_LEN])
     arp->ar_pln = sizeof arp->ar_spa;
     arp->ar_op = htons(ARP_OP_RARP);
     memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN);
-    arp->ar_spa = htonl(0);
+    put_16aligned_be32(&arp->ar_spa, htonl(0));
     memcpy(arp->ar_tha, eth_src, ETH_ADDR_LEN);
-    arp->ar_tpa = htonl(0);
+    put_16aligned_be32(&arp->ar_tpa, htonl(0));
 }
 
 /* Insert VLAN header according to given TCI. Packet passed must be Ethernet
@@ -451,26 +452,28 @@ eth_compose(struct ofpbuf *b, const uint8_t eth_dst[ETH_ADDR_LEN],
 }
 
 static void
-packet_set_ipv4_addr(struct ofpbuf *packet, ovs_be32 *addr, ovs_be32 new_addr)
+packet_set_ipv4_addr(struct ofpbuf *packet,
+                     ovs_16aligned_be32 *addr, ovs_be32 new_addr)
 {
     struct ip_header *nh = packet->l3;
+    ovs_be32 old_addr = get_16aligned_be32(addr);
 
     if (nh->ip_proto == IPPROTO_TCP && packet->l7) {
         struct tcp_header *th = packet->l4;
 
-        th->tcp_csum = recalc_csum32(th->tcp_csum, *addr, new_addr);
+        th->tcp_csum = recalc_csum32(th->tcp_csum, old_addr, new_addr);
     } else if (nh->ip_proto == IPPROTO_UDP && packet->l7) {
         struct udp_header *uh = packet->l4;
 
         if (uh->udp_csum) {
-            uh->udp_csum = recalc_csum32(uh->udp_csum, *addr, new_addr);
+            uh->udp_csum = recalc_csum32(uh->udp_csum, old_addr, new_addr);
             if (!uh->udp_csum) {
                 uh->udp_csum = htons(0xffff);
             }
         }
     }
-    nh->ip_csum = recalc_csum32(nh->ip_csum, *addr, new_addr);
-    *addr = new_addr;
+    nh->ip_csum = recalc_csum32(nh->ip_csum, old_addr, new_addr);
+    put_16aligned_be32(addr, new_addr);
 }
 
 /* Returns true, if packet contains at least one routing header where
@@ -610,11 +613,11 @@ packet_set_ipv4(struct ofpbuf *packet, ovs_be32 src, ovs_be32 dst,
 {
     struct ip_header *nh = packet->l3;
 
-    if (nh->ip_src != src) {
+    if (get_16aligned_be32(&nh->ip_src) != src) {
         packet_set_ipv4_addr(packet, &nh->ip_src, src);
     }
 
-    if (nh->ip_dst != dst) {
+    if (get_16aligned_be32(&nh->ip_dst) != dst) {
         packet_set_ipv4_addr(packet, &nh->ip_dst, dst);
     }
 
diff --git a/lib/packets.h b/lib/packets.h
index f88026f80..0dfe443a1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -350,8 +350,8 @@ struct ip_header {
     uint8_t ip_ttl;
     uint8_t ip_proto;
     ovs_be16 ip_csum;
-    ovs_be32 ip_src;
-    ovs_be32 ip_dst;
+    ovs_16aligned_be32 ip_src;
+    ovs_16aligned_be32 ip_dst;
 };
 BUILD_ASSERT_DECL(IP_HEADER_LEN == sizeof(struct ip_header));
 
@@ -369,7 +369,7 @@ struct icmp_header {
             ovs_be16 empty;
             ovs_be16 mtu;
         } frag;
-        ovs_be32 gateway;
+        ovs_16aligned_be32 gateway;
     } icmp_fields;
     uint8_t icmp_data[0];
 };
@@ -399,8 +399,8 @@ BUILD_ASSERT_DECL(UDP_HEADER_LEN == sizeof(struct udp_header));
 struct tcp_header {
     ovs_be16 tcp_src;
     ovs_be16 tcp_dst;
-    ovs_be32 tcp_seq;
-    ovs_be32 tcp_ack;
+    ovs_16aligned_be32 tcp_seq;
+    ovs_16aligned_be32 tcp_ack;
     ovs_be16 tcp_ctl;
     ovs_be16 tcp_winsz;
     ovs_be16 tcp_csum;
@@ -425,10 +425,10 @@ struct arp_eth_header {
 
     /* Ethernet+IPv4 specific members. */
     uint8_t ar_sha[ETH_ADDR_LEN]; /* Sender hardware address. */
-    ovs_be32 ar_spa;           /* Sender protocol address. */
+    ovs_16aligned_be32 ar_spa;           /* Sender protocol address. */
     uint8_t ar_tha[ETH_ADDR_LEN]; /* Target hardware address. */
-    ovs_be32 ar_tpa;           /* Target protocol address. */
-} __attribute__((packed));
+    ovs_16aligned_be32 ar_tpa;           /* Target protocol address. */
+};
 BUILD_ASSERT_DECL(ARP_ETH_HEADER_LEN == sizeof(struct arp_eth_header));
 
 /* The IPv6 flow label is in the lower 20 bits of the first 32-bit word. */
diff --git a/lib/unaligned.h b/lib/unaligned.h
index 2654a2763..154eb1338 100644
--- a/lib/unaligned.h
+++ b/lib/unaligned.h
@@ -174,6 +174,21 @@ put_unaligned_u64(uint64_t *p, uint64_t x)
     put_unaligned_u64__(p, x);
 }
 
+/* Returns the value in 'x'. */
+static inline uint32_t
+get_16aligned_u32(const ovs_16aligned_u32 *x)
+{
+    return ((uint32_t) x->hi << 16) | x->lo;
+}
+
+/* Stores 'value' in 'x'. */
+static inline void
+put_16aligned_u32(ovs_16aligned_u32 *x, uint32_t value)
+{
+    x->hi = value >> 16;
+    x->lo = value;
+}
+
 /* Returns the value in 'x'. */
 static inline uint64_t
 get_32aligned_u64(const ovs_32aligned_u64 *x)
@@ -190,6 +205,30 @@ put_32aligned_u64(ovs_32aligned_u64 *x, uint64_t value)
 }
 
 #ifndef __CHECKER__
+/* Returns the value of 'x'. */
+static inline ovs_be32
+get_16aligned_be32(const ovs_16aligned_be32 *x)
+{
+#ifdef WORDS_BIGENDIAN
+    return ((ovs_be32) x->hi << 16) | x->lo;
+#else
+    return ((ovs_be32) x->lo << 16) | x->hi;
+#endif
+}
+
+/* Stores network byte order 'value' into 'x'. */
+static inline void
+put_16aligned_be32(ovs_16aligned_be32 *x, ovs_be32 value)
+{
+#if WORDS_BIGENDIAN
+    x->hi = value >> 16;
+    x->lo = value;
+#else
+    x->hi = value;
+    x->lo = value >> 16;
+#endif
+}
+
 /* Returns the value of 'x'. */
 static inline ovs_be64
 get_32aligned_be64(const ovs_32aligned_be64 *x)
@@ -216,6 +255,8 @@ put_32aligned_be64(ovs_32aligned_be64 *x, ovs_be64 value)
 #else  /* __CHECKER__ */
 /* Making sparse happy with these functions also makes them unreadable, so
  * don't bother to show it their implementations. */
+ovs_be32 get_16aligned_be32(const ovs_16aligned_be32 *);
+void put_16aligned_be32(ovs_16aligned_be32 *, ovs_be32);
 ovs_be64 get_32aligned_be64(const ovs_32aligned_be64 *);
 void put_32aligned_be64(ovs_32aligned_be64 *, ovs_be64);
 #endif
-- 
2.20.1