OVS_GREP_IFELSE([$KSRC/include/linux/openvswitch.h], [openvswitch_handle_frame_hook],
[OVS_DEFINE([HAVE_RHEL_OVS_HOOK])])
OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [vxlan_xmit_skb])
- OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [bool xnet],
- [OVS_DEFINE([HAVE_VXLAN_XMIT_SKB_XNET_ARG])])
+ OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [struct vxlan_metadata],
+ [OVS_DEFINE([HAVE_VXLAN_METADATA])])
OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_flow_src_port],
[OVS_DEFINE([HAVE_UDP_FLOW_SRC_PORT])])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [ignore_df:1],
#ifndef VXLAN_HLEN
/* VXLAN header flags. */
#define VXLAN_HF_VNI 0x08000000
+#ifndef VXLAN_HF_GBP
+#define VXLAN_HF_GBP 0x80000000
+#endif
#define VXLAN_N_VID (1u << 24)
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
#endif
-#ifdef USE_KERNEL_TUNNEL_API
+#ifndef VXLAN_GBP_USED_BITS
+/*
+ * VXLAN Group Based Policy Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * D = Don't Learn bit. When set, this bit indicates that the egress
+ * VTEP MUST NOT learn the source address of the encapsulated frame.
+ *
+ * A = Indicates that the group policy has already been applied to
+ * this packet. Policies MUST NOT be applied by devices when the
+ * A bit is set.
+ *
+ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
+ */
+struct vxlanhdr_gbp {
+ __u8 vx_flags;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ __u8 reserved_flags1:3,
+ policy_applied:1,
+ reserved_flags2:2,
+ dont_learn:1,
+ reserved_flags3:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 reserved_flags1:1,
+ dont_learn:1,
+ reserved_flags2:2,
+ policy_applied:1,
+ reserved_flags3:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __be16 policy_id;
+ __be32 vx_vni;
+};
+#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
+
+/* skb->mark mapping
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
+#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
+#define VXLAN_GBP_ID_MASK (0xFFFF)
+
+#define VXLAN_F_GBP 0x800
+#endif
+
+#ifdef HAVE_VXLAN_METADATA
static inline int rpl_vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni)
+ __be16 src_port, __be16 dst_port,
+ struct vxlan_metadata *md)
{
if (skb_is_gso(skb) && skb_is_encapsulated(skb)) {
kfree_skb(skb);
return -ENOSYS;
}
-#ifdef HAVE_VXLAN_XMIT_SKB_XNET_ARG
- return vxlan_xmit_skb(vs, rt, skb, src, dst, tos, ttl, df,
- src_port, dst_port, vni, false);
-#else
-#ifndef HAVE_IPTUNNEL_XMIT_NET
return vxlan_xmit_skb(vs, rt, skb, src, dst, tos, ttl, df,
- src_port, dst_port, vni);
-#else
- return vxlan_xmit_skb(NULL, vs, rt, skb, src, dst, tos, ttl, df,
- src_port, dst_port, vni);
-#endif
-
-#endif
+ src_port, dst_port, md, false);
}
#define vxlan_xmit_skb rpl_vxlan_xmit_skb
-#else
+#else /* HAVE_VXLAN_METADATA */
-struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key);
+struct vxlan_metadata {
+ __be32 vni;
+ u32 gbp;
+};
+
+#define vxlan_sock rpl_vxlan_sock
+struct rpl_vxlan_sock;
+
+#define vxlan_rcv_t rpl_vxlan_rcv_t
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
+ struct vxlan_metadata *md);
/* per UDP socket information */
struct vxlan_sock {
struct work_struct del_work;
struct socket *sock;
struct rcu_head rcu;
+ u32 flags;
};
+#define vxlan_sock_add rpl_vxlan_sock_add
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
bool no_share, u32 flags);
+#define vxlan_sock_release rpl_vxlan_sock_release
void vxlan_sock_release(struct vxlan_sock *vs);
+#define vxlan_xmit_skb rpl_vxlan_xmit_skb
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni);
+ __be16 src_port, __be16 dst_port,
+ struct vxlan_metadata *md);
+#define vxlan_src_port rpl_vxlan_src_port
__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb);
-#endif /* 3.12 */
+#endif /* !HAVE_VXLAN_METADATA */
#endif
struct vxlan_sock *vs;
struct vxlanhdr *vxh;
u32 flags, vni;
+ struct vxlan_metadata md = {0};
/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
if (!vs)
goto drop;
+ /* For backwards compatibility, only allow reserved fields to be
+ * used by VXLAN extensions if explicitly requested.
+ */
+ if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
+ struct vxlanhdr_gbp *gbp;
+
+ gbp = (struct vxlanhdr_gbp *)vxh;
+ md.gbp = ntohs(gbp->policy_id);
+
+ if (gbp->dont_learn)
+ md.gbp |= VXLAN_GBP_DONT_LEARN;
+
+ if (gbp->policy_applied)
+ md.gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+ flags &= ~VXLAN_GBP_USED_BITS;
+ }
+
if (flags || (vni & 0xff)) {
/* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from
goto bad_flags;
}
- vs->rcv(vs, skb, vxh->vx_vni);
+ md.vni = vxh->vx_vni;
+ vs->rcv(vs, skb, &md);
return 0;
drop:
return ovs_iptunnel_handle_offloads(skb, false, vxlan_gso);
}
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+ struct vxlan_metadata *md)
+{
+ struct vxlanhdr_gbp *gbp;
+
+ gbp = (struct vxlanhdr_gbp *)vxh;
+ vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+
+ if (md->gbp & VXLAN_GBP_DONT_LEARN)
+ gbp->dont_learn = 1;
+
+ if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+ gbp->policy_applied = 1;
+
+ gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni)
+ __be16 src_port, __be16 dst_port,
+ struct vxlan_metadata *md)
{
struct vxlanhdr *vxh;
struct udphdr *uh;
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
- vxh->vx_vni = vni;
+ vxh->vx_vni = md->vni;
+
+ if (vs->flags & VXLAN_F_GBP)
+ vxlan_build_gbp_hdr(vxh, vs, md);
__skb_push(skb, sizeof(*uh));
skb_reset_transport_header(skb);
}
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
- vxlan_rcv_t *rcv, void *data)
+ vxlan_rcv_t *rcv, void *data, u32 flags)
{
struct vxlan_sock *vs;
struct sock *sk;
}
vs->rcv = rcv;
vs->data = data;
+ vs->flags = flags;
/* Disable multicast loopback */
inet_sk(sk)->mc_loop = 0;
vxlan_rcv_t *rcv, void *data,
bool no_share, u32 flags)
{
- return vxlan_socket_create(net, port, rcv, data);
+ return vxlan_socket_create(net, port, rcv, data, flags);
}
void vxlan_sock_release(struct vxlan_sock *vs)
return vport_priv(vport);
}
-/* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+ struct vxlan_metadata *md)
{
struct ovs_tunnel_info tun_info;
struct vport *vport = vs->data;
/* Save outer tunnel values */
iph = ip_hdr(skb);
- key = cpu_to_be64(ntohl(vx_vni) >> 8);
+ key = cpu_to_be64(ntohl(md->vni) >> 8);
ovs_flow_tun_info_init(&tun_info, iph,
udp_hdr(skb)->source, udp_hdr(skb)->dest,
key, TUNNEL_KEY, NULL, 0);
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sport(vxlan_port->vs->sock->sk);
+ struct vxlan_metadata md = {0};
struct rtable *rt;
__be16 src_port;
__be32 saddr;
skb->ignore_df = 1;
src_port = udp_flow_src_port(net, skb, 0, 0, true);
+ md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos,
tun_key->ipv4_ttl, df,
src_port, dst_port,
- htonl(be64_to_cpu(tun_key->tun_id) << 8));
+ &md);
if (err < 0)
ip_rt_put(rt);
return err;