From 3174a818a125e7a2b5c37cb60839d6ea5c5b8c8c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Feb 2015 21:53:35 +0100 Subject: [PATCH] datapath: Account for "vxlan: Group Policy extension" Upstream commit: vxlan: Group Policy extension Implements supports for the Group Policy VXLAN extension [0] to provide a lightweight and simple security label mechanism across network peers based on VXLAN. The security context and associated metadata is mapped to/from skb->mark. This allows further mapping to a SELinux context using SECMARK, to implement ACLs directly with nftables, iptables, OVS, tc, etc. The group membership is defined by the lower 16 bits of skb->mark, the upper 16 bits are used for flags. SELinux allows to manage label to secure local resources. However, distributed applications require ACLs to implemented across hosts. This is typically achieved by matching on L2-L4 fields to identify the original sending host and process on the receiver. On top of that, netlabel and specifically CIPSO [1] allow to map security contexts to universal labels. However, netlabel and CIPSO are relatively complex. This patch provides a lightweight alternative for overlay network environments with a trusted underlay. No additional control protocol is required. Host 1: Host 2: Group A Group B Group B Group A +-----+ +-------------+ +-------+ +-----+ | lxc | | SELinux CTX | | httpd | | VM | +--+--+ +--+----------+ +---+---+ +--+--+ \---+---/ \----+---/ | | +---+---+ +---+---+ | vxlan | | vxlan | +---+---+ +---+---+ +------------------------------+ Backwards compatibility: A VXLAN-GBP socket can receive standard VXLAN frames and will assign the default group 0x0000 to such frames. A Linux VXLAN socket will drop VXLAN-GBP frames. The extension is therefore disabled by default and needs to be specifically enabled: ip link add [...] type vxlan [...] gbp In a mixed environment with VXLAN and VXLAN-GBP sockets, the GBP socket must run on a separate port number. Examples: iptables: host1# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200 host2# iptables -I INPUT -m mark --mark 0x200 -j DROP OVS: # ovs-ofctl add-flow br0 'in_port=1,actions=load:0x200->NXM_NX_TUN_GBP_ID[],NORMAL' # ovs-ofctl add-flow br0 'in_port=2,tun_gbp_id=0x200,actions=drop' [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy [1] http://lwn.net/Articles/204905/ Signed-off-by: Thomas Graf Signed-off-by: David S. Miller Upstream: 351149 ("vxlan: Group Policy extension") Signed-off-by: Thomas Graf Acked-by: Pravin B Shelar --- acinclude.m4 | 4 +- datapath/linux/compat/include/net/vxlan.h | 99 ++++++++++++++++++----- datapath/linux/compat/vxlan.c | 52 ++++++++++-- datapath/vport-vxlan.c | 10 ++- 4 files changed, 135 insertions(+), 30 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 10ede83ba..3d2dc63a8 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -387,8 +387,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/linux/openvswitch.h], [openvswitch_handle_frame_hook], [OVS_DEFINE([HAVE_RHEL_OVS_HOOK])]) OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [vxlan_xmit_skb]) - OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [bool xnet], - [OVS_DEFINE([HAVE_VXLAN_XMIT_SKB_XNET_ARG])]) + OVS_GREP_IFELSE([$KSRC/include/net/vxlan.h], [struct vxlan_metadata], + [OVS_DEFINE([HAVE_VXLAN_METADATA])]) OVS_GREP_IFELSE([$KSRC/include/net/udp.h], [udp_flow_src_port], [OVS_DEFINE([HAVE_UDP_FLOW_SRC_PORT])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [ignore_df:1], diff --git a/datapath/linux/compat/include/net/vxlan.h b/datapath/linux/compat/include/net/vxlan.h index 33dbc286c..4f3e3b723 100644 --- a/datapath/linux/compat/include/net/vxlan.h +++ b/datapath/linux/compat/include/net/vxlan.h @@ -15,43 +15,98 @@ #ifndef VXLAN_HLEN /* VXLAN header flags. */ #define VXLAN_HF_VNI 0x08000000 +#ifndef VXLAN_HF_GBP +#define VXLAN_HF_GBP 0x80000000 +#endif #define VXLAN_N_VID (1u << 24) #define VXLAN_VID_MASK (VXLAN_N_VID - 1) #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #endif -#ifdef USE_KERNEL_TUNNEL_API +#ifndef VXLAN_GBP_USED_BITS +/* + * VXLAN Group Based Policy Extension: + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | VXLAN Network Identifier (VNI) | Reserved | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * D = Don't Learn bit. When set, this bit indicates that the egress + * VTEP MUST NOT learn the source address of the encapsulated frame. + * + * A = Indicates that the group policy has already been applied to + * this packet. Policies MUST NOT be applied by devices when the + * A bit is set. + * + * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy + */ +struct vxlanhdr_gbp { + __u8 vx_flags; +#ifdef __LITTLE_ENDIAN_BITFIELD + __u8 reserved_flags1:3, + policy_applied:1, + reserved_flags2:2, + dont_learn:1, + reserved_flags3:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 reserved_flags1:1, + dont_learn:1, + reserved_flags2:2, + policy_applied:1, + reserved_flags3:3; +#else +#error "Please fix " +#endif + __be16 policy_id; + __be32 vx_vni; +}; +#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) + +/* skb->mark mapping + * + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) +#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) +#define VXLAN_GBP_ID_MASK (0xFFFF) + +#define VXLAN_F_GBP 0x800 +#endif + +#ifdef HAVE_VXLAN_METADATA static inline int rpl_vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni) + __be16 src_port, __be16 dst_port, + struct vxlan_metadata *md) { if (skb_is_gso(skb) && skb_is_encapsulated(skb)) { kfree_skb(skb); return -ENOSYS; } -#ifdef HAVE_VXLAN_XMIT_SKB_XNET_ARG - return vxlan_xmit_skb(vs, rt, skb, src, dst, tos, ttl, df, - src_port, dst_port, vni, false); -#else -#ifndef HAVE_IPTUNNEL_XMIT_NET return vxlan_xmit_skb(vs, rt, skb, src, dst, tos, ttl, df, - src_port, dst_port, vni); -#else - return vxlan_xmit_skb(NULL, vs, rt, skb, src, dst, tos, ttl, df, - src_port, dst_port, vni); -#endif - -#endif + src_port, dst_port, md, false); } #define vxlan_xmit_skb rpl_vxlan_xmit_skb -#else +#else /* HAVE_VXLAN_METADATA */ -struct vxlan_sock; -typedef void (vxlan_rcv_t)(struct vxlan_sock *vs, struct sk_buff *skb, __be32 key); +struct vxlan_metadata { + __be32 vni; + u32 gbp; +}; + +#define vxlan_sock rpl_vxlan_sock +struct rpl_vxlan_sock; + +#define vxlan_rcv_t rpl_vxlan_rcv_t +typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, + struct vxlan_metadata *md); /* per UDP socket information */ struct vxlan_sock { @@ -61,20 +116,26 @@ struct vxlan_sock { struct work_struct del_work; struct socket *sock; struct rcu_head rcu; + u32 flags; }; +#define vxlan_sock_add rpl_vxlan_sock_add struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share, u32 flags); +#define vxlan_sock_release rpl_vxlan_sock_release void vxlan_sock_release(struct vxlan_sock *vs); +#define vxlan_xmit_skb rpl_vxlan_xmit_skb int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni); + __be16 src_port, __be16 dst_port, + struct vxlan_metadata *md); +#define vxlan_src_port rpl_vxlan_src_port __be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb); -#endif /* 3.12 */ +#endif /* !HAVE_VXLAN_METADATA */ #endif diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 1946cac25..969e32c07 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -72,6 +72,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) struct vxlan_sock *vs; struct vxlanhdr *vxh; u32 flags, vni; + struct vxlan_metadata md = {0}; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -95,6 +96,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { + struct vxlanhdr_gbp *gbp; + + gbp = (struct vxlanhdr_gbp *)vxh; + md.gbp = ntohs(gbp->policy_id); + + if (gbp->dont_learn) + md.gbp |= VXLAN_GBP_DONT_LEARN; + + if (gbp->policy_applied) + md.gbp |= VXLAN_GBP_POLICY_APPLIED; + + flags &= ~VXLAN_GBP_USED_BITS; + } + if (flags || (vni & 0xff)) { /* If there are any unprocessed flags remaining treat * this as a malformed packet. This behavior diverges from @@ -108,7 +127,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto bad_flags; } - vs->rcv(vs, skb, vxh->vx_vni); + md.vni = vxh->vx_vni; + vs->rcv(vs, skb, &md); return 0; drop: @@ -186,10 +206,28 @@ static struct sk_buff *handle_offloads(struct sk_buff *skb) return ovs_iptunnel_handle_offloads(skb, false, vxlan_gso); } +static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs, + struct vxlan_metadata *md) +{ + struct vxlanhdr_gbp *gbp; + + gbp = (struct vxlanhdr_gbp *)vxh; + vxh->vx_flags |= htonl(VXLAN_HF_GBP); + + if (md->gbp & VXLAN_GBP_DONT_LEARN) + gbp->dont_learn = 1; + + if (md->gbp & VXLAN_GBP_POLICY_APPLIED) + gbp->policy_applied = 1; + + gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); +} + int vxlan_xmit_skb(struct vxlan_sock *vs, struct rtable *rt, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, - __be16 src_port, __be16 dst_port, __be32 vni) + __be16 src_port, __be16 dst_port, + struct vxlan_metadata *md) { struct vxlanhdr *vxh; struct udphdr *uh; @@ -220,7 +258,10 @@ int vxlan_xmit_skb(struct vxlan_sock *vs, vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_HF_VNI); - vxh->vx_vni = vni; + vxh->vx_vni = md->vni; + + if (vs->flags & VXLAN_F_GBP) + vxlan_build_gbp_hdr(vxh, vs, md); __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); @@ -258,7 +299,7 @@ static void vxlan_del_work(struct work_struct *work) } static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, - vxlan_rcv_t *rcv, void *data) + vxlan_rcv_t *rcv, void *data, u32 flags) { struct vxlan_sock *vs; struct sock *sk; @@ -300,6 +341,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port, } vs->rcv = rcv; vs->data = data; + vs->flags = flags; /* Disable multicast loopback */ inet_sk(sk)->mc_loop = 0; @@ -316,7 +358,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, vxlan_rcv_t *rcv, void *data, bool no_share, u32 flags) { - return vxlan_socket_create(net, port, rcv, data); + return vxlan_socket_create(net, port, rcv, data, flags); } void vxlan_sock_release(struct vxlan_sock *vs) diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index c9e356e99..c4b4b999f 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -57,8 +57,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport) return vport_priv(vport); } -/* Called with rcu_read_lock and BH disabled. */ -static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) +static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, + struct vxlan_metadata *md) { struct ovs_tunnel_info tun_info; struct vport *vport = vs->data; @@ -67,7 +67,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) /* Save outer tunnel values */ iph = ip_hdr(skb); - key = cpu_to_be64(ntohl(vx_vni) >> 8); + key = cpu_to_be64(ntohl(md->vni) >> 8); ovs_flow_tun_info_init(&tun_info, iph, udp_hdr(skb)->source, udp_hdr(skb)->dest, key, TUNNEL_KEY, NULL, 0); @@ -145,6 +145,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); __be16 dst_port = inet_sport(vxlan_port->vs->sock->sk); + struct vxlan_metadata md = {0}; struct rtable *rt; __be16 src_port; __be32 saddr; @@ -173,13 +174,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) skb->ignore_df = 1; src_port = udp_flow_src_port(net, skb, 0, 0, true); + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, - htonl(be64_to_cpu(tun_key->tun_id) << 8)); + &md); if (err < 0) ip_rt_put(rt); return err; -- 2.20.1