From 85571a3daa67603ce4096c12ba0522acded4deb0 Mon Sep 17 00:00:00 2001 From: Alin Serdean Date: Fri, 11 Dec 2015 19:18:25 +0000 Subject: [PATCH] datapath-windows: Add GRE TEB support for windows datapath This patch introduces the support for GRE TEB (trasparent ethernet bridging) for the windows datapath. The GRE support is based on http://tools.ietf.org/html/rfc2890, without taking into account the GRE sequence, and it supports only the GRE protocol type 6558 (trasparent ethernet bridging) like its linux counterpart. Util.h: define the GRE pool tag Vport.c/h: sort the includes alphabetically add the function OvsFindTunnelVportByPortType which searches the tunnelVportsArray for a given port type Actions.c : sort the includes alphabetically call the GRE encapsulation / decapsulation functions when needed Gre.c/h : add GRE type defines add initialization/cleanup functions add encapsulation / decapsulation functions with software offloads (hardware offloads will be added in a separate patch) support Tested using: PSPING (https://technet.microsoft.com/en-us/sysinternals/psping.aspx) (ICMP, TCP, UDP) with various packet lengths IPERF3 (https://iperf.fr/iperf-download.php) (TCP, UDP) with various options Signed-off-by: Alin Gabriel Serdean Acked-by: Nithin Raju Acked-by: Sorin Vinturis Signed-off-by: Justin Pettit --- FAQ.md | 2 +- datapath-windows/automake.mk | 20 +- datapath-windows/ovsext/Actions.c | 71 ++-- datapath-windows/ovsext/Gre.c | 453 +++++++++++++++++++++++++ datapath-windows/ovsext/Gre.h | 100 ++++++ datapath-windows/ovsext/Util.h | 1 + datapath-windows/ovsext/Vport.c | 40 ++- datapath-windows/ovsext/Vport.h | 13 +- datapath-windows/ovsext/ovsext.vcxproj | 2 + 9 files changed, 657 insertions(+), 45 deletions(-) create mode 100644 datapath-windows/ovsext/Gre.c create mode 100644 datapath-windows/ovsext/Gre.h diff --git a/FAQ.md b/FAQ.md index 22c923ffe..29b2e19f9 100644 --- a/FAQ.md +++ b/FAQ.md @@ -197,7 +197,7 @@ Feature | Linux upstream | Linux OVS tree | Userspace | Hyper-V | Connection tracking | 4.3 | 3.10 | NO | NO | Tunnel - LISP | NO | YES | NO | NO | Tunnel - STT | NO | 3.5 | NO | YES | -Tunnel - GRE | 3.11 | YES | YES | NO | +Tunnel - GRE | 3.11 | YES | YES | YES | Tunnel - VXLAN | 3.12 | YES | YES | YES | Tunnel - Geneve | 3.18 | YES | YES | NO | QoS - Policing | YES | YES | NO | NO | diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index ed48c69b4..7f12d92d7 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -4,45 +4,49 @@ EXTRA_DIST += \ datapath-windows/Package/package.VcxProj \ datapath-windows/Package/package.VcxProj.user \ datapath-windows/include/OvsDpInterfaceExt.h \ + datapath-windows/misc/OVS.psm1 \ datapath-windows/misc/install.cmd \ datapath-windows/misc/uninstall.cmd \ - datapath-windows/misc/OVS.psm1 \ datapath-windows/ovsext.sln \ - datapath-windows/ovsext/Datapath.c \ - datapath-windows/ovsext/Datapath.h \ - datapath-windows/ovsext/DpInternal.h\ datapath-windows/ovsext/Actions.c \ datapath-windows/ovsext/Atomic.h \ datapath-windows/ovsext/BufferMgmt.c \ datapath-windows/ovsext/BufferMgmt.h \ datapath-windows/ovsext/Checksum.c \ datapath-windows/ovsext/Checksum.h \ + datapath-windows/ovsext/Datapath.c \ + datapath-windows/ovsext/Datapath.h \ datapath-windows/ovsext/Debug.c \ datapath-windows/ovsext/Debug.h \ + datapath-windows/ovsext/DpInternal.h\ datapath-windows/ovsext/Driver.c \ datapath-windows/ovsext/Ethernet.h \ datapath-windows/ovsext/Event.c \ datapath-windows/ovsext/Event.h \ datapath-windows/ovsext/Flow.c \ datapath-windows/ovsext/Flow.h \ + datapath-windows/ovsext/Gre.h \ + datapath-windows/ovsext/Gre.c \ datapath-windows/ovsext/IpHelper.c \ datapath-windows/ovsext/IpHelper.h \ datapath-windows/ovsext/Jhash.c \ datapath-windows/ovsext/Jhash.h \ + datapath-windows/ovsext/NetProto.h \ datapath-windows/ovsext/Netlink/Netlink.c \ datapath-windows/ovsext/Netlink/Netlink.h \ datapath-windows/ovsext/Netlink/NetlinkBuf.c \ datapath-windows/ovsext/Netlink/NetlinkBuf.h \ datapath-windows/ovsext/Netlink/NetlinkError.h \ datapath-windows/ovsext/Netlink/NetlinkProto.h \ - datapath-windows/ovsext/NetProto.h \ datapath-windows/ovsext/Oid.c \ datapath-windows/ovsext/Oid.h \ datapath-windows/ovsext/PacketIO.c \ datapath-windows/ovsext/PacketIO.h \ datapath-windows/ovsext/PacketParser.c \ datapath-windows/ovsext/PacketParser.h \ - datapath-windows/ovsext/Switch.c \ + datapath-windows/ovsext/Stt.c \ + datapath-windows/ovsext/Stt.h \ + datapath-windows/ovsext/Switch.c \ datapath-windows/ovsext/Switch.h \ datapath-windows/ovsext/Tunnel.c \ datapath-windows/ovsext/Tunnel.h \ @@ -51,13 +55,11 @@ EXTRA_DIST += \ datapath-windows/ovsext/Types.h \ datapath-windows/ovsext/User.c \ datapath-windows/ovsext/User.h \ - datapath-windows/ovsext/Util.c \ + datapath-windows/ovsext/Util.c \ datapath-windows/ovsext/Util.h \ datapath-windows/ovsext/Vport.c \ datapath-windows/ovsext/Vport.h \ datapath-windows/ovsext/Vxlan.c \ - datapath-windows/ovsext/Stt.h \ - datapath-windows/ovsext/Stt.c \ datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index f6c029b12..c113a84eb 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -16,16 +16,17 @@ #include "precomp.h" -#include "Switch.h" -#include "Vport.h" +#include "Checksum.h" #include "Event.h" -#include "User.h" -#include "NetProto.h" #include "Flow.h" -#include "Vxlan.h" -#include "Stt.h" -#include "Checksum.h" +#include "Gre.h" +#include "NetProto.h" #include "PacketIO.h" +#include "Stt.h" +#include "Switch.h" +#include "User.h" +#include "Vport.h" +#include "Vxlan.h" #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD @@ -34,6 +35,8 @@ #include "Debug.h" typedef struct _OVS_ACTION_STATS { + UINT64 rxGre; + UINT64 txGre; UINT64 rxVxlan; UINT64 txVxlan; UINT64 rxStt; @@ -205,27 +208,35 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, /* XXX: we should also check for the length of the UDP payload to pick * packets only if they are at least VXLAN header size. */ - if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_UDP) { - UINT16 dstPort = ntohs(flowKey->ipKey.l4.tpDst); - tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort, - OVS_VPORT_TYPE_VXLAN); - if (tunnelVport) { - ovsActionStats.rxVxlan++; - } - } else if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_TCP) { + if (!flowKey->ipKey.nwFrag) { UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); - tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort, - OVS_VPORT_TYPE_STT); - if (tunnelVport) { - ovsActionStats.rxStt++; + switch (flowKey->ipKey.nwProto) { + case IPPROTO_GRE: + tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext, + OVS_VPORT_TYPE_GRE); + if (tunnelVport) { + ovsActionStats.rxGre++; + } + break; + case IPPROTO_TCP: + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_STT); + if (tunnelVport) { + ovsActionStats.rxStt++; + } + break; + case IPPROTO_UDP: + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_VXLAN); + if (tunnelVport) { + ovsActionStats.rxVxlan++; + } + break; } } - // We might get tunnel packets even before the tunnel gets initialized. if (tunnelVport) { ASSERT(ovsFwdCtx->tunnelRxNic == NULL); @@ -306,6 +317,9 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, /* Tunnel the packet only if tunnel context is set. */ if (ovsFwdCtx->tunKey.dst != 0) { switch(dstVport->ovsType) { + case OVS_VPORT_TYPE_GRE: + ovsActionStats.txGre++; + break; case OVS_VPORT_TYPE_VXLAN: ovsActionStats.txVxlan++; break; @@ -652,6 +666,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) /* Do the encap. Encap function does not consume the NBL. */ switch(ovsFwdCtx->tunnelTxNic->ovsType) { + case OVS_VPORT_TYPE_GRE: + status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, + &ovsFwdCtx->layers, &newNbl); + break; case OVS_VPORT_TYPE_VXLAN: status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, @@ -724,6 +743,10 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) */ switch(tunnelRxVport->ovsType) { + case OVS_VPORT_TYPE_GRE: + status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; case OVS_VPORT_TYPE_VXLAN: status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, &newNbl); diff --git a/datapath-windows/ovsext/Gre.c b/datapath-windows/ovsext/Gre.c new file mode 100644 index 000000000..3ebfda3f8 --- /dev/null +++ b/datapath-windows/ovsext/Gre.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2015 Cloudbase Solutions Srl + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Atomic.h" +#include "Checksum.h" +#include "Flow.h" +#include "Gre.h" +#include "IpHelper.h" +#include "NetProto.h" +#include "PacketIO.h" +#include "PacketParser.h" +#include "Switch.h" +#include "User.h" +#include "Util.h" +#include "Vport.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_GRE +#include "Debug.h" + +static NDIS_STATUS +OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl); + +/* + * -------------------------------------------------------------------------- + * OvsInitGreTunnel -- + * Initialize GRE tunnel module. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsInitGreTunnel(POVS_VPORT_ENTRY vport) +{ + POVS_GRE_VPORT grePort; + + grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort), + OVS_GRE_POOL_TAG); + if (!grePort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(grePort, sizeof(*grePort)); + vport->priv = (PVOID)grePort; + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCleanupGreTunnel -- + * Cleanup GRE Tunnel module. + * -------------------------------------------------------------------------- + */ +void +OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVS_VPORT_TYPE_GRE || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG); + vport->priv = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsEncapGre -- + * Encapsulates a packet with an GRE header. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + OVS_FWD_INFO fwdInfo; + NDIS_STATUS status; + + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + return NDIS_STATUS_FAILURE; + } + + status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers, + switchContext, newNbl); + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoEncapGre -- + * Internal utility function which actually does the GRE encap. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status; + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + IPHdr *ipHdr; + PGREHdr greHdr; + POVS_GRE_VPORT vportGre; + UINT32 headRoom = GreTunHdrSize(tunKey->flags); +#if DBG + UINT32 counterHeadRoom; +#endif + UINT32 packetLength; + ULONG mss = 0; + ASSERT(*newNbl == NULL); + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + switch (tsoInfo.Transmit.Type) { + case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE: + mss = tsoInfo.LsoV1Transmit.MSS; + break; + case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE: + mss = tsoInfo.LsoV2Transmit.MSS; + break; + default: + OVS_LOG_ERROR("Unknown LSO transmit type:%d", + tsoInfo.Transmit.Type); + return NDIS_STATUS_FAILURE; + } + OVS_LOG_TRACE("MSS %u packet len %u", mss, + packetLength); + if (mss) { + OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); + *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, + mss, headRoom); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to segment NBL"); + return NDIS_STATUS_FAILURE; + } + /* Clear out LSO flags after this point */ + NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0; + } + } + + vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport); + ASSERT(vportGre); + + /* If we didn't split the packet above, make a copy now */ + if (*newNbl == NULL) { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + /* + * To this point we do not have GRE hardware offloading. + * Apply defined checksums + */ + curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + + if (layers->isIPv4) { + IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset); + + if (csumInfo.Transmit.IpHeaderChecksum) { + ip->check = 0; + ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0); + } + + if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); + tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_TCP, csumLength); + tcp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr, + IPPROTO_UDP, csumLength); + udp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } + } else if (layers->isIPv6) { + IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset); + + if (layers->isTcp && csumInfo.Transmit.TcpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset); + tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, + (UINT32 *) &ip->daddr, + IPPROTO_TCP, csumLength); + tcp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) { + UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset); + UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip); + udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr, + (UINT32 *) &ip->daddr, + IPPROTO_UDP, csumLength); + udp->check = CalculateChecksumNB(curNb, csumLength, + (UINT32)(layers->l4Offset)); + } + } + /* Clear out TcpIpChecksumNetBufferListInfo flag */ + NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0; + } + + curNbl = *newNbl; + for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; + curNb = curNb->Next) { +#if DBG + counterHeadRoom = headRoom; +#endif + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, + LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nb length %u next %u", + NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); + ethHdr->Type = htons(ETH_TYPE_IPV4); +#if DBG + counterHeadRoom -= sizeof *ethHdr; +#endif + + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPPROTO_IPV4; + ipHdr->tos = tunKey->tos; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = (uint16)atomic_add64(&vportGre->ipId, + NET_BUFFER_DATA_LENGTH(curNb)); + ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64; + ipHdr->protocol = IPPROTO_GRE; + ASSERT(tunKey->dst == fwdInfo->dstIpAddr); + ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); + ipHdr->saddr = fwdInfo->srcIpAddr; + ipHdr->daddr = fwdInfo->dstIpAddr; + + ipHdr->check = 0; + ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); +#if DBG + counterHeadRoom -= sizeof *ipHdr; +#endif + + /* GRE header */ + greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags); + greHdr->protocolType = GRE_NET_TEB; +#if DBG + counterHeadRoom -= sizeof *greHdr; +#endif + + PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr; + + if (tunKey->flags & OVS_TNL_F_CSUM) { + RtlZeroMemory(currentOffset, 4); + currentOffset += 4; +#if DBG + counterHeadRoom -= 4; +#endif + } + + if (tunKey->flags & OVS_TNL_F_KEY) { + RtlZeroMemory(currentOffset, 4); + UINT32 key = (tunKey->tunnelId >> 32); + RtlCopyMemory(currentOffset, &key, sizeof key); + currentOffset += 4; +#if DBG + counterHeadRoom -= 4; +#endif + } + +#if DBG + ASSERT(counterHeadRoom == 0); +#endif + + } + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +NDIS_STATUS +OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + PNET_BUFFER curNb; + PMDL curMdl; + EthHdr *ethHdr; + IPHdr *ipHdr; + GREHdr *greHdr; + UINT32 tunnelSize = 0, packetLength = 0; + UINT32 headRoom = 0; + PUINT8 bufferStart; + NDIS_STATUS status; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + tunnelSize = GreTunHdrSize(tunKey->flags); + if (packetLength <= tunnelSize) { + return NDIS_STATUS_INVALID_LENGTH; + } + + /* + * Create a copy of the NBL so that we have all the headers in one MDL. + */ + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, + tunnelSize + OVS_DEFAULT_COPY_SIZE, 0, + TRUE /*copy NBL info */); + + if (*newNbl == NULL) { + return NDIS_STATUS_RESOURCES; + } + + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto dropNbl; + } + + ethHdr = (EthHdr *)bufferStart; + headRoom += sizeof *ethHdr; + + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + tunKey->src = ipHdr->saddr; + tunKey->dst = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + headRoom += sizeof *ipHdr; + + greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + headRoom += sizeof *greHdr; + + /* Validate if GRE header protocol type. */ + if (greHdr->protocolType != GRE_NET_TEB) { + status = STATUS_NDIS_INVALID_PACKET; + goto dropNbl; + } + + PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr; + + if (greHdr->flags & GRE_CSUM) { + tunKey->flags |= OVS_TNL_F_CSUM; + currentOffset += 4; + headRoom += 4; + } + + if (greHdr->flags & GRE_KEY) { + tunKey->flags |= OVS_TNL_F_KEY; + UINT32 key = 0; + RtlCopyMemory(&key, currentOffset, 4); + tunKey->tunnelId = (UINT64)key << 32; + currentOffset += 4; + headRoom += 4; + } + + /* Clear out the receive flag for the inner packet. */ + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE, + NULL); + ASSERT(headRoom == GreTunHdrSize(tunKey->flags)); + return NDIS_STATUS_SUCCESS; + +dropNbl: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} diff --git a/datapath-windows/ovsext/Gre.h b/datapath-windows/ovsext/Gre.h new file mode 100644 index 000000000..d2472d91a --- /dev/null +++ b/datapath-windows/ovsext/Gre.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2015 Cloudbase Solutions Srl + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __GRE_H_ +#define __GRE_H_ 1 + +#include "NetProto.h" +#include "Flow.h" + +typedef struct _OVS_GRE_VPORT { + UINT64 ipId; + /* + * To be filled + */ +} OVS_GRE_VPORT, *POVS_GRE_VPORT; + + +/* GRE RFC 2890 header based on http://tools.ietf.org/html/rfc2890 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |C| |K|S| Reserved0 | Ver | Protocol Type | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Checksum (optional) | Reserved1 (Optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Key (optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number (Optional) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +typedef struct GREHdr { + UINT16 flags; + UINT16 protocolType; +} GREHdr, *PGREHdr; + +/* Transparent Ethernet Bridging */ +#define GRE_NET_TEB 0x5865 +/* GRE Flags*/ +#define GRE_CSUM 0x0080 +#define GRE_KEY 0x0020 + +NTSTATUS OvsInitGreTunnel(POVS_VPORT_ENTRY vport); + +VOID OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport); + + +void OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsEncapGre(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + +NDIS_STATUS OvsDecapGre(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT16 +OvsTunnelFlagsToGreFlags(UINT16 tunnelflags) +{ + UINT16 flags = 0; + + if (tunnelflags & OVS_TNL_F_CSUM) + flags |= GRE_CSUM; + + if (tunnelflags & OVS_TNL_F_KEY) + flags |= GRE_KEY; + + return flags; +} + +static __inline UINT32 +GreTunHdrSize(UINT16 flags) +{ + UINT32 sum = sizeof(EthHdr) + sizeof(IPHdr) + sizeof(GREHdr); + sum += (flags & OVS_TNL_F_CSUM) ? + 4 : 0; + sum += (flags & OVS_TNL_F_KEY) ? + 4 : 0; + + return sum; +} + +#endif /*__GRE_H_ */ diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h index e5ba72bd8..a81c723b0 100644 --- a/datapath-windows/ovsext/Util.h +++ b/datapath-windows/ovsext/Util.h @@ -34,6 +34,7 @@ #define OVS_USER_POOL_TAG 'USVO' #define OVS_VPORT_POOL_TAG 'PSVO' #define OVS_STT_POOL_TAG 'RSVO' +#define OVS_GRE_POOL_TAG 'GSVO' #define OVS_TUNFLT_POOL_TAG 'WSVO' VOID *OvsAllocateMemory(size_t size); diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index a7576d353..7b0103d6b 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -15,16 +15,18 @@ */ #include "precomp.h" + +#include "Datapath.h" +#include "Event.h" +#include "Gre.h" +#include "IpHelper.h" #include "Jhash.h" +#include "Oid.h" +#include "Stt.h" #include "Switch.h" -#include "Vport.h" -#include "Event.h" #include "User.h" +#include "Vport.h" #include "Vxlan.h" -#include "Stt.h" -#include "IpHelper.h" -#include "Oid.h" -#include "Datapath.h" #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD @@ -700,6 +702,24 @@ OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, return NULL; } +POVS_VPORT_ENTRY +OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext, + OVS_VPORT_TYPE ovsPortType) +{ + POVS_VPORT_ENTRY vport; + PLIST_ENTRY head, link; + UINT16 dstPort = 0; + UINT32 hash = OvsJhashBytes((const VOID *)&dstPort, sizeof(dstPort), + OVS_HASH_BASIS); + head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink); + if (vport->ovsType == ovsPortType) { + return vport; + } + } + return NULL; +} POVS_VPORT_ENTRY OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, @@ -983,6 +1003,7 @@ OvsInitTunnelVport(PVOID userContext, vport->ovsState = OVS_STATE_PORT_CREATED; switch (ovsType) { case OVS_VPORT_TYPE_GRE: + status = OvsInitGreTunnel(vport); break; case OVS_VPORT_TYPE_VXLAN: { @@ -1153,6 +1174,7 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, UINT32 hash; switch(vport->ovsType) { + case OVS_VPORT_TYPE_GRE: case OVS_VPORT_TYPE_VXLAN: case OVS_VPORT_TYPE_STT: { @@ -1242,6 +1264,7 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, OvsCleanupSttTunnel(vport); break; case OVS_VPORT_TYPE_GRE: + OvsCleanupGreTunnel(vport); break; case OVS_VPORT_TYPE_NETDEV: if (vport->isExternal) { @@ -1299,7 +1322,8 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, RemoveEntryList(&vport->portNoLink); InitializeListHead(&vport->portNoLink); if (OVS_VPORT_TYPE_VXLAN == vport->ovsType || - OVS_VPORT_TYPE_STT == vport->ovsType) { + OVS_VPORT_TYPE_STT == vport->ovsType || + OVS_VPORT_TYPE_GRE == vport->ovsType) { RemoveEntryList(&vport->tunnelVportLink); InitializeListHead(&vport->tunnelVportLink); } @@ -2190,6 +2214,8 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT16 transportPortDest = 0; switch (portType) { + case OVS_VPORT_TYPE_GRE: + break; case OVS_VPORT_TYPE_VXLAN: transportPortDest = VXLAN_UDP_PORT; break; diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index e9f3b0389..373896ddd 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -17,9 +17,10 @@ #ifndef __VPORT_H_ #define __VPORT_H_ 1 +#include "Gre.h" +#include "Stt.h" #include "Switch.h" #include "VxLan.h" -#include "Stt.h" #define OVS_MAX_DPPORTS MAXUINT16 #define OVS_DPPORT_NUMBER_INVALID OVS_MAX_DPPORTS @@ -147,6 +148,8 @@ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchConte POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, UINT16 dstPort, OVS_VPORT_TYPE ovsVportType); +POVS_VPORT_ENTRY OvsFindTunnelVportByPortType(POVS_SWITCH_CONTEXT switchContext, + OVS_VPORT_TYPE ovsPortType); NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); @@ -256,16 +259,18 @@ GetPortFromPriv(POVS_VPORT_ENTRY vport) /* XXX would better to have a commom tunnel "parent" structure */ ASSERT(vportPriv); switch(vport->ovsType) { - case OVS_VPORT_TYPE_VXLAN: - dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + case OVS_VPORT_TYPE_GRE: break; case OVS_VPORT_TYPE_STT: dstPort = ((POVS_STT_VPORT)vportPriv)->dstPort; break; + case OVS_VPORT_TYPE_VXLAN: + dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + break; default: ASSERT(! "Port is not a tunnel port"); } - ASSERT(dstPort); + ASSERT(dstPort || vport->ovsType == OVS_VPORT_TYPE_GRE); return dstPort; } diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 616f68860..231ac83b7 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -80,6 +80,7 @@ + @@ -172,6 +173,7 @@ + -- 2.20.1