From: Eitan Eliahu Date: Thu, 11 Jun 2015 13:35:54 +0000 (-0700) Subject: datapath-windows: Stateless TCP Tunnelling protocol - Initial implementation X-Git-Tag: v2.4.0~109 X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=022c20408192a6c35f8f629411b07c13250e9682 datapath-windows: Stateless TCP Tunnelling protocol - Initial implementation This change include an initial implementable of STT. The following should be added: [1] Checksum offload (SW and HW) [2] LSO (SW and HW) [3] IP layer WFP callout for IP segments Added support for multiple (per TCP port) STT ports Testing: link layer connection through ping works. File transfer. Signed-off-by: Eitan Eliahu Co-authored-by: Saurabh Shah Signed-off-by: Saurabh Shah Acked-by: Nithin Raju Signed-off-by: Ben Pfaff --- diff --git a/INSTALL.Windows.md b/INSTALL.Windows.md index 3171e47d7..6d870edd6 100644 --- a/INSTALL.Windows.md +++ b/INSTALL.Windows.md @@ -386,29 +386,31 @@ Hyper-Vs. The following examples demonstrate how it can be done: % ovs-vsctl add-port br-int ovs-port-a tag=900 % ovs-vsctl add-port br-int ovs-port-b tag=900 -Steps to add VXLAN tunnels +Steps to add tunnels -------------------------- -The Windows Open vSwitch implementation support VXLAN tunnels. To add VXLAN +The Windows Open vSwitch implementation support VXLAN and STT tunnels. To add tunnels, the following steps serve as examples. Note that, any patch ports created between br-int and br-pif MUST be beleted -prior to adding VXLAN tunnels. - -01> Add the vxlan port between 172.168.201.101 <-> 172.168.201.102 - % ovs-vsctl add-port br-int vxlan-1 - % ovs-vsctl set Interface vxlan-1 type=vxlan - % ovs-vsctl set Interface vxlan-1 options:local_ip=172.168.201.101 - % ovs-vsctl set Interface vxlan-1 options:remote_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-1 options:in_key=flow - % ovs-vsctl set Interface vxlan-1 options:out_key=flow - -02> Add the vxlan port between 172.168.201.101 <-> 172.168.201.105 - % ovs-vsctl add-port br-int vxlan-2 - % ovs-vsctl set Interface vxlan-2 type=vxlan - % ovs-vsctl set Interface vxlan-2 options:local_ip=172.168.201.102 - % ovs-vsctl set Interface vxlan-2 options:remote_ip=172.168.201.105 - % ovs-vsctl set Interface vxlan-2 options:in_key=flow - % ovs-vsctl set Interface vxlan-2 options:out_key=flow +prior to adding tunnels. + +01> Add the tunnel port between 172.168.201.101 <-> 172.168.201.102 + % ovs-vsctl add-port br-int tun-1 + % ovs-vsctl set Interface tun-1 type=port-type + % ovs-vsctl set Interface tun-1 options:local_ip=172.168.201.101 + % ovs-vsctl set Interface tun-1 options:remote_ip=172.168.201.102 + % ovs-vsctl set Interface tun-1 options:in_key=flow + % ovs-vsctl set Interface tun-1 options:out_key=flow + +02> Add the tunnel port between 172.168.201.101 <-> 172.168.201.105 + % ovs-vsctl add-port br-int tun-2 + % ovs-vsctl set Interface tun-2 type=port-type + % ovs-vsctl set Interface tun-2 options:local_ip=172.168.201.102 + % ovs-vsctl set Interface tun-2 options:remote_ip=172.168.201.105 + % ovs-vsctl set Interface tun-2 options:in_key=flow + % ovs-vsctl set Interface tun-2 options:out_key=flow + + Where port-type is the string stt or vxlan Requirements diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 9324b3c27..a4f5a576d 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -56,6 +56,8 @@ EXTRA_DIST += \ datapath-windows/ovsext/Vport.c \ datapath-windows/ovsext/Vport.h \ datapath-windows/ovsext/Vxlan.c \ + datapath-windows/ovsext/Stt.h \ + datapath-windows/ovsext/Stt.c \ datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 79e464c80..d75949ca4 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -23,6 +23,7 @@ #include "NetProto.h" #include "Flow.h" #include "Vxlan.h" +#include "Stt.h" #include "Checksum.h" #include "PacketIO.h" @@ -35,6 +36,8 @@ typedef struct _OVS_ACTION_STATS { UINT64 rxVxlan; UINT64 txVxlan; + UINT64 rxStt; + UINT64 txStt; UINT64 flowMiss; UINT64 flowUserspace; UINT64 txTcp; @@ -184,9 +187,6 @@ OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, } /* - * XXX: When we search for the tunnelVport we also need to specify the - * tunnelling protocol or the L4 protocol as key as well, because there are - * different protocols that can use the same destination port. * -------------------------------------------------------------------------- * OvsDetectTunnelRxPkt -- * Utility function for an RX packet to detect its tunnel type. @@ -209,14 +209,27 @@ OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, flowKey->ipKey.nwProto == IPPROTO_UDP) { UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, - dstPort); + dstPort, + OVS_VPORT_TYPE_VXLAN); + if (tunnelVport) { + ovsActionStats.rxVxlan++; + } + } else if (!flowKey->ipKey.nwFrag && + flowKey->ipKey.nwProto == IPPROTO_TCP) { + UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst); + tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext, + dstPort, + OVS_VPORT_TYPE_STT); + if (tunnelVport) { + ovsActionStats.rxStt++; + } } + // We might get tunnel packets even before the tunnel gets initialized. if (tunnelVport) { ASSERT(ovsFwdCtx->tunnelRxNic == NULL); ovsFwdCtx->tunnelRxNic = tunnelVport; - ovsActionStats.rxVxlan++; return TRUE; } @@ -292,6 +305,14 @@ OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, /* Tunnel the packet only if tunnel context is set. */ if (ovsFwdCtx->tunKey.dst != 0) { + switch(dstVport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + ovsActionStats.txVxlan++; + break; + case OVS_VPORT_TYPE_STT: + ovsActionStats.txStt++; + break; + } ovsActionStats.txVxlan++; ovsFwdCtx->tunnelTxNic = dstVport; } @@ -616,10 +637,11 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) * Setup the source port to be the internal port to as to facilitate the * second OvsLookupFlow. */ - if (ovsFwdCtx->switchContext->internalVport == NULL) { + if (ovsFwdCtx->switchContext->internalVport == NULL || + ovsFwdCtx->switchContext->virtualExternalVport == NULL) { OvsClearTunTxCtx(ovsFwdCtx); OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped since internal port is absent"); + L"OVS-Dropped since either internal or external port is absent"); return NDIS_STATUS_FAILURE; } ovsFwdCtx->srcVportNo = @@ -634,9 +656,14 @@ OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) case OVS_VPORT_TYPE_VXLAN: status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext, - (VOID *)ovsFwdCtx->completionList, &ovsFwdCtx->layers, &newNbl); break; + case OVS_VPORT_TYPE_STT: + status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, + ovsFwdCtx->switchContext, + &ovsFwdCtx->layers, &newNbl); + break; default: ASSERT(! "Tx: Unhandled tunnel type"); } @@ -692,14 +719,19 @@ OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) goto dropNbl; } + /* + * Decap port functions should return a new NBL if it was copied, and + * this new NBL should be setup as the ovsFwdCtx->curNbl. + */ + switch(tunnelRxVport->ovsType) { case OVS_VPORT_TYPE_VXLAN: - /* - * OvsDoDecapVxlan should return a new NBL if it was copied, and - * this new NBL should be setup as the ovsFwdCtx->curNbl. - */ - status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - &ovsFwdCtx->tunKey, &newNbl); + status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; + case OVS_VPORT_TYPE_STT: + status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); break; default: OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", @@ -1318,13 +1350,11 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, case OVS_KEY_ATTR_TUNNEL: { OvsIPv4TunnelKey tunKey; - - status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); + status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); ASSERT(status == NDIS_STATUS_SUCCESS); tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); tunKey.dst_port = key->ipKey.l4.tpDst; RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); - break; } case OVS_KEY_ATTR_SKB_MARK: diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h index a0da5eba3..4b7b52651 100644 --- a/datapath-windows/ovsext/Debug.h +++ b/datapath-windows/ovsext/Debug.h @@ -40,6 +40,7 @@ #define OVS_DBG_OTHERS BIT32(21) #define OVS_DBG_NETLINK BIT32(22) #define OVS_DBG_TUNFLT BIT32(23) +#define OVS_DBG_STT BIT32(24) #define OVS_DBG_RESERVED BIT32(31) //Please add above OVS_DBG_RESERVED. diff --git a/datapath-windows/ovsext/Stt.c b/datapath-windows/ovsext/Stt.c new file mode 100644 index 000000000..1f708c843 --- /dev/null +++ b/datapath-windows/ovsext/Stt.c @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "NetProto.h" +#include "Switch.h" +#include "Vport.h" +#include "Flow.h" +#include "Stt.h" +#include "IpHelper.h" +#include "Checksum.h" +#include "User.h" +#include "PacketIO.h" +#include "Flow.h" +#include "PacketParser.h" +#include "Atomic.h" +#include "Util.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_STT +#include "Debug.h" + +static NDIS_STATUS +OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl); + +/* + * -------------------------------------------------------------------------- + * OvsInitSttTunnel -- + * Initialize STT tunnel module. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 tcpDestPort) +{ + POVS_STT_VPORT sttPort; + + sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort), + OVS_STT_POOL_TAG); + if (!sttPort) { + OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT"); + return STATUS_INSUFFICIENT_RESOURCES; + } + + RtlZeroMemory(sttPort, sizeof(*sttPort)); + sttPort->dstPort = tcpDestPort; + vport->priv = (PVOID) sttPort; + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCleanupSttTunnel -- + * Cleanup STT Tunnel module. + * -------------------------------------------------------------------------- + */ +void +OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVS_VPORT_TYPE_STT || + vport->priv == NULL) { + return; + } + + OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG); + vport->priv = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsEncapStt -- + * Encapsulates a packet with an STT header. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + OVS_FWD_INFO fwdInfo; + NDIS_STATUS status; + + UNREFERENCED_PARAMETER(switchContext); + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + /* + * XXX This case where the ARP table is not populated is + * currently not handled + */ + return NDIS_STATUS_FAILURE; + } + + status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers, switchContext, + newNbl); + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoEncapStt -- + * Internal utility function which actually does the STT encap. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + const OvsIPv4TunnelKey *tunKey, + const POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PMDL curMdl = NULL; + PNET_BUFFER curNb; + PUINT8 buf = NULL; + EthHdr *outerEthHdr; + IPHdr *outerIpHdr; + TCPHdr *outerTcpHdr; + SttHdr *sttHdr; + UINT32 innerFrameLen, ipTotalLen; + POVS_STT_VPORT vportStt; + UINT32 headRoom = OvsGetSttTunHdrSize(); + UINT32 tcpChksumLen; + + UNREFERENCED_PARAMETER(layers); + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo; + + lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + if (lsoInfo.LsoV1Transmit.MSS) { + /* XXX We don't handle LSO yet */ + OVS_LOG_ERROR("LSO on STT is not supported"); + return NDIS_STATUS_FAILURE; + } + } + + vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport); + ASSERT(vportStt); + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*copy NblInfo*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + /* NB Chain should be split before */ + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb); + /* + * External port can't be removed as we hold the dispatch lock + * We also check if the external port was removed beforecalling + * port encapsulation functions + */ + if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) { + OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate", + innerFrameLen, OvsGetExternalMtu(switchContext)); + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)"); + OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)"); + goto ret_error; + } + + /* + * Make sure that the headroom for the tunnel header is continguous in + * memory. + */ + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb)) + >= (int) headRoom); + + buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!buf) { + ASSERT(!"MmGetSystemAddressForMdlSafe failed"); + OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed"); + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + outerEthHdr = (EthHdr *)buf; + outerIpHdr = (IPHdr *) (outerEthHdr + 1); + outerTcpHdr = (TCPHdr *) (outerIpHdr + 1); + sttHdr = (SttHdr *) (outerTcpHdr + 1); + + /* L2 header */ + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr, + sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source); + outerEthHdr->Type = htons(ETH_TYPE_IPV4); + + /* L3 header */ + outerIpHdr->ihl = sizeof(IPHdr) >> 2; + outerIpHdr->version = IPPROTO_IPV4; + outerIpHdr->tos = tunKey->tos; + + ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + outerIpHdr->tot_len = htons(ipTotalLen); + ASSERT(ipTotalLen < 65536); + + outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen); + outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ? + IP_DF_NBO : 0; + outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64; + outerIpHdr->protocol = IPPROTO_TCP; + outerIpHdr->check = 0; + outerIpHdr->saddr = fwdInfo->srcIpAddr; + outerIpHdr->daddr = tunKey->dst; + outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0); + + /* L4 header */ + RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr); + outerTcpHdr->source = htons(tunKey->flow_hash | 32768); + outerTcpHdr->dest = htons(vportStt->dstPort); + outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) << + STT_SEQ_LEN_SHIFT); + outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo)); + outerTcpHdr->doff = sizeof(TCPHdr) >> 2; + outerTcpHdr->psh = 1; + outerTcpHdr->ack = 1; + outerTcpHdr->window = (uint16) ~0; + + /* Calculate pseudo header chksum */ + tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen; + ASSERT(tcpChksumLen < 65535); + outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst, + IPPROTO_TCP, (uint16) tcpChksumLen); + sttHdr->version = 0; + + /* XXX need to peek into the inner packet, hard code for now */ + sttHdr->flags = STT_PROTO_IPV4; + sttHdr->l4Offset = 0; + + sttHdr->reserved = 0; + /* XXX Used for large TCP packets.Not sure how it is used, clarify */ + sttHdr->mss = 0; + sttHdr->vlanTCI = 0; + sttHdr->key = tunKey->tunnelId; + /* Zero out stt padding */ + *(uint16 *)(sttHdr + 1) = 0; + + /* Calculate software tcp checksum */ + outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen, + sizeof(EthHdr) + sizeof(IPHdr)); + if (outerTcpHdr->check == 0) { + status = NDIS_STATUS_FAILURE; + goto ret_error; + } + + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsDecapStt -- + * Decapsulates an STT packet. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + PNET_BUFFER curNb; + IPHdr *ipHdr; + char *ipBuf[sizeof(IPHdr)]; + SttHdr *sttHdr; + char *sttBuf[STT_HDR_LEN]; + UINT32 advanceCnt, hdrLen; + + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL); + + if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) { + OVS_LOG_ERROR("Packet length received is less than the tunnel header:" + " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize()); + return NDIS_STATUS_INVALID_LENGTH; + } + + /* Skip Eth header */ + hdrLen = sizeof(EthHdr); + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt = hdrLen; + + ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf, + 1 /*no align*/, 0); + ASSERT(ipHdr); + + /* Skip IP & TCP headers */ + hdrLen = sizeof(IPHdr) + sizeof(TCPHdr), + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + /* STT Header */ + sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf, + 1 /*no align*/, 0); + ASSERT(sttHdr); + + /* Initialize the tunnel key */ + tunKey->dst = ipHdr->daddr; + tunKey->src = ipHdr->saddr; + tunKey->tunnelId = sttHdr->key; + tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY); + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + + /* Skip stt header, DataOffset points to inner pkt now. */ + hdrLen = STT_HDR_LEN; + NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL); + advanceCnt += hdrLen; + + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE, + 0, FALSE /*copy NBL info*/); + + ASSERT(advanceCnt == OvsGetSttTunHdrSize()); + status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL); + + if (*newNbl == NULL) { + OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL"); + status = NDIS_STATUS_RESOURCES; + } + + return status; +} diff --git a/datapath-windows/ovsext/Stt.h b/datapath-windows/ovsext/Stt.h new file mode 100644 index 000000000..38d721c49 --- /dev/null +++ b/datapath-windows/ovsext/Stt.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OVS_STT_H_ +#define __OVS_STT_H_ 1 + +#define STT_TCP_PORT 7471 +#define STT_TCP_PORT_NBO 0x2f1d + +#define MAX_IP_TOTAL_LEN 65535 + +// STT defines. +#define STT_SEQ_LEN_SHIFT 16 +#define STT_SEQ_OFFSET_MASK ((1 << STT_SEQ_LEN_SHIFT) - 1) +#define STT_FRAME_LEN(seq) ((seq) >> STT_SEQ_LEN_SHIFT) +#define STT_SEGMENT_OFF(seq) ((seq) & STT_SEQ_OFFSET_MASK) + +#define STT_CSUM_VERIFIED (1 << 0) +#define STT_CSUM_PARTIAL (1 << 1) +#define STT_PROTO_IPV4 (1 << 2) +#define STT_PROTO_TCP (1 << 3) +#define STT_PROTO_TYPES (STT_PROTO_IPV4 | STT_PROTO_TCP) + +#define STT_ETH_PAD 2 +typedef struct SttHdr { + UINT8 version; + UINT8 flags; + UINT8 l4Offset; + UINT8 reserved; + UINT16 mss; + UINT16 vlanTCI; + UINT64 key; +} SttHdr, *PSttHdr; + +#define STT_HDR_LEN (sizeof(SttHdr) + STT_ETH_PAD) + +typedef struct _OVS_STT_VPORT { + UINT16 dstPort; + UINT64 ackNo; + UINT64 ipId; + + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; +} OVS_STT_VPORT, *POVS_STT_VPORT; + +NTSTATUS OvsInitSttTunnel(POVS_VPORT_ENTRY vport, + UINT16 udpDestPort); + +VOID OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + + +void OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsEncapStt(POVS_VPORT_ENTRY vport, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + + +NDIS_STATUS OvsDecapStt(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT32 +OvsGetSttTunHdrSize(VOID) +{ + return sizeof (EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr) + + STT_HDR_LEN; +} + +#endif /*__OVS_STT_H_ */ diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h index 8e1eb5f2c..3bc20eee4 100644 --- a/datapath-windows/ovsext/Switch.h +++ b/datapath-windows/ovsext/Switch.h @@ -218,6 +218,4 @@ OvsAcquireSwitchContext(VOID); VOID OvsReleaseSwitchContext(POVS_SWITCH_CONTEXT switchContext); -PVOID OvsGetExternalVport(); - #endif /* __SWITCH_H_ */ diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index 002f18024..ffb9c011d 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -286,7 +286,8 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; vport = OvsFindTunnelVportByDstPort(gOvsSwitchContext, - htons(tunnelKey.dst_port)); + htons(tunnelKey.dst_port), + OVS_VPORT_TYPE_VXLAN); if (vport == NULL){ status = STATUS_UNSUCCESSFUL; diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h index 9a0124245..ee676fa71 100644 --- a/datapath-windows/ovsext/Util.h +++ b/datapath-windows/ovsext/Util.h @@ -33,6 +33,7 @@ #define OVS_SWITCH_POOL_TAG 'SSVO' #define OVS_USER_POOL_TAG 'USVO' #define OVS_VPORT_POOL_TAG 'PSVO' +#define OVS_STT_POOL_TAG 'TSVO' VOID *OvsAllocateMemory(size_t size); VOID *OvsAllocateMemoryWithTag(size_t size, ULONG tag); diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c index 5a1b64f18..913954561 100644 --- a/datapath-windows/ovsext/Vport.c +++ b/datapath-windows/ovsext/Vport.c @@ -21,6 +21,7 @@ #include "Event.h" #include "User.h" #include "Vxlan.h" +#include "Stt.h" #include "IpHelper.h" #include "Oid.h" #include "Datapath.h" @@ -602,7 +603,8 @@ OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, - UINT16 dstPort) + UINT16 dstPort, + OVS_VPORT_TYPE ovsPortType) { POVS_VPORT_ENTRY vport; PLIST_ENTRY head, link; @@ -611,7 +613,8 @@ OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, head = &(switchContext->tunnelVportsArray[hash & OVS_VPORT_MASK]); LIST_FORALL(head, link) { vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, tunnelVportLink); - if (((POVS_VXLAN_VPORT)vport->priv)->dstPort == dstPort) { + if (GetPortFromPriv(vport) == dstPort && + vport->ovsType == ovsPortType) { return vport; } } @@ -934,6 +937,9 @@ OvsInitTunnelVport(PVOID userContext, (PVOID)tunnelContext); break; } + case OVS_VPORT_TYPE_STT: + status = OvsInitSttTunnel(vport, dstPort); + break; default: ASSERT(0); } @@ -1079,10 +1085,11 @@ InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, switch(vport->ovsType) { case OVS_VPORT_TYPE_VXLAN: + case OVS_VPORT_TYPE_STT: { - POVS_VXLAN_VPORT vxlanVport = (POVS_VXLAN_VPORT)vport->priv; - hash = OvsJhashBytes(&vxlanVport->dstPort, - sizeof(vxlanVport->dstPort), + UINT16 dstPort = GetPortFromPriv(vport); + hash = OvsJhashBytes(&dstPort, + sizeof(dstPort), OVS_HASH_BASIS); InsertHeadList( &gOvsSwitchContext->tunnelVportsArray[hash & OVS_VPORT_MASK], @@ -1158,7 +1165,8 @@ OvsCleanupVportCommon(POVS_SWITCH_CONTEXT switchContext, InitializeListHead(&vport->ovsNameLink); RemoveEntryList(&vport->portNoLink); InitializeListHead(&vport->portNoLink); - if (OVS_VPORT_TYPE_VXLAN == vport->ovsType) { + if (OVS_VPORT_TYPE_VXLAN == vport->ovsType || + OVS_VPORT_TYPE_STT == vport->ovsType) { RemoveEntryList(&vport->tunnelVportLink); InitializeListHead(&vport->tunnelVportLink); } @@ -1258,6 +1266,9 @@ OvsRemoveAndDeleteVport(PVOID usrParamsContext, tunnelContext); break; } + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport); + break; case OVS_VPORT_TYPE_GRE: case OVS_VPORT_TYPE_GRE64: break; @@ -2147,17 +2158,29 @@ OvsNewVportCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx, vportAllocated = TRUE; if (OvsIsTunnelVportType(portType)) { - UINT16 udpPortDest = VXLAN_UDP_PORT; + UINT16 transportPortDest = 0; + + switch (vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + transportPortDest = VXLAN_UDP_PORT; + break; + case OVS_VPORT_TYPE_STT: + transportPortDest = STT_TCP_PORT; + break; + default: + break; + } + PNL_ATTR attr = NlAttrFindNested(vportAttrs[OVS_VPORT_ATTR_OPTIONS], OVS_TUNNEL_ATTR_DST_PORT); if (attr) { - udpPortDest = NlAttrGetU16(attr); + transportPortDest = NlAttrGetU16(attr); } status = OvsInitTunnelVport(usrParamsCtx, vport, portType, - udpPortDest); + transportPortDest); nlError = NlMapStatusToNlErr(status); } else { @@ -2243,7 +2266,16 @@ Cleanup: if (vport && vportAllocated == TRUE) { if (vportInitialized == TRUE) { if (OvsIsTunnelVportType(portType)) { - OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); + switch (vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + OvsCleanupVxlanTunnel(NULL, vport, NULL, NULL); + break; + case OVS_VPORT_TYPE_STT: + OvsCleanupSttTunnel(vport);; + break; + default: + ASSERT(!"Invalid tunnel port type"); + } } } OvsFreeMemoryWithTag(vport, OVS_VPORT_POOL_TAG); diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h index 84ac3d3fa..3ea3d0303 100644 --- a/datapath-windows/ovsext/Vport.h +++ b/datapath-windows/ovsext/Vport.h @@ -18,6 +18,8 @@ #define __VPORT_H_ 1 #include "Switch.h" +#include "VxLan.h" +#include "Stt.h" #define OVS_MAX_DPPORTS MAXUINT16 #define OVS_DPPORT_NUMBER_INVALID OVS_MAX_DPPORTS @@ -147,7 +149,8 @@ POVS_VPORT_ENTRY OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchConte NDIS_SWITCH_PORT_ID portId, NDIS_SWITCH_NIC_INDEX index); POVS_VPORT_ENTRY OvsFindTunnelVportByDstPort(POVS_SWITCH_CONTEXT switchContext, - UINT16 dstPort); + UINT16 dstPort, + OVS_VPORT_TYPE ovsVportType); NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); @@ -177,10 +180,18 @@ static __inline BOOLEAN OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) { return ovsType == OVS_VPORT_TYPE_VXLAN || + ovsType == OVS_VPORT_TYPE_STT || ovsType == OVS_VPORT_TYPE_GRE || ovsType == OVS_VPORT_TYPE_GRE64; } + +static __inline PVOID +GetOvsVportPriv(POVS_VPORT_ENTRY ovsVport) +{ + return ovsVport->priv; +} + static __inline BOOLEAN OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) { @@ -200,6 +211,40 @@ NTSTATUS OvsRemoveAndDeleteVport(PVOID usrParamsCtx, POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport, BOOLEAN hvDelete, BOOLEAN ovsDelete); +static __inline POVS_VPORT_ENTRY +OvsGetExternalVport(POVS_SWITCH_CONTEXT switchContext) +{ + return switchContext->virtualExternalVport; +} + +static __inline UINT32 +OvsGetExternalMtu(POVS_SWITCH_CONTEXT switchContext) +{ + ASSERT(OvsGetExternalVport(switchContext)); + return ((POVS_VPORT_ENTRY) OvsGetExternalVport(switchContext))->mtu; +} + +static __inline UINT16 +GetPortFromPriv(POVS_VPORT_ENTRY vport) +{ + UINT16 dstPort = 0; + PVOID vportPriv = GetOvsVportPriv(vport); + + /* XXX would better to have a commom tunnel "parent" structure */ + ASSERT(vportPriv); + switch(vport->ovsType) { + case OVS_VPORT_TYPE_VXLAN: + dstPort = ((POVS_VXLAN_VPORT)vportPriv)->dstPort; + break; + case OVS_VPORT_TYPE_STT: + dstPort = ((POVS_STT_VPORT)vportPriv)->dstPort; + break; + default: + ASSERT(! "Port is not a tunnel port"); + } + ASSERT(dstPort); + return dstPort; +} NDIS_STATUS InitOvsVportCommon(POVS_SWITCH_CONTEXT switchContext, POVS_VPORT_ENTRY vport); diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 9935bdff0..fa6be666b 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -244,10 +244,10 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, /* L2 header */ ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination + sizeof ethHdr->Source); ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == (PCHAR)&fwdInfo->srcMacAddr); + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); ethHdr->Type = htons(ETH_TYPE_IPV4); // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, @@ -311,13 +311,11 @@ NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl) { NTSTATUS status; OVS_FWD_INFO fwdInfo; - UNREFERENCED_PARAMETER(completionList); status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); if (status != STATUS_SUCCESS) { @@ -420,15 +418,15 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, /* *---------------------------------------------------------------------------- - * OvsDoDecapVxlan + * OvsDecapVxlan * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. *---------------------------------------------------------------------------- */ NDIS_STATUS -OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl) +OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) { PNET_BUFFER curNb; PMDL curMdl; diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h index 0e2830496..448ee9722 100644 --- a/datapath-windows/ovsext/Vxlan.h +++ b/datapath-windows/ovsext/Vxlan.h @@ -65,14 +65,13 @@ NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, OvsIPv4TunnelKey *tunKey, POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, POVS_PACKET_HDR_INFO layers, PNET_BUFFER_LIST *newNbl); -NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl); +NDIS_STATUS OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); static __inline UINT32 OvsGetVxlanTunHdrSize(VOID) diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 693bc50d1..705001592 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -90,6 +90,7 @@ + @@ -183,6 +184,7 @@ Create $(IntDir)\precomp.h.pch + @@ -202,4 +204,4 @@ - + \ No newline at end of file