datapath-windows: Add LSOv2 support for VXLAN
[cascardo/ovs.git] / datapath-windows / ovsext / Vxlan.c
index 9d42665..2516ece 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include "precomp.h"
+#include "Atomic.h"
 #include "NetProto.h"
 #include "Switch.h"
 #include "Vport.h"
@@ -113,11 +114,11 @@ OvsInitVxlanTunnel(PIRP irp,
     vport->priv = (PVOID)vxlanPort;
 
     if (!OvsIsTunnelFilterCreated(gOvsSwitchContext, udpDestPort)) {
-        status = OvsTunelFilterCreate(irp,
-                                      udpDestPort,
-                                      &vxlanPort->filterID,
-                                      callback,
-                                      tunnelContext);
+        status = OvsTunnelFilterCreate(irp,
+                                       udpDestPort,
+                                       &vxlanPort->filterID,
+                                       callback,
+                                       tunnelContext);
     } else {
         status = STATUS_OBJECT_NAME_EXISTS;
     }
@@ -150,15 +151,15 @@ OvsCleanupVxlanTunnel(PIRP irp,
     vxlanPort = (POVS_VXLAN_VPORT)vport->priv;
 
     if (vxlanPort->filterID != 0) {
-        status = OvsTunelFilterDelete(irp,
-                                      vxlanPort->filterID,
-                                      callback,
-                                      tunnelContext);
+        status = OvsTunnelFilterDelete(irp,
+                                       vxlanPort->filterID,
+                                       callback,
+                                       tunnelContext);
+    } else {
+        OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG);
+        vport->priv = NULL;
     }
 
-    OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG);
-    vport->priv = NULL;
-
     return status;
 }
 
@@ -170,7 +171,8 @@ OvsCleanupVxlanTunnel(PIRP irp,
  *----------------------------------------------------------------------------
  */
 static __inline NDIS_STATUS
-OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
+OvsDoEncapVxlan(POVS_VPORT_ENTRY vport,
+                PNET_BUFFER_LIST curNbl,
                 OvsIPv4TunnelKey *tunKey,
                 POVS_FWD_INFO fwdInfo,
                 POVS_PACKET_HDR_INFO layers,
@@ -185,8 +187,10 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
     IPHdr *ipHdr;
     UDPHdr *udpHdr;
     VXLANHdr *vxlanHdr;
+    POVS_VXLAN_VPORT vportVxlan;
     UINT32 headRoom = OvsGetVxlanTunHdrSize();
     UINT32 packetLength;
+    ULONG mss = 0;
 
     /*
      * XXX: the assumption currently is that the NBL is owned by OVS, and
@@ -195,22 +199,41 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
      */
     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
+
     if (layers->isTcp) {
         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
 
         tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
-                TcpLargeSendNetBufferListInfo);
-        OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
-        if (tsoInfo.LsoV1Transmit.MSS) {
+                                             TcpLargeSendNetBufferListInfo);
+        switch (tsoInfo.Transmit.Type) {
+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
+                mss = tsoInfo.LsoV1Transmit.MSS;
+                break;
+            case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
+                mss = tsoInfo.LsoV2Transmit.MSS;
+                break;
+            default:
+                OVS_LOG_ERROR("Unknown LSO transmit type:%d",
+                              tsoInfo.Transmit.Type);
+        }
+        OVS_LOG_TRACE("MSS %u packet len %u", mss,
+                      packetLength);
+        if (mss) {
             OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
             *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
-                        tsoInfo.LsoV1Transmit.MSS, headRoom);
+                                       mss, headRoom);
             if (*newNbl == NULL) {
                 OVS_LOG_ERROR("Unable to segment NBL");
                 return NDIS_STATUS_FAILURE;
             }
+            /* Clear out LSO flags after this point */
+            NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
         }
     }
+
+    vportVxlan = (POVS_VXLAN_VPORT) GetOvsVportPriv(vport);
+    ASSERT(vportVxlan);
+
     /* If we didn't split the packet above, make a copy now */
     if (*newNbl == NULL) {
         *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
@@ -219,6 +242,70 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
             OVS_LOG_ERROR("Unable to copy NBL");
             return NDIS_STATUS_FAILURE;
         }
+        /*
+         * To this point we do not have VXLAN offloading.
+         * Apply defined checksums
+         */
+        curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
+        curMdl = NET_BUFFER_CURRENT_MDL(curNb);
+        bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
+        if (!bufferStart) {
+            status = NDIS_STATUS_RESOURCES;
+            goto ret_error;
+        }
+
+        NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
+        csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
+                                              TcpIpChecksumNetBufferListInfo);
+
+        bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
+
+        if (layers->isIPv4) {
+            IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
+
+            if (csumInfo.Transmit.IpHeaderChecksum) {
+                ip->check = 0;
+                ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
+            }
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
+                                              IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+        } else if (layers->isIPv6) {
+            IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
+
+            if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
+                tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+                                                (UINT32 *) &ip->daddr,
+                                                IPPROTO_TCP, csumLength);
+                tcp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
+                UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
+                UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
+                udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
+                                                (UINT32 *) &ip->daddr,
+                                                IPPROTO_UDP, csumLength);
+                udp->check = CalculateChecksumNB(curNb, csumLength,
+                                                 (UINT32)(layers->l4Offset));
+            }
+        }
+        /* Clear out TcpIpChecksumNetBufferListInfo flag */
+        NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
     }
 
     curNbl = *newNbl;
@@ -244,37 +331,37 @@ OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
 
         /* L2 header */
         ethHdr = (EthHdr *)bufferStart;
-        NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
-                       sizeof ethHdr->Destination + sizeof ethHdr->Source);
         ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
                (PCHAR)&fwdInfo->srcMacAddr);
+        NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
+                       sizeof ethHdr->Destination + sizeof ethHdr->Source);
         ethHdr->Type = htons(ETH_TYPE_IPV4);
 
-        // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
-        // should we use those values instead? or will they end up being
-        // uninitialized;
         /* IP header */
         ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
 
         ipHdr->ihl = sizeof *ipHdr / 4;
-        ipHdr->version = IPV4;
-        ipHdr->tos = 0;
+        ipHdr->version = IPPROTO_IPV4;
+        ipHdr->tos = tunKey->tos;
         ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
-        ipHdr->id = 0;
-        ipHdr->frag_off = IP_DF_NBO;
+        ipHdr->id = (uint16)atomic_add64(&vportVxlan->ipId,
+                                         NET_BUFFER_DATA_LENGTH(curNb));
+        ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
+                          IP_DF_NBO : 0;
         ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
         ipHdr->protocol = IPPROTO_UDP;
         ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
         ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
         ipHdr->saddr = fwdInfo->srcIpAddr;
         ipHdr->daddr = fwdInfo->dstIpAddr;
+
         ipHdr->check = 0;
         ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
 
         /* UDP header */
         udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
-        udpHdr->source = htons(tunKey->flow_hash | 32768);
-        udpHdr->dest = VXLAN_UDP_PORT_NBO;
+        udpHdr->source = htons(tunKey->flow_hash | MAXINT16);
+        udpHdr->dest = htons(vportVxlan->dstPort);
         udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
                             sizeof *udpHdr + sizeof *vxlanHdr);
         udpHdr->check = 0;
@@ -308,16 +395,15 @@ ret_error:
  *----------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
+OvsEncapVxlan(POVS_VPORT_ENTRY vport,
+              PNET_BUFFER_LIST curNbl,
               OvsIPv4TunnelKey *tunKey,
               POVS_SWITCH_CONTEXT switchContext,
-              VOID *completionList,
               POVS_PACKET_HDR_INFO layers,
               PNET_BUFFER_LIST *newNbl)
 {
     NTSTATUS status;
     OVS_FWD_INFO fwdInfo;
-    UNREFERENCED_PARAMETER(completionList);
 
     status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
     if (status != STATUS_SUCCESS) {
@@ -333,48 +419,10 @@ OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
         return NDIS_STATUS_FAILURE;
     }
 
-    return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers,
+    return OvsDoEncapVxlan(vport, curNbl, tunKey, &fwdInfo, layers,
                            switchContext, newNbl);
 }
 
-
-/*
- *----------------------------------------------------------------------------
- * OvsIpHlprCbVxlan --
- *     Callback function for IP helper.
- *     XXX: not used currently
- *----------------------------------------------------------------------------
- */
-static VOID
-OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
-                 UINT32 inPort,
-                 OvsIPv4TunnelKey *tunKey,
-                 PVOID cbData1,
-                 PVOID cbData2,
-                 NTSTATUS result,
-                 POVS_FWD_INFO fwdInfo)
-{
-    OVS_PACKET_HDR_INFO layers;
-    OvsFlowKey key;
-    NDIS_STATUS status;
-    UNREFERENCED_PARAMETER(inPort);
-
-    status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
-    if (result == STATUS_SUCCESS) {
-        status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
-                (POVS_SWITCH_CONTEXT)cbData1, NULL);
-    } else {
-        status = NDIS_STATUS_FAILURE;
-    }
-
-    if (status != NDIS_STATUS_SUCCESS) {
-        // XXX: Free up the NBL;
-        return;
-    }
-
-    OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl);
-}
-
 /*
  *----------------------------------------------------------------------------
  * OvsCalculateUDPChecksum
@@ -420,15 +468,15 @@ OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl,
 
 /*
  *----------------------------------------------------------------------------
- * OvsDoDecapVxlan
+ * OvsDecapVxlan
  *     Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
  *----------------------------------------------------------------------------
  */
 NDIS_STATUS
-OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
-                PNET_BUFFER_LIST curNbl,
-                OvsIPv4TunnelKey *tunKey,
-                PNET_BUFFER_LIST *newNbl)
+OvsDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
+              PNET_BUFFER_LIST curNbl,
+              OvsIPv4TunnelKey *tunKey,
+              PNET_BUFFER_LIST *newNbl)
 {
     PNET_BUFFER curNb;
     PMDL curMdl;
@@ -440,7 +488,7 @@ OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
     PUINT8 bufferStart;
     NDIS_STATUS status;
 
-    /* Check the the length of the UDP payload */
+    /* Check the length of the UDP payload */
     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
     tunnelSize = OvsGetVxlanTunHdrSize();