2 * Copyright (c) 2015 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include "PacketParser.h"
35 #define OVS_DBG_MOD OVS_DBG_STT
39 KSTART_ROUTINE OvsSttDefragCleaner;
40 static PLIST_ENTRY OvsSttPktFragHash;
41 static NDIS_SPIN_LOCK OvsSttSpinLock;
42 static OVS_STT_THREAD_CTX sttDefragThreadCtx;
45 OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
46 const OvsIPv4TunnelKey *tunKey,
47 const POVS_FWD_INFO fwdInfo,
48 POVS_PACKET_HDR_INFO layers,
49 POVS_SWITCH_CONTEXT switchContext,
50 PNET_BUFFER_LIST *newNbl);
53 * --------------------------------------------------------------------------
55 * Initialize STT tunnel module.
56 * --------------------------------------------------------------------------
59 OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
62 POVS_STT_VPORT sttPort;
64 sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort),
67 OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT");
68 return STATUS_INSUFFICIENT_RESOURCES;
71 RtlZeroMemory(sttPort, sizeof(*sttPort));
72 sttPort->dstPort = tcpDestPort;
73 vport->priv = (PVOID) sttPort;
74 return STATUS_SUCCESS;
78 * --------------------------------------------------------------------------
79 * OvsCleanupSttTunnel --
80 * Cleanup STT Tunnel module.
81 * --------------------------------------------------------------------------
84 OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport)
86 if (vport->ovsType != OVS_VPORT_TYPE_STT ||
87 vport->priv == NULL) {
91 OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG);
96 * --------------------------------------------------------------------------
98 * Encapsulates a packet with an STT header.
99 * --------------------------------------------------------------------------
102 OvsEncapStt(POVS_VPORT_ENTRY vport,
103 PNET_BUFFER_LIST curNbl,
104 OvsIPv4TunnelKey *tunKey,
105 POVS_SWITCH_CONTEXT switchContext,
106 POVS_PACKET_HDR_INFO layers,
107 PNET_BUFFER_LIST *newNbl)
109 OVS_FWD_INFO fwdInfo;
112 UNREFERENCED_PARAMETER(switchContext);
113 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
114 if (status != STATUS_SUCCESS) {
115 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
117 * XXX This case where the ARP table is not populated is
118 * currently not handled
120 return NDIS_STATUS_FAILURE;
123 status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers,
124 switchContext, newNbl);
129 * --------------------------------------------------------------------------
131 * Internal utility function which actually does the STT encap.
132 * --------------------------------------------------------------------------
135 OvsDoEncapStt(POVS_VPORT_ENTRY vport,
136 PNET_BUFFER_LIST curNbl,
137 const OvsIPv4TunnelKey *tunKey,
138 const POVS_FWD_INFO fwdInfo,
139 POVS_PACKET_HDR_INFO layers,
140 POVS_SWITCH_CONTEXT switchContext,
141 PNET_BUFFER_LIST *newNbl)
143 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
151 UINT32 innerFrameLen, ipTotalLen;
152 POVS_STT_VPORT vportStt;
153 UINT32 headRoom = OvsGetSttTunHdrSize();
157 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
159 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
161 /* Verify if inner checksum is verified */
162 BOOLEAN innerChecksumVerified = FALSE;
163 BOOLEAN innerPartialChecksum = FALSE;
166 lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
167 TcpLargeSendNetBufferListInfo);
169 switch (lsoInfo.Transmit.Type) {
170 case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
171 mss = lsoInfo.LsoV1Transmit.MSS;
173 case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
174 mss = lsoInfo.LsoV2Transmit.MSS;
177 OVS_LOG_ERROR("Unknown LSO transmit type:%d",
178 lsoInfo.Transmit.Type);
182 vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport);
185 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
186 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
187 TcpIpChecksumNetBufferListInfo);
188 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
189 FALSE /*copy NblInfo*/);
190 if (*newNbl == NULL) {
191 OVS_LOG_ERROR("Unable to copy NBL");
192 return NDIS_STATUS_FAILURE;
196 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
197 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
198 /* NB Chain should be split before */
199 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
200 innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
202 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
204 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
206 if (layers->isIPv4) {
207 IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
209 ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
212 ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
218 innerPartialChecksum = TRUE;
220 if (!csumInfo.Transmit.TcpChecksum) {
221 innerChecksumVerified = TRUE;
223 innerPartialChecksum = TRUE;
226 } else if (layers->isUdp) {
227 if(!csumInfo.Transmit.UdpChecksum) {
228 innerChecksumVerified = TRUE;
230 innerPartialChecksum = TRUE;
234 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
235 if (status != NDIS_STATUS_SUCCESS) {
236 ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
237 OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)");
242 * Make sure that the headroom for the tunnel header is continguous in
245 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
246 ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
249 buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
251 ASSERT(!"MmGetSystemAddressForMdlSafe failed");
252 OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed");
253 status = NDIS_STATUS_RESOURCES;
257 buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
258 outerEthHdr = (EthHdr *)buf;
259 outerIpHdr = (IPHdr *) (outerEthHdr + 1);
260 outerTcpHdr = (TCPHdr *) (outerIpHdr + 1);
261 sttHdr = (SttHdr *) (outerTcpHdr + 1);
264 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
265 (PCHAR)&fwdInfo->srcMacAddr);
266 NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr,
267 sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source);
268 outerEthHdr->Type = htons(ETH_TYPE_IPV4);
271 outerIpHdr->ihl = sizeof(IPHdr) >> 2;
272 outerIpHdr->version = IPPROTO_IPV4;
273 outerIpHdr->tos = tunKey->tos;
275 ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
276 outerIpHdr->tot_len = htons(ipTotalLen);
277 ASSERT(ipTotalLen < 65536);
279 outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen);
280 outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
282 outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64;
283 outerIpHdr->protocol = IPPROTO_TCP;
284 outerIpHdr->check = 0;
285 outerIpHdr->saddr = fwdInfo->srcIpAddr;
286 outerIpHdr->daddr = tunKey->dst;
289 RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
290 outerTcpHdr->source = htons(tunKey->flow_hash | 32768);
291 outerTcpHdr->dest = htons(vportStt->dstPort);
292 outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) <<
294 outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo));
295 outerTcpHdr->doff = sizeof(TCPHdr) >> 2;
296 outerTcpHdr->psh = 1;
297 outerTcpHdr->ack = 1;
298 outerTcpHdr->window = (uint16) ~0;
300 /* Calculate pseudo header chksum */
301 tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
302 ASSERT(tcpChksumLen < 65535);
303 outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
304 IPPROTO_TCP, (uint16) tcpChksumLen);
309 if (innerPartialChecksum) {
310 sttHdr->flags |= STT_CSUM_PARTIAL;
311 if (layers->isIPv4) {
312 sttHdr->flags |= STT_PROTO_IPV4;
315 sttHdr->flags |= STT_PROTO_TCP;
317 sttHdr->l4Offset = (UINT8) layers->l4Offset;
318 sttHdr->mss = (UINT16) htons(mss);
319 } else if (innerChecksumVerified) {
320 sttHdr->flags = STT_CSUM_VERIFIED;
321 sttHdr->l4Offset = 0;
325 sttHdr->reserved = 0;
327 sttHdr->key = tunKey->tunnelId;
328 /* Zero out stt padding */
329 *(uint16 *)(sttHdr + 1) = 0;
331 /* Offload IP and TCP checksum */
332 ULONG tcpHeaderOffset = sizeof *outerEthHdr +
335 csumInfo.Transmit.IpHeaderChecksum = 1;
336 csumInfo.Transmit.TcpChecksum = 1;
337 csumInfo.Transmit.IsIPv4 = 1;
338 csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
339 NET_BUFFER_LIST_INFO(curNbl,
340 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
342 UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr);
343 if (ipTotalLen > encapMss) {
345 lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
346 lsoInfo.LsoV2Transmit.MSS = encapMss;
347 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
348 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
349 NET_BUFFER_LIST_INFO(curNbl,
350 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
353 return STATUS_SUCCESS;
356 OvsCompleteNBL(switchContext, *newNbl, TRUE);
362 *----------------------------------------------------------------------------
363 * OvsValidateTCPChecksum
364 * Validate TCP checksum
365 *----------------------------------------------------------------------------
367 static __inline NDIS_STATUS
368 OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
370 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
371 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
372 TcpIpChecksumNetBufferListInfo);
374 /* Check if NIC has indicated TCP checksum failure */
375 if (csumInfo.Receive.TcpChecksumFailed) {
376 return NDIS_STATUS_INVALID_PACKET;
381 /* Check if TCP Checksum has been calculated by NIC */
382 if (csumInfo.Receive.TcpChecksumSucceeded) {
383 return NDIS_STATUS_SUCCESS;
386 EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
389 if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) {
390 IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth);
391 UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4;
392 TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + ip->ihl * 4);
393 checkSum = tcp->check;
396 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
397 IPPROTO_TCP, (UINT16)l4Payload);
398 tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload),
399 sizeof(EthHdr) + ip->ihl * 4);
400 if (checkSum != tcp->check) {
401 return NDIS_STATUS_INVALID_PACKET;
404 OVS_LOG_ERROR("IPv6 on STT is not supported");
405 return NDIS_STATUS_INVALID_PACKET;
408 csumInfo.Receive.TcpChecksumSucceeded = 1;
409 NET_BUFFER_LIST_INFO(curNbl,
410 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
411 return NDIS_STATUS_SUCCESS;
415 *----------------------------------------------------------------------------
416 * OvsInitSttDefragmentation
417 * Initialize the components used by the stt lso defragmentation
418 *----------------------------------------------------------------------------
421 OvsInitSttDefragmentation()
424 HANDLE threadHandle = NULL;
426 /* Init the sync-lock */
427 NdisAllocateSpinLock(&OvsSttSpinLock);
429 /* Init the Hash Buffer */
430 OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
431 * STT_HASH_TABLE_SIZE,
433 if (OvsSttPktFragHash == NULL) {
434 NdisFreeSpinLock(&OvsSttSpinLock);
435 return STATUS_INSUFFICIENT_RESOURCES;
438 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
439 InitializeListHead(&OvsSttPktFragHash[i]);
442 /* Init Defrag Cleanup Thread */
443 KeInitializeEvent(&sttDefragThreadCtx.event, NotificationEvent, FALSE);
444 status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
445 NULL, OvsSttDefragCleaner,
446 &sttDefragThreadCtx);
448 if (status != STATUS_SUCCESS) {
449 OvsCleanupSttDefragmentation();
453 ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
454 &sttDefragThreadCtx.threadObject, NULL);
455 ZwClose(threadHandle);
457 return STATUS_SUCCESS;
461 *----------------------------------------------------------------------------
462 * OvsCleanupSttDefragmentation
463 * Cleanup memory and thread that were spawned for STT LSO defragmentation
464 *----------------------------------------------------------------------------
467 OvsCleanupSttDefragmentation(VOID)
469 NdisAcquireSpinLock(&OvsSttSpinLock);
470 sttDefragThreadCtx.exit = 1;
471 KeSetEvent(&sttDefragThreadCtx.event, 0, FALSE);
472 NdisReleaseSpinLock(&OvsSttSpinLock);
474 KeWaitForSingleObject(sttDefragThreadCtx.threadObject, Executive,
475 KernelMode, FALSE, NULL);
476 ObDereferenceObject(sttDefragThreadCtx.threadObject);
478 if (OvsSttPktFragHash) {
479 OvsFreeMemoryWithTag(OvsSttPktFragHash, OVS_STT_POOL_TAG);
480 OvsSttPktFragHash = NULL;
483 NdisFreeSpinLock(&OvsSttSpinLock);
487 *----------------------------------------------------------------------------
488 * OvsSttDefragCleaner
489 * Runs periodically and cleans up the buffer to remove expired segments
490 *----------------------------------------------------------------------------
493 OvsSttDefragCleaner(PVOID data)
495 POVS_STT_THREAD_CTX context = (POVS_STT_THREAD_CTX)data;
496 PLIST_ENTRY link, next;
497 POVS_STT_PKT_ENTRY entry;
498 BOOLEAN success = TRUE;
501 NdisAcquireSpinLock(&OvsSttSpinLock);
503 NdisReleaseSpinLock(&OvsSttSpinLock);
507 /* Set the timeout for the thread and cleanup */
508 UINT64 currentTime, threadSleepTimeout;
509 NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
510 threadSleepTimeout = currentTime + STT_CLEANUP_INTERVAL;
512 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
513 LIST_FORALL_SAFE(&OvsSttPktFragHash[i], link, next) {
514 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
515 if (entry->timeout < currentTime) {
516 RemoveEntryList(&entry->link);
517 OvsFreeMemoryWithTag(entry->packetBuf, OVS_STT_POOL_TAG);
518 OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
523 NdisReleaseSpinLock(&OvsSttSpinLock);
524 KeWaitForSingleObject(&context->event, Executive, KernelMode,
525 FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
528 PsTerminateSystemThread(STATUS_SUCCESS);
531 static OVS_STT_PKT_KEY
532 OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
535 key.sAddr = ipHdr->saddr;
536 key.dAddr = ipHdr->daddr;
537 key.ackSeq = ntohl(tcpHdr->ack_seq);
542 OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
545 arr[0] = pktKey->ackSeq;
546 arr[1] = pktKey->dAddr;
547 arr[2] = pktKey->sAddr;
548 return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
552 OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
555 POVS_STT_PKT_ENTRY entry;
557 LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
558 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
559 if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
560 entry->ovsPktKey.dAddr == pktKey->dAddr &&
561 entry->ovsPktKey.sAddr == pktKey->sAddr) {
570 --------------------------------------------------------------------------
571 * OvsSttReassemble --
572 * Reassemble an LSO packet from multiple STT-Fragments.
574 --------------------------------------------------------------------------
577 OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
578 PNET_BUFFER_LIST curNbl,
584 UINT32 seq = ntohl(tcp->seq);
585 UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
586 UINT32 segOffset = STT_SEGMENT_OFF(seq);
587 UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
588 UINT32 startOffset = 0;
589 OVS_STT_PKT_ENTRY *pktFragEntry;
590 PNET_BUFFER_LIST targetPNbl = NULL;
591 BOOLEAN lastPacket = FALSE;
592 PNET_BUFFER sourceNb;
593 UINT32 fragmentLength = payloadLen;
595 SttHdr *sttHdr = NULL;
596 sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
598 /* XXX optimize this lock */
599 NdisAcquireSpinLock(&OvsSttSpinLock);
601 /* If this is the first fragment, copy the STT header */
602 if (segOffset == 0) {
603 sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
604 if (sttHdr == NULL) {
605 OVS_LOG_ERROR("Unable to retrieve STT header");
608 fragmentLength = fragmentLength - STT_HDR_LEN;
609 startOffset = startOffset + STT_HDR_LEN;
612 /* Lookup fragment */
613 OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
614 UINT32 hash = OvsSttGetPktHash(&pktKey);
615 pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
617 if (pktFragEntry == NULL) {
618 /* Create a new Packet Entry */
619 POVS_STT_PKT_ENTRY entry;
620 entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
622 RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
624 /* Update Key, timestamp and recvdLen */
625 NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
627 entry->recvdLen = fragmentLength;
630 NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
631 entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
633 if (segOffset == 0) {
634 entry->sttHdr = *sttHdr;
637 /* Copy the data from Source to new buffer */
638 entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
640 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
641 entry->packetBuf + offset) == NULL) {
642 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
646 /* Insert the entry in the Static Buffer */
647 InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
650 /* Add to recieved length to identify if this is the last fragment */
651 pktFragEntry->recvdLen += fragmentLength;
652 lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
654 if (segOffset == 0) {
655 pktFragEntry->sttHdr = *sttHdr;
658 /* Copy the fragment data from Source to existing buffer */
659 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
660 pktFragEntry->packetBuf + offset) == NULL) {
661 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
668 /* Retrieve the original STT header */
669 NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
670 targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
673 /* Delete this entry and free up the memory/ */
674 RemoveEntryList(&pktFragEntry->link);
675 OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
676 OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
679 NdisReleaseSpinLock(&OvsSttSpinLock);
680 return lastPacket ? targetPNbl : NULL;
684 OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr)
686 if ((sttHdr->flags & STT_CSUM_VERIFIED)
687 || !(sttHdr->flags & STT_CSUM_PARTIAL)) {
692 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
694 csumInfo.Transmit.IpHeaderChecksum = 0;
695 csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset;
696 protoType = sttHdr->flags & STT_PROTO_TYPES;
698 case (STT_PROTO_IPV4 | STT_PROTO_TCP):
700 csumInfo.Transmit.IsIPv4 = 1;
701 csumInfo.Transmit.TcpChecksum = 1;
705 csumInfo.Transmit.IsIPv6 = 1;
706 csumInfo.Transmit.TcpChecksum = 1;
710 csumInfo.Transmit.IsIPv4 = 1;
711 csumInfo.Transmit.UdpChecksum = 1;
715 csumInfo.Transmit.IsIPv6 = 1;
716 csumInfo.Transmit.UdpChecksum = 1;
718 NET_BUFFER_LIST_INFO(curNbl,
719 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
722 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
724 lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
725 lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU
728 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
729 if (sttHdr->flags & STT_PROTO_IPV4) {
730 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
732 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
734 NET_BUFFER_LIST_INFO(curNbl,
735 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
740 * --------------------------------------------------------------------------
742 * Decapsulates an STT packet.
743 * --------------------------------------------------------------------------
746 OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
747 PNET_BUFFER_LIST curNbl,
748 OvsIPv4TunnelKey *tunKey,
749 PNET_BUFFER_LIST *newNbl)
751 NDIS_STATUS status = NDIS_STATUS_FAILURE;
752 PNET_BUFFER curNb, newNb;
754 char *ipBuf[sizeof(IPHdr)];
757 char *sttBuf[STT_HDR_LEN];
758 UINT32 advanceCnt, hdrLen;
759 BOOLEAN isLsoPacket = FALSE;
761 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
762 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
764 /* Validate the TCP Checksum */
765 status = OvsValidateTCPChecksum(curNbl, curNb);
766 if (status != NDIS_STATUS_SUCCESS) {
770 /* Skip Eth header */
771 hdrLen = sizeof(EthHdr);
772 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
775 ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
779 TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
781 /* Skip IP & TCP headers */
782 hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
783 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
784 advanceCnt += hdrLen;
786 UINT32 seq = ntohl(tcp->seq);
787 UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
788 UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
792 /* Check if incoming packet requires reassembly */
793 if (totalLen != payloadLen) {
795 PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
799 return NDIS_STATUS_SUCCESS;
806 sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
807 (PVOID) &sttBuf, 1 /*no align*/, 0);
808 /* Skip stt header, DataOffset points to inner pkt now. */
809 hdrLen = STT_HDR_LEN;
810 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
811 advanceCnt += hdrLen;
813 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
814 0, FALSE /*copy NBL info*/);
817 if (*newNbl == NULL) {
818 OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
819 return NDIS_STATUS_RESOURCES;
822 status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
823 if (status != NDIS_STATUS_SUCCESS) {
824 OvsCompleteNBL(switchContext, *newNbl, TRUE);
825 return NDIS_STATUS_FAILURE;
827 newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
831 /* Initialize the tunnel key */
832 tunKey->dst = ipHdr->daddr;
833 tunKey->src = ipHdr->saddr;
834 tunKey->tunnelId = sttHdr->key;
835 tunKey->flags = OVS_TNL_F_KEY;
836 tunKey->tos = ipHdr->tos;
837 tunKey->ttl = ipHdr->ttl;
840 /* Set Checksum and LSO offload flags */
841 OvsDecapSetOffloads(*newNbl, sttHdr);
843 return NDIS_STATUS_SUCCESS;