2 * Copyright (c) 2015 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
28 #include "PacketParser.h"
35 #define OVS_DBG_MOD OVS_DBG_STT
39 KSTART_ROUTINE OvsSttDefragCleaner;
40 static PLIST_ENTRY OvsSttPktFragHash;
41 static NDIS_SPIN_LOCK OvsSttSpinLock;
42 static OVS_STT_THREAD_CTX sttDefragThreadCtx;
45 OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
46 const OvsIPv4TunnelKey *tunKey,
47 const POVS_FWD_INFO fwdInfo,
48 POVS_PACKET_HDR_INFO layers,
49 POVS_SWITCH_CONTEXT switchContext,
50 PNET_BUFFER_LIST *newNbl);
53 * --------------------------------------------------------------------------
55 * Initialize STT tunnel module.
56 * --------------------------------------------------------------------------
59 OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
62 POVS_STT_VPORT sttPort;
64 sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort),
67 OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT");
68 return STATUS_INSUFFICIENT_RESOURCES;
71 RtlZeroMemory(sttPort, sizeof(*sttPort));
72 sttPort->dstPort = tcpDestPort;
73 vport->priv = (PVOID) sttPort;
74 return STATUS_SUCCESS;
78 * --------------------------------------------------------------------------
79 * OvsCleanupSttTunnel --
80 * Cleanup STT Tunnel module.
81 * --------------------------------------------------------------------------
84 OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport)
86 if (vport->ovsType != OVS_VPORT_TYPE_STT ||
87 vport->priv == NULL) {
91 OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG);
96 * --------------------------------------------------------------------------
98 * Encapsulates a packet with an STT header.
99 * --------------------------------------------------------------------------
102 OvsEncapStt(POVS_VPORT_ENTRY vport,
103 PNET_BUFFER_LIST curNbl,
104 OvsIPv4TunnelKey *tunKey,
105 POVS_SWITCH_CONTEXT switchContext,
106 POVS_PACKET_HDR_INFO layers,
107 PNET_BUFFER_LIST *newNbl)
109 OVS_FWD_INFO fwdInfo;
112 UNREFERENCED_PARAMETER(switchContext);
113 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
114 if (status != STATUS_SUCCESS) {
115 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
117 * XXX This case where the ARP table is not populated is
118 * currently not handled
120 return NDIS_STATUS_FAILURE;
123 status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers,
124 switchContext, newNbl);
129 * --------------------------------------------------------------------------
131 * Internal utility function which actually does the STT encap.
132 * --------------------------------------------------------------------------
135 OvsDoEncapStt(POVS_VPORT_ENTRY vport,
136 PNET_BUFFER_LIST curNbl,
137 const OvsIPv4TunnelKey *tunKey,
138 const POVS_FWD_INFO fwdInfo,
139 POVS_PACKET_HDR_INFO layers,
140 POVS_SWITCH_CONTEXT switchContext,
141 PNET_BUFFER_LIST *newNbl)
143 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
151 UINT32 innerFrameLen, ipTotalLen;
152 POVS_STT_VPORT vportStt;
153 UINT32 headRoom = OvsGetSttTunHdrSize();
157 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
159 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
161 /* Verify if inner checksum is verified */
162 BOOLEAN innerChecksumVerified = FALSE;
163 BOOLEAN innerPartialChecksum = FALSE;
166 lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
167 TcpLargeSendNetBufferListInfo);
169 switch (lsoInfo.Transmit.Type) {
170 case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
171 mss = lsoInfo.LsoV1Transmit.MSS;
173 case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
174 mss = lsoInfo.LsoV2Transmit.MSS;
177 OVS_LOG_ERROR("Unknown LSO transmit type:%d",
178 lsoInfo.Transmit.Type);
179 return NDIS_STATUS_FAILURE;
183 vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport);
186 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
187 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
188 TcpIpChecksumNetBufferListInfo);
189 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
190 FALSE /*copy NblInfo*/);
191 if (*newNbl == NULL) {
192 OVS_LOG_ERROR("Unable to copy NBL");
193 return NDIS_STATUS_FAILURE;
197 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
198 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
199 /* NB Chain should be split before */
200 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
201 innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
203 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
205 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
207 if (layers->isIPv4) {
208 IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
210 ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
213 ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
219 innerPartialChecksum = TRUE;
221 if (!csumInfo.Transmit.TcpChecksum) {
222 innerChecksumVerified = TRUE;
224 innerPartialChecksum = TRUE;
227 } else if (layers->isUdp) {
228 if(!csumInfo.Transmit.UdpChecksum) {
229 innerChecksumVerified = TRUE;
231 innerPartialChecksum = TRUE;
235 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
236 if (status != NDIS_STATUS_SUCCESS) {
237 ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
238 OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)");
243 * Make sure that the headroom for the tunnel header is continguous in
246 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
247 ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
250 buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
252 ASSERT(!"MmGetSystemAddressForMdlSafe failed");
253 OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed");
254 status = NDIS_STATUS_RESOURCES;
258 buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
259 outerEthHdr = (EthHdr *)buf;
260 outerIpHdr = (IPHdr *) (outerEthHdr + 1);
261 outerTcpHdr = (TCPHdr *) (outerIpHdr + 1);
262 sttHdr = (SttHdr *) (outerTcpHdr + 1);
265 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
266 (PCHAR)&fwdInfo->srcMacAddr);
267 NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr,
268 sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source);
269 outerEthHdr->Type = htons(ETH_TYPE_IPV4);
272 outerIpHdr->ihl = sizeof(IPHdr) >> 2;
273 outerIpHdr->version = IPPROTO_IPV4;
274 outerIpHdr->tos = tunKey->tos;
276 ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
277 outerIpHdr->tot_len = htons(ipTotalLen);
278 ASSERT(ipTotalLen < 65536);
280 outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen);
281 outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
283 outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64;
284 outerIpHdr->protocol = IPPROTO_TCP;
285 outerIpHdr->check = 0;
286 outerIpHdr->saddr = fwdInfo->srcIpAddr;
287 outerIpHdr->daddr = tunKey->dst;
290 RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
291 outerTcpHdr->source = htons(tunKey->flow_hash | 32768);
292 outerTcpHdr->dest = htons(vportStt->dstPort);
293 outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) <<
295 outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo));
296 outerTcpHdr->doff = sizeof(TCPHdr) >> 2;
297 outerTcpHdr->psh = 1;
298 outerTcpHdr->ack = 1;
299 outerTcpHdr->window = (uint16) ~0;
301 /* Calculate pseudo header chksum */
302 tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
303 ASSERT(tcpChksumLen < 65535);
304 outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
305 IPPROTO_TCP, (uint16) tcpChksumLen);
310 if (innerPartialChecksum) {
311 sttHdr->flags |= STT_CSUM_PARTIAL;
312 if (layers->isIPv4) {
313 sttHdr->flags |= STT_PROTO_IPV4;
316 sttHdr->flags |= STT_PROTO_TCP;
318 sttHdr->l4Offset = (UINT8) layers->l4Offset;
319 sttHdr->mss = (UINT16) htons(mss);
320 } else if (innerChecksumVerified) {
321 sttHdr->flags = STT_CSUM_VERIFIED;
322 sttHdr->l4Offset = 0;
326 sttHdr->reserved = 0;
328 sttHdr->key = tunKey->tunnelId;
329 /* Zero out stt padding */
330 *(uint16 *)(sttHdr + 1) = 0;
332 /* Offload IP and TCP checksum */
333 ULONG tcpHeaderOffset = sizeof *outerEthHdr +
336 csumInfo.Transmit.IpHeaderChecksum = 1;
337 csumInfo.Transmit.TcpChecksum = 1;
338 csumInfo.Transmit.IsIPv4 = 1;
339 csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
340 NET_BUFFER_LIST_INFO(curNbl,
341 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
343 UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr);
344 if (ipTotalLen > encapMss) {
346 lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
347 lsoInfo.LsoV2Transmit.MSS = encapMss;
348 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
349 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
350 NET_BUFFER_LIST_INFO(curNbl,
351 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
354 return STATUS_SUCCESS;
357 OvsCompleteNBL(switchContext, *newNbl, TRUE);
363 *----------------------------------------------------------------------------
364 * OvsValidateTCPChecksum
365 * Validate TCP checksum
366 *----------------------------------------------------------------------------
368 static __inline NDIS_STATUS
369 OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
371 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
372 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
373 TcpIpChecksumNetBufferListInfo);
375 /* Check if NIC has indicated TCP checksum failure */
376 if (csumInfo.Receive.TcpChecksumFailed) {
377 return NDIS_STATUS_INVALID_PACKET;
382 /* Check if TCP Checksum has been calculated by NIC */
383 if (csumInfo.Receive.TcpChecksumSucceeded) {
384 return NDIS_STATUS_SUCCESS;
387 EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
390 if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) {
391 IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth);
392 UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4;
393 TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + ip->ihl * 4);
394 checkSum = tcp->check;
397 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
398 IPPROTO_TCP, (UINT16)l4Payload);
399 tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload),
400 sizeof(EthHdr) + ip->ihl * 4);
401 if (checkSum != tcp->check) {
402 return NDIS_STATUS_INVALID_PACKET;
405 OVS_LOG_ERROR("IPv6 on STT is not supported");
406 return NDIS_STATUS_INVALID_PACKET;
409 csumInfo.Receive.TcpChecksumSucceeded = 1;
410 NET_BUFFER_LIST_INFO(curNbl,
411 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
412 return NDIS_STATUS_SUCCESS;
416 *----------------------------------------------------------------------------
417 * OvsInitSttDefragmentation
418 * Initialize the components used by the stt lso defragmentation
419 *----------------------------------------------------------------------------
422 OvsInitSttDefragmentation()
425 HANDLE threadHandle = NULL;
427 /* Init the sync-lock */
428 NdisAllocateSpinLock(&OvsSttSpinLock);
430 /* Init the Hash Buffer */
431 OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
432 * STT_HASH_TABLE_SIZE,
434 if (OvsSttPktFragHash == NULL) {
435 NdisFreeSpinLock(&OvsSttSpinLock);
436 return STATUS_INSUFFICIENT_RESOURCES;
439 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
440 InitializeListHead(&OvsSttPktFragHash[i]);
443 /* Init Defrag Cleanup Thread */
444 KeInitializeEvent(&sttDefragThreadCtx.event, NotificationEvent, FALSE);
445 status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
446 NULL, OvsSttDefragCleaner,
447 &sttDefragThreadCtx);
449 if (status != STATUS_SUCCESS) {
450 OvsCleanupSttDefragmentation();
454 ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
455 &sttDefragThreadCtx.threadObject, NULL);
456 ZwClose(threadHandle);
458 return STATUS_SUCCESS;
462 *----------------------------------------------------------------------------
463 * OvsCleanupSttDefragmentation
464 * Cleanup memory and thread that were spawned for STT LSO defragmentation
465 *----------------------------------------------------------------------------
468 OvsCleanupSttDefragmentation(VOID)
470 NdisAcquireSpinLock(&OvsSttSpinLock);
471 sttDefragThreadCtx.exit = 1;
472 KeSetEvent(&sttDefragThreadCtx.event, 0, FALSE);
473 NdisReleaseSpinLock(&OvsSttSpinLock);
475 KeWaitForSingleObject(sttDefragThreadCtx.threadObject, Executive,
476 KernelMode, FALSE, NULL);
477 ObDereferenceObject(sttDefragThreadCtx.threadObject);
479 if (OvsSttPktFragHash) {
480 OvsFreeMemoryWithTag(OvsSttPktFragHash, OVS_STT_POOL_TAG);
481 OvsSttPktFragHash = NULL;
484 NdisFreeSpinLock(&OvsSttSpinLock);
488 *----------------------------------------------------------------------------
489 * OvsSttDefragCleaner
490 * Runs periodically and cleans up the buffer to remove expired segments
491 *----------------------------------------------------------------------------
494 OvsSttDefragCleaner(PVOID data)
496 POVS_STT_THREAD_CTX context = (POVS_STT_THREAD_CTX)data;
497 PLIST_ENTRY link, next;
498 POVS_STT_PKT_ENTRY entry;
499 BOOLEAN success = TRUE;
502 NdisAcquireSpinLock(&OvsSttSpinLock);
504 NdisReleaseSpinLock(&OvsSttSpinLock);
508 /* Set the timeout for the thread and cleanup */
509 UINT64 currentTime, threadSleepTimeout;
510 NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
511 threadSleepTimeout = currentTime + STT_CLEANUP_INTERVAL;
513 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
514 LIST_FORALL_SAFE(&OvsSttPktFragHash[i], link, next) {
515 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
516 if (entry->timeout < currentTime) {
517 RemoveEntryList(&entry->link);
518 OvsFreeMemoryWithTag(entry->packetBuf, OVS_STT_POOL_TAG);
519 OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
524 NdisReleaseSpinLock(&OvsSttSpinLock);
525 KeWaitForSingleObject(&context->event, Executive, KernelMode,
526 FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
529 PsTerminateSystemThread(STATUS_SUCCESS);
532 static OVS_STT_PKT_KEY
533 OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
536 key.sAddr = ipHdr->saddr;
537 key.dAddr = ipHdr->daddr;
538 key.ackSeq = ntohl(tcpHdr->ack_seq);
543 OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
546 arr[0] = pktKey->ackSeq;
547 arr[1] = pktKey->dAddr;
548 arr[2] = pktKey->sAddr;
549 return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
553 OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
556 POVS_STT_PKT_ENTRY entry;
558 LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
559 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
560 if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
561 entry->ovsPktKey.dAddr == pktKey->dAddr &&
562 entry->ovsPktKey.sAddr == pktKey->sAddr) {
571 --------------------------------------------------------------------------
572 * OvsSttReassemble --
573 * Reassemble an LSO packet from multiple STT-Fragments.
575 --------------------------------------------------------------------------
578 OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
579 PNET_BUFFER_LIST curNbl,
585 UINT32 seq = ntohl(tcp->seq);
586 UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
587 UINT32 segOffset = STT_SEGMENT_OFF(seq);
588 UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
589 UINT32 startOffset = 0;
590 OVS_STT_PKT_ENTRY *pktFragEntry;
591 PNET_BUFFER_LIST targetPNbl = NULL;
592 BOOLEAN lastPacket = FALSE;
593 PNET_BUFFER sourceNb;
594 UINT32 fragmentLength = payloadLen;
596 SttHdr *sttHdr = NULL;
597 sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
599 /* XXX optimize this lock */
600 NdisAcquireSpinLock(&OvsSttSpinLock);
602 /* If this is the first fragment, copy the STT header */
603 if (segOffset == 0) {
604 sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
605 if (sttHdr == NULL) {
606 OVS_LOG_ERROR("Unable to retrieve STT header");
609 fragmentLength = fragmentLength - STT_HDR_LEN;
610 startOffset = startOffset + STT_HDR_LEN;
613 /* Lookup fragment */
614 OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
615 UINT32 hash = OvsSttGetPktHash(&pktKey);
616 pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
618 if (pktFragEntry == NULL) {
619 /* Create a new Packet Entry */
620 POVS_STT_PKT_ENTRY entry;
621 entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
623 RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
625 /* Update Key, timestamp and recvdLen */
626 NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
628 entry->recvdLen = fragmentLength;
631 NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
632 entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
634 if (segOffset == 0) {
635 entry->sttHdr = *sttHdr;
638 /* Copy the data from Source to new buffer */
639 entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
641 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
642 entry->packetBuf + offset) == NULL) {
643 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
647 /* Insert the entry in the Static Buffer */
648 InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
651 /* Add to recieved length to identify if this is the last fragment */
652 pktFragEntry->recvdLen += fragmentLength;
653 lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
655 if (segOffset == 0) {
656 pktFragEntry->sttHdr = *sttHdr;
659 /* Copy the fragment data from Source to existing buffer */
660 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
661 pktFragEntry->packetBuf + offset) == NULL) {
662 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
669 /* Retrieve the original STT header */
670 NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
671 targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
674 /* Delete this entry and free up the memory/ */
675 RemoveEntryList(&pktFragEntry->link);
676 OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
677 OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
680 NdisReleaseSpinLock(&OvsSttSpinLock);
681 return lastPacket ? targetPNbl : NULL;
685 OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr)
687 if ((sttHdr->flags & STT_CSUM_VERIFIED)
688 || !(sttHdr->flags & STT_CSUM_PARTIAL)) {
693 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
695 csumInfo.Transmit.IpHeaderChecksum = 0;
696 csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset;
697 protoType = sttHdr->flags & STT_PROTO_TYPES;
699 case (STT_PROTO_IPV4 | STT_PROTO_TCP):
701 csumInfo.Transmit.IsIPv4 = 1;
702 csumInfo.Transmit.TcpChecksum = 1;
706 csumInfo.Transmit.IsIPv6 = 1;
707 csumInfo.Transmit.TcpChecksum = 1;
711 csumInfo.Transmit.IsIPv4 = 1;
712 csumInfo.Transmit.UdpChecksum = 1;
716 csumInfo.Transmit.IsIPv6 = 1;
717 csumInfo.Transmit.UdpChecksum = 1;
719 NET_BUFFER_LIST_INFO(curNbl,
720 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
723 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
725 lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
726 lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU
729 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
730 if (sttHdr->flags & STT_PROTO_IPV4) {
731 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
733 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
735 NET_BUFFER_LIST_INFO(curNbl,
736 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
741 * --------------------------------------------------------------------------
743 * Decapsulates an STT packet.
744 * --------------------------------------------------------------------------
747 OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
748 PNET_BUFFER_LIST curNbl,
749 OvsIPv4TunnelKey *tunKey,
750 PNET_BUFFER_LIST *newNbl)
752 NDIS_STATUS status = NDIS_STATUS_FAILURE;
753 PNET_BUFFER curNb, newNb;
755 char *ipBuf[sizeof(IPHdr)];
758 char *sttBuf[STT_HDR_LEN];
759 UINT32 advanceCnt, hdrLen;
760 BOOLEAN isLsoPacket = FALSE;
762 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
763 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
765 /* Validate the TCP Checksum */
766 status = OvsValidateTCPChecksum(curNbl, curNb);
767 if (status != NDIS_STATUS_SUCCESS) {
771 /* Skip Eth header */
772 hdrLen = sizeof(EthHdr);
773 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
776 ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
780 TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
782 /* Skip IP & TCP headers */
783 hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
784 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
785 advanceCnt += hdrLen;
787 UINT32 seq = ntohl(tcp->seq);
788 UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
789 UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
793 /* Check if incoming packet requires reassembly */
794 if (totalLen != payloadLen) {
796 PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
800 return NDIS_STATUS_SUCCESS;
807 sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
808 (PVOID) &sttBuf, 1 /*no align*/, 0);
809 /* Skip stt header, DataOffset points to inner pkt now. */
810 hdrLen = STT_HDR_LEN;
811 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
812 advanceCnt += hdrLen;
814 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
815 0, FALSE /*copy NBL info*/);
818 if (*newNbl == NULL) {
819 OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
820 return NDIS_STATUS_RESOURCES;
823 status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
824 if (status != NDIS_STATUS_SUCCESS) {
825 OvsCompleteNBL(switchContext, *newNbl, TRUE);
826 return NDIS_STATUS_FAILURE;
828 newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
832 /* Initialize the tunnel key */
833 tunKey->dst = ipHdr->daddr;
834 tunKey->src = ipHdr->saddr;
835 tunKey->tunnelId = sttHdr->key;
836 tunKey->flags = OVS_TNL_F_KEY;
837 tunKey->tos = ipHdr->tos;
838 tunKey->ttl = ipHdr->ttl;
841 /* Set Checksum and LSO offload flags */
842 OvsDecapSetOffloads(*newNbl, sttHdr);
844 return NDIS_STATUS_SUCCESS;