2 * Copyright (c) 2015, 2016 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
27 #include "PacketParser.h"
37 #define OVS_DBG_MOD OVS_DBG_STT
40 KSTART_ROUTINE OvsSttDefragCleaner;
41 static PLIST_ENTRY OvsSttPktFragHash;
42 static NDIS_SPIN_LOCK OvsSttSpinLock;
43 static OVS_STT_THREAD_CTX sttDefragThreadCtx;
46 OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
47 const OvsIPv4TunnelKey *tunKey,
48 const POVS_FWD_INFO fwdInfo,
49 POVS_PACKET_HDR_INFO layers,
50 POVS_SWITCH_CONTEXT switchContext,
51 PNET_BUFFER_LIST *newNbl);
54 * --------------------------------------------------------------------------
56 * Initialize STT tunnel module.
57 * --------------------------------------------------------------------------
60 OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
63 POVS_STT_VPORT sttPort;
65 sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort),
68 OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT");
69 return STATUS_INSUFFICIENT_RESOURCES;
72 RtlZeroMemory(sttPort, sizeof(*sttPort));
73 sttPort->dstPort = tcpDestPort;
74 vport->priv = (PVOID) sttPort;
75 return STATUS_SUCCESS;
79 * --------------------------------------------------------------------------
80 * OvsCleanupSttTunnel --
81 * Cleanup STT Tunnel module.
82 * --------------------------------------------------------------------------
85 OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport)
87 if (vport->ovsType != OVS_VPORT_TYPE_STT ||
88 vport->priv == NULL) {
92 OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG);
97 * --------------------------------------------------------------------------
99 * Encapsulates a packet with an STT header.
100 * --------------------------------------------------------------------------
103 OvsEncapStt(POVS_VPORT_ENTRY vport,
104 PNET_BUFFER_LIST curNbl,
105 OvsIPv4TunnelKey *tunKey,
106 POVS_SWITCH_CONTEXT switchContext,
107 POVS_PACKET_HDR_INFO layers,
108 PNET_BUFFER_LIST *newNbl)
110 OVS_FWD_INFO fwdInfo;
113 UNREFERENCED_PARAMETER(switchContext);
114 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
115 if (status != STATUS_SUCCESS) {
116 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
118 * XXX This case where the ARP table is not populated is
119 * currently not handled
121 return NDIS_STATUS_FAILURE;
124 status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers,
125 switchContext, newNbl);
130 * --------------------------------------------------------------------------
132 * Internal utility function which actually does the STT encap.
133 * --------------------------------------------------------------------------
136 OvsDoEncapStt(POVS_VPORT_ENTRY vport,
137 PNET_BUFFER_LIST curNbl,
138 const OvsIPv4TunnelKey *tunKey,
139 const POVS_FWD_INFO fwdInfo,
140 POVS_PACKET_HDR_INFO layers,
141 POVS_SWITCH_CONTEXT switchContext,
142 PNET_BUFFER_LIST *newNbl)
144 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
152 UINT32 innerFrameLen, ipTotalLen;
153 POVS_STT_VPORT vportStt;
154 UINT32 headRoom = OvsGetSttTunHdrSize();
158 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
160 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
162 /* Verify if inner checksum is verified */
163 BOOLEAN innerChecksumVerified = FALSE;
164 BOOLEAN innerPartialChecksum = FALSE;
167 mss = OVSGetTcpMSS(curNbl);
170 vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport);
173 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
174 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
175 TcpIpChecksumNetBufferListInfo);
176 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
177 FALSE /*copy NblInfo*/);
178 if (*newNbl == NULL) {
179 OVS_LOG_ERROR("Unable to copy NBL");
180 return NDIS_STATUS_FAILURE;
184 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
185 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
186 /* NB Chain should be split before */
187 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
188 innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
190 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
192 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
194 if (layers->isIPv4) {
195 IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
197 ip->tot_len = htons(innerFrameLen - sizeof(EthHdr));
200 ip->check = IPChecksum((UINT8 *)ip, ip->ihl * 4, 0);
206 innerPartialChecksum = TRUE;
208 if (!csumInfo.Transmit.TcpChecksum) {
209 innerChecksumVerified = TRUE;
211 innerPartialChecksum = TRUE;
214 } else if (layers->isUdp) {
215 if(!csumInfo.Transmit.UdpChecksum) {
216 innerChecksumVerified = TRUE;
218 innerPartialChecksum = TRUE;
222 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
223 if (status != NDIS_STATUS_SUCCESS) {
224 ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
225 OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)");
230 * Make sure that the headroom for the tunnel header is continguous in
233 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
234 ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
237 buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
239 ASSERT(!"MmGetSystemAddressForMdlSafe failed");
240 OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed");
241 status = NDIS_STATUS_RESOURCES;
245 buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
246 outerEthHdr = (EthHdr *)buf;
247 outerIpHdr = (IPHdr *) (outerEthHdr + 1);
248 outerTcpHdr = (TCPHdr *) (outerIpHdr + 1);
249 sttHdr = (SttHdr *) (outerTcpHdr + 1);
252 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
253 (PCHAR)&fwdInfo->srcMacAddr);
254 NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr,
255 sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source);
256 outerEthHdr->Type = htons(ETH_TYPE_IPV4);
259 outerIpHdr->ihl = sizeof(IPHdr) >> 2;
260 outerIpHdr->version = IPPROTO_IPV4;
261 outerIpHdr->tos = tunKey->tos;
263 ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
264 outerIpHdr->tot_len = htons(ipTotalLen);
265 ASSERT(ipTotalLen < 65536);
267 outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen);
268 outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
270 outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64;
271 outerIpHdr->protocol = IPPROTO_TCP;
272 outerIpHdr->check = 0;
273 outerIpHdr->saddr = fwdInfo->srcIpAddr;
274 outerIpHdr->daddr = tunKey->dst;
277 RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
278 outerTcpHdr->source = htons(tunKey->flow_hash | 32768);
279 outerTcpHdr->dest = htons(vportStt->dstPort);
280 outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) <<
282 outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo));
283 outerTcpHdr->doff = sizeof(TCPHdr) >> 2;
284 outerTcpHdr->psh = 1;
285 outerTcpHdr->ack = 1;
286 outerTcpHdr->window = (uint16) ~0;
288 /* Calculate pseudo header chksum */
289 tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
290 ASSERT(tcpChksumLen < 65535);
291 outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
292 IPPROTO_TCP, (uint16) tcpChksumLen);
297 if (innerPartialChecksum) {
298 sttHdr->flags |= STT_CSUM_PARTIAL;
299 if (layers->isIPv4) {
300 sttHdr->flags |= STT_PROTO_IPV4;
303 sttHdr->flags |= STT_PROTO_TCP;
305 sttHdr->l4Offset = (UINT8) layers->l4Offset;
306 sttHdr->mss = (UINT16) htons(mss);
307 } else if (innerChecksumVerified) {
308 sttHdr->flags = STT_CSUM_VERIFIED;
309 sttHdr->l4Offset = 0;
313 sttHdr->reserved = 0;
315 sttHdr->key = tunKey->tunnelId;
316 /* Zero out stt padding */
317 *(uint16 *)(sttHdr + 1) = 0;
319 /* Offload IP and TCP checksum */
320 ULONG tcpHeaderOffset = sizeof *outerEthHdr +
323 csumInfo.Transmit.IpHeaderChecksum = 1;
324 csumInfo.Transmit.TcpChecksum = 1;
325 csumInfo.Transmit.IsIPv4 = 1;
326 csumInfo.Transmit.TcpHeaderOffset = tcpHeaderOffset;
327 NET_BUFFER_LIST_INFO(curNbl,
328 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
330 UINT32 encapMss = OvsGetExternalMtu(switchContext) - sizeof(IPHdr) - sizeof(TCPHdr);
331 if (ipTotalLen > encapMss) {
333 lsoInfo.LsoV2Transmit.TcpHeaderOffset = tcpHeaderOffset;
334 lsoInfo.LsoV2Transmit.MSS = encapMss;
335 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
336 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
337 NET_BUFFER_LIST_INFO(curNbl,
338 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
341 return STATUS_SUCCESS;
344 OvsCompleteNBL(switchContext, *newNbl, TRUE);
350 *----------------------------------------------------------------------------
351 * OvsValidateTCPChecksum
352 * Validate TCP checksum
353 *----------------------------------------------------------------------------
355 static __inline NDIS_STATUS
356 OvsValidateTCPChecksum(PNET_BUFFER_LIST curNbl, PNET_BUFFER curNb)
358 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
359 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
360 TcpIpChecksumNetBufferListInfo);
362 /* Check if NIC has indicated TCP checksum failure */
363 if (csumInfo.Receive.TcpChecksumFailed) {
364 return NDIS_STATUS_INVALID_PACKET;
369 /* Check if TCP Checksum has been calculated by NIC */
370 if (csumInfo.Receive.TcpChecksumSucceeded) {
371 return NDIS_STATUS_SUCCESS;
374 EthHdr *eth = (EthHdr *)NdisGetDataBuffer(curNb, sizeof(EthHdr),
377 if (eth->Type == ntohs(NDIS_ETH_TYPE_IPV4)) {
378 IPHdr *ip = (IPHdr *)((PCHAR)eth + sizeof *eth);
379 UINT32 l4Payload = ntohs(ip->tot_len) - ip->ihl * 4;
380 TCPHdr *tcp = (TCPHdr *)((PCHAR)ip + ip->ihl * 4);
381 checkSum = tcp->check;
384 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
385 IPPROTO_TCP, (UINT16)l4Payload);
386 tcp->check = CalculateChecksumNB(curNb, (UINT16)(l4Payload),
387 sizeof(EthHdr) + ip->ihl * 4);
388 if (checkSum != tcp->check) {
389 return NDIS_STATUS_INVALID_PACKET;
392 OVS_LOG_ERROR("IPv6 on STT is not supported");
393 return NDIS_STATUS_INVALID_PACKET;
396 csumInfo.Receive.TcpChecksumSucceeded = 1;
397 NET_BUFFER_LIST_INFO(curNbl,
398 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
399 return NDIS_STATUS_SUCCESS;
403 *----------------------------------------------------------------------------
404 * OvsInitSttDefragmentation
405 * Initialize the components used by the stt lso defragmentation
406 *----------------------------------------------------------------------------
409 OvsInitSttDefragmentation()
412 HANDLE threadHandle = NULL;
414 /* Init the sync-lock */
415 NdisAllocateSpinLock(&OvsSttSpinLock);
417 /* Init the Hash Buffer */
418 OvsSttPktFragHash = OvsAllocateMemoryWithTag(sizeof(LIST_ENTRY)
419 * STT_HASH_TABLE_SIZE,
421 if (OvsSttPktFragHash == NULL) {
422 NdisFreeSpinLock(&OvsSttSpinLock);
423 return STATUS_INSUFFICIENT_RESOURCES;
426 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
427 InitializeListHead(&OvsSttPktFragHash[i]);
430 /* Init Defrag Cleanup Thread */
431 KeInitializeEvent(&sttDefragThreadCtx.event, NotificationEvent, FALSE);
432 status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, NULL, NULL,
433 NULL, OvsSttDefragCleaner,
434 &sttDefragThreadCtx);
436 if (status != STATUS_SUCCESS) {
437 OvsCleanupSttDefragmentation();
441 ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, KernelMode,
442 &sttDefragThreadCtx.threadObject, NULL);
443 ZwClose(threadHandle);
445 return STATUS_SUCCESS;
449 *----------------------------------------------------------------------------
450 * OvsCleanupSttDefragmentation
451 * Cleanup memory and thread that were spawned for STT LSO defragmentation
452 *----------------------------------------------------------------------------
455 OvsCleanupSttDefragmentation(VOID)
457 NdisAcquireSpinLock(&OvsSttSpinLock);
458 sttDefragThreadCtx.exit = 1;
459 KeSetEvent(&sttDefragThreadCtx.event, 0, FALSE);
460 NdisReleaseSpinLock(&OvsSttSpinLock);
462 KeWaitForSingleObject(sttDefragThreadCtx.threadObject, Executive,
463 KernelMode, FALSE, NULL);
464 ObDereferenceObject(sttDefragThreadCtx.threadObject);
466 if (OvsSttPktFragHash) {
467 OvsFreeMemoryWithTag(OvsSttPktFragHash, OVS_STT_POOL_TAG);
468 OvsSttPktFragHash = NULL;
471 NdisFreeSpinLock(&OvsSttSpinLock);
475 *----------------------------------------------------------------------------
476 * OvsSttDefragCleaner
477 * Runs periodically and cleans up the buffer to remove expired segments
478 *----------------------------------------------------------------------------
481 OvsSttDefragCleaner(PVOID data)
483 POVS_STT_THREAD_CTX context = (POVS_STT_THREAD_CTX)data;
484 PLIST_ENTRY link, next;
485 POVS_STT_PKT_ENTRY entry;
486 BOOLEAN success = TRUE;
489 NdisAcquireSpinLock(&OvsSttSpinLock);
491 NdisReleaseSpinLock(&OvsSttSpinLock);
495 /* Set the timeout for the thread and cleanup */
496 UINT64 currentTime, threadSleepTimeout;
497 NdisGetCurrentSystemTime((LARGE_INTEGER *)¤tTime);
498 threadSleepTimeout = currentTime + STT_CLEANUP_INTERVAL;
500 for (int i = 0; i < STT_HASH_TABLE_SIZE; i++) {
501 LIST_FORALL_SAFE(&OvsSttPktFragHash[i], link, next) {
502 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
503 if (entry->timeout < currentTime) {
504 RemoveEntryList(&entry->link);
505 OvsFreeMemoryWithTag(entry->packetBuf, OVS_STT_POOL_TAG);
506 OvsFreeMemoryWithTag(entry, OVS_STT_POOL_TAG);
511 NdisReleaseSpinLock(&OvsSttSpinLock);
512 KeWaitForSingleObject(&context->event, Executive, KernelMode,
513 FALSE, (LARGE_INTEGER *)&threadSleepTimeout);
516 PsTerminateSystemThread(STATUS_SUCCESS);
519 static OVS_STT_PKT_KEY
520 OvsGeneratePacketKey(IPHdr *ipHdr, TCPHdr *tcpHdr)
523 key.sAddr = ipHdr->saddr;
524 key.dAddr = ipHdr->daddr;
525 key.ackSeq = ntohl(tcpHdr->ack_seq);
530 OvsSttGetPktHash(OVS_STT_PKT_KEY *pktKey)
533 arr[0] = pktKey->ackSeq;
534 arr[1] = pktKey->dAddr;
535 arr[2] = pktKey->sAddr;
536 return OvsJhashWords(arr, 3, OVS_HASH_BASIS);
540 OvsLookupPktFrag(OVS_STT_PKT_KEY *pktKey, UINT32 hash)
543 POVS_STT_PKT_ENTRY entry;
545 LIST_FORALL(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK], link) {
546 entry = CONTAINING_RECORD(link, OVS_STT_PKT_ENTRY, link);
547 if (entry->ovsPktKey.ackSeq == pktKey->ackSeq &&
548 entry->ovsPktKey.dAddr == pktKey->dAddr &&
549 entry->ovsPktKey.sAddr == pktKey->sAddr) {
558 --------------------------------------------------------------------------
559 * OvsSttReassemble --
560 * Reassemble an LSO packet from multiple STT-Fragments.
562 --------------------------------------------------------------------------
565 OvsSttReassemble(POVS_SWITCH_CONTEXT switchContext,
566 PNET_BUFFER_LIST curNbl,
572 UINT32 seq = ntohl(tcp->seq);
573 UINT32 innerPacketLen = (seq >> STT_SEQ_LEN_SHIFT) - STT_HDR_LEN;
574 UINT32 segOffset = STT_SEGMENT_OFF(seq);
575 UINT32 offset = segOffset == 0 ? 0 : segOffset - STT_HDR_LEN;
576 UINT32 startOffset = 0;
577 OVS_STT_PKT_ENTRY *pktFragEntry;
578 PNET_BUFFER_LIST targetPNbl = NULL;
579 BOOLEAN lastPacket = FALSE;
580 PNET_BUFFER sourceNb;
581 UINT32 fragmentLength = payloadLen;
583 SttHdr *sttHdr = NULL;
584 sourceNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
586 /* XXX optimize this lock */
587 NdisAcquireSpinLock(&OvsSttSpinLock);
589 /* If this is the first fragment, copy the STT header */
590 if (segOffset == 0) {
591 sttHdr = NdisGetDataBuffer(sourceNb, sizeof(SttHdr), &stt, 1, 0);
592 if (sttHdr == NULL) {
593 OVS_LOG_ERROR("Unable to retrieve STT header");
596 fragmentLength = fragmentLength - STT_HDR_LEN;
597 startOffset = startOffset + STT_HDR_LEN;
600 /* Lookup fragment */
601 OVS_STT_PKT_KEY pktKey = OvsGeneratePacketKey(ipHdr, tcp);
602 UINT32 hash = OvsSttGetPktHash(&pktKey);
603 pktFragEntry = OvsLookupPktFrag(&pktKey, hash);
605 if (pktFragEntry == NULL) {
606 /* Create a new Packet Entry */
607 POVS_STT_PKT_ENTRY entry;
608 entry = OvsAllocateMemoryWithTag(sizeof(OVS_STT_PKT_ENTRY),
610 RtlZeroMemory(entry, sizeof (OVS_STT_PKT_ENTRY));
612 /* Update Key, timestamp and recvdLen */
613 NdisMoveMemory(&entry->ovsPktKey, &pktKey, sizeof (OVS_STT_PKT_KEY));
615 entry->recvdLen = fragmentLength;
618 NdisGetCurrentSystemTime((LARGE_INTEGER *) ¤tTime);
619 entry->timeout = currentTime + STT_ENTRY_TIMEOUT;
621 if (segOffset == 0) {
622 entry->sttHdr = *sttHdr;
625 /* Copy the data from Source to new buffer */
626 entry->packetBuf = OvsAllocateMemoryWithTag(innerPacketLen,
628 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
629 entry->packetBuf + offset) == NULL) {
630 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
634 /* Insert the entry in the Static Buffer */
635 InsertHeadList(&OvsSttPktFragHash[hash & STT_HASH_TABLE_MASK],
638 /* Add to recieved length to identify if this is the last fragment */
639 pktFragEntry->recvdLen += fragmentLength;
640 lastPacket = (pktFragEntry->recvdLen == innerPacketLen);
642 if (segOffset == 0) {
643 pktFragEntry->sttHdr = *sttHdr;
646 /* Copy the fragment data from Source to existing buffer */
647 if (OvsGetPacketBytes(curNbl, fragmentLength, startOffset,
648 pktFragEntry->packetBuf + offset) == NULL) {
649 OVS_LOG_ERROR("Error when obtaining bytes from Packet");
656 /* Retrieve the original STT header */
657 NdisMoveMemory(newSttHdr, &pktFragEntry->sttHdr, sizeof (SttHdr));
658 targetPNbl = OvsAllocateNBLFromBuffer(switchContext, pktFragEntry->packetBuf,
661 /* Delete this entry and free up the memory/ */
662 RemoveEntryList(&pktFragEntry->link);
663 OvsFreeMemoryWithTag(pktFragEntry->packetBuf, OVS_STT_POOL_TAG);
664 OvsFreeMemoryWithTag(pktFragEntry, OVS_STT_POOL_TAG);
667 NdisReleaseSpinLock(&OvsSttSpinLock);
668 return lastPacket ? targetPNbl : NULL;
672 OvsDecapSetOffloads(PNET_BUFFER_LIST curNbl, SttHdr *sttHdr)
674 if ((sttHdr->flags & STT_CSUM_VERIFIED)
675 || !(sttHdr->flags & STT_CSUM_PARTIAL)) {
680 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
682 csumInfo.Transmit.IpHeaderChecksum = 0;
683 csumInfo.Transmit.TcpHeaderOffset = sttHdr->l4Offset;
684 protoType = sttHdr->flags & STT_PROTO_TYPES;
686 case (STT_PROTO_IPV4 | STT_PROTO_TCP):
688 csumInfo.Transmit.IsIPv4 = 1;
689 csumInfo.Transmit.TcpChecksum = 1;
693 csumInfo.Transmit.IsIPv6 = 1;
694 csumInfo.Transmit.TcpChecksum = 1;
698 csumInfo.Transmit.IsIPv4 = 1;
699 csumInfo.Transmit.UdpChecksum = 1;
703 csumInfo.Transmit.IsIPv6 = 1;
704 csumInfo.Transmit.UdpChecksum = 1;
706 NET_BUFFER_LIST_INFO(curNbl,
707 TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
710 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
712 lsoInfo.LsoV2Transmit.TcpHeaderOffset = sttHdr->l4Offset;
713 lsoInfo.LsoV2Transmit.MSS = ETH_DEFAULT_MTU
716 lsoInfo.LsoV2Transmit.Type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
717 if (sttHdr->flags & STT_PROTO_IPV4) {
718 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv4;
720 lsoInfo.LsoV2Transmit.IPVersion = NDIS_TCP_LARGE_SEND_OFFLOAD_IPv6;
722 NET_BUFFER_LIST_INFO(curNbl,
723 TcpLargeSendNetBufferListInfo) = lsoInfo.Value;
728 * --------------------------------------------------------------------------
730 * Decapsulates an STT packet.
731 * --------------------------------------------------------------------------
734 OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
735 PNET_BUFFER_LIST curNbl,
736 OvsIPv4TunnelKey *tunKey,
737 PNET_BUFFER_LIST *newNbl)
739 NDIS_STATUS status = NDIS_STATUS_FAILURE;
740 PNET_BUFFER curNb, newNb;
742 char *ipBuf[sizeof(IPHdr)];
745 char *sttBuf[STT_HDR_LEN];
746 UINT32 advanceCnt, hdrLen;
747 BOOLEAN isLsoPacket = FALSE;
749 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
750 ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
752 /* Validate the TCP Checksum */
753 status = OvsValidateTCPChecksum(curNbl, curNb);
754 if (status != NDIS_STATUS_SUCCESS) {
758 /* Skip Eth header */
759 hdrLen = sizeof(EthHdr);
760 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
763 ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
767 TCPHdr *tcp = (TCPHdr *)((PCHAR)ipHdr + ipHdr->ihl * 4);
769 /* Skip IP & TCP headers */
770 hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
771 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
772 advanceCnt += hdrLen;
774 UINT32 seq = ntohl(tcp->seq);
775 UINT32 totalLen = (seq >> STT_SEQ_LEN_SHIFT);
776 UINT16 payloadLen = (UINT16)ntohs(ipHdr->tot_len)
780 /* Check if incoming packet requires reassembly */
781 if (totalLen != payloadLen) {
783 PNET_BUFFER_LIST pNbl = OvsSttReassemble(switchContext, curNbl,
787 return NDIS_STATUS_SUCCESS;
794 sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr,
795 (PVOID) &sttBuf, 1 /*no align*/, 0);
796 /* Skip stt header, DataOffset points to inner pkt now. */
797 hdrLen = STT_HDR_LEN;
798 NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
799 advanceCnt += hdrLen;
801 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0,
802 0, FALSE /*copy NBL info*/);
805 if (*newNbl == NULL) {
806 OVS_LOG_ERROR("Unable to allocate a new cloned NBL");
807 return NDIS_STATUS_RESOURCES;
810 status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
811 if (status != NDIS_STATUS_SUCCESS) {
812 OvsCompleteNBL(switchContext, *newNbl, TRUE);
813 return NDIS_STATUS_FAILURE;
815 newNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
819 /* Initialize the tunnel key */
820 tunKey->dst = ipHdr->daddr;
821 tunKey->src = ipHdr->saddr;
822 tunKey->tunnelId = sttHdr->key;
823 tunKey->flags = OVS_TNL_F_KEY;
824 tunKey->tos = ipHdr->tos;
825 tunKey->ttl = ipHdr->ttl;
828 /* Set Checksum and LSO offload flags */
829 OvsDecapSetOffloads(*newNbl, sttHdr);
831 return NDIS_STATUS_SUCCESS;