2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
39 #define OVS_DBG_MOD OVS_DBG_USER
42 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
43 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
44 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
45 OVS_USER_STATS ovsUserStats;
47 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
48 OvsPacketExecute *execute);
49 extern NL_POLICY nlFlowKeyPolicy[];
52 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
53 POVS_OPEN_INSTANCE instance)
55 PLIST_ENTRY link, next;
57 POVS_PACKET_QUEUE_ELEM elem;
59 InitializeListHead(&tmp);
60 NdisAcquireSpinLock(&queue->queueLock);
61 if (queue->instance != instance) {
62 NdisReleaseSpinLock(&queue->queueLock);
66 if (queue->numPackets) {
67 OvsAppendList(&tmp, &queue->packetList);
68 queue->numPackets = 0;
70 NdisReleaseSpinLock(&queue->queueLock);
71 LIST_FORALL_SAFE(&tmp, link, next) {
72 RemoveEntryList(link);
73 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
79 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
81 POVS_USER_PACKET_QUEUE queue;
82 POVS_PACKET_QUEUE_ELEM elem;
83 PLIST_ENTRY link, next;
87 InitializeListHead(&tmp);
88 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
90 PDRIVER_CANCEL cancelRoutine;
91 NdisAcquireSpinLock(&queue->queueLock);
92 ASSERT(queue->instance == instance);
93 /* XXX Should not happen */
94 if (queue->instance != instance) {
95 NdisReleaseSpinLock(&queue->queueLock);
96 NdisFreeSpinLock(&queue->queueLock);
100 if (queue->numPackets) {
101 OvsAppendList(&tmp, &queue->packetList);
102 queue->numPackets = 0;
104 queue->instance = NULL;
105 instance->packetQueue = NULL;
106 irp = queue->pendingIrp;
107 queue->pendingIrp = NULL;
109 cancelRoutine = IoSetCancelRoutine(irp, NULL);
110 if (cancelRoutine == NULL) {
114 NdisReleaseSpinLock(&queue->queueLock);
115 NdisFreeSpinLock(&queue->queueLock);
117 LIST_FORALL_SAFE(&tmp, link, next) {
118 RemoveEntryList(link);
119 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
123 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
126 OvsFreeMemory(queue);
131 OvsSubscribeDpIoctl(PVOID instanceP,
135 POVS_USER_PACKET_QUEUE queue;
136 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
138 if (instance->packetQueue && !join) {
140 OvsCleanupPacketQueue(instance);
141 } else if (instance->packetQueue == NULL && join) {
142 queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
144 return STATUS_NO_MEMORY;
146 instance->packetQueue = queue;
147 RtlZeroMemory(queue, sizeof (*queue));
148 NdisAllocateSpinLock(&queue->queueLock);
149 NdisAcquireSpinLock(&queue->queueLock);
150 InitializeListHead(&queue->packetList);
152 queue->instance = instance;
153 instance->packetQueue = queue;
154 NdisReleaseSpinLock(&queue->queueLock);
156 /* user mode should call only once for subscribe */
157 return STATUS_INVALID_PARAMETER;
159 return STATUS_SUCCESS;
164 OvsReadDpIoctl(PFILE_OBJECT fileObject,
169 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
170 POVS_PACKET_QUEUE_ELEM elem;
173 #define TCP_CSUM_OFFSET 16
174 #define UDP_CSUM_OFFSET 6
177 if (instance->packetQueue == NULL) {
178 return STATUS_INVALID_PARAMETER;
180 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
181 return STATUS_BUFFER_TOO_SMALL;
184 elem = OvsGetNextPacket(instance);
187 * XXX revisit this later
189 len = elem->packet.totalLen > outputLength ? outputLength :
190 elem->packet.totalLen;
192 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
193 len == elem->packet.totalLen) {
195 UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
196 elem->hdrInfo.l4Offset);
197 RtlCopyMemory(outputBuffer, &elem->packet.data, size);
198 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
199 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
200 (UINT8 *)&elem->packet.data + size,
201 elem->hdrInfo.l4PayLoad, 0);
202 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
203 (elem->hdrInfo.tcpCsumNeeded ?
204 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
206 ovsUserStats.l4Csum++;
208 RtlCopyMemory(outputBuffer, &elem->packet.data, len);
214 return STATUS_SUCCESS;
217 /* Helper function to allocate a Forwarding Context for an NBL */
219 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
220 PNET_BUFFER_LIST nbl)
222 return switchContext->NdisSwitchHandlers.
223 AllocateNetBufferListForwardingContext(
224 switchContext->NdisSwitchContext, nbl);
228 * --------------------------------------------------------------------------
229 * This function allocates all the stuff necessary for creating an NBL from the
230 * input buffer of specified length, namely, a nonpaged data buffer of size
231 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
232 * context yet. It also copies data from the specified buffer to the NBL.
233 * --------------------------------------------------------------------------
236 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
241 PNET_BUFFER_LIST nbl = NULL;
245 if (length > OVS_DEFAULT_DATA_SIZE) {
246 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
247 OVS_DEFAULT_HEADROOM_SIZE);
250 nbl = OvsAllocateFixSizeNBL(switchContext, length,
251 OVS_DEFAULT_HEADROOM_SIZE);
257 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
258 mdl = NET_BUFFER_CURRENT_MDL(nb);
259 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
260 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
262 OvsCompleteNBL(switchContext, nbl, TRUE);
266 NdisMoveMemory(data, userBuffer, length);
272 *----------------------------------------------------------------------------
273 * OvsNlExecuteCmdHandler --
274 * Handler for OVS_PACKET_CMD_EXECUTE command.
275 *----------------------------------------------------------------------------
278 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
281 NTSTATUS status = STATUS_SUCCESS;
282 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
283 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
284 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
285 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
286 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
288 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
289 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
291 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
292 UINT32 keyAttrOffset = 0;
293 OvsPacketExecute execute;
294 NL_ERROR nlError = NL_ERROR_SUCCESS;
297 static const NL_POLICY nlPktExecPolicy[] = {
298 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
299 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
300 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
301 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
302 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
306 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
308 /* Get all the top level Flow attributes */
309 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
310 nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
312 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
314 status = STATUS_UNSUCCESSFUL;
318 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
321 /* Get flow keys attributes */
322 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
323 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
324 nlFlowKeyPolicy, keyAttrs,
325 ARRAY_SIZE(keyAttrs))) != TRUE) {
326 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
327 status = STATUS_UNSUCCESSFUL;
331 execute.dpNo = ovsHdr->dp_ifindex;
333 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
335 status = OvsExecuteDpIoctl(&execute);
337 /* Default reply that we want to send */
338 if (status == STATUS_SUCCESS) {
339 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
340 usrParamsCtx->outputLength);
342 /* Prepare nl Msg headers */
343 status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
344 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
345 genlMsgHdr->cmd, OVS_PACKET_VERSION,
348 if (status == STATUS_SUCCESS) {
349 *replyLen = msgOut->nlMsg.nlmsgLen;
352 /* Map NTSTATUS to NL_ERROR */
353 nlError = NlMapStatusToNlErr(status);
355 /* As of now there are no transactional errors in the implementation.
356 * Once we have them then we need to map status to correct
357 * nlError value, so that below mentioned code gets hit. */
358 if ((nlError != NL_ERROR_SUCCESS) &&
359 (usrParamsCtx->outputBuffer)) {
361 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
362 usrParamsCtx->outputBuffer;
363 BuildErrorMsg(msgIn, msgError, nlError);
364 *replyLen = msgError->nlMsg.nlmsgLen;
365 status = STATUS_SUCCESS;
375 *----------------------------------------------------------------------------
376 * _MapNlAttrToOvsPktExec --
377 * Maps input Netlink attributes to OvsPacketExecute.
378 *----------------------------------------------------------------------------
381 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
382 OvsPacketExecute *execute)
384 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
385 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
387 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
388 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
390 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
394 OvsExecuteDpIoctl(OvsPacketExecute *execute)
396 NTSTATUS status = STATUS_SUCCESS;
398 LOCK_STATE_EX lockState;
399 PNET_BUFFER_LIST pNbl;
401 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
403 OVS_PACKET_HDR_INFO layers;
404 POVS_VPORT_ENTRY vport;
406 NdisAcquireSpinLock(gOvsCtrlLock);
407 if (gOvsSwitchContext == NULL) {
408 status = STATUS_INVALID_PARAMETER;
412 if (execute->packetLen == 0) {
413 status = STATUS_INVALID_PARAMETER;
417 actions = execute->actions;
422 * Allocate the NBL, copy the data from the userspace buffer. Allocate
423 * also, the forwarding context for the packet.
425 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
428 status = STATUS_NO_MEMORY;
432 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
433 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
435 fwdDetail->SourcePortId = vport->portId;
436 fwdDetail->SourceNicIndex = vport->nicIndex;
438 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
439 fwdDetail->SourceNicIndex = 0;
441 // XXX: Figure out if any of the other members of fwdDetail need to be set.
443 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
445 if (ndisStatus == NDIS_STATUS_SUCCESS) {
446 ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
447 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
448 NDIS_RWL_AT_DISPATCH_LEVEL);
449 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
450 vport ? vport->portNo :
452 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
453 &key, NULL, &layers, actions,
454 execute->actionsLen);
456 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
458 if (ndisStatus != NDIS_STATUS_SUCCESS) {
459 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
460 status = STATUS_NOT_SUPPORTED;
462 status = STATUS_UNSUCCESSFUL;
467 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
470 NdisReleaseSpinLock(gOvsCtrlLock);
476 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
478 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
479 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
482 return STATUS_INVALID_PARAMETER;
484 OvsPurgePacketQueue(queue, instance);
485 return STATUS_SUCCESS;
489 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
492 PIO_STACK_LOCATION irpSp;
493 PFILE_OBJECT fileObject;
494 POVS_OPEN_INSTANCE instance;
495 POVS_USER_PACKET_QUEUE queue = NULL;
497 UNREFERENCED_PARAMETER(deviceObject);
499 IoReleaseCancelSpinLock(irp->CancelIrql);
500 irpSp = IoGetCurrentIrpStackLocation(irp);
501 fileObject = irpSp->FileObject;
503 if (fileObject == NULL) {
506 NdisAcquireSpinLock(gOvsCtrlLock);
507 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
509 queue = instance->packetQueue;
511 if (instance == NULL || queue == NULL) {
512 NdisReleaseSpinLock(gOvsCtrlLock);
515 NdisReleaseSpinLock(gOvsCtrlLock);
516 NdisAcquireSpinLock(&queue->queueLock);
517 if (queue->pendingIrp == irp) {
518 queue->pendingIrp = NULL;
520 NdisReleaseSpinLock(&queue->queueLock);
522 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
527 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
529 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
530 POVS_USER_PACKET_QUEUE queue =
531 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
532 NTSTATUS status = STATUS_SUCCESS;
533 BOOLEAN cancelled = FALSE;
536 return STATUS_INVALID_PARAMETER;
538 NdisAcquireSpinLock(&queue->queueLock);
539 if (queue->instance != instance) {
540 NdisReleaseSpinLock(&queue->queueLock);
541 return STATUS_INVALID_PARAMETER;
543 if (queue->pendingIrp) {
544 NdisReleaseSpinLock(&queue->queueLock);
545 return STATUS_DEVICE_BUSY;
547 if (queue->numPackets == 0) {
548 PDRIVER_CANCEL cancelRoutine;
549 IoMarkIrpPending(irp);
550 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
552 cancelRoutine = IoSetCancelRoutine(irp, NULL);
557 queue->pendingIrp = irp;
559 status = STATUS_PENDING;
561 NdisReleaseSpinLock(&queue->queueLock);
563 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
564 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
570 POVS_PACKET_QUEUE_ELEM
571 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
573 POVS_USER_PACKET_QUEUE queue;
575 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
579 NdisAcquireSpinLock(&queue->queueLock);
580 if (queue->instance != instance || queue->numPackets == 0) {
581 NdisReleaseSpinLock(&queue->queueLock);
584 link = RemoveHeadList(&queue->packetList);
586 NdisReleaseSpinLock(&queue->queueLock);
587 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
591 POVS_USER_PACKET_QUEUE
592 OvsGetQueue(UINT32 pid)
594 /* XXX To be implemented. Return the queue assoiated with the pid*/
595 UNREFERENCED_PARAMETER(pid);
601 OvsQueuePackets(UINT32 queueId,
602 PLIST_ENTRY packetList,
605 POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
606 POVS_PACKET_QUEUE_ELEM elem;
611 OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
617 NdisAcquireSpinLock(&queue->queueLock);
618 if (queue->instance == NULL) {
619 NdisReleaseSpinLock(&queue->queueLock);
622 OvsAppendList(&queue->packetList, packetList);
623 queue->numPackets += numElems;
625 if (queue->pendingIrp) {
626 PDRIVER_CANCEL cancelRoutine;
627 irp = queue->pendingIrp;
628 queue->pendingIrp = NULL;
629 cancelRoutine = IoSetCancelRoutine(irp, NULL);
630 if (cancelRoutine == NULL) {
634 NdisReleaseSpinLock(&queue->queueLock);
636 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
640 while (!IsListEmpty(packetList)) {
641 link = RemoveHeadList(packetList);
642 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
646 OVS_LOG_LOUD("Exit: drop %u packets", num);
651 *----------------------------------------------------------------------------
652 * OvsCreateAndAddPackets --
654 * Create a packet and forwarded to user space.
656 * This function would fragment packet if needed, and queue
657 * each segment to user space.
658 *----------------------------------------------------------------------------
661 OvsCreateAndAddPackets(PVOID userData,
666 PNET_BUFFER_LIST nbl,
668 POVS_PACKET_HDR_INFO hdrInfo,
669 POVS_SWITCH_CONTEXT switchContext,
673 POVS_PACKET_QUEUE_ELEM elem;
674 PNET_BUFFER_LIST newNbl = NULL;
677 if (hdrInfo->isTcp) {
678 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
681 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
682 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
683 packetLength = NET_BUFFER_DATA_LENGTH(nb);
685 OVS_LOG_TRACE("MSS %u packet len %u",
686 tsoInfo.LsoV1Transmit.MSS, packetLength);
687 if (tsoInfo.LsoV1Transmit.MSS) {
688 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
689 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
690 tsoInfo.LsoV1Transmit.MSS , 0);
691 if (newNbl == NULL) {
692 return NDIS_STATUS_FAILURE;
698 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
700 elem = OvsCreateQueueNlPacket(userData, userDataLen,
701 cmd, inPort, key, nbl, nb,
704 InsertTailList(list, &elem->link);
707 nb = NET_BUFFER_NEXT_NB(nb);
710 OvsCompleteNBL(switchContext, newNbl, TRUE);
712 return NDIS_STATUS_SUCCESS;
715 static __inline UINT32
716 OvsGetUpcallMsgSize(PVOID userData,
718 OvsIPv4TunnelKey *tunnelKey,
721 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
722 NlAttrSize(payload) +
723 NlAttrSize(OvsFlowKeyAttrSize());
725 /* OVS_PACKET_ATTR_USERDATA */
727 size += NlAttrTotalSize(userDataLen);
729 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
730 /* Is it included in the the flwo key attr XXX */
732 size += NlAttrTotalSize(OvsTunKeyAttrSize());
738 *----------------------------------------------------------------------------
739 * This function completes the IP Header csum. record the L4 payload offset and
740 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
741 * caluculated simopultaneossly with the copy of the payload to the destination
742 * buffer when the packet is read.
743 *----------------------------------------------------------------------------
746 OvsCompletePacketHeader(UINT8 *packet,
748 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
749 POVS_PACKET_HDR_INFO hdrInfoIn,
750 POVS_PACKET_HDR_INFO hdrInfoOut)
752 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
753 (!isRecv && csumInfo.Transmit.IsIPv4 &&
754 csumInfo.Transmit.IpHeaderChecksum)) {
755 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
756 ASSERT(hdrInfoIn->isIPv4);
757 ASSERT(ipHdr->Version == 4);
758 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
759 ipHdr->HeaderLength << 2,
760 (UINT16)~ipHdr->HeaderChecksum);
761 ovsUserStats.ipCsum++;
763 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
765 * calculate TCP/UDP pseudo checksum
767 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
769 * Only this case, we need to reclaculate pseudo checksum
770 * all other cases, it is assumed the pseudo checksum is
774 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
775 if (hdrInfoIn->isIPv4) {
776 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
777 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
778 (ipHdr->HeaderLength << 2));
779 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
780 (UINT32 *)&ipHdr->DestinationAddress,
781 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
783 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
784 hdrInfoOut->l4PayLoad =
785 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
786 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
787 hdrInfoIn->l4Offset);
788 ASSERT(hdrInfoIn->isIPv6);
790 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
791 (UINT32 *)&ipv6Hdr->DestinationAddress,
792 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
794 hdrInfoOut->tcpCsumNeeded = 1;
795 ovsUserStats.recalTcpCsum++;
796 } else if (!isRecv) {
797 if (csumInfo.Transmit.TcpChecksum) {
798 hdrInfoOut->tcpCsumNeeded = 1;
799 } else if (csumInfo.Transmit.UdpChecksum) {
800 hdrInfoOut->udpCsumNeeded = 1;
802 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
806 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
808 if (hdrInfoIn->isIPv4) {
809 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
810 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
811 (ipHdr->HeaderLength << 2));
813 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
814 (UINT32 *)&ipHdr->DestinationAddress,
815 proto, hdrInfoOut->l4PayLoad);
818 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
819 hdrInfoIn->l3Offset);
820 hdrInfoOut->l4PayLoad =
821 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
822 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
823 hdrInfoIn->l4Offset);
824 ASSERT(hdrInfoIn->isIPv6);
826 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
827 (UINT32 *)&ipv6Hdr->DestinationAddress,
828 proto, hdrInfoOut->l4PayLoad);
832 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
833 (hdrInfoOut->tcpCsumNeeded ?
834 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
842 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
844 UNREFERENCED_PARAMETER(nb);
846 /* XXX select a pid from an array of pids using a flow based hash */
847 *pid = vport->upcallPid;
848 return STATUS_SUCCESS;
852 *----------------------------------------------------------------------------
853 * OvsCreateQueueNlPacket --
855 * Create a packet which will be forwarded to user space.
858 * userData: when cmd is user action, this field contain
860 * userDataLen: as name indicated
861 * cmd: either miss or user action
862 * inPort: datapath port id from which the packet is received.
863 * key: flow Key with a tunnel key if available
864 * nbl: the NET_BUFFER_LIST which contain the packet
866 * isRecv: This is used to decide how to interprete the csum info
867 * hdrInfo: include hdr info initialized during flow extraction.
870 * NULL if fail to create the packet
871 * The packet element otherwise
872 *----------------------------------------------------------------------------
874 POVS_PACKET_QUEUE_ELEM
875 OvsCreateQueueNlPacket(PVOID userData,
880 PNET_BUFFER_LIST nbl,
883 POVS_PACKET_HDR_INFO hdrInfo)
885 #define VLAN_TAG_SIZE 4
886 UINT32 allocLen, dataLen, extraLen;
887 POVS_PACKET_QUEUE_ELEM elem;
889 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
890 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
891 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
897 /* XXX pass vport in the stack rather than portNo */
898 POVS_VPORT_ENTRY vport =
899 OvsFindVportByPortNo(gOvsSwitchContext, inPort);
902 /* No vport is not fatal. */
906 if (!OvsGetPid(vport, nb, &pid)) {
908 * There is no userspace queue created yet, so there is no point for
909 * creating a new packet to be queued.
914 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
916 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
917 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
918 csumInfo.Receive.IpChecksumFailed)) {
919 OVS_LOG_INFO("Packet dropped due to checksum failure.");
920 ovsUserStats.dropDuetoChecksum++;
924 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
925 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
927 dataLen = NET_BUFFER_DATA_LENGTH(nb);
929 if (NlAttrSize(dataLen) > MAXUINT16) {
933 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
936 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
937 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
939 ovsUserStats.dropDuetoResource++;
942 elem->hdrInfo.value = hdrInfo->value;
943 elem->packet.totalLen = nlMsgSize;
944 /* XXX remove queueid */
945 elem->packet.queue = 0;
946 /* XXX no need as the length is already in the NL attrib */
947 elem->packet.userDataLen = userDataLen;
948 elem->packet.inPort = inPort;
949 elem->packet.cmd = cmd;
950 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
952 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
953 ovsUserStats.action++;
958 /* XXX Should we have both packetLen and TotalLen*/
959 elem->packet.packetLen = dataLen + extraLen;
961 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
964 * Initialize the OVS header
965 * Since we are pre allocating memory for the NL buffer
966 * the attribute settings should not fail
968 if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
969 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
970 gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
974 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
975 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
979 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
981 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
982 userData, (UINT16)userDataLen)) {
988 * Make space for the payload to be copied and set the attribute
989 * XXX Uninit set initilizes the buffer with xero, we don't actually need
990 * that the payload to be initailized
992 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
993 (UINT16)(dataLen + extraLen));
998 /* Store the payload for csum calculation when packet is read */
999 elem->packet.payload = dst;
1002 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1004 ovsUserStats.dropDuetoResource++;
1006 } else if (src != dst) {
1007 /* Copy the data from the NDIS buffer to dst. */
1008 RtlCopyMemory(dst, src, dataLen);
1011 /* Set csum if was offloaded */
1012 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1015 * Finally insert VLAN tag
1018 dst = elem->packet.payload;
1019 src = dst + extraLen;
1020 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1021 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1022 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1024 ((UINT16 *)dst)[0] = htons(0x8100);
1025 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1026 (vlanInfo.TagHeader.UserPriority << 13));
1027 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1028 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1029 ovsUserStats.vlanInsert++;
1032 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1033 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1034 /* 'totalLen' should be size of valid data. */
1035 elem->packet.totalLen = nlMsg->nlmsgLen;
1039 OvsFreeMemory(elem);