2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
39 #define OVS_DBG_MOD OVS_DBG_USER
42 OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES];
44 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
45 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
46 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
47 OVS_USER_STATS ovsUserStats;
49 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
50 OvsPacketExecute *execute);
51 extern NL_POLICY nlFlowKeyPolicy[];
57 POVS_USER_PACKET_QUEUE queue;
58 for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
59 queue = &ovsPacketQueues[i];
60 RtlZeroMemory(queue, sizeof (*queue));
61 InitializeListHead(&queue->packetList);
62 NdisAllocateSpinLock(&queue->queueLock);
64 return STATUS_SUCCESS;
71 POVS_USER_PACKET_QUEUE queue;
72 for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
73 queue = &ovsPacketQueues[i];
74 ASSERT(IsListEmpty(&queue->packetList));
75 ASSERT(queue->instance == NULL);
76 ASSERT(queue->pendingIrp == NULL);
77 NdisFreeSpinLock(&queue->queueLock);
82 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
83 POVS_OPEN_INSTANCE instance)
85 PLIST_ENTRY link, next;
87 POVS_PACKET_QUEUE_ELEM elem;
89 InitializeListHead(&tmp);
90 NdisAcquireSpinLock(&queue->queueLock);
91 if (queue->instance != instance) {
92 NdisReleaseSpinLock(&queue->queueLock);
96 if (queue->numPackets) {
97 OvsAppendList(&tmp, &queue->packetList);
98 queue->numPackets = 0;
100 NdisReleaseSpinLock(&queue->queueLock);
101 LIST_FORALL_SAFE(&tmp, link, next) {
102 RemoveEntryList(link);
103 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
110 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
112 POVS_USER_PACKET_QUEUE queue;
113 POVS_PACKET_QUEUE_ELEM elem;
114 PLIST_ENTRY link, next;
118 InitializeListHead(&tmp);
119 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
121 PDRIVER_CANCEL cancelRoutine;
122 NdisAcquireSpinLock(&queue->queueLock);
123 if (queue->instance != instance) {
124 NdisReleaseSpinLock(&queue->queueLock);
128 if (queue->numPackets) {
129 OvsAppendList(&tmp, &queue->packetList);
130 queue->numPackets = 0;
132 queue->instance = NULL;
133 queue->queueId = OVS_MAX_NUM_PACKET_QUEUES;
134 instance->packetQueue = NULL;
135 irp = queue->pendingIrp;
136 queue->pendingIrp = NULL;
138 cancelRoutine = IoSetCancelRoutine(irp, NULL);
139 if (cancelRoutine == NULL) {
143 NdisReleaseSpinLock(&queue->queueLock);
145 LIST_FORALL_SAFE(&tmp, link, next) {
146 RemoveEntryList(link);
147 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
151 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
156 OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
160 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
162 POVS_USER_PACKET_QUEUE queue;
163 if (inputLength < sizeof (UINT32)) {
164 return STATUS_INVALID_PARAMETER;
166 queueId = *(UINT32 *)inputBuffer;
167 if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
171 OvsCleanupPacketQueue(instance);
172 } else if (instance->packetQueue == NULL &&
173 queueId < OVS_MAX_NUM_PACKET_QUEUES) {
174 queue = &ovsPacketQueues[queueId];
175 NdisAcquireSpinLock(&queue->queueLock);
176 if (ovsPacketQueues[queueId].instance) {
177 if (ovsPacketQueues[queueId].instance != instance) {
178 NdisReleaseSpinLock(&queue->queueLock);
179 return STATUS_INSUFFICIENT_RESOURCES;
181 NdisReleaseSpinLock(&queue->queueLock);
182 return STATUS_SUCCESS;
185 queue->queueId = queueId;
186 queue->instance = instance;
187 instance->packetQueue = queue;
188 ASSERT(IsListEmpty(&queue->packetList));
189 NdisReleaseSpinLock(&queue->queueLock);
191 return STATUS_INVALID_PARAMETER;
193 return STATUS_SUCCESS;
198 OvsReadDpIoctl(PFILE_OBJECT fileObject,
203 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
204 POVS_PACKET_QUEUE_ELEM elem;
207 #define TCP_CSUM_OFFSET 16
208 #define UDP_CSUM_OFFSET 6
211 if (instance->packetQueue == NULL) {
212 return STATUS_INVALID_PARAMETER;
214 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
215 return STATUS_BUFFER_TOO_SMALL;
218 elem = OvsGetNextPacket(instance);
221 * XXX revisit this later
223 len = elem->packet.totalLen > outputLength ? outputLength :
224 elem->packet.totalLen;
226 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
227 len == elem->packet.totalLen) {
229 UINT16 size = (UINT16)(elem->packet.userDataLen +
230 elem->hdrInfo.l4Offset +
231 (UINT16)sizeof (OVS_PACKET_INFO));
232 RtlCopyMemory(outputBuffer, &elem->packet, size);
233 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
234 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
235 (UINT8 *)&elem->packet + size,
236 elem->hdrInfo.l4PayLoad, 0);
237 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
238 (elem->hdrInfo.tcpCsumNeeded ?
239 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
241 ovsUserStats.l4Csum++;
243 RtlCopyMemory(outputBuffer, &elem->packet, len);
249 return STATUS_SUCCESS;
252 /* Helper function to allocate a Forwarding Context for an NBL */
254 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
255 PNET_BUFFER_LIST nbl)
257 return switchContext->NdisSwitchHandlers.
258 AllocateNetBufferListForwardingContext(
259 switchContext->NdisSwitchContext, nbl);
263 * --------------------------------------------------------------------------
264 * This function allocates all the stuff necessary for creating an NBL from the
265 * input buffer of specified length, namely, a nonpaged data buffer of size
266 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
267 * context yet. It also copies data from the specified buffer to the NBL.
268 * --------------------------------------------------------------------------
271 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
276 PNET_BUFFER_LIST nbl = NULL;
280 if (length > OVS_DEFAULT_DATA_SIZE) {
281 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
282 OVS_DEFAULT_HEADROOM_SIZE);
285 nbl = OvsAllocateFixSizeNBL(switchContext, length,
286 OVS_DEFAULT_HEADROOM_SIZE);
292 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
293 mdl = NET_BUFFER_CURRENT_MDL(nb);
294 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
295 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
297 OvsCompleteNBL(switchContext, nbl, TRUE);
301 NdisMoveMemory(data, userBuffer, length);
307 *----------------------------------------------------------------------------
308 * OvsNlExecuteCmdHandler --
309 * Handler for OVS_PACKET_CMD_EXECUTE command.
310 *----------------------------------------------------------------------------
313 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
316 NTSTATUS status = STATUS_SUCCESS;
317 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
318 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
319 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
320 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
321 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
323 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
324 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
326 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
327 UINT32 keyAttrOffset = 0;
328 OvsPacketExecute execute;
329 NL_ERROR nlError = NL_ERROR_SUCCESS;
332 static const NL_POLICY nlPktExecPolicy[] = {
333 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
334 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
335 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
336 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
337 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
341 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
343 /* Get all the top level Flow attributes */
344 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
345 nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
347 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
349 status = STATUS_UNSUCCESSFUL;
353 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
356 /* Get flow keys attributes */
357 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
358 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
359 nlFlowKeyPolicy, keyAttrs,
360 ARRAY_SIZE(keyAttrs))) != TRUE) {
361 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
362 status = STATUS_UNSUCCESSFUL;
366 execute.dpNo = ovsHdr->dp_ifindex;
368 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
370 status = OvsExecuteDpIoctl(&execute);
372 /* Default reply that we want to send */
373 if (status == STATUS_SUCCESS) {
374 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
375 usrParamsCtx->outputLength);
377 /* Prepare nl Msg headers */
378 status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
379 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
380 genlMsgHdr->cmd, OVS_PACKET_VERSION,
383 if (status == STATUS_SUCCESS) {
384 *replyLen = msgOut->nlMsg.nlmsgLen;
387 /* Map NTSTATUS to NL_ERROR */
388 nlError = NlMapStatusToNlErr(status);
390 /* As of now there are no transactional errors in the implementation.
391 * Once we have them then we need to map status to correct
392 * nlError value, so that below mentioned code gets hit. */
393 if ((nlError != NL_ERROR_SUCCESS) &&
394 (usrParamsCtx->outputBuffer)) {
396 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
397 usrParamsCtx->outputBuffer;
398 BuildErrorMsg(msgIn, msgError, nlError);
399 *replyLen = msgError->nlMsg.nlmsgLen;
400 status = STATUS_SUCCESS;
410 *----------------------------------------------------------------------------
411 * _MapNlAttrToOvsPktExec --
412 * Maps input Netlink attributes to OvsPacketExecute.
413 *----------------------------------------------------------------------------
416 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
417 OvsPacketExecute *execute)
419 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
420 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
422 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
423 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
425 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
429 OvsExecuteDpIoctl(OvsPacketExecute *execute)
431 NTSTATUS status = STATUS_SUCCESS;
433 LOCK_STATE_EX lockState;
434 PNET_BUFFER_LIST pNbl;
436 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
438 OVS_PACKET_HDR_INFO layers;
439 POVS_VPORT_ENTRY vport;
441 NdisAcquireSpinLock(gOvsCtrlLock);
442 if (gOvsSwitchContext == NULL) {
443 status = STATUS_INVALID_PARAMETER;
447 if (execute->packetLen == 0) {
448 status = STATUS_INVALID_PARAMETER;
452 actions = execute->actions;
457 * Allocate the NBL, copy the data from the userspace buffer. Allocate
458 * also, the forwarding context for the packet.
460 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
463 status = STATUS_NO_MEMORY;
467 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
468 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
470 fwdDetail->SourcePortId = vport->portId;
471 fwdDetail->SourceNicIndex = vport->nicIndex;
473 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
474 fwdDetail->SourceNicIndex = 0;
476 // XXX: Figure out if any of the other members of fwdDetail need to be set.
478 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
480 if (ndisStatus == NDIS_STATUS_SUCCESS) {
481 ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
482 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
483 NDIS_RWL_AT_DISPATCH_LEVEL);
484 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
485 vport ? vport->portNo :
487 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
488 &key, NULL, &layers, actions,
489 execute->actionsLen);
491 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
493 if (ndisStatus != NDIS_STATUS_SUCCESS) {
494 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
495 status = STATUS_NOT_SUPPORTED;
497 status = STATUS_UNSUCCESSFUL;
502 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
505 NdisReleaseSpinLock(gOvsCtrlLock);
511 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
513 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
514 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
517 return STATUS_INVALID_PARAMETER;
519 OvsPurgePacketQueue(queue, instance);
520 return STATUS_SUCCESS;
524 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
527 PIO_STACK_LOCATION irpSp;
528 PFILE_OBJECT fileObject;
529 POVS_OPEN_INSTANCE instance;
530 POVS_USER_PACKET_QUEUE queue = NULL;
532 UNREFERENCED_PARAMETER(deviceObject);
534 IoReleaseCancelSpinLock(irp->CancelIrql);
535 irpSp = IoGetCurrentIrpStackLocation(irp);
536 fileObject = irpSp->FileObject;
538 if (fileObject == NULL) {
541 NdisAcquireSpinLock(gOvsCtrlLock);
542 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
544 queue = instance->packetQueue;
546 if (instance == NULL || queue == NULL) {
547 NdisReleaseSpinLock(gOvsCtrlLock);
550 NdisReleaseSpinLock(gOvsCtrlLock);
551 NdisAcquireSpinLock(&queue->queueLock);
552 if (queue->pendingIrp == irp) {
553 queue->pendingIrp = NULL;
555 NdisReleaseSpinLock(&queue->queueLock);
557 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
562 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
564 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
565 POVS_USER_PACKET_QUEUE queue =
566 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
567 NTSTATUS status = STATUS_SUCCESS;
568 BOOLEAN cancelled = FALSE;
571 return STATUS_INVALID_PARAMETER;
573 NdisAcquireSpinLock(&queue->queueLock);
574 if (queue->instance != instance) {
575 NdisReleaseSpinLock(&queue->queueLock);
576 return STATUS_INVALID_PARAMETER;
578 if (queue->pendingIrp) {
579 NdisReleaseSpinLock(&queue->queueLock);
580 return STATUS_DEVICE_BUSY;
582 if (queue->numPackets == 0) {
583 PDRIVER_CANCEL cancelRoutine;
584 IoMarkIrpPending(irp);
585 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
587 cancelRoutine = IoSetCancelRoutine(irp, NULL);
592 queue->pendingIrp = irp;
594 status = STATUS_PENDING;
596 NdisReleaseSpinLock(&queue->queueLock);
598 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
599 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
605 POVS_PACKET_QUEUE_ELEM
606 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
608 POVS_USER_PACKET_QUEUE queue;
610 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
614 NdisAcquireSpinLock(&queue->queueLock);
615 if (queue->instance != instance || queue->numPackets == 0) {
616 NdisReleaseSpinLock(&queue->queueLock);
619 link = RemoveHeadList(&queue->packetList);
621 NdisReleaseSpinLock(&queue->queueLock);
622 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
626 POVS_USER_PACKET_QUEUE
627 OvsGetQueue(UINT32 queueId)
629 POVS_USER_PACKET_QUEUE queue;
630 if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
633 queue = &ovsPacketQueues[queueId];
634 return queue->instance != NULL ? queue : NULL;
638 OvsQueuePackets(UINT32 queueId,
639 PLIST_ENTRY packetList,
642 POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
643 POVS_PACKET_QUEUE_ELEM elem;
648 OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
654 NdisAcquireSpinLock(&queue->queueLock);
655 if (queue->instance == NULL) {
656 NdisReleaseSpinLock(&queue->queueLock);
659 OvsAppendList(&queue->packetList, packetList);
660 queue->numPackets += numElems;
662 if (queue->pendingIrp) {
663 PDRIVER_CANCEL cancelRoutine;
664 irp = queue->pendingIrp;
665 queue->pendingIrp = NULL;
666 cancelRoutine = IoSetCancelRoutine(irp, NULL);
667 if (cancelRoutine == NULL) {
671 NdisReleaseSpinLock(&queue->queueLock);
673 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
677 while (!IsListEmpty(packetList)) {
678 link = RemoveHeadList(packetList);
679 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
683 OVS_LOG_LOUD("Exit: drop %u packets", num);
688 *----------------------------------------------------------------------------
689 * OvsCreateAndAddPackets --
691 * Create a packet and forwarded to user space.
693 * This function would fragment packet if needed, and queue
694 * each segment to user space.
695 *----------------------------------------------------------------------------
698 OvsCreateAndAddPackets(PVOID userData,
703 PNET_BUFFER_LIST nbl,
705 POVS_PACKET_HDR_INFO hdrInfo,
706 POVS_SWITCH_CONTEXT switchContext,
710 POVS_PACKET_QUEUE_ELEM elem;
711 PNET_BUFFER_LIST newNbl = NULL;
714 if (hdrInfo->isTcp) {
715 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
718 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
719 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
720 packetLength = NET_BUFFER_DATA_LENGTH(nb);
722 OVS_LOG_TRACE("MSS %u packet len %u",
723 tsoInfo.LsoV1Transmit.MSS, packetLength);
724 if (tsoInfo.LsoV1Transmit.MSS) {
725 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
726 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
727 tsoInfo.LsoV1Transmit.MSS , 0);
728 if (newNbl == NULL) {
729 return NDIS_STATUS_FAILURE;
735 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
737 elem = OvsCreateQueueNlPacket(userData, userDataLen,
738 cmd, inPort, key, nbl, nb,
741 InsertTailList(list, &elem->link);
744 nb = NET_BUFFER_NEXT_NB(nb);
747 OvsCompleteNBL(switchContext, newNbl, TRUE);
749 return NDIS_STATUS_SUCCESS;
752 static __inline UINT32
753 OvsGetUpcallMsgSize(PVOID userData,
755 OvsIPv4TunnelKey *tunnelKey,
758 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
759 NlAttrSize(payload) +
760 NlAttrSize(OvsFlowKeyAttrSize());
762 /* OVS_PACKET_ATTR_USERDATA */
764 size += NlAttrTotalSize(userDataLen);
766 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
767 /* Is it included in the the flwo key attr XXX */
769 size += NlAttrTotalSize(OvsTunKeyAttrSize());
775 *----------------------------------------------------------------------------
776 * This function completes the IP Header csum. record the L4 payload offset and
777 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
778 * caluculated simopultaneossly with the copy of the payload to the destination
779 * buffer when the packet is read.
780 *----------------------------------------------------------------------------
783 OvsCompletePacketHeader(UINT8 *packet,
785 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
786 POVS_PACKET_HDR_INFO hdrInfoIn,
787 POVS_PACKET_HDR_INFO hdrInfoOut)
789 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
790 (!isRecv && csumInfo.Transmit.IsIPv4 &&
791 csumInfo.Transmit.IpHeaderChecksum)) {
792 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
793 ASSERT(hdrInfoIn->isIPv4);
794 ASSERT(ipHdr->Version == 4);
795 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
796 ipHdr->HeaderLength << 2,
797 (UINT16)~ipHdr->HeaderChecksum);
798 ovsUserStats.ipCsum++;
800 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
802 * calculate TCP/UDP pseudo checksum
804 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
806 * Only this case, we need to reclaculate pseudo checksum
807 * all other cases, it is assumed the pseudo checksum is
811 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
812 if (hdrInfoIn->isIPv4) {
813 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
814 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
815 (ipHdr->HeaderLength << 2));
816 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
817 (UINT32 *)&ipHdr->DestinationAddress,
818 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
820 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
821 hdrInfoOut->l4PayLoad =
822 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
823 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
824 hdrInfoIn->l4Offset);
825 ASSERT(hdrInfoIn->isIPv6);
827 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
828 (UINT32 *)&ipv6Hdr->DestinationAddress,
829 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
831 hdrInfoOut->tcpCsumNeeded = 1;
832 ovsUserStats.recalTcpCsum++;
833 } else if (!isRecv) {
834 if (csumInfo.Transmit.TcpChecksum) {
835 hdrInfoOut->tcpCsumNeeded = 1;
836 } else if (csumInfo.Transmit.UdpChecksum) {
837 hdrInfoOut->udpCsumNeeded = 1;
839 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
843 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
845 if (hdrInfoIn->isIPv4) {
846 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
847 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
848 (ipHdr->HeaderLength << 2));
850 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
851 (UINT32 *)&ipHdr->DestinationAddress,
852 proto, hdrInfoOut->l4PayLoad);
855 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
856 hdrInfoIn->l3Offset);
857 hdrInfoOut->l4PayLoad =
858 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
859 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
860 hdrInfoIn->l4Offset);
861 ASSERT(hdrInfoIn->isIPv6);
863 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
864 (UINT32 *)&ipv6Hdr->DestinationAddress,
865 proto, hdrInfoOut->l4PayLoad);
869 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
870 (hdrInfoOut->tcpCsumNeeded ?
871 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
879 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
881 UNREFERENCED_PARAMETER(nb);
883 /* XXX select a pid from an array of pids using a flow based hash */
884 *pid = vport->upcallPid;
885 return STATUS_SUCCESS;
889 *----------------------------------------------------------------------------
890 * OvsCreateQueueNlPacket --
892 * Create a packet which will be forwarded to user space.
895 * userData: when cmd is user action, this field contain
897 * userDataLen: as name indicated
898 * cmd: either miss or user action
899 * inPort: datapath port id from which the packet is received.
900 * key: flow Key with a tunnel key if available
901 * nbl: the NET_BUFFER_LIST which contain the packet
903 * isRecv: This is used to decide how to interprete the csum info
904 * hdrInfo: include hdr info initialized during flow extraction.
907 * NULL if fail to create the packet
908 * The packet element otherwise
909 *----------------------------------------------------------------------------
911 POVS_PACKET_QUEUE_ELEM
912 OvsCreateQueueNlPacket(PVOID userData,
917 PNET_BUFFER_LIST nbl,
920 POVS_PACKET_HDR_INFO hdrInfo)
922 #define VLAN_TAG_SIZE 4
923 UINT32 allocLen, dataLen, extraLen;
924 POVS_PACKET_QUEUE_ELEM elem;
926 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
927 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
928 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
933 /* XXX pass vport in the stack rather than portNo */
934 POVS_VPORT_ENTRY vport =
935 OvsFindVportByPortNo(gOvsSwitchContext, inPort);
938 /* Should never happen as dispatch lock is held */
943 if (!OvsGetPid(vport, nb, &pid)) {
945 * There is no userspace queue created yet, so there is no point for
946 * creating a new packet to be queued.
951 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
953 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
954 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
955 csumInfo.Receive.IpChecksumFailed)) {
956 OVS_LOG_INFO("Packet dropped due to checksum failure.");
957 ovsUserStats.dropDuetoChecksum++;
961 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
962 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
964 dataLen = NET_BUFFER_DATA_LENGTH(nb);
966 if (NlAttrSize(dataLen) > MAXUINT16) {
970 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
973 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
974 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
976 ovsUserStats.dropDuetoResource++;
979 elem->hdrInfo.value = hdrInfo->value;
980 elem->packet.totalLen = nlMsgSize;
981 /* XXX remove queueid */
982 elem->packet.queue = 0;
983 /* XXX no need as the length is already in the NL attrib */
984 elem->packet.userDataLen = userDataLen;
985 elem->packet.inPort = inPort;
986 elem->packet.cmd = cmd;
987 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
989 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
990 ovsUserStats.action++;
995 /* XXX Should we have both packetLen and TotalLen*/
996 elem->packet.packetLen = dataLen + extraLen;
998 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1001 * Initialize the OVS header
1002 * Since we are pre allocating memory for the NL buffer
1003 * the attribute settings should not fail
1005 if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1006 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1007 gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1011 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1012 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1016 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1018 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1019 userData, (UINT16)userDataLen)) {
1025 * Make space for the payload to be copied and set the attribute
1026 * XXX Uninit set initilizes the buffer with xero, we don't actually need
1027 * that the payload to be initailized
1029 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1030 (UINT16)(dataLen + extraLen));
1035 /* Store the payload for csum calculation when packet is read */
1036 elem->packet.payload = dst;
1039 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1041 ovsUserStats.dropDuetoResource++;
1043 } else if (src != dst) {
1044 /* Copy the data from the NDIS buffer to dst. */
1045 RtlCopyMemory(dst, src, dataLen);
1048 /* Set csum if was offloaded */
1049 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1052 * Finally insert VLAN tag
1055 dst = elem->packet.payload;
1056 src = dst + extraLen;
1057 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1058 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1059 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1061 ((UINT16 *)dst)[0] = htons(0x8100);
1062 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1063 (vlanInfo.TagHeader.UserPriority << 13));
1064 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1065 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1066 ovsUserStats.vlanInsert++;
1070 OvsFreeMemory(elem);