2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
40 #define OVS_DBG_MOD OVS_DBG_USER
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49 OvsPacketExecute *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
53 OvsAcquirePidHashLock()
55 NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
59 OvsReleasePidHashLock()
61 NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
66 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
67 POVS_OPEN_INSTANCE instance)
69 PLIST_ENTRY link, next;
71 POVS_PACKET_QUEUE_ELEM elem;
73 InitializeListHead(&tmp);
74 NdisAcquireSpinLock(&queue->queueLock);
75 if (queue->instance != instance) {
76 NdisReleaseSpinLock(&queue->queueLock);
80 if (queue->numPackets) {
81 OvsAppendList(&tmp, &queue->packetList);
82 queue->numPackets = 0;
84 NdisReleaseSpinLock(&queue->queueLock);
85 LIST_FORALL_SAFE(&tmp, link, next) {
86 RemoveEntryList(link);
87 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
93 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
95 POVS_USER_PACKET_QUEUE queue;
96 POVS_PACKET_QUEUE_ELEM elem;
97 PLIST_ENTRY link, next;
102 InitializeListHead(&tmp);
103 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
105 PDRIVER_CANCEL cancelRoutine;
106 NdisAcquireSpinLock(&queue->queueLock);
107 ASSERT(queue->instance == instance);
108 /* XXX Should not happen */
109 if (queue->instance != instance) {
110 NdisReleaseSpinLock(&queue->queueLock);
111 NdisFreeSpinLock(&queue->queueLock);
115 if (queue->numPackets) {
116 OvsAppendList(&tmp, &queue->packetList);
117 queue->numPackets = 0;
119 queue->instance = NULL;
120 instance->packetQueue = NULL;
121 irp = queue->pendingIrp;
122 queue->pendingIrp = NULL;
124 cancelRoutine = IoSetCancelRoutine(irp, NULL);
125 if (cancelRoutine == NULL) {
129 NdisReleaseSpinLock(&queue->queueLock);
130 NdisFreeSpinLock(&queue->queueLock);
132 LIST_FORALL_SAFE(&tmp, link, next) {
133 RemoveEntryList(link);
134 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
138 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
141 OvsFreeMemory(queue);
144 /* Remove the instance from pidHashArray */
145 OvsAcquirePidHashLock();
146 OvsDelPidInstance(gOvsSwitchContext, instance->pid);
147 OvsReleasePidHashLock();
151 OvsSubscribeDpIoctl(PVOID instanceP,
155 POVS_USER_PACKET_QUEUE queue;
156 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
158 OvsAcquireCtrlLock();
159 if (!gOvsSwitchContext) {
160 OvsReleaseCtrlLock();
161 return STATUS_INVALID_PARAMETER;
163 OvsReleaseCtrlLock();
165 if (instance->packetQueue && !join) {
167 OvsCleanupPacketQueue(instance);
169 OvsAcquirePidHashLock();
170 /* Remove the instance from pidHashArray */
171 OvsDelPidInstance(gOvsSwitchContext, pid);
172 OvsReleasePidHashLock();
174 } else if (instance->packetQueue == NULL && join) {
175 queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
177 return STATUS_NO_MEMORY;
179 InitializeListHead(&(instance->pidLink));
180 instance->packetQueue = queue;
181 RtlZeroMemory(queue, sizeof (*queue));
182 NdisAllocateSpinLock(&queue->queueLock);
183 NdisAcquireSpinLock(&queue->queueLock);
184 InitializeListHead(&queue->packetList);
186 queue->instance = instance;
187 instance->packetQueue = queue;
188 NdisReleaseSpinLock(&queue->queueLock);
190 OvsAcquirePidHashLock();
191 /* Insert the instance to pidHashArray */
192 OvsAddPidInstance(gOvsSwitchContext, pid, instance);
193 OvsReleasePidHashLock();
196 /* user mode should call only once for subscribe */
197 return STATUS_INVALID_PARAMETER;
200 return STATUS_SUCCESS;
205 OvsReadDpIoctl(PFILE_OBJECT fileObject,
210 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
211 POVS_PACKET_QUEUE_ELEM elem;
214 #define TCP_CSUM_OFFSET 16
215 #define UDP_CSUM_OFFSET 6
218 if (instance->packetQueue == NULL) {
219 return STATUS_INVALID_PARAMETER;
221 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
222 return STATUS_BUFFER_TOO_SMALL;
225 elem = OvsGetNextPacket(instance);
228 * XXX revisit this later
230 len = elem->packet.totalLen > outputLength ? outputLength :
231 elem->packet.totalLen;
233 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
234 len == elem->packet.totalLen) {
236 UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
237 elem->hdrInfo.l4Offset);
238 RtlCopyMemory(outputBuffer, &elem->packet.data, size);
239 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
240 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
241 (UINT8 *)&elem->packet.data + size,
242 elem->hdrInfo.l4PayLoad, 0);
243 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
244 (elem->hdrInfo.tcpCsumNeeded ?
245 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
247 ovsUserStats.l4Csum++;
249 RtlCopyMemory(outputBuffer, &elem->packet.data, len);
255 return STATUS_SUCCESS;
258 /* Helper function to allocate a Forwarding Context for an NBL */
260 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
261 PNET_BUFFER_LIST nbl)
263 return switchContext->NdisSwitchHandlers.
264 AllocateNetBufferListForwardingContext(
265 switchContext->NdisSwitchContext, nbl);
269 * --------------------------------------------------------------------------
270 * This function allocates all the stuff necessary for creating an NBL from the
271 * input buffer of specified length, namely, a nonpaged data buffer of size
272 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
273 * context yet. It also copies data from the specified buffer to the NBL.
274 * --------------------------------------------------------------------------
277 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
282 PNET_BUFFER_LIST nbl = NULL;
286 if (length > OVS_DEFAULT_DATA_SIZE) {
287 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
288 OVS_DEFAULT_HEADROOM_SIZE);
291 nbl = OvsAllocateFixSizeNBL(switchContext, length,
292 OVS_DEFAULT_HEADROOM_SIZE);
298 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
299 mdl = NET_BUFFER_CURRENT_MDL(nb);
300 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
301 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
303 OvsCompleteNBL(switchContext, nbl, TRUE);
307 NdisMoveMemory(data, userBuffer, length);
313 *----------------------------------------------------------------------------
314 * OvsNlExecuteCmdHandler --
315 * Handler for OVS_PACKET_CMD_EXECUTE command.
316 *----------------------------------------------------------------------------
319 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
322 NTSTATUS status = STATUS_SUCCESS;
323 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
324 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
325 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
326 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
327 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
329 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
330 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
332 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
333 UINT32 keyAttrOffset = 0;
334 OvsPacketExecute execute;
335 NL_ERROR nlError = NL_ERROR_SUCCESS;
338 static const NL_POLICY nlPktExecPolicy[] = {
339 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
340 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
341 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
342 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
343 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
347 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
349 /* Get all the top level Flow attributes */
350 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
351 nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
353 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
355 status = STATUS_UNSUCCESSFUL;
359 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
362 /* Get flow keys attributes */
363 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
364 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
365 nlFlowKeyPolicy, keyAttrs,
366 ARRAY_SIZE(keyAttrs))) != TRUE) {
367 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
368 status = STATUS_UNSUCCESSFUL;
372 execute.dpNo = ovsHdr->dp_ifindex;
374 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
376 status = OvsExecuteDpIoctl(&execute);
378 /* Default reply that we want to send */
379 if (status == STATUS_SUCCESS) {
380 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
381 usrParamsCtx->outputLength);
383 /* Prepare nl Msg headers */
384 status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
385 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
386 genlMsgHdr->cmd, OVS_PACKET_VERSION,
389 if (status == STATUS_SUCCESS) {
390 *replyLen = msgOut->nlMsg.nlmsgLen;
393 /* Map NTSTATUS to NL_ERROR */
394 nlError = NlMapStatusToNlErr(status);
396 /* As of now there are no transactional errors in the implementation.
397 * Once we have them then we need to map status to correct
398 * nlError value, so that below mentioned code gets hit. */
399 if ((nlError != NL_ERROR_SUCCESS) &&
400 (usrParamsCtx->outputBuffer)) {
402 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
403 usrParamsCtx->outputBuffer;
404 BuildErrorMsg(msgIn, msgError, nlError);
405 *replyLen = msgError->nlMsg.nlmsgLen;
406 status = STATUS_SUCCESS;
416 *----------------------------------------------------------------------------
417 * _MapNlAttrToOvsPktExec --
418 * Maps input Netlink attributes to OvsPacketExecute.
419 *----------------------------------------------------------------------------
422 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
423 OvsPacketExecute *execute)
425 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
426 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
428 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
429 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
431 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
435 OvsExecuteDpIoctl(OvsPacketExecute *execute)
437 NTSTATUS status = STATUS_SUCCESS;
439 LOCK_STATE_EX lockState;
440 PNET_BUFFER_LIST pNbl;
442 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
444 OVS_PACKET_HDR_INFO layers;
445 POVS_VPORT_ENTRY vport;
447 NdisAcquireSpinLock(gOvsCtrlLock);
448 if (gOvsSwitchContext == NULL) {
449 status = STATUS_INVALID_PARAMETER;
453 if (execute->packetLen == 0) {
454 status = STATUS_INVALID_PARAMETER;
458 actions = execute->actions;
463 * Allocate the NBL, copy the data from the userspace buffer. Allocate
464 * also, the forwarding context for the packet.
466 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
469 status = STATUS_NO_MEMORY;
473 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
474 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
476 fwdDetail->SourcePortId = vport->portId;
477 fwdDetail->SourceNicIndex = vport->nicIndex;
479 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
480 fwdDetail->SourceNicIndex = 0;
482 // XXX: Figure out if any of the other members of fwdDetail need to be set.
484 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
486 if (ndisStatus == NDIS_STATUS_SUCCESS) {
487 ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
488 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
489 NDIS_RWL_AT_DISPATCH_LEVEL);
490 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
491 vport ? vport->portNo :
493 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
494 &key, NULL, &layers, actions,
495 execute->actionsLen);
497 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
499 if (ndisStatus != NDIS_STATUS_SUCCESS) {
500 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
501 status = STATUS_NOT_SUPPORTED;
503 status = STATUS_UNSUCCESSFUL;
508 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
511 NdisReleaseSpinLock(gOvsCtrlLock);
517 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
519 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
520 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
523 return STATUS_INVALID_PARAMETER;
525 OvsPurgePacketQueue(queue, instance);
526 return STATUS_SUCCESS;
530 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
533 PIO_STACK_LOCATION irpSp;
534 PFILE_OBJECT fileObject;
535 POVS_OPEN_INSTANCE instance;
536 POVS_USER_PACKET_QUEUE queue = NULL;
538 UNREFERENCED_PARAMETER(deviceObject);
540 IoReleaseCancelSpinLock(irp->CancelIrql);
541 irpSp = IoGetCurrentIrpStackLocation(irp);
542 fileObject = irpSp->FileObject;
544 if (fileObject == NULL) {
547 NdisAcquireSpinLock(gOvsCtrlLock);
548 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
550 queue = instance->packetQueue;
552 if (instance == NULL || queue == NULL) {
553 NdisReleaseSpinLock(gOvsCtrlLock);
556 NdisReleaseSpinLock(gOvsCtrlLock);
557 NdisAcquireSpinLock(&queue->queueLock);
558 if (queue->pendingIrp == irp) {
559 queue->pendingIrp = NULL;
561 NdisReleaseSpinLock(&queue->queueLock);
563 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
568 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
570 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
571 POVS_USER_PACKET_QUEUE queue =
572 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
573 NTSTATUS status = STATUS_SUCCESS;
574 BOOLEAN cancelled = FALSE;
577 return STATUS_INVALID_PARAMETER;
579 NdisAcquireSpinLock(&queue->queueLock);
580 if (queue->instance != instance) {
581 NdisReleaseSpinLock(&queue->queueLock);
582 return STATUS_INVALID_PARAMETER;
584 if (queue->pendingIrp) {
585 NdisReleaseSpinLock(&queue->queueLock);
586 return STATUS_DEVICE_BUSY;
588 if (queue->numPackets == 0) {
589 PDRIVER_CANCEL cancelRoutine;
590 IoMarkIrpPending(irp);
591 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
593 cancelRoutine = IoSetCancelRoutine(irp, NULL);
598 queue->pendingIrp = irp;
600 status = STATUS_PENDING;
602 NdisReleaseSpinLock(&queue->queueLock);
604 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
605 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
611 POVS_PACKET_QUEUE_ELEM
612 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
614 POVS_USER_PACKET_QUEUE queue;
616 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
620 NdisAcquireSpinLock(&queue->queueLock);
621 if (queue->instance != instance || queue->numPackets == 0) {
622 NdisReleaseSpinLock(&queue->queueLock);
625 link = RemoveHeadList(&queue->packetList);
627 NdisReleaseSpinLock(&queue->queueLock);
628 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
632 * ---------------------------------------------------------------------------
633 * Given a pid, returns the corresponding USER_PACKET_QUEUE.
634 * gOvsCtrlLock must be acquired before calling this API.
635 * ---------------------------------------------------------------------------
637 POVS_USER_PACKET_QUEUE
638 OvsGetQueue(UINT32 pid)
640 POVS_OPEN_INSTANCE instance;
641 POVS_USER_PACKET_QUEUE ret = NULL;
643 instance = OvsGetPidInstance(gOvsSwitchContext, pid);
646 ret = instance->packetQueue;
653 * ---------------------------------------------------------------------------
654 * Given a pid, returns the corresponding instance.
655 * pidHashLock must be acquired before calling this API.
656 * ---------------------------------------------------------------------------
659 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
661 POVS_OPEN_INSTANCE instance;
662 PLIST_ENTRY head, link;
663 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
665 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
666 LIST_FORALL(head, link) {
667 instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
668 if (instance->pid == pid) {
676 * ---------------------------------------------------------------------------
677 * Given a pid and an instance. This API adds instance to pidHashArray.
678 * pidHashLock must be acquired before calling this API.
679 * ---------------------------------------------------------------------------
682 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
683 POVS_OPEN_INSTANCE instance)
686 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
688 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
689 InsertHeadList(head, &(instance->pidLink));
693 * ---------------------------------------------------------------------------
694 * Given a pid and an instance. This API removes instance from pidHashArray.
695 * pidHashLock must be acquired before calling this API.
696 * ---------------------------------------------------------------------------
699 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
701 POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
704 RemoveEntryList(&(instance->pidLink));
709 OvsQueuePackets(PLIST_ENTRY packetList,
712 POVS_USER_PACKET_QUEUE upcallQueue = NULL;
713 POVS_PACKET_QUEUE_ELEM elem;
717 LIST_ENTRY dropPackets;
719 OVS_LOG_LOUD("Enter: numELems: %u", numElems);
721 InitializeListHead(&dropPackets);
723 while (!IsListEmpty(packetList)) {
724 link = RemoveHeadList(packetList);
725 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
729 OvsAcquirePidHashLock();
731 upcallQueue = OvsGetQueue(elem->upcallPid);
733 /* No upcall queue found, drop this packet. */
734 InsertTailList(&dropPackets, &elem->link);
736 NdisAcquireSpinLock(&upcallQueue->queueLock);
738 if (upcallQueue->instance == NULL) {
739 InsertTailList(&dropPackets, &elem->link);
741 InsertTailList(&upcallQueue->packetList, &elem->link);
742 upcallQueue->numPackets++;
743 if (upcallQueue->pendingIrp) {
744 PDRIVER_CANCEL cancelRoutine;
745 irp = upcallQueue->pendingIrp;
746 upcallQueue->pendingIrp = NULL;
747 cancelRoutine = IoSetCancelRoutine(irp, NULL);
748 if (cancelRoutine == NULL) {
755 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
758 NdisReleaseSpinLock(&upcallQueue->queueLock);
761 OvsReleasePidHashLock();
764 while (!IsListEmpty(&dropPackets)) {
765 link = RemoveHeadList(&dropPackets);
766 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
771 OVS_LOG_LOUD("Exit: drop %u packets", num);
775 *----------------------------------------------------------------------------
776 * OvsCreateAndAddPackets --
778 * Create a packet and forwarded to user space.
780 * This function would fragment packet if needed, and queue
781 * each segment to user space.
782 *----------------------------------------------------------------------------
785 OvsCreateAndAddPackets(PVOID userData,
790 PNET_BUFFER_LIST nbl,
792 POVS_PACKET_HDR_INFO hdrInfo,
793 POVS_SWITCH_CONTEXT switchContext,
797 POVS_PACKET_QUEUE_ELEM elem;
798 PNET_BUFFER_LIST newNbl = NULL;
801 if (hdrInfo->isTcp) {
802 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
805 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
806 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
807 packetLength = NET_BUFFER_DATA_LENGTH(nb);
809 OVS_LOG_TRACE("MSS %u packet len %u",
810 tsoInfo.LsoV1Transmit.MSS, packetLength);
811 if (tsoInfo.LsoV1Transmit.MSS) {
812 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
813 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
814 tsoInfo.LsoV1Transmit.MSS , 0);
815 if (newNbl == NULL) {
816 return NDIS_STATUS_FAILURE;
822 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
824 elem = OvsCreateQueueNlPacket(userData, userDataLen,
825 cmd, inPort, key, nbl, nb,
828 InsertTailList(list, &elem->link);
831 nb = NET_BUFFER_NEXT_NB(nb);
834 OvsCompleteNBL(switchContext, newNbl, TRUE);
836 return NDIS_STATUS_SUCCESS;
839 static __inline UINT32
840 OvsGetUpcallMsgSize(PVOID userData,
842 OvsIPv4TunnelKey *tunnelKey,
845 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
846 NlAttrSize(payload) +
847 NlAttrSize(OvsFlowKeyAttrSize());
849 /* OVS_PACKET_ATTR_USERDATA */
851 size += NlAttrTotalSize(userDataLen);
853 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
854 /* Is it included in the the flwo key attr XXX */
856 size += NlAttrTotalSize(OvsTunKeyAttrSize());
862 *----------------------------------------------------------------------------
863 * This function completes the IP Header csum. record the L4 payload offset and
864 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
865 * caluculated simopultaneossly with the copy of the payload to the destination
866 * buffer when the packet is read.
867 *----------------------------------------------------------------------------
870 OvsCompletePacketHeader(UINT8 *packet,
872 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
873 POVS_PACKET_HDR_INFO hdrInfoIn,
874 POVS_PACKET_HDR_INFO hdrInfoOut)
876 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
877 (!isRecv && csumInfo.Transmit.IsIPv4 &&
878 csumInfo.Transmit.IpHeaderChecksum)) {
879 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
880 ASSERT(hdrInfoIn->isIPv4);
881 ASSERT(ipHdr->Version == 4);
882 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
883 ipHdr->HeaderLength << 2,
884 (UINT16)~ipHdr->HeaderChecksum);
885 ovsUserStats.ipCsum++;
887 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
889 * calculate TCP/UDP pseudo checksum
891 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
893 * Only this case, we need to reclaculate pseudo checksum
894 * all other cases, it is assumed the pseudo checksum is
898 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
899 if (hdrInfoIn->isIPv4) {
900 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
901 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
902 (ipHdr->HeaderLength << 2));
903 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
904 (UINT32 *)&ipHdr->DestinationAddress,
905 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
907 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
908 hdrInfoOut->l4PayLoad =
909 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
910 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
911 hdrInfoIn->l4Offset);
912 ASSERT(hdrInfoIn->isIPv6);
914 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
915 (UINT32 *)&ipv6Hdr->DestinationAddress,
916 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
918 hdrInfoOut->tcpCsumNeeded = 1;
919 ovsUserStats.recalTcpCsum++;
920 } else if (!isRecv) {
921 if (csumInfo.Transmit.TcpChecksum) {
922 hdrInfoOut->tcpCsumNeeded = 1;
923 } else if (csumInfo.Transmit.UdpChecksum) {
924 hdrInfoOut->udpCsumNeeded = 1;
926 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
930 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
932 if (hdrInfoIn->isIPv4) {
933 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
934 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
935 (ipHdr->HeaderLength << 2));
937 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
938 (UINT32 *)&ipHdr->DestinationAddress,
939 proto, hdrInfoOut->l4PayLoad);
942 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
943 hdrInfoIn->l3Offset);
944 hdrInfoOut->l4PayLoad =
945 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
946 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
947 hdrInfoIn->l4Offset);
948 ASSERT(hdrInfoIn->isIPv6);
950 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
951 (UINT32 *)&ipv6Hdr->DestinationAddress,
952 proto, hdrInfoOut->l4PayLoad);
956 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
957 (hdrInfoOut->tcpCsumNeeded ?
958 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
966 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
968 UNREFERENCED_PARAMETER(nb);
972 /* XXX select a pid from an array of pids using a flow based hash */
973 *pid = vport->upcallPid;
974 return STATUS_SUCCESS;
978 *----------------------------------------------------------------------------
979 * OvsCreateQueueNlPacket --
981 * Create a packet which will be forwarded to user space.
984 * userData: when cmd is user action, this field contain
986 * userDataLen: as name indicated
987 * cmd: either miss or user action
988 * inPort: datapath port id from which the packet is received.
989 * key: flow Key with a tunnel key if available
990 * nbl: the NET_BUFFER_LIST which contain the packet
992 * isRecv: This is used to decide how to interprete the csum info
993 * hdrInfo: include hdr info initialized during flow extraction.
996 * NULL if fail to create the packet
997 * The packet element otherwise
998 *----------------------------------------------------------------------------
1000 POVS_PACKET_QUEUE_ELEM
1001 OvsCreateQueueNlPacket(PVOID userData,
1006 PNET_BUFFER_LIST nbl,
1009 POVS_PACKET_HDR_INFO hdrInfo)
1011 #define VLAN_TAG_SIZE 4
1012 UINT32 allocLen, dataLen, extraLen;
1013 POVS_PACKET_QUEUE_ELEM elem;
1015 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
1016 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
1017 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
1023 /* XXX pass vport in the stack rather than portNo */
1024 POVS_VPORT_ENTRY vport =
1025 OvsFindVportByPortNo(gOvsSwitchContext, inPort);
1028 /* No vport is not fatal. */
1032 OvsGetPid(vport, nb, &pid);
1036 * There is no userspace queue created yet, so there is no point for
1037 * creating a new packet to be queued.
1042 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1044 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1045 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1046 csumInfo.Receive.IpChecksumFailed)) {
1047 OVS_LOG_INFO("Packet dropped due to checksum failure.");
1048 ovsUserStats.dropDuetoChecksum++;
1052 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1053 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1055 dataLen = NET_BUFFER_DATA_LENGTH(nb);
1057 if (NlAttrSize(dataLen) > MAXUINT16) {
1061 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1062 dataLen + extraLen);
1064 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1065 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
1067 ovsUserStats.dropDuetoResource++;
1070 elem->hdrInfo.value = hdrInfo->value;
1071 elem->upcallPid = pid;
1072 elem->packet.totalLen = nlMsgSize;
1073 /* XXX remove queueid */
1074 elem->packet.queue = 0;
1075 /* XXX no need as the length is already in the NL attrib */
1076 elem->packet.userDataLen = userDataLen;
1077 elem->packet.inPort = inPort;
1078 elem->packet.cmd = cmd;
1079 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1080 ovsUserStats.miss++;
1081 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1082 ovsUserStats.action++;
1087 /* XXX Should we have both packetLen and TotalLen*/
1088 elem->packet.packetLen = dataLen + extraLen;
1090 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1093 * Initialize the OVS header
1094 * Since we are pre allocating memory for the NL buffer
1095 * the attribute settings should not fail
1097 if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1098 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1099 gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1103 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1104 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1108 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1110 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1111 userData, (UINT16)userDataLen)) {
1117 * Make space for the payload to be copied and set the attribute
1118 * XXX Uninit set initilizes the buffer with xero, we don't actually need
1119 * that the payload to be initailized
1121 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1122 (UINT16)(dataLen + extraLen));
1127 /* Store the payload for csum calculation when packet is read */
1128 elem->packet.payload = dst;
1131 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1133 ovsUserStats.dropDuetoResource++;
1135 } else if (src != dst) {
1136 /* Copy the data from the NDIS buffer to dst. */
1137 RtlCopyMemory(dst, src, dataLen);
1140 /* Set csum if was offloaded */
1141 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1144 * Finally insert VLAN tag
1147 dst = elem->packet.payload;
1148 src = dst + extraLen;
1149 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1150 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1151 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1153 ((UINT16 *)dst)[0] = htons(0x8100);
1154 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1155 (vlanInfo.TagHeader.UserPriority << 13));
1156 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1157 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1158 ovsUserStats.vlanInsert++;
1161 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1162 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1163 /* 'totalLen' should be size of valid data. */
1164 elem->packet.totalLen = nlMsg->nlmsgLen;
1168 OvsFreeMemory(elem);