2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
40 #define OVS_DBG_MOD OVS_DBG_USER
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49 OvsPacketExecute *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
51 extern UINT32 nlFlowKeyPolicyLen;
54 OvsAcquirePidHashLock()
56 NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
60 OvsReleasePidHashLock()
62 NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
67 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
68 POVS_OPEN_INSTANCE instance)
70 PLIST_ENTRY link, next;
72 POVS_PACKET_QUEUE_ELEM elem;
74 InitializeListHead(&tmp);
75 NdisAcquireSpinLock(&queue->queueLock);
76 if (queue->instance != instance) {
77 NdisReleaseSpinLock(&queue->queueLock);
81 if (queue->numPackets) {
82 OvsAppendList(&tmp, &queue->packetList);
83 queue->numPackets = 0;
85 NdisReleaseSpinLock(&queue->queueLock);
86 LIST_FORALL_SAFE(&tmp, link, next) {
87 RemoveEntryList(link);
88 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
89 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
94 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
96 POVS_USER_PACKET_QUEUE queue;
97 POVS_PACKET_QUEUE_ELEM elem;
98 PLIST_ENTRY link, next;
103 InitializeListHead(&tmp);
104 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
106 PDRIVER_CANCEL cancelRoutine;
107 NdisAcquireSpinLock(&queue->queueLock);
108 ASSERT(queue->instance == instance);
109 /* XXX Should not happen */
110 if (queue->instance != instance) {
111 NdisReleaseSpinLock(&queue->queueLock);
112 NdisFreeSpinLock(&queue->queueLock);
116 if (queue->numPackets) {
117 OvsAppendList(&tmp, &queue->packetList);
118 queue->numPackets = 0;
120 queue->instance = NULL;
121 instance->packetQueue = NULL;
122 irp = queue->pendingIrp;
123 queue->pendingIrp = NULL;
125 cancelRoutine = IoSetCancelRoutine(irp, NULL);
126 if (cancelRoutine == NULL) {
130 NdisReleaseSpinLock(&queue->queueLock);
131 NdisFreeSpinLock(&queue->queueLock);
133 LIST_FORALL_SAFE(&tmp, link, next) {
134 RemoveEntryList(link);
135 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
136 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
139 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
142 OvsFreeMemoryWithTag(queue, OVS_USER_POOL_TAG);
145 /* Verify if gOvsSwitchContext exists. */
146 if (gOvsSwitchContext) {
147 /* Remove the instance from pidHashArray */
148 OvsAcquirePidHashLock();
149 OvsDelPidInstance(gOvsSwitchContext, instance->pid);
150 OvsReleasePidHashLock();
155 OvsSubscribeDpIoctl(PVOID instanceP,
159 POVS_USER_PACKET_QUEUE queue;
160 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
162 if (instance->packetQueue && !join) {
164 OvsCleanupPacketQueue(instance);
165 } else if (instance->packetQueue == NULL && join) {
166 queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemoryWithTag(
167 sizeof *queue, OVS_USER_POOL_TAG);
169 return STATUS_NO_MEMORY;
171 InitializeListHead(&(instance->pidLink));
172 instance->packetQueue = queue;
173 RtlZeroMemory(queue, sizeof (*queue));
174 NdisAllocateSpinLock(&queue->queueLock);
175 NdisAcquireSpinLock(&queue->queueLock);
176 InitializeListHead(&queue->packetList);
178 queue->instance = instance;
179 instance->packetQueue = queue;
180 NdisReleaseSpinLock(&queue->queueLock);
182 OvsAcquirePidHashLock();
183 /* Insert the instance to pidHashArray */
184 OvsAddPidInstance(gOvsSwitchContext, pid, instance);
185 OvsReleasePidHashLock();
188 /* user mode should call only once for subscribe */
189 return STATUS_INVALID_PARAMETER;
192 return STATUS_SUCCESS;
197 OvsReadDpIoctl(PFILE_OBJECT fileObject,
202 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
203 POVS_PACKET_QUEUE_ELEM elem;
206 #define TCP_CSUM_OFFSET 16
207 #define UDP_CSUM_OFFSET 6
210 if (instance->packetQueue == NULL) {
211 return STATUS_INVALID_PARAMETER;
213 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
214 return STATUS_BUFFER_TOO_SMALL;
217 elem = OvsGetNextPacket(instance);
220 * XXX revisit this later
222 len = elem->packet.totalLen > outputLength ? outputLength :
223 elem->packet.totalLen;
225 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
226 len == elem->packet.totalLen) {
228 UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
229 elem->hdrInfo.l4Offset);
230 RtlCopyMemory(outputBuffer, &elem->packet.data, size);
231 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
232 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
233 (UINT8 *)&elem->packet.data + size,
234 elem->hdrInfo.l4PayLoad, 0);
235 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
236 (elem->hdrInfo.tcpCsumNeeded ?
237 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
239 ovsUserStats.l4Csum++;
241 RtlCopyMemory(outputBuffer, &elem->packet.data, len);
245 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
247 return STATUS_SUCCESS;
250 /* Helper function to allocate a Forwarding Context for an NBL */
252 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
253 PNET_BUFFER_LIST nbl)
255 return switchContext->NdisSwitchHandlers.
256 AllocateNetBufferListForwardingContext(
257 switchContext->NdisSwitchContext, nbl);
261 * --------------------------------------------------------------------------
262 * This function allocates all the stuff necessary for creating an NBL from the
263 * input buffer of specified length, namely, a nonpaged data buffer of size
264 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
265 * context yet. It also copies data from the specified buffer to the NBL.
266 * --------------------------------------------------------------------------
269 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
274 PNET_BUFFER_LIST nbl = NULL;
278 if (length > OVS_DEFAULT_DATA_SIZE) {
279 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
280 OVS_DEFAULT_HEADROOM_SIZE);
283 nbl = OvsAllocateFixSizeNBL(switchContext, length,
284 OVS_DEFAULT_HEADROOM_SIZE);
290 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
291 mdl = NET_BUFFER_CURRENT_MDL(nb);
292 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
293 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
295 OvsCompleteNBL(switchContext, nbl, TRUE);
299 NdisMoveMemory(data, userBuffer, length);
305 *----------------------------------------------------------------------------
306 * OvsNlExecuteCmdHandler --
307 * Handler for OVS_PACKET_CMD_EXECUTE command.
308 *----------------------------------------------------------------------------
311 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
314 NTSTATUS status = STATUS_SUCCESS;
315 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
316 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
317 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
318 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
319 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
321 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
322 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
324 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
325 UINT32 keyAttrOffset = 0;
326 OvsPacketExecute execute;
327 NL_ERROR nlError = NL_ERROR_SUCCESS;
330 static const NL_POLICY nlPktExecPolicy[] = {
331 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
332 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
333 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
334 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
335 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
339 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
341 /* Get all the top level Flow attributes */
342 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
343 nlPktExecPolicy, ARRAY_SIZE(nlPktExecPolicy),
344 nlAttrs, ARRAY_SIZE(nlAttrs)))
346 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
348 status = STATUS_UNSUCCESSFUL;
352 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
355 /* Get flow keys attributes */
356 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
357 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
358 nlFlowKeyPolicy, nlFlowKeyPolicyLen,
359 keyAttrs, ARRAY_SIZE(keyAttrs))) != TRUE) {
360 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
361 status = STATUS_UNSUCCESSFUL;
365 execute.dpNo = ovsHdr->dp_ifindex;
367 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
369 status = OvsExecuteDpIoctl(&execute);
371 /* Default reply that we want to send */
372 if (status == STATUS_SUCCESS) {
375 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
376 usrParamsCtx->outputLength);
378 /* Prepare nl Msg headers */
379 ok = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
380 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
381 genlMsgHdr->cmd, OVS_PACKET_VERSION,
385 *replyLen = msgOut->nlMsg.nlmsgLen;
387 status = STATUS_INVALID_BUFFER_SIZE;
390 /* Map NTSTATUS to NL_ERROR */
391 nlError = NlMapStatusToNlErr(status);
393 /* As of now there are no transactional errors in the implementation.
394 * Once we have them then we need to map status to correct
395 * nlError value, so that below mentioned code gets hit. */
396 if ((nlError != NL_ERROR_SUCCESS) &&
397 (usrParamsCtx->outputBuffer)) {
399 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
400 usrParamsCtx->outputBuffer;
401 NlBuildErrorMsg(msgIn, msgError, nlError);
402 *replyLen = msgError->nlMsg.nlmsgLen;
403 status = STATUS_SUCCESS;
413 *----------------------------------------------------------------------------
414 * _MapNlAttrToOvsPktExec --
415 * Maps input Netlink attributes to OvsPacketExecute.
416 *----------------------------------------------------------------------------
419 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
420 OvsPacketExecute *execute)
422 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
423 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
425 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
426 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
428 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
432 OvsExecuteDpIoctl(OvsPacketExecute *execute)
434 NTSTATUS status = STATUS_SUCCESS;
436 LOCK_STATE_EX lockState;
437 PNET_BUFFER_LIST pNbl;
439 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
441 OVS_PACKET_HDR_INFO layers;
442 POVS_VPORT_ENTRY vport;
444 if (execute->packetLen == 0) {
445 status = STATUS_INVALID_PARAMETER;
449 actions = execute->actions;
454 * Allocate the NBL, copy the data from the userspace buffer. Allocate
455 * also, the forwarding context for the packet.
457 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
460 status = STATUS_NO_MEMORY;
464 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
465 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
467 fwdDetail->SourcePortId = vport->portId;
468 fwdDetail->SourceNicIndex = vport->nicIndex;
470 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
471 fwdDetail->SourceNicIndex = 0;
473 // XXX: Figure out if any of the other members of fwdDetail need to be set.
475 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
477 if (ndisStatus == NDIS_STATUS_SUCCESS) {
478 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, 0);
479 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
480 vport ? vport->portNo :
482 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
483 &key, NULL, &layers, actions,
484 execute->actionsLen);
486 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
488 if (ndisStatus != NDIS_STATUS_SUCCESS) {
489 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
490 status = STATUS_NOT_SUPPORTED;
492 status = STATUS_UNSUCCESSFUL;
497 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
505 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
507 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
508 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
511 return STATUS_INVALID_PARAMETER;
513 OvsPurgePacketQueue(queue, instance);
514 return STATUS_SUCCESS;
518 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
521 PIO_STACK_LOCATION irpSp;
522 PFILE_OBJECT fileObject;
523 POVS_OPEN_INSTANCE instance;
524 POVS_USER_PACKET_QUEUE queue = NULL;
526 UNREFERENCED_PARAMETER(deviceObject);
528 IoReleaseCancelSpinLock(irp->CancelIrql);
529 irpSp = IoGetCurrentIrpStackLocation(irp);
530 fileObject = irpSp->FileObject;
532 if (fileObject == NULL) {
535 NdisAcquireSpinLock(gOvsCtrlLock);
536 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
538 queue = instance->packetQueue;
540 if (instance == NULL || queue == NULL) {
541 NdisReleaseSpinLock(gOvsCtrlLock);
544 NdisReleaseSpinLock(gOvsCtrlLock);
545 NdisAcquireSpinLock(&queue->queueLock);
546 if (queue->pendingIrp == irp) {
547 queue->pendingIrp = NULL;
549 NdisReleaseSpinLock(&queue->queueLock);
551 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
556 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
558 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
559 POVS_USER_PACKET_QUEUE queue =
560 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
561 NTSTATUS status = STATUS_SUCCESS;
562 BOOLEAN cancelled = FALSE;
565 return STATUS_INVALID_PARAMETER;
567 NdisAcquireSpinLock(&queue->queueLock);
568 if (queue->instance != instance) {
569 NdisReleaseSpinLock(&queue->queueLock);
570 return STATUS_INVALID_PARAMETER;
572 if (queue->pendingIrp) {
573 NdisReleaseSpinLock(&queue->queueLock);
574 return STATUS_DEVICE_BUSY;
576 if (queue->numPackets == 0) {
577 PDRIVER_CANCEL cancelRoutine;
578 IoMarkIrpPending(irp);
579 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
581 cancelRoutine = IoSetCancelRoutine(irp, NULL);
586 queue->pendingIrp = irp;
588 status = STATUS_PENDING;
590 NdisReleaseSpinLock(&queue->queueLock);
592 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
593 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
599 POVS_PACKET_QUEUE_ELEM
600 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
602 POVS_USER_PACKET_QUEUE queue;
604 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
608 NdisAcquireSpinLock(&queue->queueLock);
609 if (queue->instance != instance || queue->numPackets == 0) {
610 NdisReleaseSpinLock(&queue->queueLock);
613 link = RemoveHeadList(&queue->packetList);
615 NdisReleaseSpinLock(&queue->queueLock);
616 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
620 * ---------------------------------------------------------------------------
621 * Given a pid, returns the corresponding USER_PACKET_QUEUE.
622 * ---------------------------------------------------------------------------
624 POVS_USER_PACKET_QUEUE
625 OvsGetQueue(UINT32 pid)
627 POVS_OPEN_INSTANCE instance;
628 POVS_USER_PACKET_QUEUE ret = NULL;
630 instance = OvsGetPidInstance(gOvsSwitchContext, pid);
633 ret = instance->packetQueue;
640 * ---------------------------------------------------------------------------
641 * Given a pid, returns the corresponding instance.
642 * pidHashLock must be acquired before calling this API.
643 * ---------------------------------------------------------------------------
646 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
648 POVS_OPEN_INSTANCE instance;
649 PLIST_ENTRY head, link;
650 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
652 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
653 LIST_FORALL(head, link) {
654 instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
655 if (instance->pid == pid) {
663 * ---------------------------------------------------------------------------
664 * Given a pid and an instance. This API adds instance to pidHashArray.
665 * pidHashLock must be acquired before calling this API.
666 * ---------------------------------------------------------------------------
669 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
670 POVS_OPEN_INSTANCE instance)
673 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
675 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
676 InsertHeadList(head, &(instance->pidLink));
680 * ---------------------------------------------------------------------------
681 * Given a pid and an instance. This API removes instance from pidHashArray.
682 * pidHashLock must be acquired before calling this API.
683 * ---------------------------------------------------------------------------
686 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
688 POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
691 RemoveEntryList(&(instance->pidLink));
696 OvsQueuePackets(PLIST_ENTRY packetList,
699 POVS_USER_PACKET_QUEUE upcallQueue = NULL;
700 POVS_PACKET_QUEUE_ELEM elem;
704 LIST_ENTRY dropPackets;
706 OVS_LOG_LOUD("Enter: numELems: %u", numElems);
708 InitializeListHead(&dropPackets);
710 while (!IsListEmpty(packetList)) {
711 link = RemoveHeadList(packetList);
712 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
716 OvsAcquirePidHashLock();
718 upcallQueue = OvsGetQueue(elem->upcallPid);
720 /* No upcall queue found, drop this packet. */
721 InsertTailList(&dropPackets, &elem->link);
723 NdisAcquireSpinLock(&upcallQueue->queueLock);
725 if (upcallQueue->instance == NULL) {
726 InsertTailList(&dropPackets, &elem->link);
728 InsertTailList(&upcallQueue->packetList, &elem->link);
729 upcallQueue->numPackets++;
730 if (upcallQueue->pendingIrp) {
731 PDRIVER_CANCEL cancelRoutine;
732 irp = upcallQueue->pendingIrp;
733 upcallQueue->pendingIrp = NULL;
734 cancelRoutine = IoSetCancelRoutine(irp, NULL);
735 if (cancelRoutine == NULL) {
742 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
745 NdisReleaseSpinLock(&upcallQueue->queueLock);
748 OvsReleasePidHashLock();
751 while (!IsListEmpty(&dropPackets)) {
752 link = RemoveHeadList(&dropPackets);
753 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
754 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
758 OVS_LOG_LOUD("Exit: drop %u packets", num);
762 *----------------------------------------------------------------------------
763 * OvsCreateAndAddPackets --
765 * Create a packet and forwarded to user space.
767 * This function would fragment packet if needed, and queue
768 * each segment to user space.
769 *----------------------------------------------------------------------------
772 OvsCreateAndAddPackets(PVOID userData,
775 POVS_VPORT_ENTRY vport,
777 PNET_BUFFER_LIST nbl,
779 POVS_PACKET_HDR_INFO hdrInfo,
780 POVS_SWITCH_CONTEXT switchContext,
784 POVS_PACKET_QUEUE_ELEM elem;
785 PNET_BUFFER_LIST newNbl = NULL;
788 if (hdrInfo->isTcp) {
789 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
792 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
793 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
794 packetLength = NET_BUFFER_DATA_LENGTH(nb);
796 OVS_LOG_TRACE("MSS %u packet len %u",
797 tsoInfo.LsoV1Transmit.MSS, packetLength);
798 if (tsoInfo.LsoV1Transmit.MSS) {
799 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
800 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
801 tsoInfo.LsoV1Transmit.MSS , 0);
802 if (newNbl == NULL) {
803 return NDIS_STATUS_FAILURE;
809 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
811 elem = OvsCreateQueueNlPacket(userData, userDataLen,
812 cmd, vport, key, nbl, nb,
815 InsertTailList(list, &elem->link);
818 nb = NET_BUFFER_NEXT_NB(nb);
821 OvsCompleteNBL(switchContext, newNbl, TRUE);
823 return NDIS_STATUS_SUCCESS;
826 static __inline UINT32
827 OvsGetUpcallMsgSize(PVOID userData,
829 OvsIPv4TunnelKey *tunnelKey,
832 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
833 NlAttrSize(payload) +
834 NlAttrSize(OvsFlowKeyAttrSize());
836 /* OVS_PACKET_ATTR_USERDATA */
838 size += NlAttrTotalSize(userDataLen);
840 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
841 /* Is it included in the flow key attr XXX */
843 size += NlAttrTotalSize(OvsTunKeyAttrSize());
849 *----------------------------------------------------------------------------
850 * This function completes the IP Header csum. record the L4 payload offset and
851 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
852 * caluculated simopultaneossly with the copy of the payload to the destination
853 * buffer when the packet is read.
854 *----------------------------------------------------------------------------
857 OvsCompletePacketHeader(UINT8 *packet,
859 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
860 POVS_PACKET_HDR_INFO hdrInfoIn,
861 POVS_PACKET_HDR_INFO hdrInfoOut)
863 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
864 (!isRecv && csumInfo.Transmit.IsIPv4 &&
865 csumInfo.Transmit.IpHeaderChecksum)) {
866 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
867 ASSERT(hdrInfoIn->isIPv4);
868 ASSERT(ipHdr->Version == 4);
869 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
870 ipHdr->HeaderLength << 2,
871 (UINT16)~ipHdr->HeaderChecksum);
872 ovsUserStats.ipCsum++;
874 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
876 * calculate TCP/UDP pseudo checksum
878 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
880 * Only this case, we need to reclaculate pseudo checksum
881 * all other cases, it is assumed the pseudo checksum is
885 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
886 if (hdrInfoIn->isIPv4) {
887 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
888 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
889 (ipHdr->HeaderLength << 2));
890 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
891 (UINT32 *)&ipHdr->DestinationAddress,
892 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
894 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
895 hdrInfoOut->l4PayLoad =
896 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
897 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
898 hdrInfoIn->l4Offset);
899 ASSERT(hdrInfoIn->isIPv6);
901 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
902 (UINT32 *)&ipv6Hdr->DestinationAddress,
903 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
905 hdrInfoOut->tcpCsumNeeded = 1;
906 ovsUserStats.recalTcpCsum++;
907 } else if (!isRecv) {
908 if (csumInfo.Transmit.TcpChecksum) {
909 hdrInfoOut->tcpCsumNeeded = 1;
910 } else if (csumInfo.Transmit.UdpChecksum) {
911 hdrInfoOut->udpCsumNeeded = 1;
913 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
917 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
919 if (hdrInfoIn->isIPv4) {
920 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
921 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
922 (ipHdr->HeaderLength << 2));
924 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
925 (UINT32 *)&ipHdr->DestinationAddress,
926 proto, hdrInfoOut->l4PayLoad);
929 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
930 hdrInfoIn->l3Offset);
931 hdrInfoOut->l4PayLoad =
932 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
933 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
934 hdrInfoIn->l4Offset);
935 ASSERT(hdrInfoIn->isIPv6);
937 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
938 (UINT32 *)&ipv6Hdr->DestinationAddress,
939 proto, hdrInfoOut->l4PayLoad);
943 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
944 (hdrInfoOut->tcpCsumNeeded ?
945 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
953 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
955 UNREFERENCED_PARAMETER(nb);
959 /* XXX select a pid from an array of pids using a flow based hash */
960 *pid = vport->upcallPid;
961 return STATUS_SUCCESS;
965 *----------------------------------------------------------------------------
966 * OvsCreateQueueNlPacket --
968 * Create a packet which will be forwarded to user space.
971 * userData: when cmd is user action, this field contain
973 * userDataLen: as name indicated
974 * cmd: either miss or user action
975 * inPort: datapath port id from which the packet is received.
976 * key: flow Key with a tunnel key if available
977 * nbl: the NET_BUFFER_LIST which contain the packet
979 * isRecv: This is used to decide how to interprete the csum info
980 * hdrInfo: include hdr info initialized during flow extraction.
983 * NULL if fail to create the packet
984 * The packet element otherwise
985 *----------------------------------------------------------------------------
987 POVS_PACKET_QUEUE_ELEM
988 OvsCreateQueueNlPacket(PVOID userData,
991 POVS_VPORT_ENTRY vport,
993 PNET_BUFFER_LIST nbl,
996 POVS_PACKET_HDR_INFO hdrInfo)
998 #define VLAN_TAG_SIZE 4
999 UINT32 allocLen, dataLen, extraLen;
1000 POVS_PACKET_QUEUE_ELEM elem;
1002 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
1003 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
1004 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
1011 /* No vport is not fatal. */
1015 OvsGetPid(vport, nb, &pid);
1019 * There is no userspace queue created yet, so there is no point for
1020 * creating a new packet to be queued.
1025 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1027 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1028 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1029 csumInfo.Receive.IpChecksumFailed)) {
1030 OVS_LOG_INFO("Packet dropped due to checksum failure.");
1031 ovsUserStats.dropDuetoChecksum++;
1035 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1036 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1038 dataLen = NET_BUFFER_DATA_LENGTH(nb);
1040 if (NlAttrSize(dataLen) > MAXUINT16) {
1044 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1045 dataLen + extraLen);
1047 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1048 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemoryWithTag(allocLen,
1051 ovsUserStats.dropDuetoResource++;
1054 elem->hdrInfo.value = hdrInfo->value;
1055 elem->upcallPid = pid;
1056 elem->packet.totalLen = nlMsgSize;
1057 /* XXX remove queueid */
1058 elem->packet.queue = 0;
1059 /* XXX no need as the length is already in the NL attrib */
1060 elem->packet.userDataLen = userDataLen;
1061 elem->packet.inPort = vport->portNo;
1062 elem->packet.cmd = cmd;
1063 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1064 ovsUserStats.miss++;
1065 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1066 ovsUserStats.action++;
1071 /* XXX Should we have both packetLen and TotalLen*/
1072 elem->packet.packetLen = dataLen + extraLen;
1074 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1077 * Initialize the OVS header
1078 * Since we are pre allocating memory for the NL buffer
1079 * the attribute settings should not fail
1081 if (!NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1082 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1083 gOvsSwitchContext->dpNo)) {
1087 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1088 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1092 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1094 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1095 userData, (UINT16)userDataLen)) {
1101 * Make space for the payload to be copied and set the attribute
1102 * XXX Uninit set initilizes the buffer with xero, we don't actually need
1103 * that the payload to be initailized
1105 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1106 (UINT16)(dataLen + extraLen));
1111 /* Store the payload for csum calculation when packet is read */
1112 elem->packet.payload = dst;
1115 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1117 ovsUserStats.dropDuetoResource++;
1119 } else if (src != dst) {
1120 /* Copy the data from the NDIS buffer to dst. */
1121 RtlCopyMemory(dst, src, dataLen);
1124 /* Set csum if was offloaded */
1125 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1128 * Finally insert VLAN tag
1131 dst = elem->packet.payload;
1132 src = dst + extraLen;
1133 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1134 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1135 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1137 ((UINT16 *)dst)[0] = htons(0x8100);
1138 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1139 (vlanInfo.TagHeader.UserPriority << 13));
1140 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1141 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1142 ovsUserStats.vlanInsert++;
1145 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1146 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1147 /* 'totalLen' should be size of valid data. */
1148 elem->packet.totalLen = nlMsg->nlmsgLen;
1152 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
1157 * --------------------------------------------------------------------------
1158 * Handler for the subscription for a packet queue
1159 * --------------------------------------------------------------------------
1162 OvsSubscribePacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1169 const NL_POLICY policy[] = {
1170 [OVS_NL_ATTR_PACKET_PID] = {.type = NL_A_U32 },
1171 [OVS_NL_ATTR_PACKET_SUBSCRIBE] = {.type = NL_A_U8 }
1173 PNL_ATTR attrs[ARRAY_SIZE(policy)];
1175 UNREFERENCED_PARAMETER(replyLen);
1177 POVS_OPEN_INSTANCE instance =
1178 (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1179 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1181 rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
1182 NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, ARRAY_SIZE(policy),
1183 attrs, ARRAY_SIZE(attrs));
1185 status = STATUS_INVALID_PARAMETER;
1189 join = NlAttrGetU8(attrs[OVS_NL_ATTR_PACKET_PID]);
1190 pid = NlAttrGetU32(attrs[OVS_NL_ATTR_PACKET_PID]);
1192 /* The socket subscribed with must be the same socket we perform receive*/
1193 ASSERT(pid == instance->pid);
1195 status = OvsSubscribeDpIoctl(instance, pid, join);
1198 * XXX Need to add this instance to a global data structure
1199 * which hold all packet based instances. The data structure (hash)
1200 * should be searched through the pid field of the instance for
1201 * placing the missed packet into the correct queue
1208 * --------------------------------------------------------------------------
1209 * Handler for queueing an IRP used for missed packet notification. The IRP is
1210 * completed when a packet received and mismatched. STATUS_PENDING is returned
1211 * on success. User mode keep a pending IRP at all times.
1212 * --------------------------------------------------------------------------
1215 OvsPendPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1218 UNREFERENCED_PARAMETER(replyLen);
1220 POVS_OPEN_INSTANCE instance =
1221 (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1224 * XXX access to packet queue must be through acquiring a lock as user mode
1225 * could unsubscribe and the instnace will be freed.
1227 return OvsWaitDpIoctl(usrParamsCtx->irp, instance->fileObject);
1231 * --------------------------------------------------------------------------
1232 * Handler for reading missed pacckets from the driver event queue. This
1233 * handler is executed when user modes issues a socket receive on a socket
1234 * --------------------------------------------------------------------------
1237 OvsReadPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1241 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1243 POVS_OPEN_INSTANCE instance =
1244 (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1247 ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1249 /* Should never read events with a dump socket */
1250 ASSERT(instance->dumpState.ovsMsg == NULL);
1252 /* Must have an packet queue */
1253 ASSERT(instance->packetQueue != NULL);
1255 /* Output buffer has been validated while validating read dev op. */
1256 ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1258 /* Read a packet from the instance queue */
1259 status = OvsReadDpIoctl(instance->fileObject, usrParamsCtx->outputBuffer,
1260 usrParamsCtx->outputLength, replyLen);