2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
40 #define OVS_DBG_MOD OVS_DBG_USER
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49 OvsPacketExecute *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
53 OvsAcquirePidHashLock()
55 NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
59 OvsReleasePidHashLock()
61 NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
66 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
67 POVS_OPEN_INSTANCE instance)
69 PLIST_ENTRY link, next;
71 POVS_PACKET_QUEUE_ELEM elem;
73 InitializeListHead(&tmp);
74 NdisAcquireSpinLock(&queue->queueLock);
75 if (queue->instance != instance) {
76 NdisReleaseSpinLock(&queue->queueLock);
80 if (queue->numPackets) {
81 OvsAppendList(&tmp, &queue->packetList);
82 queue->numPackets = 0;
84 NdisReleaseSpinLock(&queue->queueLock);
85 LIST_FORALL_SAFE(&tmp, link, next) {
86 RemoveEntryList(link);
87 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
88 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
93 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
95 POVS_USER_PACKET_QUEUE queue;
96 POVS_PACKET_QUEUE_ELEM elem;
97 PLIST_ENTRY link, next;
102 InitializeListHead(&tmp);
103 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
105 PDRIVER_CANCEL cancelRoutine;
106 NdisAcquireSpinLock(&queue->queueLock);
107 ASSERT(queue->instance == instance);
108 /* XXX Should not happen */
109 if (queue->instance != instance) {
110 NdisReleaseSpinLock(&queue->queueLock);
111 NdisFreeSpinLock(&queue->queueLock);
115 if (queue->numPackets) {
116 OvsAppendList(&tmp, &queue->packetList);
117 queue->numPackets = 0;
119 queue->instance = NULL;
120 instance->packetQueue = NULL;
121 irp = queue->pendingIrp;
122 queue->pendingIrp = NULL;
124 cancelRoutine = IoSetCancelRoutine(irp, NULL);
125 if (cancelRoutine == NULL) {
129 NdisReleaseSpinLock(&queue->queueLock);
130 NdisFreeSpinLock(&queue->queueLock);
132 LIST_FORALL_SAFE(&tmp, link, next) {
133 RemoveEntryList(link);
134 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
135 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
138 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
141 OvsFreeMemoryWithTag(queue, OVS_USER_POOL_TAG);
144 /* Verify if gOvsSwitchContext exists. */
145 if (gOvsSwitchContext) {
146 /* Remove the instance from pidHashArray */
147 OvsAcquirePidHashLock();
148 OvsDelPidInstance(gOvsSwitchContext, instance->pid);
149 OvsReleasePidHashLock();
154 OvsSubscribeDpIoctl(PVOID instanceP,
158 POVS_USER_PACKET_QUEUE queue;
159 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
161 if (instance->packetQueue && !join) {
163 OvsCleanupPacketQueue(instance);
164 } else if (instance->packetQueue == NULL && join) {
165 queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemoryWithTag(
166 sizeof *queue, OVS_USER_POOL_TAG);
168 return STATUS_NO_MEMORY;
170 InitializeListHead(&(instance->pidLink));
171 instance->packetQueue = queue;
172 RtlZeroMemory(queue, sizeof (*queue));
173 NdisAllocateSpinLock(&queue->queueLock);
174 NdisAcquireSpinLock(&queue->queueLock);
175 InitializeListHead(&queue->packetList);
177 queue->instance = instance;
178 instance->packetQueue = queue;
179 NdisReleaseSpinLock(&queue->queueLock);
181 OvsAcquirePidHashLock();
182 /* Insert the instance to pidHashArray */
183 OvsAddPidInstance(gOvsSwitchContext, pid, instance);
184 OvsReleasePidHashLock();
187 /* user mode should call only once for subscribe */
188 return STATUS_INVALID_PARAMETER;
191 return STATUS_SUCCESS;
196 OvsReadDpIoctl(PFILE_OBJECT fileObject,
201 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
202 POVS_PACKET_QUEUE_ELEM elem;
205 #define TCP_CSUM_OFFSET 16
206 #define UDP_CSUM_OFFSET 6
209 if (instance->packetQueue == NULL) {
210 return STATUS_INVALID_PARAMETER;
212 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
213 return STATUS_BUFFER_TOO_SMALL;
216 elem = OvsGetNextPacket(instance);
219 * XXX revisit this later
221 len = elem->packet.totalLen > outputLength ? outputLength :
222 elem->packet.totalLen;
224 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
225 len == elem->packet.totalLen) {
227 UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
228 elem->hdrInfo.l4Offset);
229 RtlCopyMemory(outputBuffer, &elem->packet.data, size);
230 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
231 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
232 (UINT8 *)&elem->packet.data + size,
233 elem->hdrInfo.l4PayLoad, 0);
234 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
235 (elem->hdrInfo.tcpCsumNeeded ?
236 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
238 ovsUserStats.l4Csum++;
240 RtlCopyMemory(outputBuffer, &elem->packet.data, len);
244 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
246 return STATUS_SUCCESS;
249 /* Helper function to allocate a Forwarding Context for an NBL */
251 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
252 PNET_BUFFER_LIST nbl)
254 return switchContext->NdisSwitchHandlers.
255 AllocateNetBufferListForwardingContext(
256 switchContext->NdisSwitchContext, nbl);
260 * --------------------------------------------------------------------------
261 * This function allocates all the stuff necessary for creating an NBL from the
262 * input buffer of specified length, namely, a nonpaged data buffer of size
263 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
264 * context yet. It also copies data from the specified buffer to the NBL.
265 * --------------------------------------------------------------------------
268 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
273 PNET_BUFFER_LIST nbl = NULL;
277 if (length > OVS_DEFAULT_DATA_SIZE) {
278 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
279 OVS_DEFAULT_HEADROOM_SIZE);
282 nbl = OvsAllocateFixSizeNBL(switchContext, length,
283 OVS_DEFAULT_HEADROOM_SIZE);
289 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
290 mdl = NET_BUFFER_CURRENT_MDL(nb);
291 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
292 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
294 OvsCompleteNBL(switchContext, nbl, TRUE);
298 NdisMoveMemory(data, userBuffer, length);
304 *----------------------------------------------------------------------------
305 * OvsNlExecuteCmdHandler --
306 * Handler for OVS_PACKET_CMD_EXECUTE command.
307 *----------------------------------------------------------------------------
310 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
313 NTSTATUS status = STATUS_SUCCESS;
314 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
315 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
316 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
317 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
318 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
320 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
321 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
323 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
324 UINT32 keyAttrOffset = 0;
325 OvsPacketExecute execute;
326 NL_ERROR nlError = NL_ERROR_SUCCESS;
329 static const NL_POLICY nlPktExecPolicy[] = {
330 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
331 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
332 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
333 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
334 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
338 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
340 /* Get all the top level Flow attributes */
341 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
342 nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
344 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
346 status = STATUS_UNSUCCESSFUL;
350 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
353 /* Get flow keys attributes */
354 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
355 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
356 nlFlowKeyPolicy, keyAttrs,
357 ARRAY_SIZE(keyAttrs))) != TRUE) {
358 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
359 status = STATUS_UNSUCCESSFUL;
363 execute.dpNo = ovsHdr->dp_ifindex;
365 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
367 status = OvsExecuteDpIoctl(&execute);
369 /* Default reply that we want to send */
370 if (status == STATUS_SUCCESS) {
373 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
374 usrParamsCtx->outputLength);
376 /* Prepare nl Msg headers */
377 ok = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
378 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
379 genlMsgHdr->cmd, OVS_PACKET_VERSION,
383 *replyLen = msgOut->nlMsg.nlmsgLen;
385 status = STATUS_INVALID_BUFFER_SIZE;
388 /* Map NTSTATUS to NL_ERROR */
389 nlError = NlMapStatusToNlErr(status);
391 /* As of now there are no transactional errors in the implementation.
392 * Once we have them then we need to map status to correct
393 * nlError value, so that below mentioned code gets hit. */
394 if ((nlError != NL_ERROR_SUCCESS) &&
395 (usrParamsCtx->outputBuffer)) {
397 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
398 usrParamsCtx->outputBuffer;
399 NlBuildErrorMsg(msgIn, msgError, nlError);
400 *replyLen = msgError->nlMsg.nlmsgLen;
401 status = STATUS_SUCCESS;
411 *----------------------------------------------------------------------------
412 * _MapNlAttrToOvsPktExec --
413 * Maps input Netlink attributes to OvsPacketExecute.
414 *----------------------------------------------------------------------------
417 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
418 OvsPacketExecute *execute)
420 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
421 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
423 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
424 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
426 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
430 OvsExecuteDpIoctl(OvsPacketExecute *execute)
432 NTSTATUS status = STATUS_SUCCESS;
434 LOCK_STATE_EX lockState;
435 PNET_BUFFER_LIST pNbl;
437 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
439 OVS_PACKET_HDR_INFO layers;
440 POVS_VPORT_ENTRY vport;
442 if (execute->packetLen == 0) {
443 status = STATUS_INVALID_PARAMETER;
447 actions = execute->actions;
452 * Allocate the NBL, copy the data from the userspace buffer. Allocate
453 * also, the forwarding context for the packet.
455 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
458 status = STATUS_NO_MEMORY;
462 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
463 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
465 fwdDetail->SourcePortId = vport->portId;
466 fwdDetail->SourceNicIndex = vport->nicIndex;
468 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
469 fwdDetail->SourceNicIndex = 0;
471 // XXX: Figure out if any of the other members of fwdDetail need to be set.
473 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
475 if (ndisStatus == NDIS_STATUS_SUCCESS) {
476 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, 0);
477 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
478 vport ? vport->portNo :
480 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
481 &key, NULL, &layers, actions,
482 execute->actionsLen);
484 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
486 if (ndisStatus != NDIS_STATUS_SUCCESS) {
487 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
488 status = STATUS_NOT_SUPPORTED;
490 status = STATUS_UNSUCCESSFUL;
495 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
503 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
505 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
506 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
509 return STATUS_INVALID_PARAMETER;
511 OvsPurgePacketQueue(queue, instance);
512 return STATUS_SUCCESS;
516 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
519 PIO_STACK_LOCATION irpSp;
520 PFILE_OBJECT fileObject;
521 POVS_OPEN_INSTANCE instance;
522 POVS_USER_PACKET_QUEUE queue = NULL;
524 UNREFERENCED_PARAMETER(deviceObject);
526 IoReleaseCancelSpinLock(irp->CancelIrql);
527 irpSp = IoGetCurrentIrpStackLocation(irp);
528 fileObject = irpSp->FileObject;
530 if (fileObject == NULL) {
533 NdisAcquireSpinLock(gOvsCtrlLock);
534 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
536 queue = instance->packetQueue;
538 if (instance == NULL || queue == NULL) {
539 NdisReleaseSpinLock(gOvsCtrlLock);
542 NdisReleaseSpinLock(gOvsCtrlLock);
543 NdisAcquireSpinLock(&queue->queueLock);
544 if (queue->pendingIrp == irp) {
545 queue->pendingIrp = NULL;
547 NdisReleaseSpinLock(&queue->queueLock);
549 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
554 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
556 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
557 POVS_USER_PACKET_QUEUE queue =
558 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
559 NTSTATUS status = STATUS_SUCCESS;
560 BOOLEAN cancelled = FALSE;
563 return STATUS_INVALID_PARAMETER;
565 NdisAcquireSpinLock(&queue->queueLock);
566 if (queue->instance != instance) {
567 NdisReleaseSpinLock(&queue->queueLock);
568 return STATUS_INVALID_PARAMETER;
570 if (queue->pendingIrp) {
571 NdisReleaseSpinLock(&queue->queueLock);
572 return STATUS_DEVICE_BUSY;
574 if (queue->numPackets == 0) {
575 PDRIVER_CANCEL cancelRoutine;
576 IoMarkIrpPending(irp);
577 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
579 cancelRoutine = IoSetCancelRoutine(irp, NULL);
584 queue->pendingIrp = irp;
586 status = STATUS_PENDING;
588 NdisReleaseSpinLock(&queue->queueLock);
590 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
591 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
597 POVS_PACKET_QUEUE_ELEM
598 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
600 POVS_USER_PACKET_QUEUE queue;
602 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
606 NdisAcquireSpinLock(&queue->queueLock);
607 if (queue->instance != instance || queue->numPackets == 0) {
608 NdisReleaseSpinLock(&queue->queueLock);
611 link = RemoveHeadList(&queue->packetList);
613 NdisReleaseSpinLock(&queue->queueLock);
614 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
618 * ---------------------------------------------------------------------------
619 * Given a pid, returns the corresponding USER_PACKET_QUEUE.
620 * ---------------------------------------------------------------------------
622 POVS_USER_PACKET_QUEUE
623 OvsGetQueue(UINT32 pid)
625 POVS_OPEN_INSTANCE instance;
626 POVS_USER_PACKET_QUEUE ret = NULL;
628 instance = OvsGetPidInstance(gOvsSwitchContext, pid);
631 ret = instance->packetQueue;
638 * ---------------------------------------------------------------------------
639 * Given a pid, returns the corresponding instance.
640 * pidHashLock must be acquired before calling this API.
641 * ---------------------------------------------------------------------------
644 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
646 POVS_OPEN_INSTANCE instance;
647 PLIST_ENTRY head, link;
648 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
650 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
651 LIST_FORALL(head, link) {
652 instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
653 if (instance->pid == pid) {
661 * ---------------------------------------------------------------------------
662 * Given a pid and an instance. This API adds instance to pidHashArray.
663 * pidHashLock must be acquired before calling this API.
664 * ---------------------------------------------------------------------------
667 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
668 POVS_OPEN_INSTANCE instance)
671 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
673 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
674 InsertHeadList(head, &(instance->pidLink));
678 * ---------------------------------------------------------------------------
679 * Given a pid and an instance. This API removes instance from pidHashArray.
680 * pidHashLock must be acquired before calling this API.
681 * ---------------------------------------------------------------------------
684 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
686 POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
689 RemoveEntryList(&(instance->pidLink));
694 OvsQueuePackets(PLIST_ENTRY packetList,
697 POVS_USER_PACKET_QUEUE upcallQueue = NULL;
698 POVS_PACKET_QUEUE_ELEM elem;
702 LIST_ENTRY dropPackets;
704 OVS_LOG_LOUD("Enter: numELems: %u", numElems);
706 InitializeListHead(&dropPackets);
708 while (!IsListEmpty(packetList)) {
709 link = RemoveHeadList(packetList);
710 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
714 OvsAcquirePidHashLock();
716 upcallQueue = OvsGetQueue(elem->upcallPid);
718 /* No upcall queue found, drop this packet. */
719 InsertTailList(&dropPackets, &elem->link);
721 NdisAcquireSpinLock(&upcallQueue->queueLock);
723 if (upcallQueue->instance == NULL) {
724 InsertTailList(&dropPackets, &elem->link);
726 InsertTailList(&upcallQueue->packetList, &elem->link);
727 upcallQueue->numPackets++;
728 if (upcallQueue->pendingIrp) {
729 PDRIVER_CANCEL cancelRoutine;
730 irp = upcallQueue->pendingIrp;
731 upcallQueue->pendingIrp = NULL;
732 cancelRoutine = IoSetCancelRoutine(irp, NULL);
733 if (cancelRoutine == NULL) {
740 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
743 NdisReleaseSpinLock(&upcallQueue->queueLock);
746 OvsReleasePidHashLock();
749 while (!IsListEmpty(&dropPackets)) {
750 link = RemoveHeadList(&dropPackets);
751 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
752 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
756 OVS_LOG_LOUD("Exit: drop %u packets", num);
760 *----------------------------------------------------------------------------
761 * OvsCreateAndAddPackets --
763 * Create a packet and forwarded to user space.
765 * This function would fragment packet if needed, and queue
766 * each segment to user space.
767 *----------------------------------------------------------------------------
770 OvsCreateAndAddPackets(PVOID userData,
775 PNET_BUFFER_LIST nbl,
777 POVS_PACKET_HDR_INFO hdrInfo,
778 POVS_SWITCH_CONTEXT switchContext,
782 POVS_PACKET_QUEUE_ELEM elem;
783 PNET_BUFFER_LIST newNbl = NULL;
786 if (hdrInfo->isTcp) {
787 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
790 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
791 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
792 packetLength = NET_BUFFER_DATA_LENGTH(nb);
794 OVS_LOG_TRACE("MSS %u packet len %u",
795 tsoInfo.LsoV1Transmit.MSS, packetLength);
796 if (tsoInfo.LsoV1Transmit.MSS) {
797 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
798 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
799 tsoInfo.LsoV1Transmit.MSS , 0);
800 if (newNbl == NULL) {
801 return NDIS_STATUS_FAILURE;
807 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
809 elem = OvsCreateQueueNlPacket(userData, userDataLen,
810 cmd, inPort, key, nbl, nb,
813 InsertTailList(list, &elem->link);
816 nb = NET_BUFFER_NEXT_NB(nb);
819 OvsCompleteNBL(switchContext, newNbl, TRUE);
821 return NDIS_STATUS_SUCCESS;
824 static __inline UINT32
825 OvsGetUpcallMsgSize(PVOID userData,
827 OvsIPv4TunnelKey *tunnelKey,
830 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
831 NlAttrSize(payload) +
832 NlAttrSize(OvsFlowKeyAttrSize());
834 /* OVS_PACKET_ATTR_USERDATA */
836 size += NlAttrTotalSize(userDataLen);
838 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
839 /* Is it included in the the flwo key attr XXX */
841 size += NlAttrTotalSize(OvsTunKeyAttrSize());
847 *----------------------------------------------------------------------------
848 * This function completes the IP Header csum. record the L4 payload offset and
849 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
850 * caluculated simopultaneossly with the copy of the payload to the destination
851 * buffer when the packet is read.
852 *----------------------------------------------------------------------------
855 OvsCompletePacketHeader(UINT8 *packet,
857 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
858 POVS_PACKET_HDR_INFO hdrInfoIn,
859 POVS_PACKET_HDR_INFO hdrInfoOut)
861 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
862 (!isRecv && csumInfo.Transmit.IsIPv4 &&
863 csumInfo.Transmit.IpHeaderChecksum)) {
864 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
865 ASSERT(hdrInfoIn->isIPv4);
866 ASSERT(ipHdr->Version == 4);
867 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
868 ipHdr->HeaderLength << 2,
869 (UINT16)~ipHdr->HeaderChecksum);
870 ovsUserStats.ipCsum++;
872 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
874 * calculate TCP/UDP pseudo checksum
876 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
878 * Only this case, we need to reclaculate pseudo checksum
879 * all other cases, it is assumed the pseudo checksum is
883 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
884 if (hdrInfoIn->isIPv4) {
885 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
886 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
887 (ipHdr->HeaderLength << 2));
888 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
889 (UINT32 *)&ipHdr->DestinationAddress,
890 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
892 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
893 hdrInfoOut->l4PayLoad =
894 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
895 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
896 hdrInfoIn->l4Offset);
897 ASSERT(hdrInfoIn->isIPv6);
899 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
900 (UINT32 *)&ipv6Hdr->DestinationAddress,
901 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
903 hdrInfoOut->tcpCsumNeeded = 1;
904 ovsUserStats.recalTcpCsum++;
905 } else if (!isRecv) {
906 if (csumInfo.Transmit.TcpChecksum) {
907 hdrInfoOut->tcpCsumNeeded = 1;
908 } else if (csumInfo.Transmit.UdpChecksum) {
909 hdrInfoOut->udpCsumNeeded = 1;
911 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
915 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
917 if (hdrInfoIn->isIPv4) {
918 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
919 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
920 (ipHdr->HeaderLength << 2));
922 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
923 (UINT32 *)&ipHdr->DestinationAddress,
924 proto, hdrInfoOut->l4PayLoad);
927 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
928 hdrInfoIn->l3Offset);
929 hdrInfoOut->l4PayLoad =
930 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
931 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
932 hdrInfoIn->l4Offset);
933 ASSERT(hdrInfoIn->isIPv6);
935 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
936 (UINT32 *)&ipv6Hdr->DestinationAddress,
937 proto, hdrInfoOut->l4PayLoad);
941 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
942 (hdrInfoOut->tcpCsumNeeded ?
943 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
951 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
953 UNREFERENCED_PARAMETER(nb);
957 /* XXX select a pid from an array of pids using a flow based hash */
958 *pid = vport->upcallPid;
959 return STATUS_SUCCESS;
963 *----------------------------------------------------------------------------
964 * OvsCreateQueueNlPacket --
966 * Create a packet which will be forwarded to user space.
969 * userData: when cmd is user action, this field contain
971 * userDataLen: as name indicated
972 * cmd: either miss or user action
973 * inPort: datapath port id from which the packet is received.
974 * key: flow Key with a tunnel key if available
975 * nbl: the NET_BUFFER_LIST which contain the packet
977 * isRecv: This is used to decide how to interprete the csum info
978 * hdrInfo: include hdr info initialized during flow extraction.
981 * NULL if fail to create the packet
982 * The packet element otherwise
983 *----------------------------------------------------------------------------
985 POVS_PACKET_QUEUE_ELEM
986 OvsCreateQueueNlPacket(PVOID userData,
991 PNET_BUFFER_LIST nbl,
994 POVS_PACKET_HDR_INFO hdrInfo)
996 #define VLAN_TAG_SIZE 4
997 UINT32 allocLen, dataLen, extraLen;
998 POVS_PACKET_QUEUE_ELEM elem;
1000 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
1001 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
1002 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
1008 /* XXX pass vport in the stack rather than portNo */
1009 POVS_VPORT_ENTRY vport =
1010 OvsFindVportByPortNo(gOvsSwitchContext, inPort);
1013 /* No vport is not fatal. */
1017 OvsGetPid(vport, nb, &pid);
1021 * There is no userspace queue created yet, so there is no point for
1022 * creating a new packet to be queued.
1027 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1029 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1030 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1031 csumInfo.Receive.IpChecksumFailed)) {
1032 OVS_LOG_INFO("Packet dropped due to checksum failure.");
1033 ovsUserStats.dropDuetoChecksum++;
1037 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1038 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1040 dataLen = NET_BUFFER_DATA_LENGTH(nb);
1042 if (NlAttrSize(dataLen) > MAXUINT16) {
1046 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1047 dataLen + extraLen);
1049 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1050 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemoryWithTag(allocLen,
1053 ovsUserStats.dropDuetoResource++;
1056 elem->hdrInfo.value = hdrInfo->value;
1057 elem->upcallPid = pid;
1058 elem->packet.totalLen = nlMsgSize;
1059 /* XXX remove queueid */
1060 elem->packet.queue = 0;
1061 /* XXX no need as the length is already in the NL attrib */
1062 elem->packet.userDataLen = userDataLen;
1063 elem->packet.inPort = inPort;
1064 elem->packet.cmd = cmd;
1065 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1066 ovsUserStats.miss++;
1067 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1068 ovsUserStats.action++;
1073 /* XXX Should we have both packetLen and TotalLen*/
1074 elem->packet.packetLen = dataLen + extraLen;
1076 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1079 * Initialize the OVS header
1080 * Since we are pre allocating memory for the NL buffer
1081 * the attribute settings should not fail
1083 if (!NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1084 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1085 gOvsSwitchContext->dpNo)) {
1089 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1090 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1094 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1096 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1097 userData, (UINT16)userDataLen)) {
1103 * Make space for the payload to be copied and set the attribute
1104 * XXX Uninit set initilizes the buffer with xero, we don't actually need
1105 * that the payload to be initailized
1107 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1108 (UINT16)(dataLen + extraLen));
1113 /* Store the payload for csum calculation when packet is read */
1114 elem->packet.payload = dst;
1117 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1119 ovsUserStats.dropDuetoResource++;
1121 } else if (src != dst) {
1122 /* Copy the data from the NDIS buffer to dst. */
1123 RtlCopyMemory(dst, src, dataLen);
1126 /* Set csum if was offloaded */
1127 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1130 * Finally insert VLAN tag
1133 dst = elem->packet.payload;
1134 src = dst + extraLen;
1135 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1136 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1137 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1139 ((UINT16 *)dst)[0] = htons(0x8100);
1140 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1141 (vlanInfo.TagHeader.UserPriority << 13));
1142 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1143 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1144 ovsUserStats.vlanInsert++;
1147 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1148 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1149 /* 'totalLen' should be size of valid data. */
1150 elem->packet.totalLen = nlMsg->nlmsgLen;
1154 OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);