2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * Manage packet queue for packet miss for userAction.
34 #include "TunnelIntf.h"
40 #define OVS_DBG_MOD OVS_DBG_USER
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49 OvsPacketExecute *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
53 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
54 POVS_OPEN_INSTANCE instance)
56 PLIST_ENTRY link, next;
58 POVS_PACKET_QUEUE_ELEM elem;
60 InitializeListHead(&tmp);
61 NdisAcquireSpinLock(&queue->queueLock);
62 if (queue->instance != instance) {
63 NdisReleaseSpinLock(&queue->queueLock);
67 if (queue->numPackets) {
68 OvsAppendList(&tmp, &queue->packetList);
69 queue->numPackets = 0;
71 NdisReleaseSpinLock(&queue->queueLock);
72 LIST_FORALL_SAFE(&tmp, link, next) {
73 RemoveEntryList(link);
74 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
80 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
82 POVS_USER_PACKET_QUEUE queue;
83 POVS_PACKET_QUEUE_ELEM elem;
84 PLIST_ENTRY link, next;
88 InitializeListHead(&tmp);
89 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
91 PDRIVER_CANCEL cancelRoutine;
92 NdisAcquireSpinLock(&queue->queueLock);
93 ASSERT(queue->instance == instance);
94 /* XXX Should not happen */
95 if (queue->instance != instance) {
96 NdisReleaseSpinLock(&queue->queueLock);
97 NdisFreeSpinLock(&queue->queueLock);
101 if (queue->numPackets) {
102 OvsAppendList(&tmp, &queue->packetList);
103 queue->numPackets = 0;
105 queue->instance = NULL;
106 instance->packetQueue = NULL;
107 irp = queue->pendingIrp;
108 queue->pendingIrp = NULL;
110 cancelRoutine = IoSetCancelRoutine(irp, NULL);
111 if (cancelRoutine == NULL) {
115 NdisReleaseSpinLock(&queue->queueLock);
116 NdisFreeSpinLock(&queue->queueLock);
118 LIST_FORALL_SAFE(&tmp, link, next) {
119 RemoveEntryList(link);
120 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
124 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
127 OvsFreeMemory(queue);
132 OvsSubscribeDpIoctl(PVOID instanceP,
136 POVS_USER_PACKET_QUEUE queue;
137 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
139 OvsAcquireCtrlLock();
140 if (!gOvsSwitchContext) {
141 OvsReleaseCtrlLock();
142 return STATUS_INVALID_PARAMETER;
144 OvsReleaseCtrlLock();
146 if (instance->packetQueue && !join) {
148 OvsCleanupPacketQueue(instance);
150 OvsAcquireCtrlLock();
151 /* Remove the instance from pidHashArray */
152 OvsDelPidInstance(gOvsSwitchContext, pid);
153 OvsReleaseCtrlLock();
155 } else if (instance->packetQueue == NULL && join) {
156 queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
158 return STATUS_NO_MEMORY;
160 InitializeListHead(&(instance->pidLink));
161 instance->packetQueue = queue;
162 RtlZeroMemory(queue, sizeof (*queue));
163 NdisAllocateSpinLock(&queue->queueLock);
164 NdisAcquireSpinLock(&queue->queueLock);
165 InitializeListHead(&queue->packetList);
167 queue->instance = instance;
168 instance->packetQueue = queue;
169 NdisReleaseSpinLock(&queue->queueLock);
171 OvsAcquireCtrlLock();
172 /* Insert the instance to pidHashArray */
173 OvsAddPidInstance(gOvsSwitchContext, pid, instance);
174 OvsReleaseCtrlLock();
177 /* user mode should call only once for subscribe */
178 return STATUS_INVALID_PARAMETER;
181 return STATUS_SUCCESS;
186 OvsReadDpIoctl(PFILE_OBJECT fileObject,
191 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
192 POVS_PACKET_QUEUE_ELEM elem;
195 #define TCP_CSUM_OFFSET 16
196 #define UDP_CSUM_OFFSET 6
199 if (instance->packetQueue == NULL) {
200 return STATUS_INVALID_PARAMETER;
202 if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
203 return STATUS_BUFFER_TOO_SMALL;
206 elem = OvsGetNextPacket(instance);
209 * XXX revisit this later
211 len = elem->packet.totalLen > outputLength ? outputLength :
212 elem->packet.totalLen;
214 if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
215 len == elem->packet.totalLen) {
217 UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
218 elem->hdrInfo.l4Offset);
219 RtlCopyMemory(outputBuffer, &elem->packet.data, size);
220 ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
221 sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
222 (UINT8 *)&elem->packet.data + size,
223 elem->hdrInfo.l4PayLoad, 0);
224 ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
225 (elem->hdrInfo.tcpCsumNeeded ?
226 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
228 ovsUserStats.l4Csum++;
230 RtlCopyMemory(outputBuffer, &elem->packet.data, len);
236 return STATUS_SUCCESS;
239 /* Helper function to allocate a Forwarding Context for an NBL */
241 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
242 PNET_BUFFER_LIST nbl)
244 return switchContext->NdisSwitchHandlers.
245 AllocateNetBufferListForwardingContext(
246 switchContext->NdisSwitchContext, nbl);
250 * --------------------------------------------------------------------------
251 * This function allocates all the stuff necessary for creating an NBL from the
252 * input buffer of specified length, namely, a nonpaged data buffer of size
253 * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
254 * context yet. It also copies data from the specified buffer to the NBL.
255 * --------------------------------------------------------------------------
258 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
263 PNET_BUFFER_LIST nbl = NULL;
267 if (length > OVS_DEFAULT_DATA_SIZE) {
268 nbl = OvsAllocateVariableSizeNBL(switchContext, length,
269 OVS_DEFAULT_HEADROOM_SIZE);
272 nbl = OvsAllocateFixSizeNBL(switchContext, length,
273 OVS_DEFAULT_HEADROOM_SIZE);
279 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
280 mdl = NET_BUFFER_CURRENT_MDL(nb);
281 data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
282 NET_BUFFER_CURRENT_MDL_OFFSET(nb);
284 OvsCompleteNBL(switchContext, nbl, TRUE);
288 NdisMoveMemory(data, userBuffer, length);
294 *----------------------------------------------------------------------------
295 * OvsNlExecuteCmdHandler --
296 * Handler for OVS_PACKET_CMD_EXECUTE command.
297 *----------------------------------------------------------------------------
300 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
303 NTSTATUS status = STATUS_SUCCESS;
304 POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
305 POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
306 PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
307 PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
308 POVS_HDR ovsHdr = &(msgIn->ovsHdr);
310 PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
311 PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
313 UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
314 UINT32 keyAttrOffset = 0;
315 OvsPacketExecute execute;
316 NL_ERROR nlError = NL_ERROR_SUCCESS;
319 static const NL_POLICY nlPktExecPolicy[] = {
320 [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
321 [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
322 [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
323 [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
324 [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
328 RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
330 /* Get all the top level Flow attributes */
331 if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
332 nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
334 OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
336 status = STATUS_UNSUCCESSFUL;
340 keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
343 /* Get flow keys attributes */
344 if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
345 NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
346 nlFlowKeyPolicy, keyAttrs,
347 ARRAY_SIZE(keyAttrs))) != TRUE) {
348 OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
349 status = STATUS_UNSUCCESSFUL;
353 execute.dpNo = ovsHdr->dp_ifindex;
355 _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
357 status = OvsExecuteDpIoctl(&execute);
359 /* Default reply that we want to send */
360 if (status == STATUS_SUCCESS) {
361 NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
362 usrParamsCtx->outputLength);
364 /* Prepare nl Msg headers */
365 status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
366 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
367 genlMsgHdr->cmd, OVS_PACKET_VERSION,
370 if (status == STATUS_SUCCESS) {
371 *replyLen = msgOut->nlMsg.nlmsgLen;
374 /* Map NTSTATUS to NL_ERROR */
375 nlError = NlMapStatusToNlErr(status);
377 /* As of now there are no transactional errors in the implementation.
378 * Once we have them then we need to map status to correct
379 * nlError value, so that below mentioned code gets hit. */
380 if ((nlError != NL_ERROR_SUCCESS) &&
381 (usrParamsCtx->outputBuffer)) {
383 POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
384 usrParamsCtx->outputBuffer;
385 BuildErrorMsg(msgIn, msgError, nlError);
386 *replyLen = msgError->nlMsg.nlmsgLen;
387 status = STATUS_SUCCESS;
397 *----------------------------------------------------------------------------
398 * _MapNlAttrToOvsPktExec --
399 * Maps input Netlink attributes to OvsPacketExecute.
400 *----------------------------------------------------------------------------
403 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
404 OvsPacketExecute *execute)
406 execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
407 execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
409 execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
410 execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
412 execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
416 OvsExecuteDpIoctl(OvsPacketExecute *execute)
418 NTSTATUS status = STATUS_SUCCESS;
420 LOCK_STATE_EX lockState;
421 PNET_BUFFER_LIST pNbl;
423 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
425 OVS_PACKET_HDR_INFO layers;
426 POVS_VPORT_ENTRY vport;
428 NdisAcquireSpinLock(gOvsCtrlLock);
429 if (gOvsSwitchContext == NULL) {
430 status = STATUS_INVALID_PARAMETER;
434 if (execute->packetLen == 0) {
435 status = STATUS_INVALID_PARAMETER;
439 actions = execute->actions;
444 * Allocate the NBL, copy the data from the userspace buffer. Allocate
445 * also, the forwarding context for the packet.
447 pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
450 status = STATUS_NO_MEMORY;
454 fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
455 vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
457 fwdDetail->SourcePortId = vport->portId;
458 fwdDetail->SourceNicIndex = vport->nicIndex;
460 fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
461 fwdDetail->SourceNicIndex = 0;
463 // XXX: Figure out if any of the other members of fwdDetail need to be set.
465 ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
467 if (ndisStatus == NDIS_STATUS_SUCCESS) {
468 ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
469 NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
470 NDIS_RWL_AT_DISPATCH_LEVEL);
471 ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
472 vport ? vport->portNo :
474 NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
475 &key, NULL, &layers, actions,
476 execute->actionsLen);
478 NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
480 if (ndisStatus != NDIS_STATUS_SUCCESS) {
481 if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
482 status = STATUS_NOT_SUPPORTED;
484 status = STATUS_UNSUCCESSFUL;
489 OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
492 NdisReleaseSpinLock(gOvsCtrlLock);
498 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
500 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
501 POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
504 return STATUS_INVALID_PARAMETER;
506 OvsPurgePacketQueue(queue, instance);
507 return STATUS_SUCCESS;
511 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
514 PIO_STACK_LOCATION irpSp;
515 PFILE_OBJECT fileObject;
516 POVS_OPEN_INSTANCE instance;
517 POVS_USER_PACKET_QUEUE queue = NULL;
519 UNREFERENCED_PARAMETER(deviceObject);
521 IoReleaseCancelSpinLock(irp->CancelIrql);
522 irpSp = IoGetCurrentIrpStackLocation(irp);
523 fileObject = irpSp->FileObject;
525 if (fileObject == NULL) {
528 NdisAcquireSpinLock(gOvsCtrlLock);
529 instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
531 queue = instance->packetQueue;
533 if (instance == NULL || queue == NULL) {
534 NdisReleaseSpinLock(gOvsCtrlLock);
537 NdisReleaseSpinLock(gOvsCtrlLock);
538 NdisAcquireSpinLock(&queue->queueLock);
539 if (queue->pendingIrp == irp) {
540 queue->pendingIrp = NULL;
542 NdisReleaseSpinLock(&queue->queueLock);
544 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
549 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
551 POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
552 POVS_USER_PACKET_QUEUE queue =
553 (POVS_USER_PACKET_QUEUE)instance->packetQueue;
554 NTSTATUS status = STATUS_SUCCESS;
555 BOOLEAN cancelled = FALSE;
558 return STATUS_INVALID_PARAMETER;
560 NdisAcquireSpinLock(&queue->queueLock);
561 if (queue->instance != instance) {
562 NdisReleaseSpinLock(&queue->queueLock);
563 return STATUS_INVALID_PARAMETER;
565 if (queue->pendingIrp) {
566 NdisReleaseSpinLock(&queue->queueLock);
567 return STATUS_DEVICE_BUSY;
569 if (queue->numPackets == 0) {
570 PDRIVER_CANCEL cancelRoutine;
571 IoMarkIrpPending(irp);
572 IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
574 cancelRoutine = IoSetCancelRoutine(irp, NULL);
579 queue->pendingIrp = irp;
581 status = STATUS_PENDING;
583 NdisReleaseSpinLock(&queue->queueLock);
585 OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
586 OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
592 POVS_PACKET_QUEUE_ELEM
593 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
595 POVS_USER_PACKET_QUEUE queue;
597 queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
601 NdisAcquireSpinLock(&queue->queueLock);
602 if (queue->instance != instance || queue->numPackets == 0) {
603 NdisReleaseSpinLock(&queue->queueLock);
606 link = RemoveHeadList(&queue->packetList);
608 NdisReleaseSpinLock(&queue->queueLock);
609 return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
613 * ---------------------------------------------------------------------------
614 * Given a pid, returns the corresponding USER_PACKET_QUEUE.
615 * gOvsCtrlLock must be acquired before calling this API.
616 * ---------------------------------------------------------------------------
618 POVS_USER_PACKET_QUEUE
619 OvsGetQueue(UINT32 pid)
621 POVS_OPEN_INSTANCE instance;
622 POVS_USER_PACKET_QUEUE ret = NULL;
624 instance = OvsGetPidInstance(gOvsSwitchContext, pid);
627 ret = instance->packetQueue;
634 * ---------------------------------------------------------------------------
635 * Given a pid, returns the corresponding instance.
636 * gOvsCtrlLock must be acquired before calling this API.
637 * ---------------------------------------------------------------------------
640 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
642 POVS_OPEN_INSTANCE instance;
643 PLIST_ENTRY head, link;
644 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
646 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
647 LIST_FORALL(head, link) {
648 instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
649 if (instance->pid == pid) {
657 * ---------------------------------------------------------------------------
658 * Given a pid and an instance. This API adds instance to pidHashArray.
659 * gOvsCtrlLock must be acquired before calling this API.
660 * ---------------------------------------------------------------------------
663 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
664 POVS_OPEN_INSTANCE instance)
667 UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
669 head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
670 InsertHeadList(head, &(instance->pidLink));
674 * ---------------------------------------------------------------------------
675 * Given a pid and an instance. This API removes instance from pidHashArray.
676 * gOvsCtrlLock must be acquired before calling this API.
677 * ---------------------------------------------------------------------------
680 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
682 POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
685 RemoveEntryList(&(instance->pidLink));
690 OvsQueuePackets(UINT32 queueId,
691 PLIST_ENTRY packetList,
694 POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
695 POVS_PACKET_QUEUE_ELEM elem;
700 OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
706 NdisAcquireSpinLock(&queue->queueLock);
707 if (queue->instance == NULL) {
708 NdisReleaseSpinLock(&queue->queueLock);
711 OvsAppendList(&queue->packetList, packetList);
712 queue->numPackets += numElems;
714 if (queue->pendingIrp) {
715 PDRIVER_CANCEL cancelRoutine;
716 irp = queue->pendingIrp;
717 queue->pendingIrp = NULL;
718 cancelRoutine = IoSetCancelRoutine(irp, NULL);
719 if (cancelRoutine == NULL) {
723 NdisReleaseSpinLock(&queue->queueLock);
725 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
729 while (!IsListEmpty(packetList)) {
730 link = RemoveHeadList(packetList);
731 elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
735 OVS_LOG_LOUD("Exit: drop %u packets", num);
740 *----------------------------------------------------------------------------
741 * OvsCreateAndAddPackets --
743 * Create a packet and forwarded to user space.
745 * This function would fragment packet if needed, and queue
746 * each segment to user space.
747 *----------------------------------------------------------------------------
750 OvsCreateAndAddPackets(PVOID userData,
755 PNET_BUFFER_LIST nbl,
757 POVS_PACKET_HDR_INFO hdrInfo,
758 POVS_SWITCH_CONTEXT switchContext,
762 POVS_PACKET_QUEUE_ELEM elem;
763 PNET_BUFFER_LIST newNbl = NULL;
766 if (hdrInfo->isTcp) {
767 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
770 tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
771 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
772 packetLength = NET_BUFFER_DATA_LENGTH(nb);
774 OVS_LOG_TRACE("MSS %u packet len %u",
775 tsoInfo.LsoV1Transmit.MSS, packetLength);
776 if (tsoInfo.LsoV1Transmit.MSS) {
777 OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
778 newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
779 tsoInfo.LsoV1Transmit.MSS , 0);
780 if (newNbl == NULL) {
781 return NDIS_STATUS_FAILURE;
787 nb = NET_BUFFER_LIST_FIRST_NB(nbl);
789 elem = OvsCreateQueueNlPacket(userData, userDataLen,
790 cmd, inPort, key, nbl, nb,
793 InsertTailList(list, &elem->link);
796 nb = NET_BUFFER_NEXT_NB(nb);
799 OvsCompleteNBL(switchContext, newNbl, TRUE);
801 return NDIS_STATUS_SUCCESS;
804 static __inline UINT32
805 OvsGetUpcallMsgSize(PVOID userData,
807 OvsIPv4TunnelKey *tunnelKey,
810 UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
811 NlAttrSize(payload) +
812 NlAttrSize(OvsFlowKeyAttrSize());
814 /* OVS_PACKET_ATTR_USERDATA */
816 size += NlAttrTotalSize(userDataLen);
818 /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
819 /* Is it included in the the flwo key attr XXX */
821 size += NlAttrTotalSize(OvsTunKeyAttrSize());
827 *----------------------------------------------------------------------------
828 * This function completes the IP Header csum. record the L4 payload offset and
829 * if there is a need to calculate the TCP or UDP csum. The actual csum will be
830 * caluculated simopultaneossly with the copy of the payload to the destination
831 * buffer when the packet is read.
832 *----------------------------------------------------------------------------
835 OvsCompletePacketHeader(UINT8 *packet,
837 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
838 POVS_PACKET_HDR_INFO hdrInfoIn,
839 POVS_PACKET_HDR_INFO hdrInfoOut)
841 if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
842 (!isRecv && csumInfo.Transmit.IsIPv4 &&
843 csumInfo.Transmit.IpHeaderChecksum)) {
844 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
845 ASSERT(hdrInfoIn->isIPv4);
846 ASSERT(ipHdr->Version == 4);
847 ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
848 ipHdr->HeaderLength << 2,
849 (UINT16)~ipHdr->HeaderChecksum);
850 ovsUserStats.ipCsum++;
852 ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
854 * calculate TCP/UDP pseudo checksum
856 if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
858 * Only this case, we need to reclaculate pseudo checksum
859 * all other cases, it is assumed the pseudo checksum is
863 PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
864 if (hdrInfoIn->isIPv4) {
865 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
866 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
867 (ipHdr->HeaderLength << 2));
868 tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
869 (UINT32 *)&ipHdr->DestinationAddress,
870 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
872 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
873 hdrInfoOut->l4PayLoad =
874 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
875 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
876 hdrInfoIn->l4Offset);
877 ASSERT(hdrInfoIn->isIPv6);
879 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
880 (UINT32 *)&ipv6Hdr->DestinationAddress,
881 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
883 hdrInfoOut->tcpCsumNeeded = 1;
884 ovsUserStats.recalTcpCsum++;
885 } else if (!isRecv) {
886 if (csumInfo.Transmit.TcpChecksum) {
887 hdrInfoOut->tcpCsumNeeded = 1;
888 } else if (csumInfo.Transmit.UdpChecksum) {
889 hdrInfoOut->udpCsumNeeded = 1;
891 if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
895 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
897 if (hdrInfoIn->isIPv4) {
898 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
899 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
900 (ipHdr->HeaderLength << 2));
902 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
903 (UINT32 *)&ipHdr->DestinationAddress,
904 proto, hdrInfoOut->l4PayLoad);
907 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
908 hdrInfoIn->l3Offset);
909 hdrInfoOut->l4PayLoad =
910 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
911 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
912 hdrInfoIn->l4Offset);
913 ASSERT(hdrInfoIn->isIPv6);
915 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
916 (UINT32 *)&ipv6Hdr->DestinationAddress,
917 proto, hdrInfoOut->l4PayLoad);
921 ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
922 (hdrInfoOut->tcpCsumNeeded ?
923 TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
931 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
933 UNREFERENCED_PARAMETER(nb);
935 /* XXX select a pid from an array of pids using a flow based hash */
936 *pid = vport->upcallPid;
937 return STATUS_SUCCESS;
941 *----------------------------------------------------------------------------
942 * OvsCreateQueueNlPacket --
944 * Create a packet which will be forwarded to user space.
947 * userData: when cmd is user action, this field contain
949 * userDataLen: as name indicated
950 * cmd: either miss or user action
951 * inPort: datapath port id from which the packet is received.
952 * key: flow Key with a tunnel key if available
953 * nbl: the NET_BUFFER_LIST which contain the packet
955 * isRecv: This is used to decide how to interprete the csum info
956 * hdrInfo: include hdr info initialized during flow extraction.
959 * NULL if fail to create the packet
960 * The packet element otherwise
961 *----------------------------------------------------------------------------
963 POVS_PACKET_QUEUE_ELEM
964 OvsCreateQueueNlPacket(PVOID userData,
969 PNET_BUFFER_LIST nbl,
972 POVS_PACKET_HDR_INFO hdrInfo)
974 #define VLAN_TAG_SIZE 4
975 UINT32 allocLen, dataLen, extraLen;
976 POVS_PACKET_QUEUE_ELEM elem;
978 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
979 NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
980 OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
986 /* XXX pass vport in the stack rather than portNo */
987 POVS_VPORT_ENTRY vport =
988 OvsFindVportByPortNo(gOvsSwitchContext, inPort);
991 /* No vport is not fatal. */
995 OvsGetPid(vport, nb, &pid);
999 * There is no userspace queue created yet, so there is no point for
1000 * creating a new packet to be queued.
1005 csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1007 if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1008 (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1009 csumInfo.Receive.IpChecksumFailed)) {
1010 OVS_LOG_INFO("Packet dropped due to checksum failure.");
1011 ovsUserStats.dropDuetoChecksum++;
1015 vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1016 extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1018 dataLen = NET_BUFFER_DATA_LENGTH(nb);
1020 if (NlAttrSize(dataLen) > MAXUINT16) {
1024 nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1025 dataLen + extraLen);
1027 allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1028 elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
1030 ovsUserStats.dropDuetoResource++;
1033 elem->hdrInfo.value = hdrInfo->value;
1034 elem->packet.totalLen = nlMsgSize;
1035 /* XXX remove queueid */
1036 elem->packet.queue = 0;
1037 /* XXX no need as the length is already in the NL attrib */
1038 elem->packet.userDataLen = userDataLen;
1039 elem->packet.inPort = inPort;
1040 elem->packet.cmd = cmd;
1041 if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1042 ovsUserStats.miss++;
1043 } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1044 ovsUserStats.action++;
1049 /* XXX Should we have both packetLen and TotalLen*/
1050 elem->packet.packetLen = dataLen + extraLen;
1052 NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1055 * Initialize the OVS header
1056 * Since we are pre allocating memory for the NL buffer
1057 * the attribute settings should not fail
1059 if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1060 0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1061 gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1065 if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1066 OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1070 /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1072 if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1073 userData, (UINT16)userDataLen)) {
1079 * Make space for the payload to be copied and set the attribute
1080 * XXX Uninit set initilizes the buffer with xero, we don't actually need
1081 * that the payload to be initailized
1083 dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1084 (UINT16)(dataLen + extraLen));
1089 /* Store the payload for csum calculation when packet is read */
1090 elem->packet.payload = dst;
1093 src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1095 ovsUserStats.dropDuetoResource++;
1097 } else if (src != dst) {
1098 /* Copy the data from the NDIS buffer to dst. */
1099 RtlCopyMemory(dst, src, dataLen);
1102 /* Set csum if was offloaded */
1103 OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1106 * Finally insert VLAN tag
1109 dst = elem->packet.payload;
1110 src = dst + extraLen;
1111 ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1112 ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1113 ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1115 ((UINT16 *)dst)[0] = htons(0x8100);
1116 ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1117 (vlanInfo.TagHeader.UserPriority << 13));
1118 elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1119 elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1120 ovsUserStats.vlanInsert++;
1123 nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1124 nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1125 /* 'totalLen' should be size of valid data. */
1126 elem->packet.totalLen = nlMsg->nlmsgLen;
1130 OvsFreeMemory(elem);