datapath-windows: Fixes in packet created for userspace
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Switch.h"
26 #include "Vport.h"
27 #include "Event.h"
28 #include "User.h"
29 #include "Datapath.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35
36 #ifdef OVS_DBG_MOD
37 #undef OVS_DBG_MOD
38 #endif
39 #define OVS_DBG_MOD OVS_DBG_USER
40 #include "Debug.h"
41
42 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
43 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
44 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
45 OVS_USER_STATS ovsUserStats;
46
47 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
48                                    OvsPacketExecute  *execute);
49 extern NL_POLICY nlFlowKeyPolicy[];
50
51 static VOID
52 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
53                     POVS_OPEN_INSTANCE instance)
54 {
55     PLIST_ENTRY link, next;
56     LIST_ENTRY tmp;
57     POVS_PACKET_QUEUE_ELEM elem;
58
59     InitializeListHead(&tmp);
60     NdisAcquireSpinLock(&queue->queueLock);
61     if (queue->instance != instance) {
62         NdisReleaseSpinLock(&queue->queueLock);
63         return;
64     }
65
66     if (queue->numPackets) {
67         OvsAppendList(&tmp, &queue->packetList);
68         queue->numPackets = 0;
69     }
70     NdisReleaseSpinLock(&queue->queueLock);
71     LIST_FORALL_SAFE(&tmp, link, next) {
72         RemoveEntryList(link);
73         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
74         OvsFreeMemory(elem);
75     }
76 }
77
78 VOID
79 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
80 {
81     POVS_USER_PACKET_QUEUE queue;
82     POVS_PACKET_QUEUE_ELEM elem;
83     PLIST_ENTRY link, next;
84     LIST_ENTRY tmp;
85     PIRP irp = NULL;
86
87     InitializeListHead(&tmp);
88     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
89     if (queue) {
90         PDRIVER_CANCEL cancelRoutine;
91         NdisAcquireSpinLock(&queue->queueLock);
92         ASSERT(queue->instance == instance);
93         /* XXX Should not happen */
94         if (queue->instance != instance) {
95             NdisReleaseSpinLock(&queue->queueLock);
96             NdisFreeSpinLock(&queue->queueLock);
97             return;
98         }
99
100         if (queue->numPackets) {
101             OvsAppendList(&tmp, &queue->packetList);
102             queue->numPackets = 0;
103         }
104         queue->instance = NULL;
105         instance->packetQueue = NULL;
106         irp = queue->pendingIrp;
107         queue->pendingIrp = NULL;
108         if (irp) {
109             cancelRoutine = IoSetCancelRoutine(irp, NULL);
110             if (cancelRoutine == NULL) {
111                 irp = NULL;
112             }
113         }
114         NdisReleaseSpinLock(&queue->queueLock);
115         NdisFreeSpinLock(&queue->queueLock);
116     }
117     LIST_FORALL_SAFE(&tmp, link, next) {
118         RemoveEntryList(link);
119         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
120         OvsFreeMemory(elem);
121     }
122     if (irp) {
123         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
124     }
125     if (queue) {
126         OvsFreeMemory(queue);
127     }
128 }
129
130 NTSTATUS
131 OvsSubscribeDpIoctl(PVOID instanceP,
132                     UINT32 pid,
133                     UINT8 join)
134 {
135     POVS_USER_PACKET_QUEUE queue;
136     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
137
138     if (instance->packetQueue && !join) {
139         /* unsubscribe */
140         OvsCleanupPacketQueue(instance);
141     } else if (instance->packetQueue == NULL && join) {
142         queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
143         if (queue == NULL) {
144             return STATUS_NO_MEMORY;
145         }
146         instance->packetQueue = queue;
147         RtlZeroMemory(queue, sizeof (*queue));
148         NdisAllocateSpinLock(&queue->queueLock);
149         NdisAcquireSpinLock(&queue->queueLock);
150         InitializeListHead(&queue->packetList);
151         queue->pid = pid;
152         queue->instance = instance;
153         instance->packetQueue = queue;
154         NdisReleaseSpinLock(&queue->queueLock);
155     } else {
156         /* user mode should call only once for subscribe */
157         return STATUS_INVALID_PARAMETER;
158     }
159     return STATUS_SUCCESS;
160 }
161
162
163 NTSTATUS
164 OvsReadDpIoctl(PFILE_OBJECT fileObject,
165                PVOID outputBuffer,
166                UINT32 outputLength,
167                UINT32 *replyLen)
168 {
169     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
170     POVS_PACKET_QUEUE_ELEM elem;
171     UINT32 len;
172
173 #define TCP_CSUM_OFFSET  16
174 #define UDP_CSUM_OFFSET  6
175     ASSERT(instance);
176
177     if (instance->packetQueue == NULL) {
178         return STATUS_INVALID_PARAMETER;
179     }
180     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
181         return STATUS_BUFFER_TOO_SMALL;
182     }
183
184     elem = OvsGetNextPacket(instance);
185     if (elem) {
186         /*
187          * XXX revisit this later
188          */
189         len = elem->packet.totalLen > outputLength ? outputLength :
190                  elem->packet.totalLen;
191
192         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
193             len == elem->packet.totalLen) {
194             UINT16 sum, *ptr;
195             UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
196                                   elem->hdrInfo.l4Offset);
197             RtlCopyMemory(outputBuffer, &elem->packet.data, size);
198             ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
199             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
200                                            (UINT8 *)&elem->packet.data + size,
201                                            elem->hdrInfo.l4PayLoad, 0);
202             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
203                             (elem->hdrInfo.tcpCsumNeeded ?
204                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
205             *ptr = sum;
206             ovsUserStats.l4Csum++;
207         } else {
208             RtlCopyMemory(outputBuffer, &elem->packet.data, len);
209         }
210
211         *replyLen = len;
212         OvsFreeMemory(elem);
213     }
214     return STATUS_SUCCESS;
215 }
216
217 /* Helper function to allocate a Forwarding Context for an NBL */
218 NTSTATUS
219 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
220                                    PNET_BUFFER_LIST nbl)
221 {
222     return switchContext->NdisSwitchHandlers.
223         AllocateNetBufferListForwardingContext(
224             switchContext->NdisSwitchContext, nbl);
225 }
226
227 /*
228  * --------------------------------------------------------------------------
229  * This function allocates all the stuff necessary for creating an NBL from the
230  * input buffer of specified length, namely, a nonpaged data buffer of size
231  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
232  * context yet. It also copies data from the specified buffer to the NBL.
233  * --------------------------------------------------------------------------
234  */
235 PNET_BUFFER_LIST
236 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
237                             PVOID userBuffer,
238                             ULONG length)
239 {
240     UINT8 *data = NULL;
241     PNET_BUFFER_LIST nbl = NULL;
242     PNET_BUFFER nb;
243     PMDL mdl;
244
245     if (length > OVS_DEFAULT_DATA_SIZE) {
246         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
247                                          OVS_DEFAULT_HEADROOM_SIZE);
248
249     } else {
250         nbl = OvsAllocateFixSizeNBL(switchContext, length,
251                                     OVS_DEFAULT_HEADROOM_SIZE);
252     }
253     if (nbl == NULL) {
254         return NULL;
255     }
256
257     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
258     mdl = NET_BUFFER_CURRENT_MDL(nb);
259     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
260                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
261     if (!data) {
262         OvsCompleteNBL(switchContext, nbl, TRUE);
263         return NULL;
264     }
265
266     NdisMoveMemory(data, userBuffer, length);
267
268     return nbl;
269 }
270
271 /*
272  *----------------------------------------------------------------------------
273  *  OvsNlExecuteCmdHandler --
274  *    Handler for OVS_PACKET_CMD_EXECUTE command.
275  *----------------------------------------------------------------------------
276  */
277 NTSTATUS
278 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
279                        UINT32 *replyLen)
280 {
281     NTSTATUS status = STATUS_SUCCESS;
282     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
283     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
284     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
285     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
286     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
287
288     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
289     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
290
291     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
292     UINT32 keyAttrOffset = 0;
293     OvsPacketExecute execute;
294     NL_ERROR nlError = NL_ERROR_SUCCESS;
295     NL_BUFFER nlBuf;
296
297     static const NL_POLICY nlPktExecPolicy[] = {
298         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
299         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
300         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
301         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
302         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
303                                             .optional = TRUE}
304     };
305
306     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
307
308     /* Get all the top level Flow attributes */
309     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
310                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
311                      != TRUE) {
312         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
313                        nlMsgHdr);
314         status = STATUS_UNSUCCESSFUL;
315         goto done;
316     }
317
318     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
319                     (PCHAR)nlMsgHdr);
320
321     /* Get flow keys attributes */
322     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
323                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
324                            nlFlowKeyPolicy, keyAttrs,
325                            ARRAY_SIZE(keyAttrs))) != TRUE) {
326         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
327         status = STATUS_UNSUCCESSFUL;
328         goto done;
329     }
330
331     execute.dpNo = ovsHdr->dp_ifindex;
332
333     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
334
335     status = OvsExecuteDpIoctl(&execute);
336
337     /* Default reply that we want to send */
338     if (status == STATUS_SUCCESS) {
339         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
340                   usrParamsCtx->outputLength);
341
342         /* Prepare nl Msg headers */
343         status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
344                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
345                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
346                  ovsHdr->dp_ifindex);
347
348         if (status == STATUS_SUCCESS) {
349             *replyLen = msgOut->nlMsg.nlmsgLen;
350         }
351     } else {
352         /* Map NTSTATUS to NL_ERROR */
353         nlError = NlMapStatusToNlErr(status);
354
355         /* As of now there are no transactional errors in the implementation.
356          * Once we have them then we need to map status to correct
357          * nlError value, so that below mentioned code gets hit. */
358         if ((nlError != NL_ERROR_SUCCESS) &&
359             (usrParamsCtx->outputBuffer)) {
360
361             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
362                                            usrParamsCtx->outputBuffer;
363             BuildErrorMsg(msgIn, msgError, nlError);
364             *replyLen = msgError->nlMsg.nlmsgLen;
365             status = STATUS_SUCCESS;
366             goto done;
367         }
368     }
369
370 done:
371     return status;
372 }
373
374 /*
375  *----------------------------------------------------------------------------
376  *  _MapNlAttrToOvsPktExec --
377  *    Maps input Netlink attributes to OvsPacketExecute.
378  *----------------------------------------------------------------------------
379  */
380 static VOID
381 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
382                        OvsPacketExecute *execute)
383 {
384     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
385     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
386
387     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
388     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
389
390     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
391 }
392
393 NTSTATUS
394 OvsExecuteDpIoctl(OvsPacketExecute *execute)
395 {
396     NTSTATUS                    status = STATUS_SUCCESS;
397     NTSTATUS                    ndisStatus;
398     LOCK_STATE_EX               lockState;
399     PNET_BUFFER_LIST pNbl;
400     PNL_ATTR actions;
401     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
402     OvsFlowKey key;
403     OVS_PACKET_HDR_INFO layers;
404     POVS_VPORT_ENTRY vport;
405
406     NdisAcquireSpinLock(gOvsCtrlLock);
407     if (gOvsSwitchContext == NULL) {
408         status = STATUS_INVALID_PARAMETER;
409         goto unlock;
410     }
411
412     if (execute->packetLen == 0) {
413         status = STATUS_INVALID_PARAMETER;
414         goto unlock;
415     }
416
417     actions = execute->actions;
418
419     ASSERT(actions);
420
421     /*
422      * Allocate the NBL, copy the data from the userspace buffer. Allocate
423      * also, the forwarding context for the packet.
424      */
425     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
426                                        execute->packetLen);
427     if (pNbl == NULL) {
428         status = STATUS_NO_MEMORY;
429         goto unlock;
430     }
431
432     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
433     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
434     if (vport) {
435         fwdDetail->SourcePortId = vport->portId;
436         fwdDetail->SourceNicIndex = vport->nicIndex;
437     } else {
438         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
439         fwdDetail->SourceNicIndex = 0;
440     }
441     // XXX: Figure out if any of the other members of fwdDetail need to be set.
442
443     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
444                               NULL);
445     if (ndisStatus == NDIS_STATUS_SUCCESS) {
446         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
447         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
448                               NDIS_RWL_AT_DISPATCH_LEVEL);
449         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
450                                        vport ? vport->portNo :
451                                                OVS_DEFAULT_PORT_NO,
452                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
453                                        &key, NULL, &layers, actions,
454                                        execute->actionsLen);
455         pNbl = NULL;
456         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
457     }
458     if (ndisStatus != NDIS_STATUS_SUCCESS) {
459         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
460             status = STATUS_NOT_SUPPORTED;
461         } else {
462             status = STATUS_UNSUCCESSFUL;
463         }
464     }
465
466     if (pNbl) {
467         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
468     }
469 unlock:
470     NdisReleaseSpinLock(gOvsCtrlLock);
471     return status;
472 }
473
474
475 NTSTATUS
476 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
477 {
478     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
479     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
480
481     if (queue == NULL) {
482         return STATUS_INVALID_PARAMETER;
483     }
484     OvsPurgePacketQueue(queue, instance);
485     return STATUS_SUCCESS;
486 }
487
488 VOID
489 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
490                      PIRP irp)
491 {
492     PIO_STACK_LOCATION irpSp;
493     PFILE_OBJECT fileObject;
494     POVS_OPEN_INSTANCE instance;
495     POVS_USER_PACKET_QUEUE queue = NULL;
496
497     UNREFERENCED_PARAMETER(deviceObject);
498
499     IoReleaseCancelSpinLock(irp->CancelIrql);
500     irpSp = IoGetCurrentIrpStackLocation(irp);
501     fileObject = irpSp->FileObject;
502
503     if (fileObject == NULL) {
504         goto done;
505     }
506     NdisAcquireSpinLock(gOvsCtrlLock);
507     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
508     if (instance) {
509         queue = instance->packetQueue;
510     }
511     if (instance == NULL || queue == NULL) {
512         NdisReleaseSpinLock(gOvsCtrlLock);
513         goto done;
514     }
515     NdisReleaseSpinLock(gOvsCtrlLock);
516     NdisAcquireSpinLock(&queue->queueLock);
517     if (queue->pendingIrp == irp) {
518         queue->pendingIrp = NULL;
519     }
520     NdisReleaseSpinLock(&queue->queueLock);
521 done:
522     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
523 }
524
525
526 NTSTATUS
527 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
528 {
529     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
530     POVS_USER_PACKET_QUEUE queue =
531                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
532     NTSTATUS status = STATUS_SUCCESS;
533     BOOLEAN cancelled = FALSE;
534
535     if (queue == NULL) {
536         return STATUS_INVALID_PARAMETER;
537     }
538     NdisAcquireSpinLock(&queue->queueLock);
539     if (queue->instance != instance) {
540         NdisReleaseSpinLock(&queue->queueLock);
541         return STATUS_INVALID_PARAMETER;
542     }
543     if (queue->pendingIrp) {
544         NdisReleaseSpinLock(&queue->queueLock);
545         return STATUS_DEVICE_BUSY;
546     }
547     if (queue->numPackets == 0) {
548         PDRIVER_CANCEL cancelRoutine;
549         IoMarkIrpPending(irp);
550         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
551         if (irp->Cancel) {
552             cancelRoutine = IoSetCancelRoutine(irp, NULL);
553             if (cancelRoutine) {
554                 cancelled = TRUE;
555             }
556         } else {
557             queue->pendingIrp = irp;
558         }
559         status = STATUS_PENDING;
560     }
561     NdisReleaseSpinLock(&queue->queueLock);
562     if (cancelled) {
563         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
564         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
565     }
566     return status;
567 }
568
569
570 POVS_PACKET_QUEUE_ELEM
571 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
572 {
573     POVS_USER_PACKET_QUEUE queue;
574     PLIST_ENTRY link;
575     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
576     if (queue == NULL) {
577         return NULL;
578     }
579     NdisAcquireSpinLock(&queue->queueLock);
580     if (queue->instance != instance || queue->numPackets == 0) {
581         NdisReleaseSpinLock(&queue->queueLock);
582         return NULL;
583     }
584     link = RemoveHeadList(&queue->packetList);
585     queue->numPackets--;
586     NdisReleaseSpinLock(&queue->queueLock);
587     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
588 }
589
590
591 POVS_USER_PACKET_QUEUE
592 OvsGetQueue(UINT32 pid)
593 {
594     /* XXX To be implemented. Return the queue assoiated with the pid*/
595     UNREFERENCED_PARAMETER(pid);
596     ASSERT(FALSE);
597     return NULL;
598 }
599
600 VOID
601 OvsQueuePackets(UINT32 queueId,
602                 PLIST_ENTRY packetList,
603                 UINT32 numElems)
604 {
605     POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
606     POVS_PACKET_QUEUE_ELEM elem;
607     PIRP irp = NULL;
608     PLIST_ENTRY  link;
609     UINT32 num = 0;
610
611     OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
612                   queueId, numElems);
613     if (queue == NULL) {
614         goto cleanup;
615     }
616
617     NdisAcquireSpinLock(&queue->queueLock);
618     if (queue->instance == NULL) {
619         NdisReleaseSpinLock(&queue->queueLock);
620         goto cleanup;
621     } else {
622         OvsAppendList(&queue->packetList, packetList);
623         queue->numPackets += numElems;
624     }
625     if (queue->pendingIrp) {
626         PDRIVER_CANCEL cancelRoutine;
627         irp = queue->pendingIrp;
628         queue->pendingIrp = NULL;
629         cancelRoutine = IoSetCancelRoutine(irp, NULL);
630         if (cancelRoutine == NULL) {
631             irp = NULL;
632         }
633     }
634     NdisReleaseSpinLock(&queue->queueLock);
635     if (irp) {
636         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
637     }
638
639 cleanup:
640     while (!IsListEmpty(packetList)) {
641         link = RemoveHeadList(packetList);
642         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
643         OvsFreeMemory(elem);
644         num++;
645     }
646     OVS_LOG_LOUD("Exit: drop %u packets", num);
647 }
648
649
650 /*
651  *----------------------------------------------------------------------------
652  * OvsCreateAndAddPackets --
653  *
654  *  Create a packet and forwarded to user space.
655  *
656  *  This function would fragment packet if needed, and queue
657  *  each segment to user space.
658  *----------------------------------------------------------------------------
659  */
660 NTSTATUS
661 OvsCreateAndAddPackets(PVOID userData,
662                        UINT32 userDataLen,
663                        UINT32 cmd,
664                        UINT32 inPort,
665                        OvsFlowKey *key,
666                        PNET_BUFFER_LIST nbl,
667                        BOOLEAN isRecv,
668                        POVS_PACKET_HDR_INFO hdrInfo,
669                        POVS_SWITCH_CONTEXT switchContext,
670                        LIST_ENTRY *list,
671                        UINT32 *num)
672 {
673     POVS_PACKET_QUEUE_ELEM elem;
674     PNET_BUFFER_LIST newNbl = NULL;
675     PNET_BUFFER nb;
676
677     if (hdrInfo->isTcp) {
678         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
679         UINT32 packetLength;
680
681         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
682         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
683         packetLength = NET_BUFFER_DATA_LENGTH(nb);
684
685         OVS_LOG_TRACE("MSS %u packet len %u",
686                 tsoInfo.LsoV1Transmit.MSS, packetLength);
687         if (tsoInfo.LsoV1Transmit.MSS) {
688             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
689             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
690                     tsoInfo.LsoV1Transmit.MSS , 0);
691             if (newNbl == NULL) {
692                 return NDIS_STATUS_FAILURE;
693             }
694             nbl = newNbl;
695         }
696     }
697
698     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
699     while (nb) {
700         elem = OvsCreateQueueNlPacket(userData, userDataLen,
701                                     cmd, inPort, key, nbl, nb,
702                                     isRecv, hdrInfo);
703         if (elem) {
704             InsertTailList(list, &elem->link);
705             (*num)++;
706         }
707         nb = NET_BUFFER_NEXT_NB(nb);
708     }
709     if (newNbl) {
710         OvsCompleteNBL(switchContext, newNbl, TRUE);
711     }
712     return NDIS_STATUS_SUCCESS;
713 }
714
715 static __inline UINT32
716 OvsGetUpcallMsgSize(PVOID userData,
717                     UINT32 userDataLen,
718                     OvsIPv4TunnelKey *tunnelKey,
719                     UINT32 payload)
720 {
721     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
722                   NlAttrSize(payload) +
723                   NlAttrSize(OvsFlowKeyAttrSize());
724
725     /* OVS_PACKET_ATTR_USERDATA */
726     if (userData) {
727         size += NlAttrTotalSize(userDataLen);
728     }
729     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
730     /* Is it included in the the flwo key attr XXX */
731     if (tunnelKey) {
732         size += NlAttrTotalSize(OvsTunKeyAttrSize());
733     }
734     return size;
735 }
736
737 /*
738  *----------------------------------------------------------------------------
739  * This function completes the IP Header csum. record the L4 payload offset and
740  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
741  * caluculated simopultaneossly with the copy of the payload to the destination
742  * buffer when the packet is read.
743  *----------------------------------------------------------------------------
744  */
745 static VOID
746 OvsCompletePacketHeader(UINT8 *packet,
747                         BOOLEAN isRecv,
748                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
749                         POVS_PACKET_HDR_INFO hdrInfoIn,
750                         POVS_PACKET_HDR_INFO hdrInfoOut)
751 {
752     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
753         (!isRecv && csumInfo.Transmit.IsIPv4 &&
754         csumInfo.Transmit.IpHeaderChecksum)) {
755         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
756         ASSERT(hdrInfoIn->isIPv4);
757         ASSERT(ipHdr->Version == 4);
758         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
759             ipHdr->HeaderLength << 2,
760             (UINT16)~ipHdr->HeaderChecksum);
761         ovsUserStats.ipCsum++;
762     }
763     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
764     /*
765      * calculate TCP/UDP pseudo checksum
766      */
767     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
768         /*
769          * Only this case, we need to reclaculate pseudo checksum
770          * all other cases, it is assumed the pseudo checksum is
771          * filled already.
772          *
773          */
774         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
775         if (hdrInfoIn->isIPv4) {
776             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
777             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
778                                     (ipHdr->HeaderLength << 2));
779             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
780                                          (UINT32 *)&ipHdr->DestinationAddress,
781                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
782         } else {
783             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
784             hdrInfoOut->l4PayLoad =
785                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
786                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
787                 hdrInfoIn->l4Offset);
788             ASSERT(hdrInfoIn->isIPv6);
789             tcpHdr->th_sum =
790                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
791                 (UINT32 *)&ipv6Hdr->DestinationAddress,
792                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
793         }
794         hdrInfoOut->tcpCsumNeeded = 1;
795         ovsUserStats.recalTcpCsum++;
796     } else if (!isRecv) {
797         if (csumInfo.Transmit.TcpChecksum) {
798             hdrInfoOut->tcpCsumNeeded = 1;
799         } else if (csumInfo.Transmit.UdpChecksum) {
800             hdrInfoOut->udpCsumNeeded = 1;
801         }
802         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
803 #ifdef DBG
804             UINT16 sum, *ptr;
805             UINT8 proto =
806                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
807 #endif
808             if (hdrInfoIn->isIPv4) {
809                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
810                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
811                     (ipHdr->HeaderLength << 2));
812 #ifdef DBG
813                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
814                     (UINT32 *)&ipHdr->DestinationAddress,
815                     proto, hdrInfoOut->l4PayLoad);
816 #endif
817             } else {
818                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
819                     hdrInfoIn->l3Offset);
820                 hdrInfoOut->l4PayLoad =
821                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
822                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
823                     hdrInfoIn->l4Offset);
824                 ASSERT(hdrInfoIn->isIPv6);
825 #ifdef DBG
826                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
827                     (UINT32 *)&ipv6Hdr->DestinationAddress,
828                     proto, hdrInfoOut->l4PayLoad);
829 #endif
830             }
831 #ifdef DBG
832             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
833                 (hdrInfoOut->tcpCsumNeeded ?
834             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
835             ASSERT(*ptr == sum);
836 #endif
837         }
838     }
839 }
840
841 static NTSTATUS
842 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
843 {
844     UNREFERENCED_PARAMETER(nb);
845
846     /* XXX select a pid from an array of pids using a flow based hash */
847     *pid = vport->upcallPid;
848     return STATUS_SUCCESS;
849 }
850
851 /*
852  *----------------------------------------------------------------------------
853  * OvsCreateQueueNlPacket --
854  *
855  *  Create a packet which will be forwarded to user space.
856  *
857  * InputParameter:
858  *   userData: when cmd is user action, this field contain
859  *      user action data.
860  *   userDataLen: as name indicated
861  *   cmd: either miss or user action
862  *   inPort: datapath port id from which the packet is received.
863  *   key: flow Key with a tunnel key if available
864  *   nbl:  the NET_BUFFER_LIST which contain the packet
865  *   nb: the packet
866  *   isRecv: This is used to decide how to interprete the csum info
867  *   hdrInfo: include hdr info initialized during flow extraction.
868  *
869  * Results:
870  *    NULL if fail to create the packet
871  *    The packet element otherwise
872  *----------------------------------------------------------------------------
873  */
874 POVS_PACKET_QUEUE_ELEM
875 OvsCreateQueueNlPacket(PVOID userData,
876                        UINT32 userDataLen,
877                        UINT32 cmd,
878                        UINT32 inPort,
879                        OvsFlowKey *key,
880                        PNET_BUFFER_LIST nbl,
881                        PNET_BUFFER nb,
882                        BOOLEAN isRecv,
883                        POVS_PACKET_HDR_INFO hdrInfo)
884 {
885 #define VLAN_TAG_SIZE 4
886     UINT32 allocLen, dataLen, extraLen;
887     POVS_PACKET_QUEUE_ELEM elem;
888     UINT8 *src, *dst;
889     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
890     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
891     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
892     UINT32 pid;
893     UINT32 nlMsgSize;
894     NL_BUFFER nlBuf;
895     PNL_MSG_HDR nlMsg;
896
897     /* XXX pass vport in the stack rather than portNo */
898     POVS_VPORT_ENTRY vport =
899         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
900
901     if (vport == NULL){
902         /* No vport is not fatal. */
903         return NULL;
904     }
905
906     if (!OvsGetPid(vport, nb, &pid)) {
907         /*
908          * There is no userspace queue created yet, so there is no point for
909          * creating a new packet to be queued.
910          */
911         return NULL;
912     }
913
914     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
915
916     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
917                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
918                   csumInfo.Receive.IpChecksumFailed)) {
919         OVS_LOG_INFO("Packet dropped due to checksum failure.");
920         ovsUserStats.dropDuetoChecksum++;
921         return NULL;
922     }
923
924     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
925     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
926
927     dataLen = NET_BUFFER_DATA_LENGTH(nb);
928
929     if (NlAttrSize(dataLen) > MAXUINT16) {
930         return NULL;
931     }
932
933     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
934                                     dataLen + extraLen);
935
936     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
937     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
938     if (elem == NULL) {
939         ovsUserStats.dropDuetoResource++;
940         return NULL;
941     }
942     elem->hdrInfo.value = hdrInfo->value;
943     elem->packet.totalLen = nlMsgSize;
944     /* XXX remove queueid */
945     elem->packet.queue = 0;
946     /* XXX  no need as the length is already in the NL attrib */
947     elem->packet.userDataLen = userDataLen;
948     elem->packet.inPort = inPort;
949     elem->packet.cmd = cmd;
950     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
951         ovsUserStats.miss++;
952     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
953         ovsUserStats.action++;
954     } else {
955         ASSERT(FALSE);
956         goto fail;
957     }
958     /* XXX Should we have both packetLen and TotalLen*/
959     elem->packet.packetLen = dataLen + extraLen;
960
961     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
962
963     /*
964      * Initialize the OVS header
965      * Since we are pre allocating memory for the NL buffer
966      * the attribute settings should not fail
967      */
968     if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
969                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
970                       gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
971         goto fail;
972     }
973
974     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
975                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
976         goto fail;
977     }
978
979     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
980     if (userData){
981         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
982                                 userData, (UINT16)userDataLen)) {
983             goto fail;
984         }
985     }
986
987     /*
988      * Make space for the payload to be copied and set the attribute
989      * XXX Uninit set initilizes the buffer with xero, we don't actually need
990      * that the payload to be initailized
991      */
992     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
993                                             (UINT16)(dataLen + extraLen));
994     if (!dst) {
995         goto fail;
996     }
997
998     /* Store the payload for csum calculation when packet is read */
999     elem->packet.payload = dst;
1000     dst += extraLen;
1001
1002     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1003     if (src == NULL) {
1004         ovsUserStats.dropDuetoResource++;
1005         goto fail;
1006     }    else if (src != dst) {
1007         /* Copy the data from the NDIS buffer to dst. */
1008         RtlCopyMemory(dst, src, dataLen);
1009     }
1010
1011     /* Set csum if was offloaded */
1012     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1013
1014     /*
1015      * Finally insert VLAN tag
1016      */
1017     if (extraLen) {
1018         dst = elem->packet.payload;
1019         src = dst + extraLen;
1020         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1021         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1022         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1023         dst += 12;
1024         ((UINT16 *)dst)[0] = htons(0x8100);
1025         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1026             (vlanInfo.TagHeader.UserPriority << 13));
1027         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1028         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1029         ovsUserStats.vlanInsert++;
1030     }
1031
1032     nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1033     nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1034     /* 'totalLen' should be size of valid data. */
1035     elem->packet.totalLen = nlMsg->nlmsgLen;
1036
1037     return elem;
1038 fail:
1039     OvsFreeMemory(elem);
1040     return NULL;
1041 }