datapath-windows: Add packet miss read Netlink command.
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Datapath.h"
26 #include "Switch.h"
27 #include "Vport.h"
28 #include "Event.h"
29 #include "User.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35
36 #ifdef OVS_DBG_MOD
37 #undef OVS_DBG_MOD
38 #endif
39 #define OVS_DBG_MOD OVS_DBG_USER
40 #include "Debug.h"
41
42 OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES];
43
44 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
45 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
46 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
47 OVS_USER_STATS ovsUserStats;
48
49 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
50                                    OvsPacketExecute  *execute);
51 extern NL_POLICY nlFlowKeyPolicy[];
52
53 NTSTATUS
54 OvsUserInit()
55 {
56     UINT32 i;
57     POVS_USER_PACKET_QUEUE queue;
58     for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
59         queue = &ovsPacketQueues[i];
60         RtlZeroMemory(queue, sizeof (*queue));
61         InitializeListHead(&queue->packetList);
62         NdisAllocateSpinLock(&queue->queueLock);
63     }
64     return STATUS_SUCCESS;
65 }
66
67 VOID
68 OvsUserCleanup()
69 {
70     UINT32 i;
71     POVS_USER_PACKET_QUEUE queue;
72     for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
73         queue = &ovsPacketQueues[i];
74         ASSERT(IsListEmpty(&queue->packetList));
75         ASSERT(queue->instance == NULL);
76         ASSERT(queue->pendingIrp == NULL);
77         NdisFreeSpinLock(&queue->queueLock);
78     }
79 }
80
81 static VOID
82 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
83                     POVS_OPEN_INSTANCE instance)
84 {
85     PLIST_ENTRY link, next;
86     LIST_ENTRY tmp;
87     POVS_PACKET_QUEUE_ELEM elem;
88
89     InitializeListHead(&tmp);
90     NdisAcquireSpinLock(&queue->queueLock);
91     if (queue->instance != instance) {
92         NdisReleaseSpinLock(&queue->queueLock);
93         return;
94     }
95
96     if (queue->numPackets) {
97         OvsAppendList(&tmp, &queue->packetList);
98         queue->numPackets = 0;
99     }
100     NdisReleaseSpinLock(&queue->queueLock);
101     LIST_FORALL_SAFE(&tmp, link, next) {
102         RemoveEntryList(link);
103         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
104         OvsFreeMemory(elem);
105     }
106 }
107
108
109 VOID
110 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
111 {
112     POVS_USER_PACKET_QUEUE queue;
113     POVS_PACKET_QUEUE_ELEM elem;
114     PLIST_ENTRY link, next;
115     LIST_ENTRY tmp;
116     PIRP irp = NULL;
117
118     InitializeListHead(&tmp);
119     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
120     if (queue) {
121         PDRIVER_CANCEL cancelRoutine;
122         NdisAcquireSpinLock(&queue->queueLock);
123         if (queue->instance != instance) {
124             NdisReleaseSpinLock(&queue->queueLock);
125             return;
126         }
127
128         if (queue->numPackets) {
129             OvsAppendList(&tmp, &queue->packetList);
130             queue->numPackets = 0;
131         }
132         queue->instance = NULL;
133         queue->queueId = OVS_MAX_NUM_PACKET_QUEUES;
134         instance->packetQueue = NULL;
135         irp = queue->pendingIrp;
136         queue->pendingIrp = NULL;
137         if (irp) {
138             cancelRoutine = IoSetCancelRoutine(irp, NULL);
139             if (cancelRoutine == NULL) {
140                 irp = NULL;
141             }
142         }
143         NdisReleaseSpinLock(&queue->queueLock);
144     }
145     LIST_FORALL_SAFE(&tmp, link, next) {
146         RemoveEntryList(link);
147         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
148         OvsFreeMemory(elem);
149     }
150     if (irp) {
151         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
152     }
153 }
154
155 NTSTATUS
156 OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
157                     PVOID inputBuffer,
158                     UINT32 inputLength)
159 {
160     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
161     UINT32 queueId;
162     POVS_USER_PACKET_QUEUE queue;
163     if (inputLength < sizeof (UINT32)) {
164         return STATUS_INVALID_PARAMETER;
165     }
166     queueId = *(UINT32 *)inputBuffer;
167     if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
168         /*
169          * unsubscribe
170          */
171         OvsCleanupPacketQueue(instance);
172     } else if (instance->packetQueue == NULL &&
173                queueId < OVS_MAX_NUM_PACKET_QUEUES) {
174         queue = &ovsPacketQueues[queueId];
175         NdisAcquireSpinLock(&queue->queueLock);
176         if (ovsPacketQueues[queueId].instance) {
177              if (ovsPacketQueues[queueId].instance != instance) {
178                  NdisReleaseSpinLock(&queue->queueLock);
179                  return STATUS_INSUFFICIENT_RESOURCES;
180              } else {
181                  NdisReleaseSpinLock(&queue->queueLock);
182                  return STATUS_SUCCESS;
183              }
184         }
185         queue->queueId = queueId;
186         queue->instance = instance;
187         instance->packetQueue = queue;
188         ASSERT(IsListEmpty(&queue->packetList));
189         NdisReleaseSpinLock(&queue->queueLock);
190     } else {
191         return STATUS_INVALID_PARAMETER;
192     }
193     return STATUS_SUCCESS;
194 }
195
196
197 NTSTATUS
198 OvsReadDpIoctl(PFILE_OBJECT fileObject,
199                PVOID outputBuffer,
200                UINT32 outputLength,
201                UINT32 *replyLen)
202 {
203     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
204     POVS_PACKET_QUEUE_ELEM elem;
205     UINT32 len;
206
207 #define TCP_CSUM_OFFSET  16
208 #define UDP_CSUM_OFFSET  6
209     ASSERT(instance);
210
211     if (instance->packetQueue == NULL) {
212         return STATUS_INVALID_PARAMETER;
213     }
214     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
215         return STATUS_BUFFER_TOO_SMALL;
216     }
217
218     elem = OvsGetNextPacket(instance);
219     if (elem) {
220         /*
221          * XXX revisit this later
222          */
223         len = elem->packet.totalLen > outputLength ? outputLength :
224                  elem->packet.totalLen;
225
226         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
227             len == elem->packet.totalLen) {
228             UINT16 sum, *ptr;
229             UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
230                                   elem->hdrInfo.l4Offset);
231             RtlCopyMemory(outputBuffer, &elem->packet.data, size);
232             ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
233             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
234                                            (UINT8 *)&elem->packet.data + size,
235                                            elem->hdrInfo.l4PayLoad, 0);
236             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
237                             (elem->hdrInfo.tcpCsumNeeded ?
238                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
239             *ptr = sum;
240             ovsUserStats.l4Csum++;
241         } else {
242             RtlCopyMemory(outputBuffer, &elem->packet, len);
243         }
244
245         *replyLen = len;
246         OvsFreeMemory(elem);
247     }
248     return STATUS_SUCCESS;
249 }
250
251 /* Helper function to allocate a Forwarding Context for an NBL */
252 NTSTATUS
253 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
254                                    PNET_BUFFER_LIST nbl)
255 {
256     return switchContext->NdisSwitchHandlers.
257         AllocateNetBufferListForwardingContext(
258             switchContext->NdisSwitchContext, nbl);
259 }
260
261 /*
262  * --------------------------------------------------------------------------
263  * This function allocates all the stuff necessary for creating an NBL from the
264  * input buffer of specified length, namely, a nonpaged data buffer of size
265  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
266  * context yet. It also copies data from the specified buffer to the NBL.
267  * --------------------------------------------------------------------------
268  */
269 PNET_BUFFER_LIST
270 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
271                             PVOID userBuffer,
272                             ULONG length)
273 {
274     UINT8 *data = NULL;
275     PNET_BUFFER_LIST nbl = NULL;
276     PNET_BUFFER nb;
277     PMDL mdl;
278
279     if (length > OVS_DEFAULT_DATA_SIZE) {
280         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
281                                          OVS_DEFAULT_HEADROOM_SIZE);
282
283     } else {
284         nbl = OvsAllocateFixSizeNBL(switchContext, length,
285                                     OVS_DEFAULT_HEADROOM_SIZE);
286     }
287     if (nbl == NULL) {
288         return NULL;
289     }
290
291     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
292     mdl = NET_BUFFER_CURRENT_MDL(nb);
293     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
294                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
295     if (!data) {
296         OvsCompleteNBL(switchContext, nbl, TRUE);
297         return NULL;
298     }
299
300     NdisMoveMemory(data, userBuffer, length);
301
302     return nbl;
303 }
304
305 /*
306  *----------------------------------------------------------------------------
307  *  OvsNlExecuteCmdHandler --
308  *    Handler for OVS_PACKET_CMD_EXECUTE command.
309  *----------------------------------------------------------------------------
310  */
311 NTSTATUS
312 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
313                        UINT32 *replyLen)
314 {
315     NTSTATUS status = STATUS_SUCCESS;
316     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
317     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
318     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
319     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
320     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
321
322     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
323     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
324
325     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
326     UINT32 keyAttrOffset = 0;
327     OvsPacketExecute execute;
328     NL_ERROR nlError = NL_ERROR_SUCCESS;
329     NL_BUFFER nlBuf;
330
331     static const NL_POLICY nlPktExecPolicy[] = {
332         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
333         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
334         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
335         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
336         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
337                                             .optional = TRUE}
338     };
339
340     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
341
342     /* Get all the top level Flow attributes */
343     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
344                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
345                      != TRUE) {
346         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
347                        nlMsgHdr);
348         status = STATUS_UNSUCCESSFUL;
349         goto done;
350     }
351
352     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
353                     (PCHAR)nlMsgHdr);
354
355     /* Get flow keys attributes */
356     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
357                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
358                            nlFlowKeyPolicy, keyAttrs,
359                            ARRAY_SIZE(keyAttrs))) != TRUE) {
360         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
361         status = STATUS_UNSUCCESSFUL;
362         goto done;
363     }
364
365     execute.dpNo = ovsHdr->dp_ifindex;
366
367     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
368
369     status = OvsExecuteDpIoctl(&execute);
370
371     /* Default reply that we want to send */
372     if (status == STATUS_SUCCESS) {
373         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
374                   usrParamsCtx->outputLength);
375
376         /* Prepare nl Msg headers */
377         status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
378                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
379                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
380                  ovsHdr->dp_ifindex);
381
382         if (status == STATUS_SUCCESS) {
383             *replyLen = msgOut->nlMsg.nlmsgLen;
384         }
385     } else {
386         /* Map NTSTATUS to NL_ERROR */
387         nlError = NlMapStatusToNlErr(status);
388
389         /* As of now there are no transactional errors in the implementation.
390          * Once we have them then we need to map status to correct
391          * nlError value, so that below mentioned code gets hit. */
392         if ((nlError != NL_ERROR_SUCCESS) &&
393             (usrParamsCtx->outputBuffer)) {
394
395             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
396                                            usrParamsCtx->outputBuffer;
397             BuildErrorMsg(msgIn, msgError, nlError);
398             *replyLen = msgError->nlMsg.nlmsgLen;
399             status = STATUS_SUCCESS;
400             goto done;
401         }
402     }
403
404 done:
405     return status;
406 }
407
408 /*
409  *----------------------------------------------------------------------------
410  *  _MapNlAttrToOvsPktExec --
411  *    Maps input Netlink attributes to OvsPacketExecute.
412  *----------------------------------------------------------------------------
413  */
414 static VOID
415 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
416                        OvsPacketExecute *execute)
417 {
418     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
419     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
420
421     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
422     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
423
424     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
425 }
426
427 NTSTATUS
428 OvsExecuteDpIoctl(OvsPacketExecute *execute)
429 {
430     NTSTATUS                    status = STATUS_SUCCESS;
431     NTSTATUS                    ndisStatus;
432     LOCK_STATE_EX               lockState;
433     PNET_BUFFER_LIST pNbl;
434     PNL_ATTR actions;
435     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
436     OvsFlowKey key;
437     OVS_PACKET_HDR_INFO layers;
438     POVS_VPORT_ENTRY vport;
439
440     NdisAcquireSpinLock(gOvsCtrlLock);
441     if (gOvsSwitchContext == NULL) {
442         status = STATUS_INVALID_PARAMETER;
443         goto unlock;
444     }
445
446     if (execute->packetLen == 0) {
447         status = STATUS_INVALID_PARAMETER;
448         goto unlock;
449     }
450
451     actions = execute->actions;
452
453     ASSERT(actions);
454
455     /*
456      * Allocate the NBL, copy the data from the userspace buffer. Allocate
457      * also, the forwarding context for the packet.
458      */
459     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
460                                        execute->packetLen);
461     if (pNbl == NULL) {
462         status = STATUS_NO_MEMORY;
463         goto unlock;
464     }
465
466     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
467     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
468     if (vport) {
469         fwdDetail->SourcePortId = vport->portId;
470         fwdDetail->SourceNicIndex = vport->nicIndex;
471     } else {
472         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
473         fwdDetail->SourceNicIndex = 0;
474     }
475     // XXX: Figure out if any of the other members of fwdDetail need to be set.
476
477     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
478                               NULL);
479     if (ndisStatus == NDIS_STATUS_SUCCESS) {
480         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
481         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
482                               NDIS_RWL_AT_DISPATCH_LEVEL);
483         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
484                                        vport ? vport->portNo :
485                                                OVS_DEFAULT_PORT_NO,
486                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
487                                        &key, NULL, &layers, actions,
488                                        execute->actionsLen);
489         pNbl = NULL;
490         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
491     }
492     if (ndisStatus != NDIS_STATUS_SUCCESS) {
493         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
494             status = STATUS_NOT_SUPPORTED;
495         } else {
496             status = STATUS_UNSUCCESSFUL;
497         }
498     }
499
500     if (pNbl) {
501         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
502     }
503 unlock:
504     NdisReleaseSpinLock(gOvsCtrlLock);
505     return status;
506 }
507
508
509 NTSTATUS
510 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
511 {
512     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
513     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
514
515     if (queue == NULL) {
516         return STATUS_INVALID_PARAMETER;
517     }
518     OvsPurgePacketQueue(queue, instance);
519     return STATUS_SUCCESS;
520 }
521
522 VOID
523 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
524                      PIRP irp)
525 {
526     PIO_STACK_LOCATION irpSp;
527     PFILE_OBJECT fileObject;
528     POVS_OPEN_INSTANCE instance;
529     POVS_USER_PACKET_QUEUE queue = NULL;
530
531     UNREFERENCED_PARAMETER(deviceObject);
532
533     IoReleaseCancelSpinLock(irp->CancelIrql);
534     irpSp = IoGetCurrentIrpStackLocation(irp);
535     fileObject = irpSp->FileObject;
536
537     if (fileObject == NULL) {
538         goto done;
539     }
540     NdisAcquireSpinLock(gOvsCtrlLock);
541     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
542     if (instance) {
543         queue = instance->packetQueue;
544     }
545     if (instance == NULL || queue == NULL) {
546         NdisReleaseSpinLock(gOvsCtrlLock);
547         goto done;
548     }
549     NdisReleaseSpinLock(gOvsCtrlLock);
550     NdisAcquireSpinLock(&queue->queueLock);
551     if (queue->pendingIrp == irp) {
552         queue->pendingIrp = NULL;
553     }
554     NdisReleaseSpinLock(&queue->queueLock);
555 done:
556     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
557 }
558
559
560 NTSTATUS
561 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
562 {
563     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
564     POVS_USER_PACKET_QUEUE queue =
565                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
566     NTSTATUS status = STATUS_SUCCESS;
567     BOOLEAN cancelled = FALSE;
568
569     if (queue == NULL) {
570         return STATUS_INVALID_PARAMETER;
571     }
572     NdisAcquireSpinLock(&queue->queueLock);
573     if (queue->instance != instance) {
574         NdisReleaseSpinLock(&queue->queueLock);
575         return STATUS_INVALID_PARAMETER;
576     }
577     if (queue->pendingIrp) {
578         NdisReleaseSpinLock(&queue->queueLock);
579         return STATUS_DEVICE_BUSY;
580     }
581     if (queue->numPackets == 0) {
582         PDRIVER_CANCEL cancelRoutine;
583         IoMarkIrpPending(irp);
584         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
585         if (irp->Cancel) {
586             cancelRoutine = IoSetCancelRoutine(irp, NULL);
587             if (cancelRoutine) {
588                 cancelled = TRUE;
589             }
590         } else {
591             queue->pendingIrp = irp;
592         }
593         status = STATUS_PENDING;
594     }
595     NdisReleaseSpinLock(&queue->queueLock);
596     if (cancelled) {
597         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
598         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
599     }
600     return status;
601 }
602
603
604 POVS_PACKET_QUEUE_ELEM
605 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
606 {
607     POVS_USER_PACKET_QUEUE queue;
608     PLIST_ENTRY link;
609     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
610     if (queue == NULL) {
611         return NULL;
612     }
613     NdisAcquireSpinLock(&queue->queueLock);
614     if (queue->instance != instance || queue->numPackets == 0) {
615         NdisReleaseSpinLock(&queue->queueLock);
616         return NULL;
617     }
618     link = RemoveHeadList(&queue->packetList);
619     queue->numPackets--;
620     NdisReleaseSpinLock(&queue->queueLock);
621     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
622 }
623
624
625 POVS_USER_PACKET_QUEUE
626 OvsGetQueue(UINT32 queueId)
627 {
628     POVS_USER_PACKET_QUEUE queue;
629     if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
630         return NULL;
631     }
632     queue = &ovsPacketQueues[queueId];
633     return queue->instance != NULL ? queue : NULL;
634 }
635
636 VOID
637 OvsQueuePackets(UINT32 queueId,
638                 PLIST_ENTRY packetList,
639                 UINT32 numElems)
640 {
641     POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
642     POVS_PACKET_QUEUE_ELEM elem;
643     PIRP irp = NULL;
644     PLIST_ENTRY  link;
645     UINT32 num = 0;
646
647     OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
648                   queueId, numElems);
649     if (queue == NULL) {
650         goto cleanup;
651     }
652
653     NdisAcquireSpinLock(&queue->queueLock);
654     if (queue->instance == NULL) {
655         NdisReleaseSpinLock(&queue->queueLock);
656         goto cleanup;
657     } else {
658         OvsAppendList(&queue->packetList, packetList);
659         queue->numPackets += numElems;
660     }
661     if (queue->pendingIrp) {
662         PDRIVER_CANCEL cancelRoutine;
663         irp = queue->pendingIrp;
664         queue->pendingIrp = NULL;
665         cancelRoutine = IoSetCancelRoutine(irp, NULL);
666         if (cancelRoutine == NULL) {
667             irp = NULL;
668         }
669     }
670     NdisReleaseSpinLock(&queue->queueLock);
671     if (irp) {
672         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
673     }
674
675 cleanup:
676     while (!IsListEmpty(packetList)) {
677         link = RemoveHeadList(packetList);
678         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
679         OvsFreeMemory(elem);
680         num++;
681     }
682     OVS_LOG_LOUD("Exit: drop %u packets", num);
683 }
684
685
686 /*
687  *----------------------------------------------------------------------------
688  * OvsCreateAndAddPackets --
689  *
690  *  Create a packet and forwarded to user space.
691  *
692  *  This function would fragment packet if needed, and queue
693  *  each segment to user space.
694  *----------------------------------------------------------------------------
695  */
696 NTSTATUS
697 OvsCreateAndAddPackets(PVOID userData,
698                        UINT32 userDataLen,
699                        UINT32 cmd,
700                        UINT32 inPort,
701                        OvsFlowKey *key,
702                        PNET_BUFFER_LIST nbl,
703                        BOOLEAN isRecv,
704                        POVS_PACKET_HDR_INFO hdrInfo,
705                        POVS_SWITCH_CONTEXT switchContext,
706                        LIST_ENTRY *list,
707                        UINT32 *num)
708 {
709     POVS_PACKET_QUEUE_ELEM elem;
710     PNET_BUFFER_LIST newNbl = NULL;
711     PNET_BUFFER nb;
712
713     if (hdrInfo->isTcp) {
714         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
715         UINT32 packetLength;
716
717         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
718         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
719         packetLength = NET_BUFFER_DATA_LENGTH(nb);
720
721         OVS_LOG_TRACE("MSS %u packet len %u",
722                 tsoInfo.LsoV1Transmit.MSS, packetLength);
723         if (tsoInfo.LsoV1Transmit.MSS) {
724             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
725             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
726                     tsoInfo.LsoV1Transmit.MSS , 0);
727             if (newNbl == NULL) {
728                 return NDIS_STATUS_FAILURE;
729             }
730             nbl = newNbl;
731         }
732     }
733
734     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
735     while (nb) {
736         elem = OvsCreateQueueNlPacket(userData, userDataLen,
737                                     cmd, inPort, key, nbl, nb,
738                                     isRecv, hdrInfo);
739         if (elem) {
740             InsertTailList(list, &elem->link);
741             (*num)++;
742         }
743         nb = NET_BUFFER_NEXT_NB(nb);
744     }
745     if (newNbl) {
746         OvsCompleteNBL(switchContext, newNbl, TRUE);
747     }
748     return NDIS_STATUS_SUCCESS;
749 }
750
751 static __inline UINT32
752 OvsGetUpcallMsgSize(PVOID userData,
753                     UINT32 userDataLen,
754                     OvsIPv4TunnelKey *tunnelKey,
755                     UINT32 payload)
756 {
757     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
758                   NlAttrSize(payload) +
759                   NlAttrSize(OvsFlowKeyAttrSize());
760
761     /* OVS_PACKET_ATTR_USERDATA */
762     if (userData) {
763         size += NlAttrTotalSize(userDataLen);
764     }
765     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
766     /* Is it included in the the flwo key attr XXX */
767     if (tunnelKey) {
768         size += NlAttrTotalSize(OvsTunKeyAttrSize());
769     }
770     return size;
771 }
772
773 /*
774  *----------------------------------------------------------------------------
775  * This function completes the IP Header csum. record the L4 payload offset and
776  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
777  * caluculated simopultaneossly with the copy of the payload to the destination
778  * buffer when the packet is read.
779  *----------------------------------------------------------------------------
780  */
781 static VOID
782 OvsCompletePacketHeader(UINT8 *packet,
783                         BOOLEAN isRecv,
784                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
785                         POVS_PACKET_HDR_INFO hdrInfoIn,
786                         POVS_PACKET_HDR_INFO hdrInfoOut)
787 {
788     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
789         (!isRecv && csumInfo.Transmit.IsIPv4 &&
790         csumInfo.Transmit.IpHeaderChecksum)) {
791         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
792         ASSERT(hdrInfoIn->isIPv4);
793         ASSERT(ipHdr->Version == 4);
794         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
795             ipHdr->HeaderLength << 2,
796             (UINT16)~ipHdr->HeaderChecksum);
797         ovsUserStats.ipCsum++;
798     }
799     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
800     /*
801      * calculate TCP/UDP pseudo checksum
802      */
803     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
804         /*
805          * Only this case, we need to reclaculate pseudo checksum
806          * all other cases, it is assumed the pseudo checksum is
807          * filled already.
808          *
809          */
810         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
811         if (hdrInfoIn->isIPv4) {
812             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
813             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
814                                     (ipHdr->HeaderLength << 2));
815             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
816                                          (UINT32 *)&ipHdr->DestinationAddress,
817                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
818         } else {
819             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
820             hdrInfoOut->l4PayLoad =
821                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
822                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
823                 hdrInfoIn->l4Offset);
824             ASSERT(hdrInfoIn->isIPv6);
825             tcpHdr->th_sum =
826                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
827                 (UINT32 *)&ipv6Hdr->DestinationAddress,
828                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
829         }
830         hdrInfoOut->tcpCsumNeeded = 1;
831         ovsUserStats.recalTcpCsum++;
832     } else if (!isRecv) {
833         if (csumInfo.Transmit.TcpChecksum) {
834             hdrInfoOut->tcpCsumNeeded = 1;
835         } else if (csumInfo.Transmit.UdpChecksum) {
836             hdrInfoOut->udpCsumNeeded = 1;
837         }
838         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
839 #ifdef DBG
840             UINT16 sum, *ptr;
841             UINT8 proto =
842                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
843 #endif
844             if (hdrInfoIn->isIPv4) {
845                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
846                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
847                     (ipHdr->HeaderLength << 2));
848 #ifdef DBG
849                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
850                     (UINT32 *)&ipHdr->DestinationAddress,
851                     proto, hdrInfoOut->l4PayLoad);
852 #endif
853             } else {
854                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
855                     hdrInfoIn->l3Offset);
856                 hdrInfoOut->l4PayLoad =
857                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
858                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
859                     hdrInfoIn->l4Offset);
860                 ASSERT(hdrInfoIn->isIPv6);
861 #ifdef DBG
862                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
863                     (UINT32 *)&ipv6Hdr->DestinationAddress,
864                     proto, hdrInfoOut->l4PayLoad);
865 #endif
866             }
867 #ifdef DBG
868             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
869                 (hdrInfoOut->tcpCsumNeeded ?
870             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
871             ASSERT(*ptr == sum);
872 #endif
873         }
874     }
875 }
876
877 static NTSTATUS
878 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
879 {
880     UNREFERENCED_PARAMETER(nb);
881
882     /* XXX select a pid from an array of pids using a flow based hash */
883     *pid = vport->upcallPid;
884     return STATUS_SUCCESS;
885 }
886
887 /*
888  *----------------------------------------------------------------------------
889  * OvsCreateQueueNlPacket --
890  *
891  *  Create a packet which will be forwarded to user space.
892  *
893  * InputParameter:
894  *   userData: when cmd is user action, this field contain
895  *      user action data.
896  *   userDataLen: as name indicated
897  *   cmd: either miss or user action
898  *   inPort: datapath port id from which the packet is received.
899  *   key: flow Key with a tunnel key if available
900  *   nbl:  the NET_BUFFER_LIST which contain the packet
901  *   nb: the packet
902  *   isRecv: This is used to decide how to interprete the csum info
903  *   hdrInfo: include hdr info initialized during flow extraction.
904  *
905  * Results:
906  *    NULL if fail to create the packet
907  *    The packet element otherwise
908  *----------------------------------------------------------------------------
909  */
910 POVS_PACKET_QUEUE_ELEM
911 OvsCreateQueueNlPacket(PVOID userData,
912                        UINT32 userDataLen,
913                        UINT32 cmd,
914                        UINT32 inPort,
915                        OvsFlowKey *key,
916                        PNET_BUFFER_LIST nbl,
917                        PNET_BUFFER nb,
918                        BOOLEAN isRecv,
919                        POVS_PACKET_HDR_INFO hdrInfo)
920 {
921 #define VLAN_TAG_SIZE 4
922     UINT32 allocLen, dataLen, extraLen;
923     POVS_PACKET_QUEUE_ELEM elem;
924     UINT8 *src, *dst;
925     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
926     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
927     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
928     UINT32 pid;
929     UINT32 nlMsgSize;
930     NL_BUFFER nlBuf;
931
932     /* XXX pass vport in the stack rather than portNo */
933     POVS_VPORT_ENTRY vport =
934         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
935
936     if (vport == NULL){
937         /* Should never happen as dispatch lock is held */
938         ASSERT(vport);
939         return NULL;
940     }
941
942     if (!OvsGetPid(vport, nb, &pid)) {
943         /*
944          * There is no userspace queue created yet, so there is no point for
945          * creating a new packet to be queued.
946          */
947         return NULL;
948     }
949
950     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
951
952     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
953                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
954                   csumInfo.Receive.IpChecksumFailed)) {
955         OVS_LOG_INFO("Packet dropped due to checksum failure.");
956         ovsUserStats.dropDuetoChecksum++;
957         return NULL;
958     }
959
960     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
961     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
962
963     dataLen = NET_BUFFER_DATA_LENGTH(nb);
964
965     if (NlAttrSize(dataLen) > MAXUINT16) {
966         return NULL;
967     }
968
969     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
970                                     dataLen + extraLen);
971
972     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
973     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
974     if (elem == NULL) {
975         ovsUserStats.dropDuetoResource++;
976         return NULL;
977     }
978     elem->hdrInfo.value = hdrInfo->value;
979     elem->packet.totalLen = nlMsgSize;
980     /* XXX remove queueid */
981     elem->packet.queue = 0;
982     /* XXX  no need as the length is already in the NL attrib */
983     elem->packet.userDataLen = userDataLen;
984     elem->packet.inPort = inPort;
985     elem->packet.cmd = cmd;
986     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
987         ovsUserStats.miss++;
988     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
989         ovsUserStats.action++;
990     } else {
991         ASSERT(FALSE);
992         goto fail;
993     }
994     /* XXX Should we have both packetLen and TotalLen*/
995     elem->packet.packetLen = dataLen + extraLen;
996
997     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
998
999     /*
1000      * Initialize the OVS header
1001      * Since we are pre allocating memory for the NL buffer
1002      * the attribute settings should not fail
1003      */
1004     if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1005                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1006                       gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1007         goto fail;
1008     }
1009
1010     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1011                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1012         goto fail;
1013     }
1014
1015     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1016     if (userData){
1017         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1018                                 userData, (UINT16)userDataLen)) {
1019             goto fail;
1020         }
1021     }
1022
1023     /*
1024      * Make space for the payload to be copied and set the attribute
1025      * XXX Uninit set initilizes the buffer with xero, we don't actually need
1026      * that the payload to be initailized
1027      */
1028     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1029                                             (UINT16)(dataLen + extraLen));
1030     if (!dst) {
1031         goto fail;
1032     }
1033
1034     /* Store the payload for csum calculation when packet is read */
1035     elem->packet.payload = dst;
1036     dst += extraLen;
1037
1038     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1039     if (src == NULL) {
1040         ovsUserStats.dropDuetoResource++;
1041         goto fail;
1042     }    else if (src != dst) {
1043         /* Copy the data from the NDIS buffer to dst. */
1044         RtlCopyMemory(dst, src, dataLen);
1045     }
1046
1047     /* Set csum if was offloaded */
1048     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1049
1050     /*
1051      * Finally insert VLAN tag
1052      */
1053     if (extraLen) {
1054         dst = elem->packet.payload;
1055         src = dst + extraLen;
1056         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1057         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1058         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1059         dst += 12;
1060         ((UINT16 *)dst)[0] = htons(0x8100);
1061         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1062             (vlanInfo.TagHeader.UserPriority << 13));
1063         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1064         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1065         ovsUserStats.vlanInsert++;
1066     }
1067     return elem;
1068 fail:
1069     OvsFreeMemory(elem);
1070     return NULL;
1071 }