f24c4e3f1678cee0c77860f0b6e097ee3ef9ce78
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Datapath.h"
26 #include "Switch.h"
27 #include "Vport.h"
28 #include "Event.h"
29 #include "User.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35
36 #ifdef OVS_DBG_MOD
37 #undef OVS_DBG_MOD
38 #endif
39 #define OVS_DBG_MOD OVS_DBG_USER
40 #include "Debug.h"
41
42 OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES];
43
44 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
45 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
46 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
47 OVS_USER_STATS ovsUserStats;
48
49 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
50                                    OvsPacketExecute  *execute);
51 extern NL_POLICY nlFlowKeyPolicy[];
52
53 NTSTATUS
54 OvsUserInit()
55 {
56     UINT32 i;
57     POVS_USER_PACKET_QUEUE queue;
58     for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
59         queue = &ovsPacketQueues[i];
60         RtlZeroMemory(queue, sizeof (*queue));
61         InitializeListHead(&queue->packetList);
62         NdisAllocateSpinLock(&queue->queueLock);
63     }
64     return STATUS_SUCCESS;
65 }
66
67 VOID
68 OvsUserCleanup()
69 {
70     UINT32 i;
71     POVS_USER_PACKET_QUEUE queue;
72     for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
73         queue = &ovsPacketQueues[i];
74         ASSERT(IsListEmpty(&queue->packetList));
75         ASSERT(queue->instance == NULL);
76         ASSERT(queue->pendingIrp == NULL);
77         NdisFreeSpinLock(&queue->queueLock);
78     }
79 }
80
81 static VOID
82 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
83                     POVS_OPEN_INSTANCE instance)
84 {
85     PLIST_ENTRY link, next;
86     LIST_ENTRY tmp;
87     POVS_PACKET_QUEUE_ELEM elem;
88
89     InitializeListHead(&tmp);
90     NdisAcquireSpinLock(&queue->queueLock);
91     if (queue->instance != instance) {
92         NdisReleaseSpinLock(&queue->queueLock);
93         return;
94     }
95
96     if (queue->numPackets) {
97         OvsAppendList(&tmp, &queue->packetList);
98         queue->numPackets = 0;
99     }
100     NdisReleaseSpinLock(&queue->queueLock);
101     LIST_FORALL_SAFE(&tmp, link, next) {
102         RemoveEntryList(link);
103         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
104         OvsFreeMemory(elem);
105     }
106 }
107
108
109 VOID
110 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
111 {
112     POVS_USER_PACKET_QUEUE queue;
113     POVS_PACKET_QUEUE_ELEM elem;
114     PLIST_ENTRY link, next;
115     LIST_ENTRY tmp;
116     PIRP irp = NULL;
117
118     InitializeListHead(&tmp);
119     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
120     if (queue) {
121         PDRIVER_CANCEL cancelRoutine;
122         NdisAcquireSpinLock(&queue->queueLock);
123         if (queue->instance != instance) {
124             NdisReleaseSpinLock(&queue->queueLock);
125             return;
126         }
127
128         if (queue->numPackets) {
129             OvsAppendList(&tmp, &queue->packetList);
130             queue->numPackets = 0;
131         }
132         queue->instance = NULL;
133         queue->queueId = OVS_MAX_NUM_PACKET_QUEUES;
134         instance->packetQueue = NULL;
135         irp = queue->pendingIrp;
136         queue->pendingIrp = NULL;
137         if (irp) {
138             cancelRoutine = IoSetCancelRoutine(irp, NULL);
139             if (cancelRoutine == NULL) {
140                 irp = NULL;
141             }
142         }
143         NdisReleaseSpinLock(&queue->queueLock);
144     }
145     LIST_FORALL_SAFE(&tmp, link, next) {
146         RemoveEntryList(link);
147         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
148         OvsFreeMemory(elem);
149     }
150     if (irp) {
151         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
152     }
153 }
154
155 NTSTATUS
156 OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
157                     PVOID inputBuffer,
158                     UINT32 inputLength)
159 {
160     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
161     UINT32 queueId;
162     POVS_USER_PACKET_QUEUE queue;
163     if (inputLength < sizeof (UINT32)) {
164         return STATUS_INVALID_PARAMETER;
165     }
166     queueId = *(UINT32 *)inputBuffer;
167     if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
168         /*
169          * unsubscribe
170          */
171         OvsCleanupPacketQueue(instance);
172     } else if (instance->packetQueue == NULL &&
173                queueId < OVS_MAX_NUM_PACKET_QUEUES) {
174         queue = &ovsPacketQueues[queueId];
175         NdisAcquireSpinLock(&queue->queueLock);
176         if (ovsPacketQueues[queueId].instance) {
177              if (ovsPacketQueues[queueId].instance != instance) {
178                  NdisReleaseSpinLock(&queue->queueLock);
179                  return STATUS_INSUFFICIENT_RESOURCES;
180              } else {
181                  NdisReleaseSpinLock(&queue->queueLock);
182                  return STATUS_SUCCESS;
183              }
184         }
185         queue->queueId = queueId;
186         queue->instance = instance;
187         instance->packetQueue = queue;
188         ASSERT(IsListEmpty(&queue->packetList));
189         NdisReleaseSpinLock(&queue->queueLock);
190     } else {
191         return STATUS_INVALID_PARAMETER;
192     }
193     return STATUS_SUCCESS;
194 }
195
196
197 NTSTATUS
198 OvsReadDpIoctl(PFILE_OBJECT fileObject,
199                PVOID outputBuffer,
200                UINT32 outputLength,
201                UINT32 *replyLen)
202 {
203     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
204     POVS_PACKET_QUEUE_ELEM elem;
205     UINT32 len;
206
207 #define TCP_CSUM_OFFSET  16
208 #define UDP_CSUM_OFFSET  6
209     ASSERT(instance);
210
211     if (instance->packetQueue == NULL) {
212         return STATUS_INVALID_PARAMETER;
213     }
214     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
215         return STATUS_BUFFER_TOO_SMALL;
216     }
217
218     elem = OvsGetNextPacket(instance);
219     if (elem) {
220         /*
221          * XXX revisit this later
222          */
223         len = elem->packet.totalLen > outputLength ? outputLength :
224                  elem->packet.totalLen;
225
226         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
227             len == elem->packet.totalLen) {
228             UINT16 sum, *ptr;
229             UINT16 size = (UINT16)(elem->packet.userDataLen +
230                                    elem->hdrInfo.l4Offset +
231                                    (UINT16)sizeof (OVS_PACKET_INFO));
232             RtlCopyMemory(outputBuffer, &elem->packet, size);
233             ASSERT(len - size >=  elem->hdrInfo.l4PayLoad);
234             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
235                                            (UINT8 *)&elem->packet + size,
236                                            elem->hdrInfo.l4PayLoad, 0);
237             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
238                             (elem->hdrInfo.tcpCsumNeeded ?
239                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
240             *ptr = sum;
241             ovsUserStats.l4Csum++;
242         } else {
243             RtlCopyMemory(outputBuffer, &elem->packet, len);
244         }
245
246         *replyLen = len;
247         OvsFreeMemory(elem);
248     }
249     return STATUS_SUCCESS;
250 }
251
252 /* Helper function to allocate a Forwarding Context for an NBL */
253 NTSTATUS
254 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
255                                    PNET_BUFFER_LIST nbl)
256 {
257     return switchContext->NdisSwitchHandlers.
258         AllocateNetBufferListForwardingContext(
259             switchContext->NdisSwitchContext, nbl);
260 }
261
262 /*
263  * --------------------------------------------------------------------------
264  * This function allocates all the stuff necessary for creating an NBL from the
265  * input buffer of specified length, namely, a nonpaged data buffer of size
266  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
267  * context yet. It also copies data from the specified buffer to the NBL.
268  * --------------------------------------------------------------------------
269  */
270 PNET_BUFFER_LIST
271 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
272                             PVOID userBuffer,
273                             ULONG length)
274 {
275     UINT8 *data = NULL;
276     PNET_BUFFER_LIST nbl = NULL;
277     PNET_BUFFER nb;
278     PMDL mdl;
279
280     if (length > OVS_DEFAULT_DATA_SIZE) {
281         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
282                                          OVS_DEFAULT_HEADROOM_SIZE);
283
284     } else {
285         nbl = OvsAllocateFixSizeNBL(switchContext, length,
286                                     OVS_DEFAULT_HEADROOM_SIZE);
287     }
288     if (nbl == NULL) {
289         return NULL;
290     }
291
292     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
293     mdl = NET_BUFFER_CURRENT_MDL(nb);
294     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
295                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
296     if (!data) {
297         OvsCompleteNBL(switchContext, nbl, TRUE);
298         return NULL;
299     }
300
301     NdisMoveMemory(data, userBuffer, length);
302
303     return nbl;
304 }
305
306 /*
307  *----------------------------------------------------------------------------
308  *  OvsNlExecuteCmdHandler --
309  *    Handler for OVS_PACKET_CMD_EXECUTE command.
310  *----------------------------------------------------------------------------
311  */
312 NTSTATUS
313 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
314                        UINT32 *replyLen)
315 {
316     NTSTATUS status = STATUS_SUCCESS;
317     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
318     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
319     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
320     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
321     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
322
323     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
324     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
325
326     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
327     UINT32 keyAttrOffset = 0;
328     OvsPacketExecute execute;
329     NL_ERROR nlError = NL_ERROR_SUCCESS;
330     NL_BUFFER nlBuf;
331
332     static const NL_POLICY nlPktExecPolicy[] = {
333         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
334         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
335         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
336         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
337         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
338                                             .optional = TRUE}
339     };
340
341     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
342
343     /* Get all the top level Flow attributes */
344     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
345                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
346                      != TRUE) {
347         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
348                        nlMsgHdr);
349         status = STATUS_UNSUCCESSFUL;
350         goto done;
351     }
352
353     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
354                     (PCHAR)nlMsgHdr);
355
356     /* Get flow keys attributes */
357     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
358                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
359                            nlFlowKeyPolicy, keyAttrs,
360                            ARRAY_SIZE(keyAttrs))) != TRUE) {
361         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
362         status = STATUS_UNSUCCESSFUL;
363         goto done;
364     }
365
366     execute.dpNo = ovsHdr->dp_ifindex;
367
368     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
369
370     status = OvsExecuteDpIoctl(&execute);
371
372     /* Default reply that we want to send */
373     if (status == STATUS_SUCCESS) {
374         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
375                   usrParamsCtx->outputLength);
376
377         /* Prepare nl Msg headers */
378         status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
379                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
380                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
381                  ovsHdr->dp_ifindex);
382
383         if (status == STATUS_SUCCESS) {
384             *replyLen = msgOut->nlMsg.nlmsgLen;
385         }
386     } else {
387         /* Map NTSTATUS to NL_ERROR */
388         nlError = NlMapStatusToNlErr(status);
389
390         /* As of now there are no transactional errors in the implementation.
391          * Once we have them then we need to map status to correct
392          * nlError value, so that below mentioned code gets hit. */
393         if ((nlError != NL_ERROR_SUCCESS) &&
394             (usrParamsCtx->outputBuffer)) {
395
396             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
397                                            usrParamsCtx->outputBuffer;
398             BuildErrorMsg(msgIn, msgError, nlError);
399             *replyLen = msgError->nlMsg.nlmsgLen;
400             status = STATUS_SUCCESS;
401             goto done;
402         }
403     }
404
405 done:
406     return status;
407 }
408
409 /*
410  *----------------------------------------------------------------------------
411  *  _MapNlAttrToOvsPktExec --
412  *    Maps input Netlink attributes to OvsPacketExecute.
413  *----------------------------------------------------------------------------
414  */
415 static VOID
416 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
417                        OvsPacketExecute *execute)
418 {
419     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
420     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
421
422     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
423     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
424
425     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
426 }
427
428 NTSTATUS
429 OvsExecuteDpIoctl(OvsPacketExecute *execute)
430 {
431     NTSTATUS                    status = STATUS_SUCCESS;
432     NTSTATUS                    ndisStatus;
433     LOCK_STATE_EX               lockState;
434     PNET_BUFFER_LIST pNbl;
435     PNL_ATTR actions;
436     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
437     OvsFlowKey key;
438     OVS_PACKET_HDR_INFO layers;
439     POVS_VPORT_ENTRY vport;
440
441     NdisAcquireSpinLock(gOvsCtrlLock);
442     if (gOvsSwitchContext == NULL) {
443         status = STATUS_INVALID_PARAMETER;
444         goto unlock;
445     }
446
447     if (execute->packetLen == 0) {
448         status = STATUS_INVALID_PARAMETER;
449         goto unlock;
450     }
451
452     actions = execute->actions;
453
454     ASSERT(actions);
455
456     /*
457      * Allocate the NBL, copy the data from the userspace buffer. Allocate
458      * also, the forwarding context for the packet.
459      */
460     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
461                                        execute->packetLen);
462     if (pNbl == NULL) {
463         status = STATUS_NO_MEMORY;
464         goto unlock;
465     }
466
467     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
468     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
469     if (vport) {
470         fwdDetail->SourcePortId = vport->portId;
471         fwdDetail->SourceNicIndex = vport->nicIndex;
472     } else {
473         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
474         fwdDetail->SourceNicIndex = 0;
475     }
476     // XXX: Figure out if any of the other members of fwdDetail need to be set.
477
478     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
479                               NULL);
480     if (ndisStatus == NDIS_STATUS_SUCCESS) {
481         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
482         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
483                               NDIS_RWL_AT_DISPATCH_LEVEL);
484         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
485                                        vport ? vport->portNo :
486                                                OVS_DEFAULT_PORT_NO,
487                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
488                                        &key, NULL, &layers, actions,
489                                        execute->actionsLen);
490         pNbl = NULL;
491         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
492     }
493     if (ndisStatus != NDIS_STATUS_SUCCESS) {
494         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
495             status = STATUS_NOT_SUPPORTED;
496         } else {
497             status = STATUS_UNSUCCESSFUL;
498         }
499     }
500
501     if (pNbl) {
502         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
503     }
504 unlock:
505     NdisReleaseSpinLock(gOvsCtrlLock);
506     return status;
507 }
508
509
510 NTSTATUS
511 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
512 {
513     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
514     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
515
516     if (queue == NULL) {
517         return STATUS_INVALID_PARAMETER;
518     }
519     OvsPurgePacketQueue(queue, instance);
520     return STATUS_SUCCESS;
521 }
522
523 VOID
524 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
525                      PIRP irp)
526 {
527     PIO_STACK_LOCATION irpSp;
528     PFILE_OBJECT fileObject;
529     POVS_OPEN_INSTANCE instance;
530     POVS_USER_PACKET_QUEUE queue = NULL;
531
532     UNREFERENCED_PARAMETER(deviceObject);
533
534     IoReleaseCancelSpinLock(irp->CancelIrql);
535     irpSp = IoGetCurrentIrpStackLocation(irp);
536     fileObject = irpSp->FileObject;
537
538     if (fileObject == NULL) {
539         goto done;
540     }
541     NdisAcquireSpinLock(gOvsCtrlLock);
542     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
543     if (instance) {
544         queue = instance->packetQueue;
545     }
546     if (instance == NULL || queue == NULL) {
547         NdisReleaseSpinLock(gOvsCtrlLock);
548         goto done;
549     }
550     NdisReleaseSpinLock(gOvsCtrlLock);
551     NdisAcquireSpinLock(&queue->queueLock);
552     if (queue->pendingIrp == irp) {
553         queue->pendingIrp = NULL;
554     }
555     NdisReleaseSpinLock(&queue->queueLock);
556 done:
557     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
558 }
559
560
561 NTSTATUS
562 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
563 {
564     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
565     POVS_USER_PACKET_QUEUE queue =
566                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
567     NTSTATUS status = STATUS_SUCCESS;
568     BOOLEAN cancelled = FALSE;
569
570     if (queue == NULL) {
571         return STATUS_INVALID_PARAMETER;
572     }
573     NdisAcquireSpinLock(&queue->queueLock);
574     if (queue->instance != instance) {
575         NdisReleaseSpinLock(&queue->queueLock);
576         return STATUS_INVALID_PARAMETER;
577     }
578     if (queue->pendingIrp) {
579         NdisReleaseSpinLock(&queue->queueLock);
580         return STATUS_DEVICE_BUSY;
581     }
582     if (queue->numPackets == 0) {
583         PDRIVER_CANCEL cancelRoutine;
584         IoMarkIrpPending(irp);
585         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
586         if (irp->Cancel) {
587             cancelRoutine = IoSetCancelRoutine(irp, NULL);
588             if (cancelRoutine) {
589                 cancelled = TRUE;
590             }
591         } else {
592             queue->pendingIrp = irp;
593         }
594         status = STATUS_PENDING;
595     }
596     NdisReleaseSpinLock(&queue->queueLock);
597     if (cancelled) {
598         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
599         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
600     }
601     return status;
602 }
603
604
605 POVS_PACKET_QUEUE_ELEM
606 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
607 {
608     POVS_USER_PACKET_QUEUE queue;
609     PLIST_ENTRY link;
610     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
611     if (queue == NULL) {
612         return NULL;
613     }
614     NdisAcquireSpinLock(&queue->queueLock);
615     if (queue->instance != instance || queue->numPackets == 0) {
616         NdisReleaseSpinLock(&queue->queueLock);
617         return NULL;
618     }
619     link = RemoveHeadList(&queue->packetList);
620     queue->numPackets--;
621     NdisReleaseSpinLock(&queue->queueLock);
622     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
623 }
624
625
626 POVS_USER_PACKET_QUEUE
627 OvsGetQueue(UINT32 queueId)
628 {
629     POVS_USER_PACKET_QUEUE queue;
630     if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
631         return NULL;
632     }
633     queue = &ovsPacketQueues[queueId];
634     return queue->instance != NULL ? queue : NULL;
635 }
636
637 VOID
638 OvsQueuePackets(UINT32 queueId,
639                 PLIST_ENTRY packetList,
640                 UINT32 numElems)
641 {
642     POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
643     POVS_PACKET_QUEUE_ELEM elem;
644     PIRP irp = NULL;
645     PLIST_ENTRY  link;
646     UINT32 num = 0;
647
648     OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
649                   queueId, numElems);
650     if (queue == NULL) {
651         goto cleanup;
652     }
653
654     NdisAcquireSpinLock(&queue->queueLock);
655     if (queue->instance == NULL) {
656         NdisReleaseSpinLock(&queue->queueLock);
657         goto cleanup;
658     } else {
659         OvsAppendList(&queue->packetList, packetList);
660         queue->numPackets += numElems;
661     }
662     if (queue->pendingIrp) {
663         PDRIVER_CANCEL cancelRoutine;
664         irp = queue->pendingIrp;
665         queue->pendingIrp = NULL;
666         cancelRoutine = IoSetCancelRoutine(irp, NULL);
667         if (cancelRoutine == NULL) {
668             irp = NULL;
669         }
670     }
671     NdisReleaseSpinLock(&queue->queueLock);
672     if (irp) {
673         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
674     }
675
676 cleanup:
677     while (!IsListEmpty(packetList)) {
678         link = RemoveHeadList(packetList);
679         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
680         OvsFreeMemory(elem);
681         num++;
682     }
683     OVS_LOG_LOUD("Exit: drop %u packets", num);
684 }
685
686
687 /*
688  *----------------------------------------------------------------------------
689  * OvsCreateAndAddPackets --
690  *
691  *  Create a packet and forwarded to user space.
692  *
693  *  This function would fragment packet if needed, and queue
694  *  each segment to user space.
695  *----------------------------------------------------------------------------
696  */
697 NTSTATUS
698 OvsCreateAndAddPackets(PVOID userData,
699                        UINT32 userDataLen,
700                        UINT32 cmd,
701                        UINT32 inPort,
702                        OvsFlowKey *key,
703                        PNET_BUFFER_LIST nbl,
704                        BOOLEAN isRecv,
705                        POVS_PACKET_HDR_INFO hdrInfo,
706                        POVS_SWITCH_CONTEXT switchContext,
707                        LIST_ENTRY *list,
708                        UINT32 *num)
709 {
710     POVS_PACKET_QUEUE_ELEM elem;
711     PNET_BUFFER_LIST newNbl = NULL;
712     PNET_BUFFER nb;
713
714     if (hdrInfo->isTcp) {
715         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
716         UINT32 packetLength;
717
718         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
719         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
720         packetLength = NET_BUFFER_DATA_LENGTH(nb);
721
722         OVS_LOG_TRACE("MSS %u packet len %u",
723                 tsoInfo.LsoV1Transmit.MSS, packetLength);
724         if (tsoInfo.LsoV1Transmit.MSS) {
725             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
726             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
727                     tsoInfo.LsoV1Transmit.MSS , 0);
728             if (newNbl == NULL) {
729                 return NDIS_STATUS_FAILURE;
730             }
731             nbl = newNbl;
732         }
733     }
734
735     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
736     while (nb) {
737         elem = OvsCreateQueueNlPacket(userData, userDataLen,
738                                     cmd, inPort, key, nbl, nb,
739                                     isRecv, hdrInfo);
740         if (elem) {
741             InsertTailList(list, &elem->link);
742             (*num)++;
743         }
744         nb = NET_BUFFER_NEXT_NB(nb);
745     }
746     if (newNbl) {
747         OvsCompleteNBL(switchContext, newNbl, TRUE);
748     }
749     return NDIS_STATUS_SUCCESS;
750 }
751
752 static __inline UINT32
753 OvsGetUpcallMsgSize(PVOID userData,
754                     UINT32 userDataLen,
755                     OvsIPv4TunnelKey *tunnelKey,
756                     UINT32 payload)
757 {
758     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
759                   NlAttrSize(payload) +
760                   NlAttrSize(OvsFlowKeyAttrSize());
761
762     /* OVS_PACKET_ATTR_USERDATA */
763     if (userData) {
764         size += NlAttrTotalSize(userDataLen);
765     }
766     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
767     /* Is it included in the the flwo key attr XXX */
768     if (tunnelKey) {
769         size += NlAttrTotalSize(OvsTunKeyAttrSize());
770     }
771     return size;
772 }
773
774 /*
775  *----------------------------------------------------------------------------
776  * This function completes the IP Header csum. record the L4 payload offset and
777  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
778  * caluculated simopultaneossly with the copy of the payload to the destination
779  * buffer when the packet is read.
780  *----------------------------------------------------------------------------
781  */
782 static VOID
783 OvsCompletePacketHeader(UINT8 *packet,
784                         BOOLEAN isRecv,
785                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
786                         POVS_PACKET_HDR_INFO hdrInfoIn,
787                         POVS_PACKET_HDR_INFO hdrInfoOut)
788 {
789     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
790         (!isRecv && csumInfo.Transmit.IsIPv4 &&
791         csumInfo.Transmit.IpHeaderChecksum)) {
792         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
793         ASSERT(hdrInfoIn->isIPv4);
794         ASSERT(ipHdr->Version == 4);
795         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
796             ipHdr->HeaderLength << 2,
797             (UINT16)~ipHdr->HeaderChecksum);
798         ovsUserStats.ipCsum++;
799     }
800     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
801     /*
802      * calculate TCP/UDP pseudo checksum
803      */
804     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
805         /*
806          * Only this case, we need to reclaculate pseudo checksum
807          * all other cases, it is assumed the pseudo checksum is
808          * filled already.
809          *
810          */
811         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
812         if (hdrInfoIn->isIPv4) {
813             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
814             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
815                                     (ipHdr->HeaderLength << 2));
816             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
817                                          (UINT32 *)&ipHdr->DestinationAddress,
818                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
819         } else {
820             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
821             hdrInfoOut->l4PayLoad =
822                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
823                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
824                 hdrInfoIn->l4Offset);
825             ASSERT(hdrInfoIn->isIPv6);
826             tcpHdr->th_sum =
827                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
828                 (UINT32 *)&ipv6Hdr->DestinationAddress,
829                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
830         }
831         hdrInfoOut->tcpCsumNeeded = 1;
832         ovsUserStats.recalTcpCsum++;
833     } else if (!isRecv) {
834         if (csumInfo.Transmit.TcpChecksum) {
835             hdrInfoOut->tcpCsumNeeded = 1;
836         } else if (csumInfo.Transmit.UdpChecksum) {
837             hdrInfoOut->udpCsumNeeded = 1;
838         }
839         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
840 #ifdef DBG
841             UINT16 sum, *ptr;
842             UINT8 proto =
843                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
844 #endif
845             if (hdrInfoIn->isIPv4) {
846                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
847                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
848                     (ipHdr->HeaderLength << 2));
849 #ifdef DBG
850                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
851                     (UINT32 *)&ipHdr->DestinationAddress,
852                     proto, hdrInfoOut->l4PayLoad);
853 #endif
854             } else {
855                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
856                     hdrInfoIn->l3Offset);
857                 hdrInfoOut->l4PayLoad =
858                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
859                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
860                     hdrInfoIn->l4Offset);
861                 ASSERT(hdrInfoIn->isIPv6);
862 #ifdef DBG
863                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
864                     (UINT32 *)&ipv6Hdr->DestinationAddress,
865                     proto, hdrInfoOut->l4PayLoad);
866 #endif
867             }
868 #ifdef DBG
869             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
870                 (hdrInfoOut->tcpCsumNeeded ?
871             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
872             ASSERT(*ptr == sum);
873 #endif
874         }
875     }
876 }
877
878 static NTSTATUS
879 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
880 {
881     UNREFERENCED_PARAMETER(nb);
882
883     /* XXX select a pid from an array of pids using a flow based hash */
884     *pid = vport->upcallPid;
885     return STATUS_SUCCESS;
886 }
887
888 /*
889  *----------------------------------------------------------------------------
890  * OvsCreateQueueNlPacket --
891  *
892  *  Create a packet which will be forwarded to user space.
893  *
894  * InputParameter:
895  *   userData: when cmd is user action, this field contain
896  *      user action data.
897  *   userDataLen: as name indicated
898  *   cmd: either miss or user action
899  *   inPort: datapath port id from which the packet is received.
900  *   key: flow Key with a tunnel key if available
901  *   nbl:  the NET_BUFFER_LIST which contain the packet
902  *   nb: the packet
903  *   isRecv: This is used to decide how to interprete the csum info
904  *   hdrInfo: include hdr info initialized during flow extraction.
905  *
906  * Results:
907  *    NULL if fail to create the packet
908  *    The packet element otherwise
909  *----------------------------------------------------------------------------
910  */
911 POVS_PACKET_QUEUE_ELEM
912 OvsCreateQueueNlPacket(PVOID userData,
913                        UINT32 userDataLen,
914                        UINT32 cmd,
915                        UINT32 inPort,
916                        OvsFlowKey *key,
917                        PNET_BUFFER_LIST nbl,
918                        PNET_BUFFER nb,
919                        BOOLEAN isRecv,
920                        POVS_PACKET_HDR_INFO hdrInfo)
921 {
922 #define VLAN_TAG_SIZE 4
923     UINT32 allocLen, dataLen, extraLen;
924     POVS_PACKET_QUEUE_ELEM elem;
925     UINT8 *src, *dst;
926     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
927     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
928     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
929     UINT32 pid;
930     UINT32 nlMsgSize;
931     NL_BUFFER nlBuf;
932
933     /* XXX pass vport in the stack rather than portNo */
934     POVS_VPORT_ENTRY vport =
935         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
936
937     if (vport == NULL){
938         /* Should never happen as dispatch lock is held */
939         ASSERT(vport);
940         return NULL;
941     }
942
943     if (!OvsGetPid(vport, nb, &pid)) {
944         /*
945          * There is no userspace queue created yet, so there is no point for
946          * creating a new packet to be queued.
947          */
948         return NULL;
949     }
950
951     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
952
953     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
954                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
955                   csumInfo.Receive.IpChecksumFailed)) {
956         OVS_LOG_INFO("Packet dropped due to checksum failure.");
957         ovsUserStats.dropDuetoChecksum++;
958         return NULL;
959     }
960
961     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
962     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
963
964     dataLen = NET_BUFFER_DATA_LENGTH(nb);
965
966     if (NlAttrSize(dataLen) > MAXUINT16) {
967         return NULL;
968     }
969
970     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
971                                     dataLen + extraLen);
972
973     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
974     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
975     if (elem == NULL) {
976         ovsUserStats.dropDuetoResource++;
977         return NULL;
978     }
979     elem->hdrInfo.value = hdrInfo->value;
980     elem->packet.totalLen = nlMsgSize;
981     /* XXX remove queueid */
982     elem->packet.queue = 0;
983     /* XXX  no need as the length is already in the NL attrib */
984     elem->packet.userDataLen = userDataLen;
985     elem->packet.inPort = inPort;
986     elem->packet.cmd = cmd;
987     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
988         ovsUserStats.miss++;
989     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
990         ovsUserStats.action++;
991     } else {
992         ASSERT(FALSE);
993         goto fail;
994     }
995     /* XXX Should we have both packetLen and TotalLen*/
996     elem->packet.packetLen = dataLen + extraLen;
997
998     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
999
1000     /*
1001      * Initialize the OVS header
1002      * Since we are pre allocating memory for the NL buffer
1003      * the attribute settings should not fail
1004      */
1005     if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1006                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1007                       gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1008         goto fail;
1009     }
1010
1011     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1012                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1013         goto fail;
1014     }
1015
1016     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1017     if (userData){
1018         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1019                                 userData, (UINT16)userDataLen)) {
1020             goto fail;
1021         }
1022     }
1023
1024     /*
1025      * Make space for the payload to be copied and set the attribute
1026      * XXX Uninit set initilizes the buffer with xero, we don't actually need
1027      * that the payload to be initailized
1028      */
1029     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1030                                             (UINT16)(dataLen + extraLen));
1031     if (!dst) {
1032         goto fail;
1033     }
1034
1035     /* Store the payload for csum calculation when packet is read */
1036     elem->packet.payload = dst;
1037     dst += extraLen;
1038
1039     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1040     if (src == NULL) {
1041         ovsUserStats.dropDuetoResource++;
1042         goto fail;
1043     }    else if (src != dst) {
1044         /* Copy the data from the NDIS buffer to dst. */
1045         RtlCopyMemory(dst, src, dataLen);
1046     }
1047
1048     /* Set csum if was offloaded */
1049     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1050
1051     /*
1052      * Finally insert VLAN tag
1053      */
1054     if (extraLen) {
1055         dst = elem->packet.payload;
1056         src = dst + extraLen;
1057         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1058         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1059         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1060         dst += 12;
1061         ((UINT16 *)dst)[0] = htons(0x8100);
1062         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1063             (vlanInfo.TagHeader.UserPriority << 13));
1064         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1065         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1066         ovsUserStats.vlanInsert++;
1067     }
1068     return elem;
1069 fail:
1070     OvsFreeMemory(elem);
1071     return NULL;
1072 }