treewide: Fix doubled "the".
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Switch.h"
26 #include "Vport.h"
27 #include "Event.h"
28 #include "User.h"
29 #include "Datapath.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35 #include "Jhash.h"
36
37 #ifdef OVS_DBG_MOD
38 #undef OVS_DBG_MOD
39 #endif
40 #define OVS_DBG_MOD OVS_DBG_USER
41 #include "Debug.h"
42
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
47
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49                                    OvsPacketExecute  *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
51
52 static __inline VOID
53 OvsAcquirePidHashLock()
54 {
55     NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
56 }
57
58 static __inline VOID
59 OvsReleasePidHashLock()
60 {
61     NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
62 }
63
64
65 static VOID
66 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
67                     POVS_OPEN_INSTANCE instance)
68 {
69     PLIST_ENTRY link, next;
70     LIST_ENTRY tmp;
71     POVS_PACKET_QUEUE_ELEM elem;
72
73     InitializeListHead(&tmp);
74     NdisAcquireSpinLock(&queue->queueLock);
75     if (queue->instance != instance) {
76         NdisReleaseSpinLock(&queue->queueLock);
77         return;
78     }
79
80     if (queue->numPackets) {
81         OvsAppendList(&tmp, &queue->packetList);
82         queue->numPackets = 0;
83     }
84     NdisReleaseSpinLock(&queue->queueLock);
85     LIST_FORALL_SAFE(&tmp, link, next) {
86         RemoveEntryList(link);
87         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
88         OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
89     }
90 }
91
92 VOID
93 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
94 {
95     POVS_USER_PACKET_QUEUE queue;
96     POVS_PACKET_QUEUE_ELEM elem;
97     PLIST_ENTRY link, next;
98     LIST_ENTRY tmp;
99     PIRP irp = NULL;
100
101     ASSERT(instance);
102     InitializeListHead(&tmp);
103     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
104     if (queue) {
105         PDRIVER_CANCEL cancelRoutine;
106         NdisAcquireSpinLock(&queue->queueLock);
107         ASSERT(queue->instance == instance);
108         /* XXX Should not happen */
109         if (queue->instance != instance) {
110             NdisReleaseSpinLock(&queue->queueLock);
111             NdisFreeSpinLock(&queue->queueLock);
112             return;
113         }
114
115         if (queue->numPackets) {
116             OvsAppendList(&tmp, &queue->packetList);
117             queue->numPackets = 0;
118         }
119         queue->instance = NULL;
120         instance->packetQueue = NULL;
121         irp = queue->pendingIrp;
122         queue->pendingIrp = NULL;
123         if (irp) {
124             cancelRoutine = IoSetCancelRoutine(irp, NULL);
125             if (cancelRoutine == NULL) {
126                 irp = NULL;
127             }
128         }
129         NdisReleaseSpinLock(&queue->queueLock);
130         NdisFreeSpinLock(&queue->queueLock);
131     }
132     LIST_FORALL_SAFE(&tmp, link, next) {
133         RemoveEntryList(link);
134         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
135         OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
136     }
137     if (irp) {
138         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
139     }
140     if (queue) {
141         OvsFreeMemoryWithTag(queue, OVS_USER_POOL_TAG);
142     }
143
144     /* Verify if gOvsSwitchContext exists. */
145     if (gOvsSwitchContext) {
146         /* Remove the instance from pidHashArray */
147         OvsAcquirePidHashLock();
148         OvsDelPidInstance(gOvsSwitchContext, instance->pid);
149         OvsReleasePidHashLock();
150     }
151 }
152
153 NTSTATUS
154 OvsSubscribeDpIoctl(PVOID instanceP,
155                     UINT32 pid,
156                     UINT8 join)
157 {
158     POVS_USER_PACKET_QUEUE queue;
159     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
160
161     if (instance->packetQueue && !join) {
162         /* unsubscribe */
163         OvsCleanupPacketQueue(instance);
164     } else if (instance->packetQueue == NULL && join) {
165         queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemoryWithTag(
166             sizeof *queue, OVS_USER_POOL_TAG);
167         if (queue == NULL) {
168             return STATUS_NO_MEMORY;
169         }
170         InitializeListHead(&(instance->pidLink));
171         instance->packetQueue = queue;
172         RtlZeroMemory(queue, sizeof (*queue));
173         NdisAllocateSpinLock(&queue->queueLock);
174         NdisAcquireSpinLock(&queue->queueLock);
175         InitializeListHead(&queue->packetList);
176         queue->pid = pid;
177         queue->instance = instance;
178         instance->packetQueue = queue;
179         NdisReleaseSpinLock(&queue->queueLock);
180
181         OvsAcquirePidHashLock();
182         /* Insert the instance to pidHashArray */
183         OvsAddPidInstance(gOvsSwitchContext, pid, instance);
184         OvsReleasePidHashLock();
185
186     } else {
187         /* user mode should call only once for subscribe */
188         return STATUS_INVALID_PARAMETER;
189     }
190
191     return STATUS_SUCCESS;
192 }
193
194
195 NTSTATUS
196 OvsReadDpIoctl(PFILE_OBJECT fileObject,
197                PVOID outputBuffer,
198                UINT32 outputLength,
199                UINT32 *replyLen)
200 {
201     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
202     POVS_PACKET_QUEUE_ELEM elem;
203     UINT32 len;
204
205 #define TCP_CSUM_OFFSET  16
206 #define UDP_CSUM_OFFSET  6
207     ASSERT(instance);
208
209     if (instance->packetQueue == NULL) {
210         return STATUS_INVALID_PARAMETER;
211     }
212     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
213         return STATUS_BUFFER_TOO_SMALL;
214     }
215
216     elem = OvsGetNextPacket(instance);
217     if (elem) {
218         /*
219          * XXX revisit this later
220          */
221         len = elem->packet.totalLen > outputLength ? outputLength :
222                  elem->packet.totalLen;
223
224         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
225             len == elem->packet.totalLen) {
226             UINT16 sum, *ptr;
227             UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
228                                   elem->hdrInfo.l4Offset);
229             RtlCopyMemory(outputBuffer, &elem->packet.data, size);
230             ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
231             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
232                                            (UINT8 *)&elem->packet.data + size,
233                                            elem->hdrInfo.l4PayLoad, 0);
234             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
235                             (elem->hdrInfo.tcpCsumNeeded ?
236                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
237             *ptr = sum;
238             ovsUserStats.l4Csum++;
239         } else {
240             RtlCopyMemory(outputBuffer, &elem->packet.data, len);
241         }
242
243         *replyLen = len;
244         OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
245     }
246     return STATUS_SUCCESS;
247 }
248
249 /* Helper function to allocate a Forwarding Context for an NBL */
250 NTSTATUS
251 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
252                                    PNET_BUFFER_LIST nbl)
253 {
254     return switchContext->NdisSwitchHandlers.
255         AllocateNetBufferListForwardingContext(
256             switchContext->NdisSwitchContext, nbl);
257 }
258
259 /*
260  * --------------------------------------------------------------------------
261  * This function allocates all the stuff necessary for creating an NBL from the
262  * input buffer of specified length, namely, a nonpaged data buffer of size
263  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
264  * context yet. It also copies data from the specified buffer to the NBL.
265  * --------------------------------------------------------------------------
266  */
267 PNET_BUFFER_LIST
268 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
269                             PVOID userBuffer,
270                             ULONG length)
271 {
272     UINT8 *data = NULL;
273     PNET_BUFFER_LIST nbl = NULL;
274     PNET_BUFFER nb;
275     PMDL mdl;
276
277     if (length > OVS_DEFAULT_DATA_SIZE) {
278         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
279                                          OVS_DEFAULT_HEADROOM_SIZE);
280
281     } else {
282         nbl = OvsAllocateFixSizeNBL(switchContext, length,
283                                     OVS_DEFAULT_HEADROOM_SIZE);
284     }
285     if (nbl == NULL) {
286         return NULL;
287     }
288
289     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
290     mdl = NET_BUFFER_CURRENT_MDL(nb);
291     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
292                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
293     if (!data) {
294         OvsCompleteNBL(switchContext, nbl, TRUE);
295         return NULL;
296     }
297
298     NdisMoveMemory(data, userBuffer, length);
299
300     return nbl;
301 }
302
303 /*
304  *----------------------------------------------------------------------------
305  *  OvsNlExecuteCmdHandler --
306  *    Handler for OVS_PACKET_CMD_EXECUTE command.
307  *----------------------------------------------------------------------------
308  */
309 NTSTATUS
310 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
311                        UINT32 *replyLen)
312 {
313     NTSTATUS status = STATUS_SUCCESS;
314     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
315     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
316     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
317     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
318     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
319
320     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
321     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
322
323     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
324     UINT32 keyAttrOffset = 0;
325     OvsPacketExecute execute;
326     NL_ERROR nlError = NL_ERROR_SUCCESS;
327     NL_BUFFER nlBuf;
328
329     static const NL_POLICY nlPktExecPolicy[] = {
330         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
331         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
332         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
333         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
334         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
335                                             .optional = TRUE}
336     };
337
338     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
339
340     /* Get all the top level Flow attributes */
341     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
342                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
343                      != TRUE) {
344         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
345                        nlMsgHdr);
346         status = STATUS_UNSUCCESSFUL;
347         goto done;
348     }
349
350     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
351                     (PCHAR)nlMsgHdr);
352
353     /* Get flow keys attributes */
354     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
355                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
356                            nlFlowKeyPolicy, keyAttrs,
357                            ARRAY_SIZE(keyAttrs))) != TRUE) {
358         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
359         status = STATUS_UNSUCCESSFUL;
360         goto done;
361     }
362
363     execute.dpNo = ovsHdr->dp_ifindex;
364
365     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
366
367     status = OvsExecuteDpIoctl(&execute);
368
369     /* Default reply that we want to send */
370     if (status == STATUS_SUCCESS) {
371         BOOLEAN ok;
372
373         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
374                   usrParamsCtx->outputLength);
375
376         /* Prepare nl Msg headers */
377         ok = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
378                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
379                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
380                  ovsHdr->dp_ifindex);
381
382         if (ok) {
383             *replyLen = msgOut->nlMsg.nlmsgLen;
384         } else {
385             status = STATUS_INVALID_BUFFER_SIZE;
386         }
387     } else {
388         /* Map NTSTATUS to NL_ERROR */
389         nlError = NlMapStatusToNlErr(status);
390
391         /* As of now there are no transactional errors in the implementation.
392          * Once we have them then we need to map status to correct
393          * nlError value, so that below mentioned code gets hit. */
394         if ((nlError != NL_ERROR_SUCCESS) &&
395             (usrParamsCtx->outputBuffer)) {
396
397             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
398                                            usrParamsCtx->outputBuffer;
399             NlBuildErrorMsg(msgIn, msgError, nlError);
400             *replyLen = msgError->nlMsg.nlmsgLen;
401             status = STATUS_SUCCESS;
402             goto done;
403         }
404     }
405
406 done:
407     return status;
408 }
409
410 /*
411  *----------------------------------------------------------------------------
412  *  _MapNlAttrToOvsPktExec --
413  *    Maps input Netlink attributes to OvsPacketExecute.
414  *----------------------------------------------------------------------------
415  */
416 static VOID
417 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
418                        OvsPacketExecute *execute)
419 {
420     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
421     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
422
423     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
424     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
425
426     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
427 }
428
429 NTSTATUS
430 OvsExecuteDpIoctl(OvsPacketExecute *execute)
431 {
432     NTSTATUS                    status = STATUS_SUCCESS;
433     NTSTATUS                    ndisStatus;
434     LOCK_STATE_EX               lockState;
435     PNET_BUFFER_LIST pNbl;
436     PNL_ATTR actions;
437     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
438     OvsFlowKey key;
439     OVS_PACKET_HDR_INFO layers;
440     POVS_VPORT_ENTRY vport;
441
442     if (execute->packetLen == 0) {
443         status = STATUS_INVALID_PARAMETER;
444         goto exit;
445     }
446
447     actions = execute->actions;
448
449     ASSERT(actions);
450
451     /*
452      * Allocate the NBL, copy the data from the userspace buffer. Allocate
453      * also, the forwarding context for the packet.
454      */
455     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
456                                        execute->packetLen);
457     if (pNbl == NULL) {
458         status = STATUS_NO_MEMORY;
459         goto exit;
460     }
461
462     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
463     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
464     if (vport) {
465         fwdDetail->SourcePortId = vport->portId;
466         fwdDetail->SourceNicIndex = vport->nicIndex;
467     } else {
468         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
469         fwdDetail->SourceNicIndex = 0;
470     }
471     // XXX: Figure out if any of the other members of fwdDetail need to be set.
472
473     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
474                                 NULL);
475     if (ndisStatus == NDIS_STATUS_SUCCESS) {
476         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, 0);
477         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
478                                        vport ? vport->portNo :
479                                                OVS_DEFAULT_PORT_NO,
480                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
481                                        &key, NULL, &layers, actions,
482                                        execute->actionsLen);
483         pNbl = NULL;
484         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
485     }
486     if (ndisStatus != NDIS_STATUS_SUCCESS) {
487         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
488             status = STATUS_NOT_SUPPORTED;
489         } else {
490             status = STATUS_UNSUCCESSFUL;
491         }
492     }
493
494     if (pNbl) {
495         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
496     }
497 exit:
498     return status;
499 }
500
501
502 NTSTATUS
503 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
504 {
505     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
506     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
507
508     if (queue == NULL) {
509         return STATUS_INVALID_PARAMETER;
510     }
511     OvsPurgePacketQueue(queue, instance);
512     return STATUS_SUCCESS;
513 }
514
515 VOID
516 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
517                      PIRP irp)
518 {
519     PIO_STACK_LOCATION irpSp;
520     PFILE_OBJECT fileObject;
521     POVS_OPEN_INSTANCE instance;
522     POVS_USER_PACKET_QUEUE queue = NULL;
523
524     UNREFERENCED_PARAMETER(deviceObject);
525
526     IoReleaseCancelSpinLock(irp->CancelIrql);
527     irpSp = IoGetCurrentIrpStackLocation(irp);
528     fileObject = irpSp->FileObject;
529
530     if (fileObject == NULL) {
531         goto done;
532     }
533     NdisAcquireSpinLock(gOvsCtrlLock);
534     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
535     if (instance) {
536         queue = instance->packetQueue;
537     }
538     if (instance == NULL || queue == NULL) {
539         NdisReleaseSpinLock(gOvsCtrlLock);
540         goto done;
541     }
542     NdisReleaseSpinLock(gOvsCtrlLock);
543     NdisAcquireSpinLock(&queue->queueLock);
544     if (queue->pendingIrp == irp) {
545         queue->pendingIrp = NULL;
546     }
547     NdisReleaseSpinLock(&queue->queueLock);
548 done:
549     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
550 }
551
552
553 NTSTATUS
554 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
555 {
556     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
557     POVS_USER_PACKET_QUEUE queue =
558                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
559     NTSTATUS status = STATUS_SUCCESS;
560     BOOLEAN cancelled = FALSE;
561
562     if (queue == NULL) {
563         return STATUS_INVALID_PARAMETER;
564     }
565     NdisAcquireSpinLock(&queue->queueLock);
566     if (queue->instance != instance) {
567         NdisReleaseSpinLock(&queue->queueLock);
568         return STATUS_INVALID_PARAMETER;
569     }
570     if (queue->pendingIrp) {
571         NdisReleaseSpinLock(&queue->queueLock);
572         return STATUS_DEVICE_BUSY;
573     }
574     if (queue->numPackets == 0) {
575         PDRIVER_CANCEL cancelRoutine;
576         IoMarkIrpPending(irp);
577         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
578         if (irp->Cancel) {
579             cancelRoutine = IoSetCancelRoutine(irp, NULL);
580             if (cancelRoutine) {
581                 cancelled = TRUE;
582             }
583         } else {
584             queue->pendingIrp = irp;
585         }
586         status = STATUS_PENDING;
587     }
588     NdisReleaseSpinLock(&queue->queueLock);
589     if (cancelled) {
590         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
591         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
592     }
593     return status;
594 }
595
596
597 POVS_PACKET_QUEUE_ELEM
598 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
599 {
600     POVS_USER_PACKET_QUEUE queue;
601     PLIST_ENTRY link;
602     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
603     if (queue == NULL) {
604         return NULL;
605     }
606     NdisAcquireSpinLock(&queue->queueLock);
607     if (queue->instance != instance || queue->numPackets == 0) {
608         NdisReleaseSpinLock(&queue->queueLock);
609         return NULL;
610     }
611     link = RemoveHeadList(&queue->packetList);
612     queue->numPackets--;
613     NdisReleaseSpinLock(&queue->queueLock);
614     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
615 }
616
617 /*
618  * ---------------------------------------------------------------------------
619  * Given a pid, returns the corresponding USER_PACKET_QUEUE.
620  * ---------------------------------------------------------------------------
621  */
622 POVS_USER_PACKET_QUEUE
623 OvsGetQueue(UINT32 pid)
624 {
625     POVS_OPEN_INSTANCE instance;
626     POVS_USER_PACKET_QUEUE ret = NULL;
627
628     instance = OvsGetPidInstance(gOvsSwitchContext, pid);
629
630     if (instance) {
631         ret = instance->packetQueue;
632     }
633
634     return ret;
635 }
636
637 /*
638  * ---------------------------------------------------------------------------
639  * Given a pid, returns the corresponding instance.
640  * pidHashLock must be acquired before calling this API.
641  * ---------------------------------------------------------------------------
642  */
643 POVS_OPEN_INSTANCE
644 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
645 {
646     POVS_OPEN_INSTANCE instance;
647     PLIST_ENTRY head, link;
648     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
649                                 OVS_HASH_BASIS);
650     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
651     LIST_FORALL(head, link) {
652         instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
653         if (instance->pid == pid) {
654             return instance;
655         }
656     }
657     return NULL;
658 }
659
660 /*
661  * ---------------------------------------------------------------------------
662  * Given a pid and an instance. This API adds instance to pidHashArray.
663  * pidHashLock must be acquired before calling this API.
664  * ---------------------------------------------------------------------------
665  */
666 VOID
667 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
668                   POVS_OPEN_INSTANCE instance)
669 {
670     PLIST_ENTRY head;
671     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
672                                 OVS_HASH_BASIS);
673     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
674     InsertHeadList(head, &(instance->pidLink));
675 }
676
677 /*
678  * ---------------------------------------------------------------------------
679  * Given a pid and an instance. This API removes instance from pidHashArray.
680  * pidHashLock must be acquired before calling this API.
681  * ---------------------------------------------------------------------------
682  */
683 VOID
684 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
685 {
686     POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
687
688     if (instance) {
689         RemoveEntryList(&(instance->pidLink));
690     }
691 }
692
693 VOID
694 OvsQueuePackets(PLIST_ENTRY packetList,
695                 UINT32 numElems)
696 {
697     POVS_USER_PACKET_QUEUE upcallQueue = NULL;
698     POVS_PACKET_QUEUE_ELEM elem;
699     PIRP irp = NULL;
700     PLIST_ENTRY  link;
701     UINT32 num = 0;
702     LIST_ENTRY dropPackets;
703
704     OVS_LOG_LOUD("Enter: numELems: %u", numElems);
705
706     InitializeListHead(&dropPackets);
707
708     while (!IsListEmpty(packetList)) {
709         link = RemoveHeadList(packetList);
710         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
711
712         ASSERT(elem);
713
714         OvsAcquirePidHashLock();
715
716         upcallQueue = OvsGetQueue(elem->upcallPid);
717         if (!upcallQueue) {
718             /* No upcall queue found, drop this packet. */
719             InsertTailList(&dropPackets, &elem->link);
720         } else {
721             NdisAcquireSpinLock(&upcallQueue->queueLock);
722
723             if (upcallQueue->instance == NULL) {
724                 InsertTailList(&dropPackets, &elem->link);
725             } else {
726                 InsertTailList(&upcallQueue->packetList, &elem->link);
727                 upcallQueue->numPackets++;
728                 if (upcallQueue->pendingIrp) {
729                     PDRIVER_CANCEL cancelRoutine;
730                     irp = upcallQueue->pendingIrp;
731                     upcallQueue->pendingIrp = NULL;
732                     cancelRoutine = IoSetCancelRoutine(irp, NULL);
733                     if (cancelRoutine == NULL) {
734                         irp = NULL;
735                     }
736                 }
737             }
738
739             if (irp) {
740                 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
741             }
742
743             NdisReleaseSpinLock(&upcallQueue->queueLock);
744         }
745
746         OvsReleasePidHashLock();
747     }
748
749     while (!IsListEmpty(&dropPackets)) {
750         link = RemoveHeadList(&dropPackets);
751         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
752         OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
753         num++;
754     }
755
756     OVS_LOG_LOUD("Exit: drop %u packets", num);
757 }
758
759 /*
760  *----------------------------------------------------------------------------
761  * OvsCreateAndAddPackets --
762  *
763  *  Create a packet and forwarded to user space.
764  *
765  *  This function would fragment packet if needed, and queue
766  *  each segment to user space.
767  *----------------------------------------------------------------------------
768  */
769 NTSTATUS
770 OvsCreateAndAddPackets(PVOID userData,
771                        UINT32 userDataLen,
772                        UINT32 cmd,
773                        UINT32 inPort,
774                        OvsFlowKey *key,
775                        PNET_BUFFER_LIST nbl,
776                        BOOLEAN isRecv,
777                        POVS_PACKET_HDR_INFO hdrInfo,
778                        POVS_SWITCH_CONTEXT switchContext,
779                        LIST_ENTRY *list,
780                        UINT32 *num)
781 {
782     POVS_PACKET_QUEUE_ELEM elem;
783     PNET_BUFFER_LIST newNbl = NULL;
784     PNET_BUFFER nb;
785
786     if (hdrInfo->isTcp) {
787         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
788         UINT32 packetLength;
789
790         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
791         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
792         packetLength = NET_BUFFER_DATA_LENGTH(nb);
793
794         OVS_LOG_TRACE("MSS %u packet len %u",
795                 tsoInfo.LsoV1Transmit.MSS, packetLength);
796         if (tsoInfo.LsoV1Transmit.MSS) {
797             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
798             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
799                     tsoInfo.LsoV1Transmit.MSS , 0);
800             if (newNbl == NULL) {
801                 return NDIS_STATUS_FAILURE;
802             }
803             nbl = newNbl;
804         }
805     }
806
807     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
808     while (nb) {
809         elem = OvsCreateQueueNlPacket(userData, userDataLen,
810                                     cmd, inPort, key, nbl, nb,
811                                     isRecv, hdrInfo);
812         if (elem) {
813             InsertTailList(list, &elem->link);
814             (*num)++;
815         }
816         nb = NET_BUFFER_NEXT_NB(nb);
817     }
818     if (newNbl) {
819         OvsCompleteNBL(switchContext, newNbl, TRUE);
820     }
821     return NDIS_STATUS_SUCCESS;
822 }
823
824 static __inline UINT32
825 OvsGetUpcallMsgSize(PVOID userData,
826                     UINT32 userDataLen,
827                     OvsIPv4TunnelKey *tunnelKey,
828                     UINT32 payload)
829 {
830     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
831                   NlAttrSize(payload) +
832                   NlAttrSize(OvsFlowKeyAttrSize());
833
834     /* OVS_PACKET_ATTR_USERDATA */
835     if (userData) {
836         size += NlAttrTotalSize(userDataLen);
837     }
838     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
839     /* Is it included in the flow key attr XXX */
840     if (tunnelKey) {
841         size += NlAttrTotalSize(OvsTunKeyAttrSize());
842     }
843     return size;
844 }
845
846 /*
847  *----------------------------------------------------------------------------
848  * This function completes the IP Header csum. record the L4 payload offset and
849  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
850  * caluculated simopultaneossly with the copy of the payload to the destination
851  * buffer when the packet is read.
852  *----------------------------------------------------------------------------
853  */
854 static VOID
855 OvsCompletePacketHeader(UINT8 *packet,
856                         BOOLEAN isRecv,
857                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
858                         POVS_PACKET_HDR_INFO hdrInfoIn,
859                         POVS_PACKET_HDR_INFO hdrInfoOut)
860 {
861     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
862         (!isRecv && csumInfo.Transmit.IsIPv4 &&
863         csumInfo.Transmit.IpHeaderChecksum)) {
864         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
865         ASSERT(hdrInfoIn->isIPv4);
866         ASSERT(ipHdr->Version == 4);
867         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
868             ipHdr->HeaderLength << 2,
869             (UINT16)~ipHdr->HeaderChecksum);
870         ovsUserStats.ipCsum++;
871     }
872     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
873     /*
874      * calculate TCP/UDP pseudo checksum
875      */
876     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
877         /*
878          * Only this case, we need to reclaculate pseudo checksum
879          * all other cases, it is assumed the pseudo checksum is
880          * filled already.
881          *
882          */
883         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
884         if (hdrInfoIn->isIPv4) {
885             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
886             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
887                                     (ipHdr->HeaderLength << 2));
888             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
889                                          (UINT32 *)&ipHdr->DestinationAddress,
890                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
891         } else {
892             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
893             hdrInfoOut->l4PayLoad =
894                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
895                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
896                 hdrInfoIn->l4Offset);
897             ASSERT(hdrInfoIn->isIPv6);
898             tcpHdr->th_sum =
899                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
900                 (UINT32 *)&ipv6Hdr->DestinationAddress,
901                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
902         }
903         hdrInfoOut->tcpCsumNeeded = 1;
904         ovsUserStats.recalTcpCsum++;
905     } else if (!isRecv) {
906         if (csumInfo.Transmit.TcpChecksum) {
907             hdrInfoOut->tcpCsumNeeded = 1;
908         } else if (csumInfo.Transmit.UdpChecksum) {
909             hdrInfoOut->udpCsumNeeded = 1;
910         }
911         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
912 #ifdef DBG
913             UINT16 sum, *ptr;
914             UINT8 proto =
915                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
916 #endif
917             if (hdrInfoIn->isIPv4) {
918                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
919                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
920                     (ipHdr->HeaderLength << 2));
921 #ifdef DBG
922                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
923                     (UINT32 *)&ipHdr->DestinationAddress,
924                     proto, hdrInfoOut->l4PayLoad);
925 #endif
926             } else {
927                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
928                     hdrInfoIn->l3Offset);
929                 hdrInfoOut->l4PayLoad =
930                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
931                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
932                     hdrInfoIn->l4Offset);
933                 ASSERT(hdrInfoIn->isIPv6);
934 #ifdef DBG
935                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
936                     (UINT32 *)&ipv6Hdr->DestinationAddress,
937                     proto, hdrInfoOut->l4PayLoad);
938 #endif
939             }
940 #ifdef DBG
941             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
942                 (hdrInfoOut->tcpCsumNeeded ?
943             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
944             ASSERT(*ptr == sum);
945 #endif
946         }
947     }
948 }
949
950 static NTSTATUS
951 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
952 {
953     UNREFERENCED_PARAMETER(nb);
954
955     ASSERT(vport);
956
957     /* XXX select a pid from an array of pids using a flow based hash */
958     *pid = vport->upcallPid;
959     return STATUS_SUCCESS;
960 }
961
962 /*
963  *----------------------------------------------------------------------------
964  * OvsCreateQueueNlPacket --
965  *
966  *  Create a packet which will be forwarded to user space.
967  *
968  * InputParameter:
969  *   userData: when cmd is user action, this field contain
970  *      user action data.
971  *   userDataLen: as name indicated
972  *   cmd: either miss or user action
973  *   inPort: datapath port id from which the packet is received.
974  *   key: flow Key with a tunnel key if available
975  *   nbl:  the NET_BUFFER_LIST which contain the packet
976  *   nb: the packet
977  *   isRecv: This is used to decide how to interprete the csum info
978  *   hdrInfo: include hdr info initialized during flow extraction.
979  *
980  * Results:
981  *    NULL if fail to create the packet
982  *    The packet element otherwise
983  *----------------------------------------------------------------------------
984  */
985 POVS_PACKET_QUEUE_ELEM
986 OvsCreateQueueNlPacket(PVOID userData,
987                        UINT32 userDataLen,
988                        UINT32 cmd,
989                        UINT32 inPort,
990                        OvsFlowKey *key,
991                        PNET_BUFFER_LIST nbl,
992                        PNET_BUFFER nb,
993                        BOOLEAN isRecv,
994                        POVS_PACKET_HDR_INFO hdrInfo)
995 {
996 #define VLAN_TAG_SIZE 4
997     UINT32 allocLen, dataLen, extraLen;
998     POVS_PACKET_QUEUE_ELEM elem;
999     UINT8 *src, *dst;
1000     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
1001     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
1002     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
1003     UINT32 pid;
1004     UINT32 nlMsgSize;
1005     NL_BUFFER nlBuf;
1006     PNL_MSG_HDR nlMsg;
1007
1008     /* XXX pass vport in the stack rather than portNo */
1009     POVS_VPORT_ENTRY vport =
1010         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
1011
1012     if (vport == NULL){
1013         /* No vport is not fatal. */
1014         return NULL;
1015     }
1016
1017     OvsGetPid(vport, nb, &pid);
1018
1019     if (!pid) {
1020         /*
1021          * There is no userspace queue created yet, so there is no point for
1022          * creating a new packet to be queued.
1023          */
1024         return NULL;
1025     }
1026
1027     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1028
1029     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1030                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1031                   csumInfo.Receive.IpChecksumFailed)) {
1032         OVS_LOG_INFO("Packet dropped due to checksum failure.");
1033         ovsUserStats.dropDuetoChecksum++;
1034         return NULL;
1035     }
1036
1037     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1038     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1039
1040     dataLen = NET_BUFFER_DATA_LENGTH(nb);
1041
1042     if (NlAttrSize(dataLen) > MAXUINT16) {
1043         return NULL;
1044     }
1045
1046     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1047                                     dataLen + extraLen);
1048
1049     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1050     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemoryWithTag(allocLen,
1051                                                             OVS_USER_POOL_TAG);
1052     if (elem == NULL) {
1053         ovsUserStats.dropDuetoResource++;
1054         return NULL;
1055     }
1056     elem->hdrInfo.value = hdrInfo->value;
1057     elem->upcallPid = pid;
1058     elem->packet.totalLen = nlMsgSize;
1059     /* XXX remove queueid */
1060     elem->packet.queue = 0;
1061     /* XXX  no need as the length is already in the NL attrib */
1062     elem->packet.userDataLen = userDataLen;
1063     elem->packet.inPort = inPort;
1064     elem->packet.cmd = cmd;
1065     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1066         ovsUserStats.miss++;
1067     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1068         ovsUserStats.action++;
1069     } else {
1070         ASSERT(FALSE);
1071         goto fail;
1072     }
1073     /* XXX Should we have both packetLen and TotalLen*/
1074     elem->packet.packetLen = dataLen + extraLen;
1075
1076     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1077
1078     /*
1079      * Initialize the OVS header
1080      * Since we are pre allocating memory for the NL buffer
1081      * the attribute settings should not fail
1082      */
1083     if (!NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1084                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1085                       gOvsSwitchContext->dpNo)) {
1086         goto fail;
1087     }
1088
1089     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1090                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1091         goto fail;
1092     }
1093
1094     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1095     if (userData){
1096         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1097                                 userData, (UINT16)userDataLen)) {
1098             goto fail;
1099         }
1100     }
1101
1102     /*
1103      * Make space for the payload to be copied and set the attribute
1104      * XXX Uninit set initilizes the buffer with xero, we don't actually need
1105      * that the payload to be initailized
1106      */
1107     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1108                                             (UINT16)(dataLen + extraLen));
1109     if (!dst) {
1110         goto fail;
1111     }
1112
1113     /* Store the payload for csum calculation when packet is read */
1114     elem->packet.payload = dst;
1115     dst += extraLen;
1116
1117     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1118     if (src == NULL) {
1119         ovsUserStats.dropDuetoResource++;
1120         goto fail;
1121     }    else if (src != dst) {
1122         /* Copy the data from the NDIS buffer to dst. */
1123         RtlCopyMemory(dst, src, dataLen);
1124     }
1125
1126     /* Set csum if was offloaded */
1127     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1128
1129     /*
1130      * Finally insert VLAN tag
1131      */
1132     if (extraLen) {
1133         dst = elem->packet.payload;
1134         src = dst + extraLen;
1135         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1136         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1137         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1138         dst += 12;
1139         ((UINT16 *)dst)[0] = htons(0x8100);
1140         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1141             (vlanInfo.TagHeader.UserPriority << 13));
1142         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1143         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1144         ovsUserStats.vlanInsert++;
1145     }
1146
1147     nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1148     nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1149     /* 'totalLen' should be size of valid data. */
1150     elem->packet.totalLen = nlMsg->nlmsgLen;
1151
1152     return elem;
1153 fail:
1154     OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
1155     return NULL;
1156 }