datapath-windows: refactor BuildReplyMsgFromMsgIn & BuildErrorMsg
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Switch.h"
26 #include "Vport.h"
27 #include "Event.h"
28 #include "User.h"
29 #include "Datapath.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35 #include "Jhash.h"
36
37 #ifdef OVS_DBG_MOD
38 #undef OVS_DBG_MOD
39 #endif
40 #define OVS_DBG_MOD OVS_DBG_USER
41 #include "Debug.h"
42
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
47
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49                                    OvsPacketExecute  *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
51
52 static __inline VOID
53 OvsAcquirePidHashLock()
54 {
55     NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
56 }
57
58 static __inline VOID
59 OvsReleasePidHashLock()
60 {
61     NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
62 }
63
64
65 static VOID
66 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
67                     POVS_OPEN_INSTANCE instance)
68 {
69     PLIST_ENTRY link, next;
70     LIST_ENTRY tmp;
71     POVS_PACKET_QUEUE_ELEM elem;
72
73     InitializeListHead(&tmp);
74     NdisAcquireSpinLock(&queue->queueLock);
75     if (queue->instance != instance) {
76         NdisReleaseSpinLock(&queue->queueLock);
77         return;
78     }
79
80     if (queue->numPackets) {
81         OvsAppendList(&tmp, &queue->packetList);
82         queue->numPackets = 0;
83     }
84     NdisReleaseSpinLock(&queue->queueLock);
85     LIST_FORALL_SAFE(&tmp, link, next) {
86         RemoveEntryList(link);
87         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
88         OvsFreeMemory(elem);
89     }
90 }
91
92 VOID
93 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
94 {
95     POVS_USER_PACKET_QUEUE queue;
96     POVS_PACKET_QUEUE_ELEM elem;
97     PLIST_ENTRY link, next;
98     LIST_ENTRY tmp;
99     PIRP irp = NULL;
100
101     ASSERT(instance);
102     InitializeListHead(&tmp);
103     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
104     if (queue) {
105         PDRIVER_CANCEL cancelRoutine;
106         NdisAcquireSpinLock(&queue->queueLock);
107         ASSERT(queue->instance == instance);
108         /* XXX Should not happen */
109         if (queue->instance != instance) {
110             NdisReleaseSpinLock(&queue->queueLock);
111             NdisFreeSpinLock(&queue->queueLock);
112             return;
113         }
114
115         if (queue->numPackets) {
116             OvsAppendList(&tmp, &queue->packetList);
117             queue->numPackets = 0;
118         }
119         queue->instance = NULL;
120         instance->packetQueue = NULL;
121         irp = queue->pendingIrp;
122         queue->pendingIrp = NULL;
123         if (irp) {
124             cancelRoutine = IoSetCancelRoutine(irp, NULL);
125             if (cancelRoutine == NULL) {
126                 irp = NULL;
127             }
128         }
129         NdisReleaseSpinLock(&queue->queueLock);
130         NdisFreeSpinLock(&queue->queueLock);
131     }
132     LIST_FORALL_SAFE(&tmp, link, next) {
133         RemoveEntryList(link);
134         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
135         OvsFreeMemory(elem);
136     }
137     if (irp) {
138         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
139     }
140     if (queue) {
141         OvsFreeMemory(queue);
142     }
143
144     /* Verify if gOvsSwitchContext exists. */
145     OvsAcquireCtrlLock();
146     if (gOvsSwitchContext) {
147         /* Remove the instance from pidHashArray */
148         OvsAcquirePidHashLock();
149         OvsDelPidInstance(gOvsSwitchContext, instance->pid);
150         OvsReleasePidHashLock();
151     }
152     OvsReleaseCtrlLock();
153 }
154
155 NTSTATUS
156 OvsSubscribeDpIoctl(PVOID instanceP,
157                     UINT32 pid,
158                     UINT8 join)
159 {
160     POVS_USER_PACKET_QUEUE queue;
161     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
162
163     if (instance->packetQueue && !join) {
164         /* unsubscribe */
165         OvsCleanupPacketQueue(instance);
166
167         OvsAcquirePidHashLock();
168         /* Remove the instance from pidHashArray */
169         OvsDelPidInstance(gOvsSwitchContext, pid);
170         OvsReleasePidHashLock();
171
172     } else if (instance->packetQueue == NULL && join) {
173         queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
174         if (queue == NULL) {
175             return STATUS_NO_MEMORY;
176         }
177         InitializeListHead(&(instance->pidLink));
178         instance->packetQueue = queue;
179         RtlZeroMemory(queue, sizeof (*queue));
180         NdisAllocateSpinLock(&queue->queueLock);
181         NdisAcquireSpinLock(&queue->queueLock);
182         InitializeListHead(&queue->packetList);
183         queue->pid = pid;
184         queue->instance = instance;
185         instance->packetQueue = queue;
186         NdisReleaseSpinLock(&queue->queueLock);
187
188         OvsAcquirePidHashLock();
189         /* Insert the instance to pidHashArray */
190         OvsAddPidInstance(gOvsSwitchContext, pid, instance);
191         OvsReleasePidHashLock();
192
193     } else {
194         /* user mode should call only once for subscribe */
195         return STATUS_INVALID_PARAMETER;
196     }
197
198     return STATUS_SUCCESS;
199 }
200
201
202 NTSTATUS
203 OvsReadDpIoctl(PFILE_OBJECT fileObject,
204                PVOID outputBuffer,
205                UINT32 outputLength,
206                UINT32 *replyLen)
207 {
208     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
209     POVS_PACKET_QUEUE_ELEM elem;
210     UINT32 len;
211
212 #define TCP_CSUM_OFFSET  16
213 #define UDP_CSUM_OFFSET  6
214     ASSERT(instance);
215
216     if (instance->packetQueue == NULL) {
217         return STATUS_INVALID_PARAMETER;
218     }
219     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
220         return STATUS_BUFFER_TOO_SMALL;
221     }
222
223     elem = OvsGetNextPacket(instance);
224     if (elem) {
225         /*
226          * XXX revisit this later
227          */
228         len = elem->packet.totalLen > outputLength ? outputLength :
229                  elem->packet.totalLen;
230
231         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
232             len == elem->packet.totalLen) {
233             UINT16 sum, *ptr;
234             UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
235                                   elem->hdrInfo.l4Offset);
236             RtlCopyMemory(outputBuffer, &elem->packet.data, size);
237             ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
238             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
239                                            (UINT8 *)&elem->packet.data + size,
240                                            elem->hdrInfo.l4PayLoad, 0);
241             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
242                             (elem->hdrInfo.tcpCsumNeeded ?
243                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
244             *ptr = sum;
245             ovsUserStats.l4Csum++;
246         } else {
247             RtlCopyMemory(outputBuffer, &elem->packet.data, len);
248         }
249
250         *replyLen = len;
251         OvsFreeMemory(elem);
252     }
253     return STATUS_SUCCESS;
254 }
255
256 /* Helper function to allocate a Forwarding Context for an NBL */
257 NTSTATUS
258 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
259                                    PNET_BUFFER_LIST nbl)
260 {
261     return switchContext->NdisSwitchHandlers.
262         AllocateNetBufferListForwardingContext(
263             switchContext->NdisSwitchContext, nbl);
264 }
265
266 /*
267  * --------------------------------------------------------------------------
268  * This function allocates all the stuff necessary for creating an NBL from the
269  * input buffer of specified length, namely, a nonpaged data buffer of size
270  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
271  * context yet. It also copies data from the specified buffer to the NBL.
272  * --------------------------------------------------------------------------
273  */
274 PNET_BUFFER_LIST
275 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
276                             PVOID userBuffer,
277                             ULONG length)
278 {
279     UINT8 *data = NULL;
280     PNET_BUFFER_LIST nbl = NULL;
281     PNET_BUFFER nb;
282     PMDL mdl;
283
284     if (length > OVS_DEFAULT_DATA_SIZE) {
285         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
286                                          OVS_DEFAULT_HEADROOM_SIZE);
287
288     } else {
289         nbl = OvsAllocateFixSizeNBL(switchContext, length,
290                                     OVS_DEFAULT_HEADROOM_SIZE);
291     }
292     if (nbl == NULL) {
293         return NULL;
294     }
295
296     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
297     mdl = NET_BUFFER_CURRENT_MDL(nb);
298     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
299                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
300     if (!data) {
301         OvsCompleteNBL(switchContext, nbl, TRUE);
302         return NULL;
303     }
304
305     NdisMoveMemory(data, userBuffer, length);
306
307     return nbl;
308 }
309
310 /*
311  *----------------------------------------------------------------------------
312  *  OvsNlExecuteCmdHandler --
313  *    Handler for OVS_PACKET_CMD_EXECUTE command.
314  *----------------------------------------------------------------------------
315  */
316 NTSTATUS
317 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
318                        UINT32 *replyLen)
319 {
320     NTSTATUS status = STATUS_SUCCESS;
321     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
322     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
323     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
324     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
325     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
326
327     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
328     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
329
330     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
331     UINT32 keyAttrOffset = 0;
332     OvsPacketExecute execute;
333     NL_ERROR nlError = NL_ERROR_SUCCESS;
334     NL_BUFFER nlBuf;
335
336     static const NL_POLICY nlPktExecPolicy[] = {
337         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
338         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
339         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
340         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
341         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
342                                             .optional = TRUE}
343     };
344
345     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
346
347     /* Get all the top level Flow attributes */
348     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
349                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
350                      != TRUE) {
351         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
352                        nlMsgHdr);
353         status = STATUS_UNSUCCESSFUL;
354         goto done;
355     }
356
357     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
358                     (PCHAR)nlMsgHdr);
359
360     /* Get flow keys attributes */
361     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
362                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
363                            nlFlowKeyPolicy, keyAttrs,
364                            ARRAY_SIZE(keyAttrs))) != TRUE) {
365         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
366         status = STATUS_UNSUCCESSFUL;
367         goto done;
368     }
369
370     execute.dpNo = ovsHdr->dp_ifindex;
371
372     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
373
374     status = OvsExecuteDpIoctl(&execute);
375
376     /* Default reply that we want to send */
377     if (status == STATUS_SUCCESS) {
378         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
379                   usrParamsCtx->outputLength);
380
381         /* Prepare nl Msg headers */
382         status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
383                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
384                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
385                  ovsHdr->dp_ifindex);
386
387         if (status == STATUS_SUCCESS) {
388             *replyLen = msgOut->nlMsg.nlmsgLen;
389         }
390     } else {
391         /* Map NTSTATUS to NL_ERROR */
392         nlError = NlMapStatusToNlErr(status);
393
394         /* As of now there are no transactional errors in the implementation.
395          * Once we have them then we need to map status to correct
396          * nlError value, so that below mentioned code gets hit. */
397         if ((nlError != NL_ERROR_SUCCESS) &&
398             (usrParamsCtx->outputBuffer)) {
399
400             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
401                                            usrParamsCtx->outputBuffer;
402             NlBuildErrorMsg(msgIn, msgError, nlError);
403             *replyLen = msgError->nlMsg.nlmsgLen;
404             status = STATUS_SUCCESS;
405             goto done;
406         }
407     }
408
409 done:
410     return status;
411 }
412
413 /*
414  *----------------------------------------------------------------------------
415  *  _MapNlAttrToOvsPktExec --
416  *    Maps input Netlink attributes to OvsPacketExecute.
417  *----------------------------------------------------------------------------
418  */
419 static VOID
420 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
421                        OvsPacketExecute *execute)
422 {
423     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
424     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
425
426     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
427     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
428
429     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
430 }
431
432 NTSTATUS
433 OvsExecuteDpIoctl(OvsPacketExecute *execute)
434 {
435     NTSTATUS                    status = STATUS_SUCCESS;
436     NTSTATUS                    ndisStatus;
437     LOCK_STATE_EX               lockState;
438     PNET_BUFFER_LIST pNbl;
439     PNL_ATTR actions;
440     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
441     OvsFlowKey key;
442     OVS_PACKET_HDR_INFO layers;
443     POVS_VPORT_ENTRY vport;
444
445     NdisAcquireSpinLock(gOvsCtrlLock);
446
447     if (execute->packetLen == 0) {
448         status = STATUS_INVALID_PARAMETER;
449         goto unlock;
450     }
451
452     actions = execute->actions;
453
454     ASSERT(actions);
455
456     /*
457      * Allocate the NBL, copy the data from the userspace buffer. Allocate
458      * also, the forwarding context for the packet.
459      */
460     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
461                                        execute->packetLen);
462     if (pNbl == NULL) {
463         status = STATUS_NO_MEMORY;
464         goto unlock;
465     }
466
467     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
468     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
469     if (vport) {
470         fwdDetail->SourcePortId = vport->portId;
471         fwdDetail->SourceNicIndex = vport->nicIndex;
472     } else {
473         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
474         fwdDetail->SourceNicIndex = 0;
475     }
476     // XXX: Figure out if any of the other members of fwdDetail need to be set.
477
478     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
479                               NULL);
480     if (ndisStatus == NDIS_STATUS_SUCCESS) {
481         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
482         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
483                               NDIS_RWL_AT_DISPATCH_LEVEL);
484         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
485                                        vport ? vport->portNo :
486                                                OVS_DEFAULT_PORT_NO,
487                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
488                                        &key, NULL, &layers, actions,
489                                        execute->actionsLen);
490         pNbl = NULL;
491         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
492     }
493     if (ndisStatus != NDIS_STATUS_SUCCESS) {
494         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
495             status = STATUS_NOT_SUPPORTED;
496         } else {
497             status = STATUS_UNSUCCESSFUL;
498         }
499     }
500
501     if (pNbl) {
502         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
503     }
504 unlock:
505     NdisReleaseSpinLock(gOvsCtrlLock);
506     return status;
507 }
508
509
510 NTSTATUS
511 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
512 {
513     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
514     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
515
516     if (queue == NULL) {
517         return STATUS_INVALID_PARAMETER;
518     }
519     OvsPurgePacketQueue(queue, instance);
520     return STATUS_SUCCESS;
521 }
522
523 VOID
524 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
525                      PIRP irp)
526 {
527     PIO_STACK_LOCATION irpSp;
528     PFILE_OBJECT fileObject;
529     POVS_OPEN_INSTANCE instance;
530     POVS_USER_PACKET_QUEUE queue = NULL;
531
532     UNREFERENCED_PARAMETER(deviceObject);
533
534     IoReleaseCancelSpinLock(irp->CancelIrql);
535     irpSp = IoGetCurrentIrpStackLocation(irp);
536     fileObject = irpSp->FileObject;
537
538     if (fileObject == NULL) {
539         goto done;
540     }
541     NdisAcquireSpinLock(gOvsCtrlLock);
542     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
543     if (instance) {
544         queue = instance->packetQueue;
545     }
546     if (instance == NULL || queue == NULL) {
547         NdisReleaseSpinLock(gOvsCtrlLock);
548         goto done;
549     }
550     NdisReleaseSpinLock(gOvsCtrlLock);
551     NdisAcquireSpinLock(&queue->queueLock);
552     if (queue->pendingIrp == irp) {
553         queue->pendingIrp = NULL;
554     }
555     NdisReleaseSpinLock(&queue->queueLock);
556 done:
557     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
558 }
559
560
561 NTSTATUS
562 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
563 {
564     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
565     POVS_USER_PACKET_QUEUE queue =
566                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
567     NTSTATUS status = STATUS_SUCCESS;
568     BOOLEAN cancelled = FALSE;
569
570     if (queue == NULL) {
571         return STATUS_INVALID_PARAMETER;
572     }
573     NdisAcquireSpinLock(&queue->queueLock);
574     if (queue->instance != instance) {
575         NdisReleaseSpinLock(&queue->queueLock);
576         return STATUS_INVALID_PARAMETER;
577     }
578     if (queue->pendingIrp) {
579         NdisReleaseSpinLock(&queue->queueLock);
580         return STATUS_DEVICE_BUSY;
581     }
582     if (queue->numPackets == 0) {
583         PDRIVER_CANCEL cancelRoutine;
584         IoMarkIrpPending(irp);
585         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
586         if (irp->Cancel) {
587             cancelRoutine = IoSetCancelRoutine(irp, NULL);
588             if (cancelRoutine) {
589                 cancelled = TRUE;
590             }
591         } else {
592             queue->pendingIrp = irp;
593         }
594         status = STATUS_PENDING;
595     }
596     NdisReleaseSpinLock(&queue->queueLock);
597     if (cancelled) {
598         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
599         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
600     }
601     return status;
602 }
603
604
605 POVS_PACKET_QUEUE_ELEM
606 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
607 {
608     POVS_USER_PACKET_QUEUE queue;
609     PLIST_ENTRY link;
610     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
611     if (queue == NULL) {
612         return NULL;
613     }
614     NdisAcquireSpinLock(&queue->queueLock);
615     if (queue->instance != instance || queue->numPackets == 0) {
616         NdisReleaseSpinLock(&queue->queueLock);
617         return NULL;
618     }
619     link = RemoveHeadList(&queue->packetList);
620     queue->numPackets--;
621     NdisReleaseSpinLock(&queue->queueLock);
622     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
623 }
624
625 /*
626  * ---------------------------------------------------------------------------
627  * Given a pid, returns the corresponding USER_PACKET_QUEUE.
628  * gOvsCtrlLock must be acquired before calling this API.
629  * ---------------------------------------------------------------------------
630  */
631 POVS_USER_PACKET_QUEUE
632 OvsGetQueue(UINT32 pid)
633 {
634     POVS_OPEN_INSTANCE instance;
635     POVS_USER_PACKET_QUEUE ret = NULL;
636
637     instance = OvsGetPidInstance(gOvsSwitchContext, pid);
638
639     if (instance) {
640         ret = instance->packetQueue;
641     }
642
643     return ret;
644 }
645
646 /*
647  * ---------------------------------------------------------------------------
648  * Given a pid, returns the corresponding instance.
649  * pidHashLock must be acquired before calling this API.
650  * ---------------------------------------------------------------------------
651  */
652 POVS_OPEN_INSTANCE
653 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
654 {
655     POVS_OPEN_INSTANCE instance;
656     PLIST_ENTRY head, link;
657     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
658                                 OVS_HASH_BASIS);
659     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
660     LIST_FORALL(head, link) {
661         instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
662         if (instance->pid == pid) {
663             return instance;
664         }
665     }
666     return NULL;
667 }
668
669 /*
670  * ---------------------------------------------------------------------------
671  * Given a pid and an instance. This API adds instance to pidHashArray.
672  * pidHashLock must be acquired before calling this API.
673  * ---------------------------------------------------------------------------
674  */
675 VOID
676 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
677                   POVS_OPEN_INSTANCE instance)
678 {
679     PLIST_ENTRY head;
680     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
681                                 OVS_HASH_BASIS);
682     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
683     InsertHeadList(head, &(instance->pidLink));
684 }
685
686 /*
687  * ---------------------------------------------------------------------------
688  * Given a pid and an instance. This API removes instance from pidHashArray.
689  * pidHashLock must be acquired before calling this API.
690  * ---------------------------------------------------------------------------
691  */
692 VOID
693 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
694 {
695     POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
696
697     if (instance) {
698         RemoveEntryList(&(instance->pidLink));
699     }
700 }
701
702 VOID
703 OvsQueuePackets(PLIST_ENTRY packetList,
704                 UINT32 numElems)
705 {
706     POVS_USER_PACKET_QUEUE upcallQueue = NULL;
707     POVS_PACKET_QUEUE_ELEM elem;
708     PIRP irp = NULL;
709     PLIST_ENTRY  link;
710     UINT32 num = 0;
711     LIST_ENTRY dropPackets;
712
713     OVS_LOG_LOUD("Enter: numELems: %u", numElems);
714
715     InitializeListHead(&dropPackets);
716
717     while (!IsListEmpty(packetList)) {
718         link = RemoveHeadList(packetList);
719         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
720
721         ASSERT(elem);
722
723         OvsAcquirePidHashLock();
724
725         upcallQueue = OvsGetQueue(elem->upcallPid);
726         if (!upcallQueue) {
727             /* No upcall queue found, drop this packet. */
728             InsertTailList(&dropPackets, &elem->link);
729         } else {
730             NdisAcquireSpinLock(&upcallQueue->queueLock);
731
732             if (upcallQueue->instance == NULL) {
733                 InsertTailList(&dropPackets, &elem->link);
734             } else {
735                 InsertTailList(&upcallQueue->packetList, &elem->link);
736                 upcallQueue->numPackets++;
737                 if (upcallQueue->pendingIrp) {
738                     PDRIVER_CANCEL cancelRoutine;
739                     irp = upcallQueue->pendingIrp;
740                     upcallQueue->pendingIrp = NULL;
741                     cancelRoutine = IoSetCancelRoutine(irp, NULL);
742                     if (cancelRoutine == NULL) {
743                         irp = NULL;
744                     }
745                 }
746             }
747
748             if (irp) {
749                 OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
750             }
751
752             NdisReleaseSpinLock(&upcallQueue->queueLock);
753         }
754
755         OvsReleasePidHashLock();
756     }
757
758     while (!IsListEmpty(&dropPackets)) {
759         link = RemoveHeadList(&dropPackets);
760         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
761         OvsFreeMemory(elem);
762         num++;
763     }
764
765     OVS_LOG_LOUD("Exit: drop %u packets", num);
766 }
767
768 /*
769  *----------------------------------------------------------------------------
770  * OvsCreateAndAddPackets --
771  *
772  *  Create a packet and forwarded to user space.
773  *
774  *  This function would fragment packet if needed, and queue
775  *  each segment to user space.
776  *----------------------------------------------------------------------------
777  */
778 NTSTATUS
779 OvsCreateAndAddPackets(PVOID userData,
780                        UINT32 userDataLen,
781                        UINT32 cmd,
782                        UINT32 inPort,
783                        OvsFlowKey *key,
784                        PNET_BUFFER_LIST nbl,
785                        BOOLEAN isRecv,
786                        POVS_PACKET_HDR_INFO hdrInfo,
787                        POVS_SWITCH_CONTEXT switchContext,
788                        LIST_ENTRY *list,
789                        UINT32 *num)
790 {
791     POVS_PACKET_QUEUE_ELEM elem;
792     PNET_BUFFER_LIST newNbl = NULL;
793     PNET_BUFFER nb;
794
795     if (hdrInfo->isTcp) {
796         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
797         UINT32 packetLength;
798
799         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
800         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
801         packetLength = NET_BUFFER_DATA_LENGTH(nb);
802
803         OVS_LOG_TRACE("MSS %u packet len %u",
804                 tsoInfo.LsoV1Transmit.MSS, packetLength);
805         if (tsoInfo.LsoV1Transmit.MSS) {
806             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
807             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
808                     tsoInfo.LsoV1Transmit.MSS , 0);
809             if (newNbl == NULL) {
810                 return NDIS_STATUS_FAILURE;
811             }
812             nbl = newNbl;
813         }
814     }
815
816     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
817     while (nb) {
818         elem = OvsCreateQueueNlPacket(userData, userDataLen,
819                                     cmd, inPort, key, nbl, nb,
820                                     isRecv, hdrInfo);
821         if (elem) {
822             InsertTailList(list, &elem->link);
823             (*num)++;
824         }
825         nb = NET_BUFFER_NEXT_NB(nb);
826     }
827     if (newNbl) {
828         OvsCompleteNBL(switchContext, newNbl, TRUE);
829     }
830     return NDIS_STATUS_SUCCESS;
831 }
832
833 static __inline UINT32
834 OvsGetUpcallMsgSize(PVOID userData,
835                     UINT32 userDataLen,
836                     OvsIPv4TunnelKey *tunnelKey,
837                     UINT32 payload)
838 {
839     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
840                   NlAttrSize(payload) +
841                   NlAttrSize(OvsFlowKeyAttrSize());
842
843     /* OVS_PACKET_ATTR_USERDATA */
844     if (userData) {
845         size += NlAttrTotalSize(userDataLen);
846     }
847     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
848     /* Is it included in the the flwo key attr XXX */
849     if (tunnelKey) {
850         size += NlAttrTotalSize(OvsTunKeyAttrSize());
851     }
852     return size;
853 }
854
855 /*
856  *----------------------------------------------------------------------------
857  * This function completes the IP Header csum. record the L4 payload offset and
858  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
859  * caluculated simopultaneossly with the copy of the payload to the destination
860  * buffer when the packet is read.
861  *----------------------------------------------------------------------------
862  */
863 static VOID
864 OvsCompletePacketHeader(UINT8 *packet,
865                         BOOLEAN isRecv,
866                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
867                         POVS_PACKET_HDR_INFO hdrInfoIn,
868                         POVS_PACKET_HDR_INFO hdrInfoOut)
869 {
870     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
871         (!isRecv && csumInfo.Transmit.IsIPv4 &&
872         csumInfo.Transmit.IpHeaderChecksum)) {
873         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
874         ASSERT(hdrInfoIn->isIPv4);
875         ASSERT(ipHdr->Version == 4);
876         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
877             ipHdr->HeaderLength << 2,
878             (UINT16)~ipHdr->HeaderChecksum);
879         ovsUserStats.ipCsum++;
880     }
881     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
882     /*
883      * calculate TCP/UDP pseudo checksum
884      */
885     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
886         /*
887          * Only this case, we need to reclaculate pseudo checksum
888          * all other cases, it is assumed the pseudo checksum is
889          * filled already.
890          *
891          */
892         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
893         if (hdrInfoIn->isIPv4) {
894             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
895             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
896                                     (ipHdr->HeaderLength << 2));
897             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
898                                          (UINT32 *)&ipHdr->DestinationAddress,
899                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
900         } else {
901             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
902             hdrInfoOut->l4PayLoad =
903                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
904                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
905                 hdrInfoIn->l4Offset);
906             ASSERT(hdrInfoIn->isIPv6);
907             tcpHdr->th_sum =
908                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
909                 (UINT32 *)&ipv6Hdr->DestinationAddress,
910                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
911         }
912         hdrInfoOut->tcpCsumNeeded = 1;
913         ovsUserStats.recalTcpCsum++;
914     } else if (!isRecv) {
915         if (csumInfo.Transmit.TcpChecksum) {
916             hdrInfoOut->tcpCsumNeeded = 1;
917         } else if (csumInfo.Transmit.UdpChecksum) {
918             hdrInfoOut->udpCsumNeeded = 1;
919         }
920         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
921 #ifdef DBG
922             UINT16 sum, *ptr;
923             UINT8 proto =
924                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
925 #endif
926             if (hdrInfoIn->isIPv4) {
927                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
928                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
929                     (ipHdr->HeaderLength << 2));
930 #ifdef DBG
931                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
932                     (UINT32 *)&ipHdr->DestinationAddress,
933                     proto, hdrInfoOut->l4PayLoad);
934 #endif
935             } else {
936                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
937                     hdrInfoIn->l3Offset);
938                 hdrInfoOut->l4PayLoad =
939                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
940                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
941                     hdrInfoIn->l4Offset);
942                 ASSERT(hdrInfoIn->isIPv6);
943 #ifdef DBG
944                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
945                     (UINT32 *)&ipv6Hdr->DestinationAddress,
946                     proto, hdrInfoOut->l4PayLoad);
947 #endif
948             }
949 #ifdef DBG
950             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
951                 (hdrInfoOut->tcpCsumNeeded ?
952             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
953             ASSERT(*ptr == sum);
954 #endif
955         }
956     }
957 }
958
959 static NTSTATUS
960 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
961 {
962     UNREFERENCED_PARAMETER(nb);
963
964     ASSERT(vport);
965
966     /* XXX select a pid from an array of pids using a flow based hash */
967     *pid = vport->upcallPid;
968     return STATUS_SUCCESS;
969 }
970
971 /*
972  *----------------------------------------------------------------------------
973  * OvsCreateQueueNlPacket --
974  *
975  *  Create a packet which will be forwarded to user space.
976  *
977  * InputParameter:
978  *   userData: when cmd is user action, this field contain
979  *      user action data.
980  *   userDataLen: as name indicated
981  *   cmd: either miss or user action
982  *   inPort: datapath port id from which the packet is received.
983  *   key: flow Key with a tunnel key if available
984  *   nbl:  the NET_BUFFER_LIST which contain the packet
985  *   nb: the packet
986  *   isRecv: This is used to decide how to interprete the csum info
987  *   hdrInfo: include hdr info initialized during flow extraction.
988  *
989  * Results:
990  *    NULL if fail to create the packet
991  *    The packet element otherwise
992  *----------------------------------------------------------------------------
993  */
994 POVS_PACKET_QUEUE_ELEM
995 OvsCreateQueueNlPacket(PVOID userData,
996                        UINT32 userDataLen,
997                        UINT32 cmd,
998                        UINT32 inPort,
999                        OvsFlowKey *key,
1000                        PNET_BUFFER_LIST nbl,
1001                        PNET_BUFFER nb,
1002                        BOOLEAN isRecv,
1003                        POVS_PACKET_HDR_INFO hdrInfo)
1004 {
1005 #define VLAN_TAG_SIZE 4
1006     UINT32 allocLen, dataLen, extraLen;
1007     POVS_PACKET_QUEUE_ELEM elem;
1008     UINT8 *src, *dst;
1009     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
1010     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
1011     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
1012     UINT32 pid;
1013     UINT32 nlMsgSize;
1014     NL_BUFFER nlBuf;
1015     PNL_MSG_HDR nlMsg;
1016
1017     /* XXX pass vport in the stack rather than portNo */
1018     POVS_VPORT_ENTRY vport =
1019         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
1020
1021     if (vport == NULL){
1022         /* No vport is not fatal. */
1023         return NULL;
1024     }
1025
1026     OvsGetPid(vport, nb, &pid);
1027
1028     if (!pid) {
1029         /*
1030          * There is no userspace queue created yet, so there is no point for
1031          * creating a new packet to be queued.
1032          */
1033         return NULL;
1034     }
1035
1036     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1037
1038     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1039                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1040                   csumInfo.Receive.IpChecksumFailed)) {
1041         OVS_LOG_INFO("Packet dropped due to checksum failure.");
1042         ovsUserStats.dropDuetoChecksum++;
1043         return NULL;
1044     }
1045
1046     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1047     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1048
1049     dataLen = NET_BUFFER_DATA_LENGTH(nb);
1050
1051     if (NlAttrSize(dataLen) > MAXUINT16) {
1052         return NULL;
1053     }
1054
1055     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1056                                     dataLen + extraLen);
1057
1058     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1059     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
1060     if (elem == NULL) {
1061         ovsUserStats.dropDuetoResource++;
1062         return NULL;
1063     }
1064     elem->hdrInfo.value = hdrInfo->value;
1065     elem->upcallPid = pid;
1066     elem->packet.totalLen = nlMsgSize;
1067     /* XXX remove queueid */
1068     elem->packet.queue = 0;
1069     /* XXX  no need as the length is already in the NL attrib */
1070     elem->packet.userDataLen = userDataLen;
1071     elem->packet.inPort = inPort;
1072     elem->packet.cmd = cmd;
1073     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1074         ovsUserStats.miss++;
1075     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1076         ovsUserStats.action++;
1077     } else {
1078         ASSERT(FALSE);
1079         goto fail;
1080     }
1081     /* XXX Should we have both packetLen and TotalLen*/
1082     elem->packet.packetLen = dataLen + extraLen;
1083
1084     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1085
1086     /*
1087      * Initialize the OVS header
1088      * Since we are pre allocating memory for the NL buffer
1089      * the attribute settings should not fail
1090      */
1091     if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1092                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1093                       gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1094         goto fail;
1095     }
1096
1097     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1098                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1099         goto fail;
1100     }
1101
1102     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1103     if (userData){
1104         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1105                                 userData, (UINT16)userDataLen)) {
1106             goto fail;
1107         }
1108     }
1109
1110     /*
1111      * Make space for the payload to be copied and set the attribute
1112      * XXX Uninit set initilizes the buffer with xero, we don't actually need
1113      * that the payload to be initailized
1114      */
1115     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1116                                             (UINT16)(dataLen + extraLen));
1117     if (!dst) {
1118         goto fail;
1119     }
1120
1121     /* Store the payload for csum calculation when packet is read */
1122     elem->packet.payload = dst;
1123     dst += extraLen;
1124
1125     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1126     if (src == NULL) {
1127         ovsUserStats.dropDuetoResource++;
1128         goto fail;
1129     }    else if (src != dst) {
1130         /* Copy the data from the NDIS buffer to dst. */
1131         RtlCopyMemory(dst, src, dataLen);
1132     }
1133
1134     /* Set csum if was offloaded */
1135     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1136
1137     /*
1138      * Finally insert VLAN tag
1139      */
1140     if (extraLen) {
1141         dst = elem->packet.payload;
1142         src = dst + extraLen;
1143         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1144         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1145         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1146         dst += 12;
1147         ((UINT16 *)dst)[0] = htons(0x8100);
1148         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1149             (vlanInfo.TagHeader.UserPriority << 13));
1150         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1151         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1152         ovsUserStats.vlanInsert++;
1153     }
1154
1155     nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1156     nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1157     /* 'totalLen' should be size of valid data. */
1158     elem->packet.totalLen = nlMsg->nlmsgLen;
1159
1160     return elem;
1161 fail:
1162     OvsFreeMemory(elem);
1163     return NULL;
1164 }