datapath-windows: Add code for OvsGetQueue.
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * OvsUser.c
19  *      Manage packet queue for packet miss for userAction.
20  */
21
22
23 #include "precomp.h"
24
25 #include "Switch.h"
26 #include "Vport.h"
27 #include "Event.h"
28 #include "User.h"
29 #include "Datapath.h"
30 #include "PacketIO.h"
31 #include "Checksum.h"
32 #include "NetProto.h"
33 #include "Flow.h"
34 #include "TunnelIntf.h"
35 #include "Jhash.h"
36
37 #ifdef OVS_DBG_MOD
38 #undef OVS_DBG_MOD
39 #endif
40 #define OVS_DBG_MOD OVS_DBG_USER
41 #include "Debug.h"
42
43 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
44 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
45 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
46 OVS_USER_STATS ovsUserStats;
47
48 static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
49                                    OvsPacketExecute  *execute);
50 extern NL_POLICY nlFlowKeyPolicy[];
51
52 static VOID
53 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
54                     POVS_OPEN_INSTANCE instance)
55 {
56     PLIST_ENTRY link, next;
57     LIST_ENTRY tmp;
58     POVS_PACKET_QUEUE_ELEM elem;
59
60     InitializeListHead(&tmp);
61     NdisAcquireSpinLock(&queue->queueLock);
62     if (queue->instance != instance) {
63         NdisReleaseSpinLock(&queue->queueLock);
64         return;
65     }
66
67     if (queue->numPackets) {
68         OvsAppendList(&tmp, &queue->packetList);
69         queue->numPackets = 0;
70     }
71     NdisReleaseSpinLock(&queue->queueLock);
72     LIST_FORALL_SAFE(&tmp, link, next) {
73         RemoveEntryList(link);
74         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
75         OvsFreeMemory(elem);
76     }
77 }
78
79 VOID
80 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
81 {
82     POVS_USER_PACKET_QUEUE queue;
83     POVS_PACKET_QUEUE_ELEM elem;
84     PLIST_ENTRY link, next;
85     LIST_ENTRY tmp;
86     PIRP irp = NULL;
87
88     InitializeListHead(&tmp);
89     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
90     if (queue) {
91         PDRIVER_CANCEL cancelRoutine;
92         NdisAcquireSpinLock(&queue->queueLock);
93         ASSERT(queue->instance == instance);
94         /* XXX Should not happen */
95         if (queue->instance != instance) {
96             NdisReleaseSpinLock(&queue->queueLock);
97             NdisFreeSpinLock(&queue->queueLock);
98             return;
99         }
100
101         if (queue->numPackets) {
102             OvsAppendList(&tmp, &queue->packetList);
103             queue->numPackets = 0;
104         }
105         queue->instance = NULL;
106         instance->packetQueue = NULL;
107         irp = queue->pendingIrp;
108         queue->pendingIrp = NULL;
109         if (irp) {
110             cancelRoutine = IoSetCancelRoutine(irp, NULL);
111             if (cancelRoutine == NULL) {
112                 irp = NULL;
113             }
114         }
115         NdisReleaseSpinLock(&queue->queueLock);
116         NdisFreeSpinLock(&queue->queueLock);
117     }
118     LIST_FORALL_SAFE(&tmp, link, next) {
119         RemoveEntryList(link);
120         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
121         OvsFreeMemory(elem);
122     }
123     if (irp) {
124         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
125     }
126     if (queue) {
127         OvsFreeMemory(queue);
128     }
129 }
130
131 NTSTATUS
132 OvsSubscribeDpIoctl(PVOID instanceP,
133                     UINT32 pid,
134                     UINT8 join)
135 {
136     POVS_USER_PACKET_QUEUE queue;
137     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
138
139     OvsAcquireCtrlLock();
140     if (!gOvsSwitchContext) {
141         OvsReleaseCtrlLock();
142         return STATUS_INVALID_PARAMETER;
143     }
144     OvsReleaseCtrlLock();
145
146     if (instance->packetQueue && !join) {
147         /* unsubscribe */
148         OvsCleanupPacketQueue(instance);
149
150         OvsAcquireCtrlLock();
151         /* Remove the instance from pidHashArray */
152         OvsDelPidInstance(gOvsSwitchContext, pid);
153         OvsReleaseCtrlLock();
154
155     } else if (instance->packetQueue == NULL && join) {
156         queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemory(sizeof *queue);
157         if (queue == NULL) {
158             return STATUS_NO_MEMORY;
159         }
160         InitializeListHead(&(instance->pidLink));
161         instance->packetQueue = queue;
162         RtlZeroMemory(queue, sizeof (*queue));
163         NdisAllocateSpinLock(&queue->queueLock);
164         NdisAcquireSpinLock(&queue->queueLock);
165         InitializeListHead(&queue->packetList);
166         queue->pid = pid;
167         queue->instance = instance;
168         instance->packetQueue = queue;
169         NdisReleaseSpinLock(&queue->queueLock);
170
171         OvsAcquireCtrlLock();
172         /* Insert the instance to pidHashArray */
173         OvsAddPidInstance(gOvsSwitchContext, pid, instance);
174         OvsReleaseCtrlLock();
175
176     } else {
177         /* user mode should call only once for subscribe */
178         return STATUS_INVALID_PARAMETER;
179     }
180
181     return STATUS_SUCCESS;
182 }
183
184
185 NTSTATUS
186 OvsReadDpIoctl(PFILE_OBJECT fileObject,
187                PVOID outputBuffer,
188                UINT32 outputLength,
189                UINT32 *replyLen)
190 {
191     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
192     POVS_PACKET_QUEUE_ELEM elem;
193     UINT32 len;
194
195 #define TCP_CSUM_OFFSET  16
196 #define UDP_CSUM_OFFSET  6
197     ASSERT(instance);
198
199     if (instance->packetQueue == NULL) {
200         return STATUS_INVALID_PARAMETER;
201     }
202     if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) {
203         return STATUS_BUFFER_TOO_SMALL;
204     }
205
206     elem = OvsGetNextPacket(instance);
207     if (elem) {
208         /*
209          * XXX revisit this later
210          */
211         len = elem->packet.totalLen > outputLength ? outputLength :
212                  elem->packet.totalLen;
213
214         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
215             len == elem->packet.totalLen) {
216             UINT16 sum, *ptr;
217             UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
218                                   elem->hdrInfo.l4Offset);
219             RtlCopyMemory(outputBuffer, &elem->packet.data, size);
220             ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
221             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
222                                            (UINT8 *)&elem->packet.data + size,
223                                            elem->hdrInfo.l4PayLoad, 0);
224             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
225                             (elem->hdrInfo.tcpCsumNeeded ?
226                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
227             *ptr = sum;
228             ovsUserStats.l4Csum++;
229         } else {
230             RtlCopyMemory(outputBuffer, &elem->packet.data, len);
231         }
232
233         *replyLen = len;
234         OvsFreeMemory(elem);
235     }
236     return STATUS_SUCCESS;
237 }
238
239 /* Helper function to allocate a Forwarding Context for an NBL */
240 NTSTATUS
241 OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
242                                    PNET_BUFFER_LIST nbl)
243 {
244     return switchContext->NdisSwitchHandlers.
245         AllocateNetBufferListForwardingContext(
246             switchContext->NdisSwitchContext, nbl);
247 }
248
249 /*
250  * --------------------------------------------------------------------------
251  * This function allocates all the stuff necessary for creating an NBL from the
252  * input buffer of specified length, namely, a nonpaged data buffer of size
253  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
254  * context yet. It also copies data from the specified buffer to the NBL.
255  * --------------------------------------------------------------------------
256  */
257 PNET_BUFFER_LIST
258 OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
259                             PVOID userBuffer,
260                             ULONG length)
261 {
262     UINT8 *data = NULL;
263     PNET_BUFFER_LIST nbl = NULL;
264     PNET_BUFFER nb;
265     PMDL mdl;
266
267     if (length > OVS_DEFAULT_DATA_SIZE) {
268         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
269                                          OVS_DEFAULT_HEADROOM_SIZE);
270
271     } else {
272         nbl = OvsAllocateFixSizeNBL(switchContext, length,
273                                     OVS_DEFAULT_HEADROOM_SIZE);
274     }
275     if (nbl == NULL) {
276         return NULL;
277     }
278
279     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
280     mdl = NET_BUFFER_CURRENT_MDL(nb);
281     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
282                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
283     if (!data) {
284         OvsCompleteNBL(switchContext, nbl, TRUE);
285         return NULL;
286     }
287
288     NdisMoveMemory(data, userBuffer, length);
289
290     return nbl;
291 }
292
293 /*
294  *----------------------------------------------------------------------------
295  *  OvsNlExecuteCmdHandler --
296  *    Handler for OVS_PACKET_CMD_EXECUTE command.
297  *----------------------------------------------------------------------------
298  */
299 NTSTATUS
300 OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
301                        UINT32 *replyLen)
302 {
303     NTSTATUS status = STATUS_SUCCESS;
304     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
305     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
306     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
307     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
308     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
309
310     PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
311     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
312
313     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
314     UINT32 keyAttrOffset = 0;
315     OvsPacketExecute execute;
316     NL_ERROR nlError = NL_ERROR_SUCCESS;
317     NL_BUFFER nlBuf;
318
319     static const NL_POLICY nlPktExecPolicy[] = {
320         [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
321         [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
322         [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
323         [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
324         [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
325                                             .optional = TRUE}
326     };
327
328     RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
329
330     /* Get all the top level Flow attributes */
331     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
332                      nlPktExecPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
333                      != TRUE) {
334         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
335                        nlMsgHdr);
336         status = STATUS_UNSUCCESSFUL;
337         goto done;
338     }
339
340     keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
341                     (PCHAR)nlMsgHdr);
342
343     /* Get flow keys attributes */
344     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
345                            NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
346                            nlFlowKeyPolicy, keyAttrs,
347                            ARRAY_SIZE(keyAttrs))) != TRUE) {
348         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
349         status = STATUS_UNSUCCESSFUL;
350         goto done;
351     }
352
353     execute.dpNo = ovsHdr->dp_ifindex;
354
355     _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
356
357     status = OvsExecuteDpIoctl(&execute);
358
359     /* Default reply that we want to send */
360     if (status == STATUS_SUCCESS) {
361         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
362                   usrParamsCtx->outputLength);
363
364         /* Prepare nl Msg headers */
365         status = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
366                  nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
367                  genlMsgHdr->cmd, OVS_PACKET_VERSION,
368                  ovsHdr->dp_ifindex);
369
370         if (status == STATUS_SUCCESS) {
371             *replyLen = msgOut->nlMsg.nlmsgLen;
372         }
373     } else {
374         /* Map NTSTATUS to NL_ERROR */
375         nlError = NlMapStatusToNlErr(status);
376
377         /* As of now there are no transactional errors in the implementation.
378          * Once we have them then we need to map status to correct
379          * nlError value, so that below mentioned code gets hit. */
380         if ((nlError != NL_ERROR_SUCCESS) &&
381             (usrParamsCtx->outputBuffer)) {
382
383             POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
384                                            usrParamsCtx->outputBuffer;
385             BuildErrorMsg(msgIn, msgError, nlError);
386             *replyLen = msgError->nlMsg.nlmsgLen;
387             status = STATUS_SUCCESS;
388             goto done;
389         }
390     }
391
392 done:
393     return status;
394 }
395
396 /*
397  *----------------------------------------------------------------------------
398  *  _MapNlAttrToOvsPktExec --
399  *    Maps input Netlink attributes to OvsPacketExecute.
400  *----------------------------------------------------------------------------
401  */
402 static VOID
403 _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
404                        OvsPacketExecute *execute)
405 {
406     execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
407     execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
408
409     execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
410     execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
411
412     execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
413 }
414
415 NTSTATUS
416 OvsExecuteDpIoctl(OvsPacketExecute *execute)
417 {
418     NTSTATUS                    status = STATUS_SUCCESS;
419     NTSTATUS                    ndisStatus;
420     LOCK_STATE_EX               lockState;
421     PNET_BUFFER_LIST pNbl;
422     PNL_ATTR actions;
423     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
424     OvsFlowKey key;
425     OVS_PACKET_HDR_INFO layers;
426     POVS_VPORT_ENTRY vport;
427
428     NdisAcquireSpinLock(gOvsCtrlLock);
429     if (gOvsSwitchContext == NULL) {
430         status = STATUS_INVALID_PARAMETER;
431         goto unlock;
432     }
433
434     if (execute->packetLen == 0) {
435         status = STATUS_INVALID_PARAMETER;
436         goto unlock;
437     }
438
439     actions = execute->actions;
440
441     ASSERT(actions);
442
443     /*
444      * Allocate the NBL, copy the data from the userspace buffer. Allocate
445      * also, the forwarding context for the packet.
446      */
447     pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, execute->packetBuf,
448                                        execute->packetLen);
449     if (pNbl == NULL) {
450         status = STATUS_NO_MEMORY;
451         goto unlock;
452     }
453
454     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
455     vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort);
456     if (vport) {
457         fwdDetail->SourcePortId = vport->portId;
458         fwdDetail->SourceNicIndex = vport->nicIndex;
459     } else {
460         fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
461         fwdDetail->SourceNicIndex = 0;
462     }
463     // XXX: Figure out if any of the other members of fwdDetail need to be set.
464
465     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
466                               NULL);
467     if (ndisStatus == NDIS_STATUS_SUCCESS) {
468         ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
469         NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
470                               NDIS_RWL_AT_DISPATCH_LEVEL);
471         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
472                                        vport ? vport->portNo :
473                                                OVS_DEFAULT_PORT_NO,
474                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
475                                        &key, NULL, &layers, actions,
476                                        execute->actionsLen);
477         pNbl = NULL;
478         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
479     }
480     if (ndisStatus != NDIS_STATUS_SUCCESS) {
481         if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
482             status = STATUS_NOT_SUPPORTED;
483         } else {
484             status = STATUS_UNSUCCESSFUL;
485         }
486     }
487
488     if (pNbl) {
489         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
490     }
491 unlock:
492     NdisReleaseSpinLock(gOvsCtrlLock);
493     return status;
494 }
495
496
497 NTSTATUS
498 OvsPurgeDpIoctl(PFILE_OBJECT fileObject)
499 {
500     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
501     POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
502
503     if (queue == NULL) {
504         return STATUS_INVALID_PARAMETER;
505     }
506     OvsPurgePacketQueue(queue, instance);
507     return STATUS_SUCCESS;
508 }
509
510 VOID
511 OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject,
512                      PIRP irp)
513 {
514     PIO_STACK_LOCATION irpSp;
515     PFILE_OBJECT fileObject;
516     POVS_OPEN_INSTANCE instance;
517     POVS_USER_PACKET_QUEUE queue = NULL;
518
519     UNREFERENCED_PARAMETER(deviceObject);
520
521     IoReleaseCancelSpinLock(irp->CancelIrql);
522     irpSp = IoGetCurrentIrpStackLocation(irp);
523     fileObject = irpSp->FileObject;
524
525     if (fileObject == NULL) {
526         goto done;
527     }
528     NdisAcquireSpinLock(gOvsCtrlLock);
529     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
530     if (instance) {
531         queue = instance->packetQueue;
532     }
533     if (instance == NULL || queue == NULL) {
534         NdisReleaseSpinLock(gOvsCtrlLock);
535         goto done;
536     }
537     NdisReleaseSpinLock(gOvsCtrlLock);
538     NdisAcquireSpinLock(&queue->queueLock);
539     if (queue->pendingIrp == irp) {
540         queue->pendingIrp = NULL;
541     }
542     NdisReleaseSpinLock(&queue->queueLock);
543 done:
544     OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
545 }
546
547
548 NTSTATUS
549 OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject)
550 {
551     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
552     POVS_USER_PACKET_QUEUE queue =
553                (POVS_USER_PACKET_QUEUE)instance->packetQueue;
554     NTSTATUS status = STATUS_SUCCESS;
555     BOOLEAN cancelled = FALSE;
556
557     if (queue == NULL) {
558         return STATUS_INVALID_PARAMETER;
559     }
560     NdisAcquireSpinLock(&queue->queueLock);
561     if (queue->instance != instance) {
562         NdisReleaseSpinLock(&queue->queueLock);
563         return STATUS_INVALID_PARAMETER;
564     }
565     if (queue->pendingIrp) {
566         NdisReleaseSpinLock(&queue->queueLock);
567         return STATUS_DEVICE_BUSY;
568     }
569     if (queue->numPackets == 0) {
570         PDRIVER_CANCEL cancelRoutine;
571         IoMarkIrpPending(irp);
572         IoSetCancelRoutine(irp, OvsCancelIrpDatapath);
573         if (irp->Cancel) {
574             cancelRoutine = IoSetCancelRoutine(irp, NULL);
575             if (cancelRoutine) {
576                 cancelled = TRUE;
577             }
578         } else {
579             queue->pendingIrp = irp;
580         }
581         status = STATUS_PENDING;
582     }
583     NdisReleaseSpinLock(&queue->queueLock);
584     if (cancelled) {
585         OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED);
586         OVS_LOG_INFO("Datapath IRP cancelled: %p", irp);
587     }
588     return status;
589 }
590
591
592 POVS_PACKET_QUEUE_ELEM
593 OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
594 {
595     POVS_USER_PACKET_QUEUE queue;
596     PLIST_ENTRY link;
597     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
598     if (queue == NULL) {
599         return NULL;
600     }
601     NdisAcquireSpinLock(&queue->queueLock);
602     if (queue->instance != instance || queue->numPackets == 0) {
603         NdisReleaseSpinLock(&queue->queueLock);
604         return NULL;
605     }
606     link = RemoveHeadList(&queue->packetList);
607     queue->numPackets--;
608     NdisReleaseSpinLock(&queue->queueLock);
609     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
610 }
611
612 /*
613  * ---------------------------------------------------------------------------
614  * Given a pid, returns the corresponding USER_PACKET_QUEUE.
615  * gOvsCtrlLock must be acquired before calling this API.
616  * ---------------------------------------------------------------------------
617  */
618 POVS_USER_PACKET_QUEUE
619 OvsGetQueue(UINT32 pid)
620 {
621     POVS_OPEN_INSTANCE instance;
622     POVS_USER_PACKET_QUEUE ret = NULL;
623
624     instance = OvsGetPidInstance(gOvsSwitchContext, pid);
625
626     if (instance) {
627         ret = instance->packetQueue;
628     }
629
630     return ret;
631 }
632
633 /*
634  * ---------------------------------------------------------------------------
635  * Given a pid, returns the corresponding instance.
636  * gOvsCtrlLock must be acquired before calling this API.
637  * ---------------------------------------------------------------------------
638  */
639 POVS_OPEN_INSTANCE
640 OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
641 {
642     POVS_OPEN_INSTANCE instance;
643     PLIST_ENTRY head, link;
644     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
645                                 OVS_HASH_BASIS);
646     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
647     LIST_FORALL(head, link) {
648         instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
649         if (instance->pid == pid) {
650             return instance;
651         }
652     }
653     return NULL;
654 }
655
656 /*
657  * ---------------------------------------------------------------------------
658  * Given a pid and an instance. This API adds instance to pidHashArray.
659  * gOvsCtrlLock must be acquired before calling this API.
660  * ---------------------------------------------------------------------------
661  */
662 VOID
663 OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
664                   POVS_OPEN_INSTANCE instance)
665 {
666     PLIST_ENTRY head;
667     UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
668                                 OVS_HASH_BASIS);
669     head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
670     InsertHeadList(head, &(instance->pidLink));
671 }
672
673 /*
674  * ---------------------------------------------------------------------------
675  * Given a pid and an instance. This API removes instance from pidHashArray.
676  * gOvsCtrlLock must be acquired before calling this API.
677  * ---------------------------------------------------------------------------
678  */
679 VOID
680 OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
681 {
682     POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
683
684     if (instance) {
685         RemoveEntryList(&(instance->pidLink));
686     }
687 }
688
689 VOID
690 OvsQueuePackets(UINT32 queueId,
691                 PLIST_ENTRY packetList,
692                 UINT32 numElems)
693 {
694     POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
695     POVS_PACKET_QUEUE_ELEM elem;
696     PIRP irp = NULL;
697     PLIST_ENTRY  link;
698     UINT32 num = 0;
699
700     OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
701                   queueId, numElems);
702     if (queue == NULL) {
703         goto cleanup;
704     }
705
706     NdisAcquireSpinLock(&queue->queueLock);
707     if (queue->instance == NULL) {
708         NdisReleaseSpinLock(&queue->queueLock);
709         goto cleanup;
710     } else {
711         OvsAppendList(&queue->packetList, packetList);
712         queue->numPackets += numElems;
713     }
714     if (queue->pendingIrp) {
715         PDRIVER_CANCEL cancelRoutine;
716         irp = queue->pendingIrp;
717         queue->pendingIrp = NULL;
718         cancelRoutine = IoSetCancelRoutine(irp, NULL);
719         if (cancelRoutine == NULL) {
720             irp = NULL;
721         }
722     }
723     NdisReleaseSpinLock(&queue->queueLock);
724     if (irp) {
725         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
726     }
727
728 cleanup:
729     while (!IsListEmpty(packetList)) {
730         link = RemoveHeadList(packetList);
731         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
732         OvsFreeMemory(elem);
733         num++;
734     }
735     OVS_LOG_LOUD("Exit: drop %u packets", num);
736 }
737
738
739 /*
740  *----------------------------------------------------------------------------
741  * OvsCreateAndAddPackets --
742  *
743  *  Create a packet and forwarded to user space.
744  *
745  *  This function would fragment packet if needed, and queue
746  *  each segment to user space.
747  *----------------------------------------------------------------------------
748  */
749 NTSTATUS
750 OvsCreateAndAddPackets(PVOID userData,
751                        UINT32 userDataLen,
752                        UINT32 cmd,
753                        UINT32 inPort,
754                        OvsFlowKey *key,
755                        PNET_BUFFER_LIST nbl,
756                        BOOLEAN isRecv,
757                        POVS_PACKET_HDR_INFO hdrInfo,
758                        POVS_SWITCH_CONTEXT switchContext,
759                        LIST_ENTRY *list,
760                        UINT32 *num)
761 {
762     POVS_PACKET_QUEUE_ELEM elem;
763     PNET_BUFFER_LIST newNbl = NULL;
764     PNET_BUFFER nb;
765
766     if (hdrInfo->isTcp) {
767         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
768         UINT32 packetLength;
769
770         tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
771         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
772         packetLength = NET_BUFFER_DATA_LENGTH(nb);
773
774         OVS_LOG_TRACE("MSS %u packet len %u",
775                 tsoInfo.LsoV1Transmit.MSS, packetLength);
776         if (tsoInfo.LsoV1Transmit.MSS) {
777             OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
778             newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
779                     tsoInfo.LsoV1Transmit.MSS , 0);
780             if (newNbl == NULL) {
781                 return NDIS_STATUS_FAILURE;
782             }
783             nbl = newNbl;
784         }
785     }
786
787     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
788     while (nb) {
789         elem = OvsCreateQueueNlPacket(userData, userDataLen,
790                                     cmd, inPort, key, nbl, nb,
791                                     isRecv, hdrInfo);
792         if (elem) {
793             InsertTailList(list, &elem->link);
794             (*num)++;
795         }
796         nb = NET_BUFFER_NEXT_NB(nb);
797     }
798     if (newNbl) {
799         OvsCompleteNBL(switchContext, newNbl, TRUE);
800     }
801     return NDIS_STATUS_SUCCESS;
802 }
803
804 static __inline UINT32
805 OvsGetUpcallMsgSize(PVOID userData,
806                     UINT32 userDataLen,
807                     OvsIPv4TunnelKey *tunnelKey,
808                     UINT32 payload)
809 {
810     UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
811                   NlAttrSize(payload) +
812                   NlAttrSize(OvsFlowKeyAttrSize());
813
814     /* OVS_PACKET_ATTR_USERDATA */
815     if (userData) {
816         size += NlAttrTotalSize(userDataLen);
817     }
818     /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
819     /* Is it included in the the flwo key attr XXX */
820     if (tunnelKey) {
821         size += NlAttrTotalSize(OvsTunKeyAttrSize());
822     }
823     return size;
824 }
825
826 /*
827  *----------------------------------------------------------------------------
828  * This function completes the IP Header csum. record the L4 payload offset and
829  * if there is a need to calculate the TCP or UDP csum. The actual csum will be
830  * caluculated simopultaneossly with the copy of the payload to the destination
831  * buffer when the packet is read.
832  *----------------------------------------------------------------------------
833  */
834 static VOID
835 OvsCompletePacketHeader(UINT8 *packet,
836                         BOOLEAN isRecv,
837                         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
838                         POVS_PACKET_HDR_INFO hdrInfoIn,
839                         POVS_PACKET_HDR_INFO hdrInfoOut)
840 {
841     if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
842         (!isRecv && csumInfo.Transmit.IsIPv4 &&
843         csumInfo.Transmit.IpHeaderChecksum)) {
844         PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
845         ASSERT(hdrInfoIn->isIPv4);
846         ASSERT(ipHdr->Version == 4);
847         ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
848             ipHdr->HeaderLength << 2,
849             (UINT16)~ipHdr->HeaderChecksum);
850         ovsUserStats.ipCsum++;
851     }
852     ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
853     /*
854      * calculate TCP/UDP pseudo checksum
855      */
856     if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
857         /*
858          * Only this case, we need to reclaculate pseudo checksum
859          * all other cases, it is assumed the pseudo checksum is
860          * filled already.
861          *
862          */
863         PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
864         if (hdrInfoIn->isIPv4) {
865             PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
866             hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
867                                     (ipHdr->HeaderLength << 2));
868             tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
869                                          (UINT32 *)&ipHdr->DestinationAddress,
870                                          IPPROTO_TCP, hdrInfoOut->l4PayLoad);
871         } else {
872             PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
873             hdrInfoOut->l4PayLoad =
874                 (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
875                 hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
876                 hdrInfoIn->l4Offset);
877             ASSERT(hdrInfoIn->isIPv6);
878             tcpHdr->th_sum =
879                 IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
880                 (UINT32 *)&ipv6Hdr->DestinationAddress,
881                 IPPROTO_TCP, hdrInfoOut->l4PayLoad);
882         }
883         hdrInfoOut->tcpCsumNeeded = 1;
884         ovsUserStats.recalTcpCsum++;
885     } else if (!isRecv) {
886         if (csumInfo.Transmit.TcpChecksum) {
887             hdrInfoOut->tcpCsumNeeded = 1;
888         } else if (csumInfo.Transmit.UdpChecksum) {
889             hdrInfoOut->udpCsumNeeded = 1;
890         }
891         if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
892 #ifdef DBG
893             UINT16 sum, *ptr;
894             UINT8 proto =
895                 hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
896 #endif
897             if (hdrInfoIn->isIPv4) {
898                 PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
899                 hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
900                     (ipHdr->HeaderLength << 2));
901 #ifdef DBG
902                 sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
903                     (UINT32 *)&ipHdr->DestinationAddress,
904                     proto, hdrInfoOut->l4PayLoad);
905 #endif
906             } else {
907                 PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
908                     hdrInfoIn->l3Offset);
909                 hdrInfoOut->l4PayLoad =
910                     (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
911                     hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
912                     hdrInfoIn->l4Offset);
913                 ASSERT(hdrInfoIn->isIPv6);
914 #ifdef DBG
915                 sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
916                     (UINT32 *)&ipv6Hdr->DestinationAddress,
917                     proto, hdrInfoOut->l4PayLoad);
918 #endif
919             }
920 #ifdef DBG
921             ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
922                 (hdrInfoOut->tcpCsumNeeded ?
923             TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
924             ASSERT(*ptr == sum);
925 #endif
926         }
927     }
928 }
929
930 static NTSTATUS
931 OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
932 {
933     UNREFERENCED_PARAMETER(nb);
934
935     /* XXX select a pid from an array of pids using a flow based hash */
936     *pid = vport->upcallPid;
937     return STATUS_SUCCESS;
938 }
939
940 /*
941  *----------------------------------------------------------------------------
942  * OvsCreateQueueNlPacket --
943  *
944  *  Create a packet which will be forwarded to user space.
945  *
946  * InputParameter:
947  *   userData: when cmd is user action, this field contain
948  *      user action data.
949  *   userDataLen: as name indicated
950  *   cmd: either miss or user action
951  *   inPort: datapath port id from which the packet is received.
952  *   key: flow Key with a tunnel key if available
953  *   nbl:  the NET_BUFFER_LIST which contain the packet
954  *   nb: the packet
955  *   isRecv: This is used to decide how to interprete the csum info
956  *   hdrInfo: include hdr info initialized during flow extraction.
957  *
958  * Results:
959  *    NULL if fail to create the packet
960  *    The packet element otherwise
961  *----------------------------------------------------------------------------
962  */
963 POVS_PACKET_QUEUE_ELEM
964 OvsCreateQueueNlPacket(PVOID userData,
965                        UINT32 userDataLen,
966                        UINT32 cmd,
967                        UINT32 inPort,
968                        OvsFlowKey *key,
969                        PNET_BUFFER_LIST nbl,
970                        PNET_BUFFER nb,
971                        BOOLEAN isRecv,
972                        POVS_PACKET_HDR_INFO hdrInfo)
973 {
974 #define VLAN_TAG_SIZE 4
975     UINT32 allocLen, dataLen, extraLen;
976     POVS_PACKET_QUEUE_ELEM elem;
977     UINT8 *src, *dst;
978     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
979     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
980     OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
981     UINT32 pid;
982     UINT32 nlMsgSize;
983     NL_BUFFER nlBuf;
984     PNL_MSG_HDR nlMsg;
985
986     /* XXX pass vport in the stack rather than portNo */
987     POVS_VPORT_ENTRY vport =
988         OvsFindVportByPortNo(gOvsSwitchContext, inPort);
989
990     if (vport == NULL){
991         /* No vport is not fatal. */
992         return NULL;
993     }
994
995     OvsGetPid(vport, nb, &pid);
996
997     if (!pid) {
998         /*
999          * There is no userspace queue created yet, so there is no point for
1000          * creating a new packet to be queued.
1001          */
1002         return NULL;
1003     }
1004
1005     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
1006
1007     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
1008                   (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
1009                   csumInfo.Receive.IpChecksumFailed)) {
1010         OVS_LOG_INFO("Packet dropped due to checksum failure.");
1011         ovsUserStats.dropDuetoChecksum++;
1012         return NULL;
1013     }
1014
1015     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
1016     extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
1017
1018     dataLen = NET_BUFFER_DATA_LENGTH(nb);
1019
1020     if (NlAttrSize(dataLen) > MAXUINT16) {
1021         return NULL;
1022     }
1023
1024     nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
1025                                     dataLen + extraLen);
1026
1027     allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
1028     elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
1029     if (elem == NULL) {
1030         ovsUserStats.dropDuetoResource++;
1031         return NULL;
1032     }
1033     elem->hdrInfo.value = hdrInfo->value;
1034     elem->packet.totalLen = nlMsgSize;
1035     /* XXX remove queueid */
1036     elem->packet.queue = 0;
1037     /* XXX  no need as the length is already in the NL attrib */
1038     elem->packet.userDataLen = userDataLen;
1039     elem->packet.inPort = inPort;
1040     elem->packet.cmd = cmd;
1041     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
1042         ovsUserStats.miss++;
1043     } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
1044         ovsUserStats.action++;
1045     } else {
1046         ASSERT(FALSE);
1047         goto fail;
1048     }
1049     /* XXX Should we have both packetLen and TotalLen*/
1050     elem->packet.packetLen = dataLen + extraLen;
1051
1052     NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
1053
1054     /*
1055      * Initialize the OVS header
1056      * Since we are pre allocating memory for the NL buffer
1057      * the attribute settings should not fail
1058      */
1059     if (NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
1060                       0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
1061                       gOvsSwitchContext->dpNo) != STATUS_SUCCESS) {
1062         goto fail;
1063     }
1064
1065     if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
1066                           OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
1067         goto fail;
1068     }
1069
1070     /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
1071     if (userData){
1072         if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
1073                                 userData, (UINT16)userDataLen)) {
1074             goto fail;
1075         }
1076     }
1077
1078     /*
1079      * Make space for the payload to be copied and set the attribute
1080      * XXX Uninit set initilizes the buffer with xero, we don't actually need
1081      * that the payload to be initailized
1082      */
1083     dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
1084                                             (UINT16)(dataLen + extraLen));
1085     if (!dst) {
1086         goto fail;
1087     }
1088
1089     /* Store the payload for csum calculation when packet is read */
1090     elem->packet.payload = dst;
1091     dst += extraLen;
1092
1093     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
1094     if (src == NULL) {
1095         ovsUserStats.dropDuetoResource++;
1096         goto fail;
1097     }    else if (src != dst) {
1098         /* Copy the data from the NDIS buffer to dst. */
1099         RtlCopyMemory(dst, src, dataLen);
1100     }
1101
1102     /* Set csum if was offloaded */
1103     OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
1104
1105     /*
1106      * Finally insert VLAN tag
1107      */
1108     if (extraLen) {
1109         dst = elem->packet.payload;
1110         src = dst + extraLen;
1111         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
1112         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
1113         ((UINT32 *)dst)[2] = ((UINT32 *)src)[2];
1114         dst += 12;
1115         ((UINT16 *)dst)[0] = htons(0x8100);
1116         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
1117             (vlanInfo.TagHeader.UserPriority << 13));
1118         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
1119         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
1120         ovsUserStats.vlanInsert++;
1121     }
1122
1123     nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
1124     nlMsg->nlmsgLen = NlBufSize(&nlBuf);
1125     /* 'totalLen' should be size of valid data. */
1126     elem->packet.totalLen = nlMsg->nlmsgLen;
1127
1128     return elem;
1129 fail:
1130     OvsFreeMemory(elem);
1131     return NULL;
1132 }