Merge remote-tracking branch 'origin/master' into ovn
[cascardo/ovs.git] / datapath-windows / ovsext / Datapath.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface
19  * alive while we transition over to the netlink based interface.
20  * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c
21  * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c
22  */
23
24 #include "precomp.h"
25 #include "Switch.h"
26 #include "User.h"
27 #include "Datapath.h"
28 #include "Jhash.h"
29 #include "Vport.h"
30 #include "Event.h"
31 #include "User.h"
32 #include "PacketIO.h"
33 #include "NetProto.h"
34 #include "Flow.h"
35 #include "User.h"
36 #include "Vxlan.h"
37
38 #ifdef OVS_DBG_MOD
39 #undef OVS_DBG_MOD
40 #endif
41 #define OVS_DBG_MOD OVS_DBG_DATAPATH
42 #include "Debug.h"
43
44 #define NETLINK_FAMILY_NAME_LEN 48
45
46
47 /*
48  * Netlink messages are grouped by family (aka type), and each family supports
49  * a set of commands, and can be passed both from kernel -> userspace or
50  * vice-versa. To call into the kernel, userspace uses a device operation which
51  * is outside of a netlink message.
52  *
53  * Each command results in the invocation of a handler function to implement the
54  * request functionality.
55  *
56  * Expectedly, only certain combinations of (device operation, netlink family,
57  * command) are valid.
58  *
59  * Here, we implement the basic infrastructure to perform validation on the
60  * incoming message, version checking, and also to invoke the corresponding
61  * handler to do the heavy-lifting.
62  */
63
64 /*
65  * Handler for a given netlink command. Not all the parameters are used by all
66  * the handlers.
67  */
68 typedef NTSTATUS(NetlinkCmdHandler)(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
69                                     UINT32 *replyLen);
70
71 typedef struct _NETLINK_CMD {
72     UINT16 cmd;
73     NetlinkCmdHandler *handler;
74     UINT32 supportedDevOp;      /* Supported device operations. */
75     BOOLEAN validateDpIndex;    /* Does command require a valid DP argument. */
76 } NETLINK_CMD, *PNETLINK_CMD;
77
78 /* A netlink family is a group of commands. */
79 typedef struct _NETLINK_FAMILY {
80     CHAR *name;
81     UINT32 id;
82     UINT8 version;
83     UINT8 pad;
84     UINT16 maxAttr;
85     NETLINK_CMD *cmds;          /* Array of netlink commands and handlers. */
86     UINT16 opsCount;
87 } NETLINK_FAMILY, *PNETLINK_FAMILY;
88
89 /* Handlers for the various netlink commands. */
90 static NetlinkCmdHandler OvsPendEventCmdHandler,
91                          OvsPendPacketCmdHandler,
92                          OvsSubscribeEventCmdHandler,
93                          OvsSubscribePacketCmdHandler,
94                          OvsReadEventCmdHandler,
95                          OvsReadPacketCmdHandler,
96                          OvsNewDpCmdHandler,
97                          OvsGetDpCmdHandler,
98                          OvsSetDpCmdHandler;
99
100 NetlinkCmdHandler        OvsGetNetdevCmdHandler,
101                          OvsGetVportCmdHandler,
102                          OvsSetVportCmdHandler,
103                          OvsNewVportCmdHandler,
104                          OvsDeleteVportCmdHandler;
105
106 static NTSTATUS HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
107                                        UINT32 *replyLen);
108 static NTSTATUS HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
109                                 UINT32 *replyLen);
110 static NTSTATUS HandleDpTransactionCommon(
111                     POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen);
112 static NTSTATUS OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
113                                     UINT32 *replyLen);
114
115 /*
116  * The various netlink families, along with the supported commands. Most of
117  * these families and commands are part of the openvswitch specification for a
118  * netlink datapath. In addition, each platform can implement a few families
119  * and commands as extensions.
120  */
121
122 /* Netlink control family: this is a Windows specific family. */
123 NETLINK_CMD nlControlFamilyCmdOps[] = {
124     { .cmd = OVS_CTRL_CMD_WIN_PEND_REQ,
125       .handler = OvsPendEventCmdHandler,
126       .supportedDevOp = OVS_WRITE_DEV_OP,
127       .validateDpIndex = TRUE,
128     },
129     { .cmd = OVS_CTRL_CMD_WIN_PEND_PACKET_REQ,
130       .handler = OvsPendPacketCmdHandler,
131       .supportedDevOp = OVS_WRITE_DEV_OP,
132       .validateDpIndex = TRUE,
133     },
134     { .cmd = OVS_CTRL_CMD_MC_SUBSCRIBE_REQ,
135       .handler = OvsSubscribeEventCmdHandler,
136       .supportedDevOp = OVS_WRITE_DEV_OP,
137       .validateDpIndex = TRUE,
138     },
139     { .cmd = OVS_CTRL_CMD_PACKET_SUBSCRIBE_REQ,
140       .handler = OvsSubscribePacketCmdHandler,
141       .supportedDevOp = OVS_WRITE_DEV_OP,
142       .validateDpIndex = TRUE,
143     },
144     { .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
145       .handler = OvsReadEventCmdHandler,
146       .supportedDevOp = OVS_READ_EVENT_DEV_OP,
147       .validateDpIndex = FALSE,
148     },
149     { .cmd = OVS_CTRL_CMD_READ_NOTIFY,
150       .handler = OvsReadPacketCmdHandler,
151       .supportedDevOp = OVS_READ_PACKET_DEV_OP,
152       .validateDpIndex = FALSE,
153     }
154 };
155
156 NETLINK_FAMILY nlControlFamilyOps = {
157     .name     = OVS_WIN_CONTROL_FAMILY,
158     .id       = OVS_WIN_NL_CTRL_FAMILY_ID,
159     .version  = OVS_WIN_CONTROL_VERSION,
160     .maxAttr  = OVS_WIN_CONTROL_ATTR_MAX,
161     .cmds     = nlControlFamilyCmdOps,
162     .opsCount = ARRAY_SIZE(nlControlFamilyCmdOps)
163 };
164
165 /* Netlink datapath family. */
166 NETLINK_CMD nlDatapathFamilyCmdOps[] = {
167     { .cmd             = OVS_DP_CMD_NEW,
168       .handler         = OvsNewDpCmdHandler,
169       .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
170       .validateDpIndex = FALSE
171     },
172     { .cmd             = OVS_DP_CMD_GET,
173       .handler         = OvsGetDpCmdHandler,
174       .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
175                          OVS_TRANSACTION_DEV_OP,
176       .validateDpIndex = FALSE
177     },
178     { .cmd             = OVS_DP_CMD_SET,
179       .handler         = OvsSetDpCmdHandler,
180       .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
181                          OVS_TRANSACTION_DEV_OP,
182       .validateDpIndex = TRUE
183     }
184 };
185
186 NETLINK_FAMILY nlDatapathFamilyOps = {
187     .name     = OVS_DATAPATH_FAMILY,
188     .id       = OVS_WIN_NL_DATAPATH_FAMILY_ID,
189     .version  = OVS_DATAPATH_VERSION,
190     .maxAttr  = OVS_DP_ATTR_MAX,
191     .cmds     = nlDatapathFamilyCmdOps,
192     .opsCount = ARRAY_SIZE(nlDatapathFamilyCmdOps)
193 };
194
195 /* Netlink packet family. */
196
197 NETLINK_CMD nlPacketFamilyCmdOps[] = {
198     { .cmd             = OVS_PACKET_CMD_EXECUTE,
199       .handler         = OvsNlExecuteCmdHandler,
200       .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
201       .validateDpIndex = TRUE
202     }
203 };
204
205 NETLINK_FAMILY nlPacketFamilyOps = {
206     .name     = OVS_PACKET_FAMILY,
207     .id       = OVS_WIN_NL_PACKET_FAMILY_ID,
208     .version  = OVS_PACKET_VERSION,
209     .maxAttr  = OVS_PACKET_ATTR_MAX,
210     .cmds     = nlPacketFamilyCmdOps,
211     .opsCount = ARRAY_SIZE(nlPacketFamilyCmdOps)
212 };
213
214 /* Netlink vport family. */
215 NETLINK_CMD nlVportFamilyCmdOps[] = {
216     { .cmd = OVS_VPORT_CMD_GET,
217       .handler = OvsGetVportCmdHandler,
218       .supportedDevOp = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
219                         OVS_TRANSACTION_DEV_OP,
220       .validateDpIndex = TRUE
221     },
222     { .cmd = OVS_VPORT_CMD_NEW,
223       .handler = OvsNewVportCmdHandler,
224       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
225       .validateDpIndex = TRUE
226     },
227     { .cmd = OVS_VPORT_CMD_SET,
228       .handler = OvsSetVportCmdHandler,
229       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
230       .validateDpIndex = TRUE
231     },
232     { .cmd = OVS_VPORT_CMD_DEL,
233       .handler = OvsDeleteVportCmdHandler,
234       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
235       .validateDpIndex = TRUE
236     },
237 };
238
239 NETLINK_FAMILY nlVportFamilyOps = {
240     .name     = OVS_VPORT_FAMILY,
241     .id       = OVS_WIN_NL_VPORT_FAMILY_ID,
242     .version  = OVS_VPORT_VERSION,
243     .maxAttr  = OVS_VPORT_ATTR_MAX,
244     .cmds     = nlVportFamilyCmdOps,
245     .opsCount = ARRAY_SIZE(nlVportFamilyCmdOps)
246 };
247
248 /* Netlink flow family. */
249
250 NETLINK_CMD nlFlowFamilyCmdOps[] = {
251     { .cmd              = OVS_FLOW_CMD_NEW,
252       .handler          = OvsFlowNlCmdHandler,
253       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
254       .validateDpIndex  = TRUE
255     },
256     { .cmd              = OVS_FLOW_CMD_SET,
257       .handler          = OvsFlowNlCmdHandler,
258       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
259       .validateDpIndex  = TRUE
260     },
261     { .cmd              = OVS_FLOW_CMD_DEL,
262       .handler          = OvsFlowNlCmdHandler,
263       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
264       .validateDpIndex  = TRUE
265     },
266     { .cmd              = OVS_FLOW_CMD_GET,
267       .handler          = OvsFlowNlGetCmdHandler,
268       .supportedDevOp   = OVS_TRANSACTION_DEV_OP |
269                           OVS_WRITE_DEV_OP | OVS_READ_DEV_OP,
270       .validateDpIndex  = TRUE
271     },
272 };
273
274 NETLINK_FAMILY nlFLowFamilyOps = {
275     .name     = OVS_FLOW_FAMILY,
276     .id       = OVS_WIN_NL_FLOW_FAMILY_ID,
277     .version  = OVS_FLOW_VERSION,
278     .maxAttr  = OVS_FLOW_ATTR_MAX,
279     .cmds     = nlFlowFamilyCmdOps,
280     .opsCount = ARRAY_SIZE(nlFlowFamilyCmdOps)
281 };
282
283 /* Netlink netdev family. */
284 NETLINK_CMD nlNetdevFamilyCmdOps[] = {
285     { .cmd = OVS_WIN_NETDEV_CMD_GET,
286       .handler = OvsGetNetdevCmdHandler,
287       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
288       .validateDpIndex = FALSE
289     },
290 };
291
292 NETLINK_FAMILY nlNetdevFamilyOps = {
293     .name     = OVS_WIN_NETDEV_FAMILY,
294     .id       = OVS_WIN_NL_NETDEV_FAMILY_ID,
295     .version  = OVS_WIN_NETDEV_VERSION,
296     .maxAttr  = OVS_WIN_NETDEV_ATTR_MAX,
297     .cmds     = nlNetdevFamilyCmdOps,
298     .opsCount = ARRAY_SIZE(nlNetdevFamilyCmdOps)
299 };
300
301 static NTSTATUS MapIrpOutputBuffer(PIRP irp,
302                                    UINT32 bufferLength,
303                                    UINT32 requiredLength,
304                                    PVOID *buffer);
305 static NTSTATUS ValidateNetlinkCmd(UINT32 devOp,
306                                    POVS_OPEN_INSTANCE instance,
307                                    POVS_MESSAGE ovsMsg,
308                                    NETLINK_FAMILY *nlFamilyOps);
309 static NTSTATUS InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
310                                         NETLINK_FAMILY *nlFamilyOps,
311                                         UINT32 *replyLen);
312
313 /* Handles to the device object for communication with userspace. */
314 NDIS_HANDLE gOvsDeviceHandle;
315 PDEVICE_OBJECT gOvsDeviceObject;
316
317 _Dispatch_type_(IRP_MJ_CREATE)
318 _Dispatch_type_(IRP_MJ_CLOSE)
319 DRIVER_DISPATCH OvsOpenCloseDevice;
320
321 _Dispatch_type_(IRP_MJ_CLEANUP)
322 DRIVER_DISPATCH OvsCleanupDevice;
323
324 _Dispatch_type_(IRP_MJ_DEVICE_CONTROL)
325 DRIVER_DISPATCH OvsDeviceControl;
326
327 #ifdef ALLOC_PRAGMA
328 #pragma alloc_text(INIT, OvsCreateDeviceObject)
329 #pragma alloc_text(PAGE, OvsOpenCloseDevice)
330 #pragma alloc_text(PAGE, OvsCleanupDevice)
331 #pragma alloc_text(PAGE, OvsDeviceControl)
332 #endif // ALLOC_PRAGMA
333
334 /*
335  * We might hit this limit easily since userspace opens a netlink descriptor for
336  * each thread, and at least one descriptor per vport. Revisit this later.
337  */
338 #define OVS_MAX_OPEN_INSTANCES 512
339 #define OVS_SYSTEM_DP_NAME     "ovs-system"
340
341 POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES];
342 UINT32 ovsNumberOfOpenInstances;
343 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
344
345 NDIS_SPIN_LOCK ovsCtrlLockObj;
346 PNDIS_SPIN_LOCK gOvsCtrlLock;
347
348 NTSTATUS
349 InitUserDumpState(POVS_OPEN_INSTANCE instance,
350                   POVS_MESSAGE ovsMsg)
351 {
352     /* Clear the dumpState from a previous dump sequence. */
353     ASSERT(instance->dumpState.ovsMsg == NULL);
354     ASSERT(ovsMsg);
355
356     instance->dumpState.ovsMsg =
357         (POVS_MESSAGE)OvsAllocateMemoryWithTag(sizeof(OVS_MESSAGE),
358                                                OVS_DATAPATH_POOL_TAG);
359     if (instance->dumpState.ovsMsg == NULL) {
360         return STATUS_NO_MEMORY;
361     }
362     RtlCopyMemory(instance->dumpState.ovsMsg, ovsMsg,
363                   sizeof *instance->dumpState.ovsMsg);
364     RtlZeroMemory(instance->dumpState.index,
365                   sizeof instance->dumpState.index);
366
367     return STATUS_SUCCESS;
368 }
369
370 VOID
371 FreeUserDumpState(POVS_OPEN_INSTANCE instance)
372 {
373     if (instance->dumpState.ovsMsg != NULL) {
374         OvsFreeMemoryWithTag(instance->dumpState.ovsMsg,
375                              OVS_DATAPATH_POOL_TAG);
376         RtlZeroMemory(&instance->dumpState, sizeof instance->dumpState);
377     }
378 }
379
380 VOID
381 OvsInit()
382 {
383     gOvsCtrlLock = &ovsCtrlLockObj;
384     NdisAllocateSpinLock(gOvsCtrlLock);
385     OvsInitEventQueue();
386 }
387
388 VOID
389 OvsCleanup()
390 {
391     OvsCleanupEventQueue();
392     if (gOvsCtrlLock) {
393         NdisFreeSpinLock(gOvsCtrlLock);
394         gOvsCtrlLock = NULL;
395     }
396 }
397
398 VOID
399 OvsAcquireCtrlLock()
400 {
401     NdisAcquireSpinLock(gOvsCtrlLock);
402 }
403
404 VOID
405 OvsReleaseCtrlLock()
406 {
407     NdisReleaseSpinLock(gOvsCtrlLock);
408 }
409
410
411 /*
412  * --------------------------------------------------------------------------
413  * Creates the communication device between user and kernel, and also
414  * initializes the data associated data structures.
415  * --------------------------------------------------------------------------
416  */
417 NDIS_STATUS
418 OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
419 {
420     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
421     UNICODE_STRING deviceName;
422     UNICODE_STRING symbolicDeviceName;
423     PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];
424     NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes;
425     OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle);
426
427     RtlZeroMemory(dispatchTable,
428                   (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH));
429     dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice;
430     dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice;
431     dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice;
432     dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl;
433
434     NdisInitUnicodeString(&deviceName, OVS_DEVICE_NAME_NT);
435     NdisInitUnicodeString(&symbolicDeviceName, OVS_DEVICE_NAME_DOS);
436
437     RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
438
439     OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header,
440                            NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES,
441                            NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1,
442                            sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
443
444     deviceAttributes.DeviceName = &deviceName;
445     deviceAttributes.SymbolicName = &symbolicDeviceName;
446     deviceAttributes.MajorFunctions = dispatchTable;
447     deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION);
448
449     status = NdisRegisterDeviceEx(ovsExtDriverHandle,
450                                   &deviceAttributes,
451                                   &gOvsDeviceObject,
452                                   &gOvsDeviceHandle);
453     if (status != NDIS_STATUS_SUCCESS) {
454         POVS_DEVICE_EXTENSION ovsExt =
455             (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject);
456         ASSERT(gOvsDeviceObject != NULL);
457         ASSERT(gOvsDeviceHandle != NULL);
458
459         if (ovsExt) {
460             ovsExt->numberOpenInstance = 0;
461         }
462     } else {
463         OvsRegisterSystemProvider((PVOID)gOvsDeviceObject);
464     }
465
466     OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
467     return status;
468 }
469
470
471 VOID
472 OvsDeleteDeviceObject()
473 {
474     if (gOvsDeviceHandle) {
475 #ifdef DBG
476         POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION)
477                     NdisGetDeviceReservedExtension(gOvsDeviceObject);
478         if (ovsExt) {
479             ASSERT(ovsExt->numberOpenInstance == 0);
480         }
481 #endif
482
483         ASSERT(gOvsDeviceObject);
484         NdisDeregisterDeviceEx(gOvsDeviceHandle);
485         gOvsDeviceHandle = NULL;
486         gOvsDeviceObject = NULL;
487
488         OvsUnregisterSystemProvider();
489     }
490 }
491
492 POVS_OPEN_INSTANCE
493 OvsGetOpenInstance(PFILE_OBJECT fileObject,
494                    UINT32 dpNo)
495 {
496     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
497     ASSERT(instance);
498     ASSERT(instance->fileObject == fileObject);
499     if (gOvsSwitchContext->dpNo != dpNo) {
500         return NULL;
501     }
502     return instance;
503 }
504
505
506 POVS_OPEN_INSTANCE
507 OvsFindOpenInstance(PFILE_OBJECT fileObject)
508 {
509     UINT32 i, j;
510     for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES &&
511                        j < ovsNumberOfOpenInstances; i++) {
512         if (ovsOpenInstanceArray[i]) {
513             if (ovsOpenInstanceArray[i]->fileObject == fileObject) {
514                 return ovsOpenInstanceArray[i];
515             }
516             j++;
517         }
518     }
519     return NULL;
520 }
521
522 NTSTATUS
523 OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
524                    PFILE_OBJECT fileObject)
525 {
526     POVS_OPEN_INSTANCE instance =
527         (POVS_OPEN_INSTANCE)OvsAllocateMemoryWithTag(sizeof(OVS_OPEN_INSTANCE),
528                                                      OVS_DATAPATH_POOL_TAG);
529     UINT32 i;
530
531     if (instance == NULL) {
532         return STATUS_NO_MEMORY;
533     }
534     OvsAcquireCtrlLock();
535     ASSERT(OvsFindOpenInstance(fileObject) == NULL);
536
537     if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
538         OvsReleaseCtrlLock();
539         OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
540         return STATUS_INSUFFICIENT_RESOURCES;
541     }
542     RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
543
544     for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) {
545         if (ovsOpenInstanceArray[i] == NULL) {
546             ovsOpenInstanceArray[i] = instance;
547             ovsNumberOfOpenInstances++;
548             instance->cookie = i;
549             break;
550         }
551     }
552     ASSERT(i < OVS_MAX_OPEN_INSTANCES);
553     instance->fileObject = fileObject;
554     ASSERT(fileObject->FsContext == NULL);
555     instance->pid = (UINT32)InterlockedIncrement((LONG volatile *)&ovsExt->pidCount);
556     if (instance->pid == 0) {
557         /* XXX: check for rollover. */
558     }
559     fileObject->FsContext = instance;
560     OvsReleaseCtrlLock();
561     return STATUS_SUCCESS;
562 }
563
564 static VOID
565 OvsCleanupOpenInstance(PFILE_OBJECT fileObject)
566 {
567     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
568     ASSERT(instance);
569     ASSERT(fileObject == instance->fileObject);
570     OvsCleanupEvent(instance);
571     OvsCleanupPacketQueue(instance);
572 }
573
574 VOID
575 OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
576 {
577     POVS_OPEN_INSTANCE instance;
578     ASSERT(fileObject->FsContext);
579     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
580     ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES);
581
582     OvsAcquireCtrlLock();
583     fileObject->FsContext = NULL;
584     ASSERT(ovsOpenInstanceArray[instance->cookie] == instance);
585     ovsOpenInstanceArray[instance->cookie] = NULL;
586     ovsNumberOfOpenInstances--;
587     OvsReleaseCtrlLock();
588     ASSERT(instance->eventQueue == NULL);
589     ASSERT (instance->packetQueue == NULL);
590     OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
591 }
592
593 NTSTATUS
594 OvsCompleteIrpRequest(PIRP irp,
595                       ULONG_PTR infoPtr,
596                       NTSTATUS status)
597 {
598     irp->IoStatus.Information = infoPtr;
599     irp->IoStatus.Status = status;
600     IoCompleteRequest(irp, IO_NO_INCREMENT);
601     return status;
602 }
603
604
605 NTSTATUS
606 OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject,
607                    PIRP irp)
608 {
609     PIO_STACK_LOCATION irpSp;
610     NTSTATUS status = STATUS_SUCCESS;
611     PFILE_OBJECT fileObject;
612     POVS_DEVICE_EXTENSION ovsExt =
613         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
614
615     ASSERT(deviceObject == gOvsDeviceObject);
616     ASSERT(ovsExt != NULL);
617
618     irpSp = IoGetCurrentIrpStackLocation(irp);
619     fileObject = irpSp->FileObject;
620     OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u",
621                   deviceObject, fileObject,
622                   ovsExt->numberOpenInstance);
623
624     switch (irpSp->MajorFunction) {
625     case IRP_MJ_CREATE:
626         status = OvsAddOpenInstance(ovsExt, fileObject);
627         if (STATUS_SUCCESS == status) {
628             InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance);
629         }
630         break;
631     case IRP_MJ_CLOSE:
632         ASSERT(ovsExt->numberOpenInstance > 0);
633         OvsRemoveOpenInstance(fileObject);
634         InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance);
635         break;
636     default:
637         ASSERT(0);
638     }
639     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
640 }
641
642 _Use_decl_annotations_
643 NTSTATUS
644 OvsCleanupDevice(PDEVICE_OBJECT deviceObject,
645                  PIRP irp)
646 {
647
648     PIO_STACK_LOCATION irpSp;
649     PFILE_OBJECT fileObject;
650
651     NTSTATUS status = STATUS_SUCCESS;
652 #ifdef DBG
653     POVS_DEVICE_EXTENSION ovsExt =
654         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
655     if (ovsExt) {
656         ASSERT(ovsExt->numberOpenInstance > 0);
657     }
658 #else
659     UNREFERENCED_PARAMETER(deviceObject);
660 #endif
661     ASSERT(deviceObject == gOvsDeviceObject);
662     irpSp = IoGetCurrentIrpStackLocation(irp);
663     fileObject = irpSp->FileObject;
664
665     ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP);
666
667     OvsCleanupOpenInstance(fileObject);
668
669     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
670 }
671
672
673 /*
674  * --------------------------------------------------------------------------
675  * IOCTL function handler for the device.
676  * --------------------------------------------------------------------------
677  */
678 NTSTATUS
679 OvsDeviceControl(PDEVICE_OBJECT deviceObject,
680                  PIRP irp)
681 {
682     PIO_STACK_LOCATION irpSp;
683     NTSTATUS status = STATUS_SUCCESS;
684     PFILE_OBJECT fileObject;
685     PVOID inputBuffer = NULL;
686     PVOID outputBuffer = NULL;
687     UINT32 inputBufferLen, outputBufferLen;
688     UINT32 code, replyLen = 0;
689     POVS_OPEN_INSTANCE instance;
690     UINT32 devOp;
691     OVS_MESSAGE ovsMsgReadOp;
692     POVS_MESSAGE ovsMsg;
693     NETLINK_FAMILY *nlFamilyOps;
694     OVS_USER_PARAMS_CONTEXT usrParamsCtx;
695
696 #ifdef DBG
697     POVS_DEVICE_EXTENSION ovsExt =
698         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
699     ASSERT(deviceObject == gOvsDeviceObject);
700     ASSERT(ovsExt);
701     ASSERT(ovsExt->numberOpenInstance > 0);
702 #else
703     UNREFERENCED_PARAMETER(deviceObject);
704 #endif
705
706     irpSp = IoGetCurrentIrpStackLocation(irp);
707
708     ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
709     ASSERT(irpSp->FileObject != NULL);
710
711     fileObject = irpSp->FileObject;
712     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
713     code = irpSp->Parameters.DeviceIoControl.IoControlCode;
714     inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength;
715     outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
716     inputBuffer = irp->AssociatedIrp.SystemBuffer;
717
718     /* Check if the extension is enabled. */
719     if (NULL == gOvsSwitchContext) {
720         status = STATUS_NOT_FOUND;
721         goto exit;
722     }
723
724     if (!OvsAcquireSwitchContext()) {
725         status = STATUS_NOT_FOUND;
726         goto exit;
727     }
728
729     /* Concurrent netlink operations are not supported. */
730     if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) {
731         status = STATUS_RESOURCE_IN_USE;
732         goto done;
733     }
734
735     /*
736      * Validate the input/output buffer arguments depending on the type of the
737      * operation.
738      */
739     switch (code) {
740     case OVS_IOCTL_GET_PID:
741         /* Both input buffer and output buffer use the same location. */
742         outputBuffer = irp->AssociatedIrp.SystemBuffer;
743         if (outputBufferLen != 0) {
744             InitUserParamsCtx(irp, instance, 0, NULL,
745                               inputBuffer, inputBufferLen,
746                               outputBuffer, outputBufferLen,
747                               &usrParamsCtx);
748
749             ASSERT(outputBuffer);
750         } else {
751             status = STATUS_NDIS_INVALID_LENGTH;
752             goto done;
753         }
754
755         status = OvsGetPidHandler(&usrParamsCtx, &replyLen);
756         goto done;
757
758     case OVS_IOCTL_TRANSACT:
759         /* Both input buffer and output buffer are mandatory. */
760         if (outputBufferLen != 0) {
761             status = MapIrpOutputBuffer(irp, outputBufferLen,
762                                         sizeof *ovsMsg, &outputBuffer);
763             if (status != STATUS_SUCCESS) {
764                 goto done;
765             }
766             ASSERT(outputBuffer);
767         } else {
768             status = STATUS_NDIS_INVALID_LENGTH;
769             goto done;
770         }
771
772         if (inputBufferLen < sizeof (*ovsMsg)) {
773             status = STATUS_NDIS_INVALID_LENGTH;
774             goto done;
775         }
776
777         ovsMsg = inputBuffer;
778         devOp = OVS_TRANSACTION_DEV_OP;
779         break;
780
781     case OVS_IOCTL_READ_EVENT:
782     case OVS_IOCTL_READ_PACKET:
783         /*
784          * Output buffer is mandatory. These IOCTLs are used to read events and
785          * packets respectively. It is convenient to have separate ioctls.
786          */
787         if (outputBufferLen != 0) {
788             status = MapIrpOutputBuffer(irp, outputBufferLen,
789                                         sizeof *ovsMsg, &outputBuffer);
790             if (status != STATUS_SUCCESS) {
791                 goto done;
792             }
793             ASSERT(outputBuffer);
794         } else {
795             status = STATUS_NDIS_INVALID_LENGTH;
796             goto done;
797         }
798         inputBuffer = NULL;
799         inputBufferLen = 0;
800
801         ovsMsg = &ovsMsgReadOp;
802         ovsMsg->nlMsg.nlmsgType = OVS_WIN_NL_CTRL_FAMILY_ID;
803         ovsMsg->nlMsg.nlmsgPid = instance->pid;
804         /* An "artificial" command so we can use NL family function table*/
805         ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
806                               OVS_CTRL_CMD_EVENT_NOTIFY :
807                               OVS_CTRL_CMD_READ_NOTIFY;
808         devOp = OVS_READ_DEV_OP;
809         break;
810
811     case OVS_IOCTL_READ:
812         /* Output buffer is mandatory. */
813         if (outputBufferLen != 0) {
814             status = MapIrpOutputBuffer(irp, outputBufferLen,
815                                         sizeof *ovsMsg, &outputBuffer);
816             if (status != STATUS_SUCCESS) {
817                 goto done;
818             }
819             ASSERT(outputBuffer);
820         } else {
821             status = STATUS_NDIS_INVALID_LENGTH;
822             goto done;
823         }
824
825         /*
826          * Operate in the mode that read ioctl is similar to ReadFile(). This
827          * might change as the userspace code gets implemented.
828          */
829         inputBuffer = NULL;
830         inputBufferLen = 0;
831
832         /*
833          * For implementing read (ioctl or otherwise), we need to store some
834          * state in the instance to indicate the command that started the dump
835          * operation. The state can setup 'ovsMsgReadOp' appropriately. Note
836          * that 'ovsMsgReadOp' is needed only in this function to call into the
837          * appropriate handler. The handler itself can access the state in the
838          * instance.
839          *
840          * In the absence of a dump start, return 0 bytes.
841          */
842         if (instance->dumpState.ovsMsg == NULL) {
843             replyLen = 0;
844             status = STATUS_SUCCESS;
845             goto done;
846         }
847         RtlCopyMemory(&ovsMsgReadOp, instance->dumpState.ovsMsg,
848                       sizeof (ovsMsgReadOp));
849
850         /* Create an NL message for consumption. */
851         ovsMsg = &ovsMsgReadOp;
852         devOp = OVS_READ_DEV_OP;
853
854         break;
855
856     case OVS_IOCTL_WRITE:
857         /* Input buffer is mandatory. */
858         if (inputBufferLen < sizeof (*ovsMsg)) {
859             status = STATUS_NDIS_INVALID_LENGTH;
860             goto done;
861         }
862
863         ovsMsg = inputBuffer;
864         devOp = OVS_WRITE_DEV_OP;
865         break;
866
867     default:
868         status = STATUS_INVALID_DEVICE_REQUEST;
869         goto done;
870     }
871
872     ASSERT(ovsMsg);
873     switch (ovsMsg->nlMsg.nlmsgType) {
874     case OVS_WIN_NL_CTRL_FAMILY_ID:
875         nlFamilyOps = &nlControlFamilyOps;
876         break;
877     case OVS_WIN_NL_DATAPATH_FAMILY_ID:
878         nlFamilyOps = &nlDatapathFamilyOps;
879         break;
880     case OVS_WIN_NL_FLOW_FAMILY_ID:
881          nlFamilyOps = &nlFLowFamilyOps;
882          break;
883     case OVS_WIN_NL_PACKET_FAMILY_ID:
884          nlFamilyOps = &nlPacketFamilyOps;
885          break;
886     case OVS_WIN_NL_VPORT_FAMILY_ID:
887         nlFamilyOps = &nlVportFamilyOps;
888         break;
889     case OVS_WIN_NL_NETDEV_FAMILY_ID:
890         nlFamilyOps = &nlNetdevFamilyOps;
891         break;
892     default:
893         status = STATUS_INVALID_PARAMETER;
894         goto done;
895     }
896
897     /*
898      * For read operation, the netlink command has already been validated
899      * previously.
900      */
901     if (devOp != OVS_READ_DEV_OP) {
902         status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps);
903         if (status != STATUS_SUCCESS) {
904             goto done;
905         }
906     }
907
908     InitUserParamsCtx(irp, instance, devOp, ovsMsg,
909                       inputBuffer, inputBufferLen,
910                       outputBuffer, outputBufferLen,
911                       &usrParamsCtx);
912
913     status = InvokeNetlinkCmdHandler(&usrParamsCtx, nlFamilyOps, &replyLen);
914
915 done:
916     OvsReleaseSwitchContext(gOvsSwitchContext);
917
918 exit:
919     KeMemoryBarrier();
920     instance->inUse = 0;
921
922     /* Should not complete a pending IRP unless proceesing is completed */
923     if (status == STATUS_PENDING) {
924         return status;
925     }
926     return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
927 }
928
929
930 /*
931  * --------------------------------------------------------------------------
932  * Function to validate a netlink command. Only certain combinations of
933  * (device operation, netlink family, command) are valid.
934  * --------------------------------------------------------------------------
935  */
936 static NTSTATUS
937 ValidateNetlinkCmd(UINT32 devOp,
938                    POVS_OPEN_INSTANCE instance,
939                    POVS_MESSAGE ovsMsg,
940                    NETLINK_FAMILY *nlFamilyOps)
941 {
942     NTSTATUS status = STATUS_INVALID_PARAMETER;
943     UINT16 i;
944
945     for (i = 0; i < nlFamilyOps->opsCount; i++) {
946         if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
947             /* Validate if the command is valid for the device operation. */
948             if ((devOp & nlFamilyOps->cmds[i].supportedDevOp) == 0) {
949                 status = STATUS_INVALID_PARAMETER;
950                 goto done;
951             }
952
953             /* Validate the version. */
954             if (nlFamilyOps->version > ovsMsg->genlMsg.version) {
955                 status = STATUS_INVALID_PARAMETER;
956                 goto done;
957             }
958
959             /* Validate the DP for commands that require a DP. */
960             if (nlFamilyOps->cmds[i].validateDpIndex == TRUE) {
961                 OvsAcquireCtrlLock();
962                 if (ovsMsg->ovsHdr.dp_ifindex !=
963                                           (INT)gOvsSwitchContext->dpNo) {
964                     status = STATUS_INVALID_PARAMETER;
965                     OvsReleaseCtrlLock();
966                     goto done;
967                 }
968                 OvsReleaseCtrlLock();
969             }
970
971             /* Validate the PID. */
972             if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
973                 status = STATUS_INVALID_PARAMETER;
974                 goto done;
975             }
976
977             status = STATUS_SUCCESS;
978             break;
979         }
980     }
981
982 done:
983     return status;
984 }
985
986 /*
987  * --------------------------------------------------------------------------
988  * Function to invoke the netlink command handler.
989  * --------------------------------------------------------------------------
990  */
991 static NTSTATUS
992 InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
993                         NETLINK_FAMILY *nlFamilyOps,
994                         UINT32 *replyLen)
995 {
996     NTSTATUS status = STATUS_INVALID_PARAMETER;
997     UINT16 i;
998
999     for (i = 0; i < nlFamilyOps->opsCount; i++) {
1000         if (nlFamilyOps->cmds[i].cmd == usrParamsCtx->ovsMsg->genlMsg.cmd) {
1001             NetlinkCmdHandler *handler = nlFamilyOps->cmds[i].handler;
1002             ASSERT(handler);
1003             if (handler) {
1004                 status = handler(usrParamsCtx, replyLen);
1005             }
1006             break;
1007         }
1008     }
1009
1010     return status;
1011 }
1012
1013 /*
1014  * --------------------------------------------------------------------------
1015  *  Handler for 'OVS_IOCTL_GET_PID'.
1016  *
1017  *  Each handle on the device is assigned a unique PID when the handle is
1018  *  created. This function passes the PID to userspace using METHOD_BUFFERED
1019  *  method.
1020  * --------------------------------------------------------------------------
1021  */
1022 static NTSTATUS
1023 OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1024                  UINT32 *replyLen)
1025 {
1026     NTSTATUS status = STATUS_SUCCESS;
1027     PUINT32 msgOut = (PUINT32)usrParamsCtx->outputBuffer;
1028
1029     if (usrParamsCtx->outputLength >= sizeof *msgOut) {
1030         POVS_OPEN_INSTANCE instance =
1031             (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1032
1033         RtlZeroMemory(msgOut, sizeof *msgOut);
1034         RtlCopyMemory(msgOut, &instance->pid, sizeof(*msgOut));
1035         *replyLen = sizeof *msgOut;
1036     } else {
1037         *replyLen = sizeof *msgOut;
1038         status = STATUS_NDIS_INVALID_LENGTH;
1039     }
1040
1041     return status;
1042 }
1043
1044 /*
1045  * --------------------------------------------------------------------------
1046  * Utility function to fill up information about the datapath in a reply to
1047  * userspace.
1048  * Assumes that 'gOvsCtrlLock' lock is acquired.
1049  * --------------------------------------------------------------------------
1050  */
1051 static NTSTATUS
1052 OvsDpFillInfo(POVS_SWITCH_CONTEXT ovsSwitchContext,
1053               POVS_MESSAGE msgIn,
1054               PNL_BUFFER nlBuf)
1055 {
1056     BOOLEAN writeOk;
1057     OVS_MESSAGE msgOutTmp;
1058     OVS_DATAPATH *datapath = &ovsSwitchContext->datapath;
1059     PNL_MSG_HDR nlMsg;
1060
1061     ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && NlBufRemLen(nlBuf) >= sizeof *msgIn);
1062
1063     msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_DATAPATH_FAMILY_ID;
1064     msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
1065     msgOutTmp.nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
1066     msgOutTmp.nlMsg.nlmsgPid = msgIn->nlMsg.nlmsgPid;
1067
1068     msgOutTmp.genlMsg.cmd = OVS_DP_CMD_GET;
1069     msgOutTmp.genlMsg.version = nlDatapathFamilyOps.version;
1070     msgOutTmp.genlMsg.reserved = 0;
1071
1072     msgOutTmp.ovsHdr.dp_ifindex = ovsSwitchContext->dpNo;
1073
1074     writeOk = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
1075     if (writeOk) {
1076         writeOk = NlMsgPutTailString(nlBuf, OVS_DP_ATTR_NAME,
1077                                      OVS_SYSTEM_DP_NAME);
1078     }
1079     if (writeOk) {
1080         OVS_DP_STATS dpStats;
1081
1082         dpStats.n_hit = datapath->hits;
1083         dpStats.n_missed = datapath->misses;
1084         dpStats.n_lost = datapath->lost;
1085         dpStats.n_flows = datapath->nFlows;
1086         writeOk = NlMsgPutTailUnspec(nlBuf, OVS_DP_ATTR_STATS,
1087                                      (PCHAR)&dpStats, sizeof dpStats);
1088     }
1089     nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
1090     nlMsg->nlmsgLen = NlBufSize(nlBuf);
1091
1092     return writeOk ? STATUS_SUCCESS : STATUS_INVALID_BUFFER_SIZE;
1093 }
1094
1095 /*
1096  * --------------------------------------------------------------------------
1097  * Handler for queueing an IRP used for event notification. The IRP is
1098  * completed when a port state changes. STATUS_PENDING is returned on
1099  * success. User mode keep a pending IRP at all times.
1100  * --------------------------------------------------------------------------
1101  */
1102 static NTSTATUS
1103 OvsPendEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1104                        UINT32 *replyLen)
1105 {
1106     NDIS_STATUS status;
1107
1108     UNREFERENCED_PARAMETER(replyLen);
1109
1110     POVS_OPEN_INSTANCE instance =
1111         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1112     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1113     OVS_EVENT_POLL poll;
1114
1115     poll.dpNo = msgIn->ovsHdr.dp_ifindex;
1116     status = OvsWaitEventIoctl(usrParamsCtx->irp, instance->fileObject,
1117                                &poll, sizeof poll);
1118     return status;
1119 }
1120
1121 /*
1122  * --------------------------------------------------------------------------
1123  *  Handler for the subscription for the event queue
1124  * --------------------------------------------------------------------------
1125  */
1126 static NTSTATUS
1127 OvsSubscribeEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1128                             UINT32 *replyLen)
1129 {
1130     NDIS_STATUS status;
1131     OVS_EVENT_SUBSCRIBE request;
1132     BOOLEAN rc;
1133     UINT8 join;
1134     PNL_ATTR attrs[2];
1135     const NL_POLICY policy[] =  {
1136         [OVS_NL_ATTR_MCAST_GRP] = {.type = NL_A_U32 },
1137         [OVS_NL_ATTR_MCAST_JOIN] = {.type = NL_A_U8 },
1138         };
1139
1140     UNREFERENCED_PARAMETER(replyLen);
1141
1142     POVS_OPEN_INSTANCE instance =
1143         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1144     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1145
1146     rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
1147          NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
1148     if (!rc) {
1149         status = STATUS_INVALID_PARAMETER;
1150         goto done;
1151     }
1152
1153     /* XXX Ignore the MC group for now */
1154     join = NlAttrGetU8(attrs[OVS_NL_ATTR_MCAST_JOIN]);
1155     request.dpNo = msgIn->ovsHdr.dp_ifindex;
1156     request.subscribe = join;
1157     request.mask = OVS_EVENT_MASK_ALL;
1158
1159     status = OvsSubscribeEventIoctl(instance->fileObject, &request,
1160                                     sizeof request);
1161 done:
1162     return status;
1163 }
1164
1165 /*
1166  * --------------------------------------------------------------------------
1167  *  Command Handler for 'OVS_DP_CMD_NEW'.
1168  * --------------------------------------------------------------------------
1169  */
1170 static NTSTATUS
1171 OvsNewDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1172                    UINT32 *replyLen)
1173 {
1174     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1175 }
1176
1177 /*
1178  * --------------------------------------------------------------------------
1179  *  Command Handler for 'OVS_DP_CMD_GET'.
1180  *
1181  *  The function handles both the dump based as well as the transaction based
1182  *  'OVS_DP_CMD_GET' command. In the dump command, it handles the initial
1183  *  call to setup dump state, as well as subsequent calls to continue dumping
1184  *  data.
1185  * --------------------------------------------------------------------------
1186  */
1187 static NTSTATUS
1188 OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1189                    UINT32 *replyLen)
1190 {
1191     if (usrParamsCtx->devOp == OVS_TRANSACTION_DEV_OP) {
1192         return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1193     } else {
1194         return HandleGetDpDump(usrParamsCtx, replyLen);
1195     }
1196 }
1197
1198 /*
1199  * --------------------------------------------------------------------------
1200  *  Function for handling the transaction based 'OVS_DP_CMD_GET' command.
1201  * --------------------------------------------------------------------------
1202  */
1203 static NTSTATUS
1204 HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1205                        UINT32 *replyLen)
1206 {
1207     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1208 }
1209
1210
1211 /*
1212  * --------------------------------------------------------------------------
1213  *  Function for handling the dump-based 'OVS_DP_CMD_GET' command.
1214  * --------------------------------------------------------------------------
1215  */
1216 static NTSTATUS
1217 HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1218                 UINT32 *replyLen)
1219 {
1220     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1221     POVS_OPEN_INSTANCE instance =
1222         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1223
1224     if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
1225         *replyLen = 0;
1226         OvsSetupDumpStart(usrParamsCtx);
1227     } else {
1228         NL_BUFFER nlBuf;
1229         NTSTATUS status;
1230         POVS_MESSAGE msgIn = instance->dumpState.ovsMsg;
1231
1232         ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1233
1234         if (instance->dumpState.ovsMsg == NULL) {
1235             ASSERT(FALSE);
1236             return STATUS_INVALID_DEVICE_STATE;
1237         }
1238
1239         /* Dump state must have been deleted after previous dump operation. */
1240         ASSERT(instance->dumpState.index[0] == 0);
1241
1242         /* Output buffer has been validated while validating read dev op. */
1243         ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1244
1245         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
1246                   usrParamsCtx->outputLength);
1247
1248         OvsAcquireCtrlLock();
1249         status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
1250         OvsReleaseCtrlLock();
1251
1252         if (status != STATUS_SUCCESS) {
1253             *replyLen = 0;
1254             FreeUserDumpState(instance);
1255             return status;
1256         }
1257
1258         /* Increment the dump index. */
1259         instance->dumpState.index[0] = 1;
1260         *replyLen = msgOut->nlMsg.nlmsgLen;
1261
1262         /* Free up the dump state, since there's no more data to continue. */
1263         FreeUserDumpState(instance);
1264     }
1265
1266     return STATUS_SUCCESS;
1267 }
1268
1269
1270 /*
1271  * --------------------------------------------------------------------------
1272  *  Command Handler for 'OVS_DP_CMD_SET'.
1273  * --------------------------------------------------------------------------
1274  */
1275 static NTSTATUS
1276 OvsSetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1277                    UINT32 *replyLen)
1278 {
1279     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1280 }
1281
1282 /*
1283  * --------------------------------------------------------------------------
1284  *  Function for handling transaction based 'OVS_DP_CMD_NEW', 'OVS_DP_CMD_GET'
1285  *  and 'OVS_DP_CMD_SET' commands.
1286  *
1287  * 'OVS_DP_CMD_NEW' is implemented to keep userspace code happy. Creation of a
1288  * new datapath is not supported currently.
1289  * --------------------------------------------------------------------------
1290  */
1291 static NTSTATUS
1292 HandleDpTransactionCommon(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1293                           UINT32 *replyLen)
1294 {
1295     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1296     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1297     NTSTATUS status = STATUS_SUCCESS;
1298     NL_BUFFER nlBuf;
1299     NL_ERROR nlError = NL_ERROR_SUCCESS;
1300     static const NL_POLICY ovsDatapathSetPolicy[] = {
1301         [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .maxLen = IFNAMSIZ },
1302         [OVS_DP_ATTR_UPCALL_PID] = { .type = NL_A_U32, .optional = TRUE },
1303         [OVS_DP_ATTR_USER_FEATURES] = { .type = NL_A_U32, .optional = TRUE },
1304     };
1305     PNL_ATTR dpAttrs[ARRAY_SIZE(ovsDatapathSetPolicy)];
1306
1307     UNREFERENCED_PARAMETER(msgOut);
1308
1309     /* input buffer has been validated while validating write dev op. */
1310     ASSERT(msgIn != NULL && usrParamsCtx->inputLength >= sizeof *msgIn);
1311
1312     /* Parse any attributes in the request. */
1313     if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET ||
1314         usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
1315         if (!NlAttrParse((PNL_MSG_HDR)msgIn,
1316                         NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN,
1317                         NlMsgAttrsLen((PNL_MSG_HDR)msgIn),
1318                         ovsDatapathSetPolicy, dpAttrs, ARRAY_SIZE(dpAttrs))) {
1319             return STATUS_INVALID_PARAMETER;
1320         }
1321
1322         /*
1323         * XXX: Not clear at this stage if there's any role for the
1324         * OVS_DP_ATTR_UPCALL_PID and OVS_DP_ATTR_USER_FEATURES attributes passed
1325         * from userspace.
1326         */
1327
1328     } else {
1329         RtlZeroMemory(dpAttrs, sizeof dpAttrs);
1330     }
1331
1332     /* Output buffer has been validated while validating transact dev op. */
1333     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1334
1335     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
1336
1337     OvsAcquireCtrlLock();
1338     if (dpAttrs[OVS_DP_ATTR_NAME] != NULL) {
1339         if (!OvsCompareString(NlAttrGet(dpAttrs[OVS_DP_ATTR_NAME]),
1340                               OVS_SYSTEM_DP_NAME)) {
1341             OvsReleaseCtrlLock();
1342
1343             /* Creation of new datapaths is not supported. */
1344             if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET) {
1345                 nlError = NL_ERROR_NOTSUPP;
1346                 goto cleanup;
1347             }
1348
1349             nlError = NL_ERROR_NODEV;
1350             goto cleanup;
1351         }
1352     } else if ((UINT32)msgIn->ovsHdr.dp_ifindex != gOvsSwitchContext->dpNo) {
1353         OvsReleaseCtrlLock();
1354         nlError = NL_ERROR_NODEV;
1355         goto cleanup;
1356     }
1357
1358     if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
1359         OvsReleaseCtrlLock();
1360         nlError = NL_ERROR_EXIST;
1361         goto cleanup;
1362     }
1363
1364     status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
1365     OvsReleaseCtrlLock();
1366
1367     *replyLen = NlBufSize(&nlBuf);
1368
1369 cleanup:
1370     if (nlError != NL_ERROR_SUCCESS) {
1371         POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
1372             usrParamsCtx->outputBuffer;
1373
1374         NlBuildErrorMsg(msgIn, msgError, nlError);
1375         *replyLen = msgError->nlMsg.nlmsgLen;
1376     }
1377
1378     return STATUS_SUCCESS;
1379 }
1380
1381
1382 NTSTATUS
1383 OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx)
1384 {
1385     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1386     POVS_OPEN_INSTANCE instance =
1387         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1388
1389     /* input buffer has been validated while validating write dev op. */
1390     ASSERT(msgIn != NULL && usrParamsCtx->inputLength >= sizeof *msgIn);
1391
1392     /* A write operation that does not indicate dump start is invalid. */
1393     if ((msgIn->nlMsg.nlmsgFlags & NLM_F_DUMP) != NLM_F_DUMP) {
1394         return STATUS_INVALID_PARAMETER;
1395     }
1396     /* XXX: Handle other NLM_F_* flags in the future. */
1397
1398     /*
1399      * This operation should be setting up the dump state. If there's any
1400      * previous state, clear it up so as to set it up afresh.
1401      */
1402     FreeUserDumpState(instance);
1403
1404     return InitUserDumpState(instance, msgIn);
1405 }
1406
1407
1408 /*
1409  * --------------------------------------------------------------------------
1410  *  Utility function to map the output buffer in an IRP. The buffer is assumed
1411  *  to have been passed down using METHOD_OUT_DIRECT (Direct I/O).
1412  * --------------------------------------------------------------------------
1413  */
1414 static NTSTATUS
1415 MapIrpOutputBuffer(PIRP irp,
1416                    UINT32 bufferLength,
1417                    UINT32 requiredLength,
1418                    PVOID *buffer)
1419 {
1420     ASSERT(irp);
1421     ASSERT(buffer);
1422     ASSERT(bufferLength);
1423     ASSERT(requiredLength);
1424     if (!buffer || !irp || bufferLength == 0 || requiredLength == 0) {
1425         return STATUS_INVALID_PARAMETER;
1426     }
1427
1428     if (bufferLength < requiredLength) {
1429         return STATUS_NDIS_INVALID_LENGTH;
1430     }
1431     if (irp->MdlAddress == NULL) {
1432         return STATUS_INVALID_PARAMETER;
1433     }
1434     *buffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
1435                                            NormalPagePriority);
1436     if (*buffer == NULL) {
1437         return STATUS_INSUFFICIENT_RESOURCES;
1438     }
1439
1440     return STATUS_SUCCESS;
1441 }
1442
1443 /*
1444  * --------------------------------------------------------------------------
1445  * Utility function to fill up information about the state of a port in a reply
1446  * to* userspace.
1447  * Assumes that 'gOvsCtrlLock' lock is acquired.
1448  * --------------------------------------------------------------------------
1449  */
1450 static NTSTATUS
1451 OvsPortFillInfo(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1452                 POVS_EVENT_ENTRY eventEntry,
1453                 PNL_BUFFER nlBuf)
1454 {
1455     NTSTATUS status;
1456     BOOLEAN ok;
1457     OVS_MESSAGE msgOutTmp;
1458     PNL_MSG_HDR nlMsg;
1459     POVS_VPORT_ENTRY vport;
1460
1461     ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && nlBuf->bufRemLen >= sizeof msgOutTmp);
1462
1463     msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_VPORT_FAMILY_ID;
1464     msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
1465
1466     /* driver intiated messages should have zerp seq number*/
1467     msgOutTmp.nlMsg.nlmsgSeq = 0;
1468     msgOutTmp.nlMsg.nlmsgPid = usrParamsCtx->ovsInstance->pid;
1469
1470     msgOutTmp.genlMsg.version = nlVportFamilyOps.version;
1471     msgOutTmp.genlMsg.reserved = 0;
1472
1473     /* we don't have netdev yet, treat link up/down a adding/removing a port*/
1474     if (eventEntry->status & (OVS_EVENT_LINK_UP | OVS_EVENT_CONNECT)) {
1475         msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_NEW;
1476     } else if (eventEntry->status &
1477              (OVS_EVENT_LINK_DOWN | OVS_EVENT_DISCONNECT)) {
1478         msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_DEL;
1479     } else {
1480         ASSERT(FALSE);
1481         return STATUS_UNSUCCESSFUL;
1482     }
1483     msgOutTmp.ovsHdr.dp_ifindex = gOvsSwitchContext->dpNo;
1484
1485     ok = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
1486     if (!ok) {
1487         status = STATUS_INVALID_BUFFER_SIZE;
1488         goto cleanup;
1489     }
1490
1491     vport = OvsFindVportByPortNo(gOvsSwitchContext, eventEntry->portNo);
1492     if (!vport) {
1493         status = STATUS_DEVICE_DOES_NOT_EXIST;
1494         goto cleanup;
1495     }
1496
1497     ok = NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_PORT_NO, eventEntry->portNo) &&
1498          NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_TYPE, vport->ovsType) &&
1499          NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_UPCALL_PID,
1500                          vport->upcallPid) &&
1501          NlMsgPutTailString(nlBuf, OVS_VPORT_ATTR_NAME, vport->ovsName);
1502     if (!ok) {
1503         status = STATUS_INVALID_BUFFER_SIZE;
1504         goto cleanup;
1505     }
1506
1507     /* XXXX Should we add the port stats attributes?*/
1508     nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
1509     nlMsg->nlmsgLen = NlBufSize(nlBuf);
1510     status = STATUS_SUCCESS;
1511
1512 cleanup:
1513     return status;
1514 }
1515
1516
1517 /*
1518  * --------------------------------------------------------------------------
1519  * Handler for reading events from the driver event queue. This handler is
1520  * executed when user modes issues a socket receive on a socket assocaited
1521  * with the MC group for events.
1522  * XXX user mode should read multiple events in one system call
1523  * --------------------------------------------------------------------------
1524  */
1525 static NTSTATUS
1526 OvsReadEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1527                        UINT32 *replyLen)
1528 {
1529 #ifdef DBG
1530     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1531     POVS_OPEN_INSTANCE instance =
1532         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1533 #endif
1534     NL_BUFFER nlBuf;
1535     NTSTATUS status;
1536     OVS_EVENT_ENTRY eventEntry;
1537
1538     ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1539
1540     /* Should never read events with a dump socket */
1541     ASSERT(instance->dumpState.ovsMsg == NULL);
1542
1543     /* Must have an event queue */
1544     ASSERT(instance->eventQueue != NULL);
1545
1546     /* Output buffer has been validated while validating read dev op. */
1547     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1548
1549     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
1550
1551     OvsAcquireCtrlLock();
1552
1553     /* remove an event entry from the event queue */
1554     status = OvsRemoveEventEntry(usrParamsCtx->ovsInstance, &eventEntry);
1555     if (status != STATUS_SUCCESS) {
1556         /* If there were not elements, read should return no data. */
1557         status = STATUS_SUCCESS;
1558         *replyLen = 0;
1559         goto cleanup;
1560     }
1561
1562     status = OvsPortFillInfo(usrParamsCtx, &eventEntry, &nlBuf);
1563     if (status == NDIS_STATUS_SUCCESS) {
1564         *replyLen = NlBufSize(&nlBuf);
1565     }
1566
1567 cleanup:
1568     OvsReleaseCtrlLock();
1569     return status;
1570 }
1571
1572 /*
1573  * --------------------------------------------------------------------------
1574  * Handler for reading missed pacckets from the driver event queue. This
1575  * handler is executed when user modes issues a socket receive on a socket
1576  * --------------------------------------------------------------------------
1577  */
1578 static NTSTATUS
1579 OvsReadPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1580                        UINT32 *replyLen)
1581 {
1582 #ifdef DBG
1583     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1584 #endif
1585     POVS_OPEN_INSTANCE instance =
1586         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1587     NTSTATUS status;
1588
1589     ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1590
1591     /* Should never read events with a dump socket */
1592     ASSERT(instance->dumpState.ovsMsg == NULL);
1593
1594     /* Must have an packet queue */
1595     ASSERT(instance->packetQueue != NULL);
1596
1597     /* Output buffer has been validated while validating read dev op. */
1598     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1599
1600     /* Read a packet from the instance queue */
1601     status = OvsReadDpIoctl(instance->fileObject, usrParamsCtx->outputBuffer,
1602                             usrParamsCtx->outputLength, replyLen);
1603     return status;
1604 }
1605
1606 /*
1607  * --------------------------------------------------------------------------
1608  *  Handler for the subscription for a packet queue
1609  * --------------------------------------------------------------------------
1610  */
1611 static NTSTATUS
1612 OvsSubscribePacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1613                             UINT32 *replyLen)
1614 {
1615     NDIS_STATUS status;
1616     BOOLEAN rc;
1617     UINT8 join;
1618     UINT32 pid;
1619     const NL_POLICY policy[] =  {
1620         [OVS_NL_ATTR_PACKET_PID] = {.type = NL_A_U32 },
1621         [OVS_NL_ATTR_PACKET_SUBSCRIBE] = {.type = NL_A_U8 }
1622         };
1623     PNL_ATTR attrs[ARRAY_SIZE(policy)];
1624
1625     UNREFERENCED_PARAMETER(replyLen);
1626
1627     POVS_OPEN_INSTANCE instance =
1628         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1629     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1630
1631     rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
1632          NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
1633     if (!rc) {
1634         status = STATUS_INVALID_PARAMETER;
1635         goto done;
1636     }
1637
1638     join = NlAttrGetU8(attrs[OVS_NL_ATTR_PACKET_PID]);
1639     pid = NlAttrGetU32(attrs[OVS_NL_ATTR_PACKET_PID]);
1640
1641     /* The socket subscribed with must be the same socket we perform receive*/
1642     ASSERT(pid == instance->pid);
1643
1644     status = OvsSubscribeDpIoctl(instance, pid, join);
1645
1646     /*
1647      * XXX Need to add this instance to a global data structure
1648      * which hold all packet based instances. The data structure (hash)
1649      * should be searched through the pid field of the instance for
1650      * placing the missed packet into the correct queue
1651      */
1652 done:
1653     return status;
1654 }
1655
1656 /*
1657  * --------------------------------------------------------------------------
1658  * Handler for queueing an IRP used for missed packet notification. The IRP is
1659  * completed when a packet received and mismatched. STATUS_PENDING is returned
1660  * on success. User mode keep a pending IRP at all times.
1661  * --------------------------------------------------------------------------
1662  */
1663 static NTSTATUS
1664 OvsPendPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1665                        UINT32 *replyLen)
1666 {
1667     UNREFERENCED_PARAMETER(replyLen);
1668
1669     POVS_OPEN_INSTANCE instance =
1670         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1671
1672     /*
1673      * XXX access to packet queue must be through acquiring a lock as user mode
1674      * could unsubscribe and the instnace will be freed.
1675      */
1676     return OvsWaitDpIoctl(usrParamsCtx->irp, instance->fileObject);
1677 }