datapath-windows: Enable extension after rrestart
[cascardo/ovs.git] / datapath-windows / ovsext / Datapath.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface
19  * alive while we transition over to the netlink based interface.
20  * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c
21  * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c
22  */
23
24 #include "precomp.h"
25 #include "Switch.h"
26 #include "User.h"
27 #include "Datapath.h"
28 #include "Jhash.h"
29 #include "Vport.h"
30 #include "Event.h"
31 #include "User.h"
32 #include "PacketIO.h"
33 #include "NetProto.h"
34 #include "Flow.h"
35 #include "User.h"
36 #include "Vxlan.h"
37
38 #ifdef OVS_DBG_MOD
39 #undef OVS_DBG_MOD
40 #endif
41 #define OVS_DBG_MOD OVS_DBG_DATAPATH
42 #include "Debug.h"
43
44 #define NETLINK_FAMILY_NAME_LEN 48
45
46
47 /*
48  * Netlink messages are grouped by family (aka type), and each family supports
49  * a set of commands, and can be passed both from kernel -> userspace or
50  * vice-versa. To call into the kernel, userspace uses a device operation which
51  * is outside of a netlink message.
52  *
53  * Each command results in the invocation of a handler function to implement the
54  * request functionality.
55  *
56  * Expectedly, only certain combinations of (device operation, netlink family,
57  * command) are valid.
58  *
59  * Here, we implement the basic infrastructure to perform validation on the
60  * incoming message, version checking, and also to invoke the corresponding
61  * handler to do the heavy-lifting.
62  */
63
64 /*
65  * Handler for a given netlink command. Not all the parameters are used by all
66  * the handlers.
67  */
68 typedef NTSTATUS(NetlinkCmdHandler)(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
69                                     UINT32 *replyLen);
70
71 typedef struct _NETLINK_CMD {
72     UINT16 cmd;
73     NetlinkCmdHandler *handler;
74     UINT32 supportedDevOp;      /* Supported device operations. */
75     BOOLEAN validateDpIndex;    /* Does command require a valid DP argument. */
76 } NETLINK_CMD, *PNETLINK_CMD;
77
78 /* A netlink family is a group of commands. */
79 typedef struct _NETLINK_FAMILY {
80     CHAR *name;
81     UINT32 id;
82     UINT8 version;
83     UINT8 pad;
84     UINT16 maxAttr;
85     NETLINK_CMD *cmds;          /* Array of netlink commands and handlers. */
86     UINT16 opsCount;
87 } NETLINK_FAMILY, *PNETLINK_FAMILY;
88
89 /* Handlers for the various netlink commands. */
90 static NetlinkCmdHandler OvsPendEventCmdHandler,
91                          OvsPendPacketCmdHandler,
92                          OvsSubscribeEventCmdHandler,
93                          OvsSubscribePacketCmdHandler,
94                          OvsReadEventCmdHandler,
95                          OvsReadPacketCmdHandler,
96                          OvsNewDpCmdHandler,
97                          OvsGetDpCmdHandler,
98                          OvsSetDpCmdHandler;
99
100 NetlinkCmdHandler        OvsGetNetdevCmdHandler,
101                          OvsGetVportCmdHandler,
102                          OvsSetVportCmdHandler,
103                          OvsNewVportCmdHandler,
104                          OvsDeleteVportCmdHandler;
105
106 static NTSTATUS HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
107                                        UINT32 *replyLen);
108 static NTSTATUS HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
109                                 UINT32 *replyLen);
110 static NTSTATUS HandleDpTransactionCommon(
111                     POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen);
112 static NTSTATUS OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
113                                     UINT32 *replyLen);
114
115 /*
116  * The various netlink families, along with the supported commands. Most of
117  * these families and commands are part of the openvswitch specification for a
118  * netlink datapath. In addition, each platform can implement a few families
119  * and commands as extensions.
120  */
121
122 /* Netlink control family: this is a Windows specific family. */
123 NETLINK_CMD nlControlFamilyCmdOps[] = {
124     { .cmd = OVS_CTRL_CMD_WIN_PEND_REQ,
125       .handler = OvsPendEventCmdHandler,
126       .supportedDevOp = OVS_WRITE_DEV_OP,
127       .validateDpIndex = TRUE,
128     },
129     { .cmd = OVS_CTRL_CMD_WIN_PEND_PACKET_REQ,
130       .handler = OvsPendPacketCmdHandler,
131       .supportedDevOp = OVS_WRITE_DEV_OP,
132       .validateDpIndex = TRUE,
133     },
134     { .cmd = OVS_CTRL_CMD_MC_SUBSCRIBE_REQ,
135       .handler = OvsSubscribeEventCmdHandler,
136       .supportedDevOp = OVS_WRITE_DEV_OP,
137       .validateDpIndex = TRUE,
138     },
139     { .cmd = OVS_CTRL_CMD_PACKET_SUBSCRIBE_REQ,
140       .handler = OvsSubscribePacketCmdHandler,
141       .supportedDevOp = OVS_WRITE_DEV_OP,
142       .validateDpIndex = TRUE,
143     },
144     { .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
145       .handler = OvsReadEventCmdHandler,
146       .supportedDevOp = OVS_READ_EVENT_DEV_OP,
147       .validateDpIndex = FALSE,
148     },
149     { .cmd = OVS_CTRL_CMD_READ_NOTIFY,
150       .handler = OvsReadPacketCmdHandler,
151       .supportedDevOp = OVS_READ_PACKET_DEV_OP,
152       .validateDpIndex = FALSE,
153     }
154 };
155
156 NETLINK_FAMILY nlControlFamilyOps = {
157     .name     = OVS_WIN_CONTROL_FAMILY,
158     .id       = OVS_WIN_NL_CTRL_FAMILY_ID,
159     .version  = OVS_WIN_CONTROL_VERSION,
160     .maxAttr  = OVS_WIN_CONTROL_ATTR_MAX,
161     .cmds     = nlControlFamilyCmdOps,
162     .opsCount = ARRAY_SIZE(nlControlFamilyCmdOps)
163 };
164
165 /* Netlink datapath family. */
166 NETLINK_CMD nlDatapathFamilyCmdOps[] = {
167     { .cmd             = OVS_DP_CMD_NEW,
168       .handler         = OvsNewDpCmdHandler,
169       .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
170       .validateDpIndex = FALSE
171     },
172     { .cmd             = OVS_DP_CMD_GET,
173       .handler         = OvsGetDpCmdHandler,
174       .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
175                          OVS_TRANSACTION_DEV_OP,
176       .validateDpIndex = FALSE
177     },
178     { .cmd             = OVS_DP_CMD_SET,
179       .handler         = OvsSetDpCmdHandler,
180       .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
181                          OVS_TRANSACTION_DEV_OP,
182       .validateDpIndex = TRUE
183     }
184 };
185
186 NETLINK_FAMILY nlDatapathFamilyOps = {
187     .name     = OVS_DATAPATH_FAMILY,
188     .id       = OVS_WIN_NL_DATAPATH_FAMILY_ID,
189     .version  = OVS_DATAPATH_VERSION,
190     .maxAttr  = OVS_DP_ATTR_MAX,
191     .cmds     = nlDatapathFamilyCmdOps,
192     .opsCount = ARRAY_SIZE(nlDatapathFamilyCmdOps)
193 };
194
195 /* Netlink packet family. */
196
197 NETLINK_CMD nlPacketFamilyCmdOps[] = {
198     { .cmd             = OVS_PACKET_CMD_EXECUTE,
199       .handler         = OvsNlExecuteCmdHandler,
200       .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
201       .validateDpIndex = TRUE
202     }
203 };
204
205 NETLINK_FAMILY nlPacketFamilyOps = {
206     .name     = OVS_PACKET_FAMILY,
207     .id       = OVS_WIN_NL_PACKET_FAMILY_ID,
208     .version  = OVS_PACKET_VERSION,
209     .maxAttr  = OVS_PACKET_ATTR_MAX,
210     .cmds     = nlPacketFamilyCmdOps,
211     .opsCount = ARRAY_SIZE(nlPacketFamilyCmdOps)
212 };
213
214 /* Netlink vport family. */
215 NETLINK_CMD nlVportFamilyCmdOps[] = {
216     { .cmd = OVS_VPORT_CMD_GET,
217       .handler = OvsGetVportCmdHandler,
218       .supportedDevOp = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
219                         OVS_TRANSACTION_DEV_OP,
220       .validateDpIndex = TRUE
221     },
222     { .cmd = OVS_VPORT_CMD_NEW,
223       .handler = OvsNewVportCmdHandler,
224       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
225       .validateDpIndex = TRUE
226     },
227     { .cmd = OVS_VPORT_CMD_SET,
228       .handler = OvsSetVportCmdHandler,
229       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
230       .validateDpIndex = TRUE
231     },
232     { .cmd = OVS_VPORT_CMD_DEL,
233       .handler = OvsDeleteVportCmdHandler,
234       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
235       .validateDpIndex = TRUE
236     },
237 };
238
239 NETLINK_FAMILY nlVportFamilyOps = {
240     .name     = OVS_VPORT_FAMILY,
241     .id       = OVS_WIN_NL_VPORT_FAMILY_ID,
242     .version  = OVS_VPORT_VERSION,
243     .maxAttr  = OVS_VPORT_ATTR_MAX,
244     .cmds     = nlVportFamilyCmdOps,
245     .opsCount = ARRAY_SIZE(nlVportFamilyCmdOps)
246 };
247
248 /* Netlink flow family. */
249
250 NETLINK_CMD nlFlowFamilyCmdOps[] = {
251     { .cmd              = OVS_FLOW_CMD_NEW,
252       .handler          = OvsFlowNlCmdHandler,
253       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
254       .validateDpIndex  = TRUE
255     },
256     { .cmd              = OVS_FLOW_CMD_SET,
257       .handler          = OvsFlowNlCmdHandler,
258       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
259       .validateDpIndex  = TRUE
260     },
261     { .cmd              = OVS_FLOW_CMD_DEL,
262       .handler          = OvsFlowNlCmdHandler,
263       .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
264       .validateDpIndex  = TRUE
265     },
266     { .cmd              = OVS_FLOW_CMD_GET,
267       .handler          = OvsFlowNlGetCmdHandler,
268       .supportedDevOp   = OVS_TRANSACTION_DEV_OP |
269                           OVS_WRITE_DEV_OP | OVS_READ_DEV_OP,
270       .validateDpIndex  = TRUE
271     },
272 };
273
274 NETLINK_FAMILY nlFLowFamilyOps = {
275     .name     = OVS_FLOW_FAMILY,
276     .id       = OVS_WIN_NL_FLOW_FAMILY_ID,
277     .version  = OVS_FLOW_VERSION,
278     .maxAttr  = OVS_FLOW_ATTR_MAX,
279     .cmds     = nlFlowFamilyCmdOps,
280     .opsCount = ARRAY_SIZE(nlFlowFamilyCmdOps)
281 };
282
283 /* Netlink netdev family. */
284 NETLINK_CMD nlNetdevFamilyCmdOps[] = {
285     { .cmd = OVS_WIN_NETDEV_CMD_GET,
286       .handler = OvsGetNetdevCmdHandler,
287       .supportedDevOp = OVS_TRANSACTION_DEV_OP,
288       .validateDpIndex = FALSE
289     },
290 };
291
292 NETLINK_FAMILY nlNetdevFamilyOps = {
293     .name     = OVS_WIN_NETDEV_FAMILY,
294     .id       = OVS_WIN_NL_NETDEV_FAMILY_ID,
295     .version  = OVS_WIN_NETDEV_VERSION,
296     .maxAttr  = OVS_WIN_NETDEV_ATTR_MAX,
297     .cmds     = nlNetdevFamilyCmdOps,
298     .opsCount = ARRAY_SIZE(nlNetdevFamilyCmdOps)
299 };
300
301 static NTSTATUS MapIrpOutputBuffer(PIRP irp,
302                                    UINT32 bufferLength,
303                                    UINT32 requiredLength,
304                                    PVOID *buffer);
305 static NTSTATUS ValidateNetlinkCmd(UINT32 devOp,
306                                    POVS_OPEN_INSTANCE instance,
307                                    POVS_MESSAGE ovsMsg,
308                                    NETLINK_FAMILY *nlFamilyOps);
309 static NTSTATUS InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
310                                         NETLINK_FAMILY *nlFamilyOps,
311                                         UINT32 *replyLen);
312
313 /* Handles to the device object for communication with userspace. */
314 NDIS_HANDLE gOvsDeviceHandle;
315 PDEVICE_OBJECT gOvsDeviceObject;
316
317 _Dispatch_type_(IRP_MJ_CREATE)
318 _Dispatch_type_(IRP_MJ_CLOSE)
319 DRIVER_DISPATCH OvsOpenCloseDevice;
320
321 _Dispatch_type_(IRP_MJ_CLEANUP)
322 DRIVER_DISPATCH OvsCleanupDevice;
323
324 _Dispatch_type_(IRP_MJ_DEVICE_CONTROL)
325 DRIVER_DISPATCH OvsDeviceControl;
326
327 #ifdef ALLOC_PRAGMA
328 #pragma alloc_text(INIT, OvsCreateDeviceObject)
329 #pragma alloc_text(PAGE, OvsOpenCloseDevice)
330 #pragma alloc_text(PAGE, OvsCleanupDevice)
331 #pragma alloc_text(PAGE, OvsDeviceControl)
332 #endif // ALLOC_PRAGMA
333
334 /*
335  * We might hit this limit easily since userspace opens a netlink descriptor for
336  * each thread, and at least one descriptor per vport. Revisit this later.
337  */
338 #define OVS_MAX_OPEN_INSTANCES 512
339 #define OVS_SYSTEM_DP_NAME     "ovs-system"
340
341 POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES];
342 UINT32 ovsNumberOfOpenInstances;
343 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
344
345 NDIS_SPIN_LOCK ovsCtrlLockObj;
346 PNDIS_SPIN_LOCK gOvsCtrlLock;
347
348 NTSTATUS
349 InitUserDumpState(POVS_OPEN_INSTANCE instance,
350                   POVS_MESSAGE ovsMsg)
351 {
352     /* Clear the dumpState from a previous dump sequence. */
353     ASSERT(instance->dumpState.ovsMsg == NULL);
354     ASSERT(ovsMsg);
355
356     instance->dumpState.ovsMsg =
357         (POVS_MESSAGE)OvsAllocateMemoryWithTag(sizeof(OVS_MESSAGE),
358                                                OVS_DATAPATH_POOL_TAG);
359     if (instance->dumpState.ovsMsg == NULL) {
360         return STATUS_NO_MEMORY;
361     }
362     RtlCopyMemory(instance->dumpState.ovsMsg, ovsMsg,
363                   sizeof *instance->dumpState.ovsMsg);
364     RtlZeroMemory(instance->dumpState.index,
365                   sizeof instance->dumpState.index);
366
367     return STATUS_SUCCESS;
368 }
369
370 VOID
371 FreeUserDumpState(POVS_OPEN_INSTANCE instance)
372 {
373     if (instance->dumpState.ovsMsg != NULL) {
374         OvsFreeMemoryWithTag(instance->dumpState.ovsMsg,
375                              OVS_DATAPATH_POOL_TAG);
376         RtlZeroMemory(&instance->dumpState, sizeof instance->dumpState);
377     }
378 }
379
380 VOID
381 OvsInit()
382 {
383     gOvsCtrlLock = &ovsCtrlLockObj;
384     NdisAllocateSpinLock(gOvsCtrlLock);
385     OvsInitEventQueue();
386 }
387
388 VOID
389 OvsCleanup()
390 {
391     OvsCleanupEventQueue();
392     if (gOvsCtrlLock) {
393         NdisFreeSpinLock(gOvsCtrlLock);
394         gOvsCtrlLock = NULL;
395     }
396 }
397
398 VOID
399 OvsAcquireCtrlLock()
400 {
401     NdisAcquireSpinLock(gOvsCtrlLock);
402 }
403
404 VOID
405 OvsReleaseCtrlLock()
406 {
407     NdisReleaseSpinLock(gOvsCtrlLock);
408 }
409
410
411 /*
412  * --------------------------------------------------------------------------
413  * Creates the communication device between user and kernel, and also
414  * initializes the data associated data structures.
415  * --------------------------------------------------------------------------
416  */
417 NDIS_STATUS
418 OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
419 {
420     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
421     UNICODE_STRING deviceName;
422     UNICODE_STRING symbolicDeviceName;
423     PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];
424     NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes;
425     OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle);
426
427     RtlZeroMemory(dispatchTable,
428                   (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH));
429     dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice;
430     dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice;
431     dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice;
432     dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl;
433
434     NdisInitUnicodeString(&deviceName, OVS_DEVICE_NAME_NT);
435     NdisInitUnicodeString(&symbolicDeviceName, OVS_DEVICE_NAME_DOS);
436
437     RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
438
439     OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header,
440                            NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES,
441                            NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1,
442                            sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
443
444     deviceAttributes.DeviceName = &deviceName;
445     deviceAttributes.SymbolicName = &symbolicDeviceName;
446     deviceAttributes.MajorFunctions = dispatchTable;
447     deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION);
448
449     status = NdisRegisterDeviceEx(ovsExtDriverHandle,
450                                   &deviceAttributes,
451                                   &gOvsDeviceObject,
452                                   &gOvsDeviceHandle);
453     if (status != NDIS_STATUS_SUCCESS) {
454         POVS_DEVICE_EXTENSION ovsExt =
455             (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject);
456         ASSERT(gOvsDeviceObject != NULL);
457         ASSERT(gOvsDeviceHandle != NULL);
458
459         if (ovsExt) {
460             ovsExt->numberOpenInstance = 0;
461         }
462     } else {
463         OvsRegisterSystemProvider((PVOID)gOvsDeviceObject);
464     }
465
466     OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
467     return status;
468 }
469
470
471 VOID
472 OvsDeleteDeviceObject()
473 {
474     if (gOvsDeviceHandle) {
475 #ifdef DBG
476         POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION)
477                     NdisGetDeviceReservedExtension(gOvsDeviceObject);
478         if (ovsExt) {
479             ASSERT(ovsExt->numberOpenInstance == 0);
480         }
481 #endif
482
483         ASSERT(gOvsDeviceObject);
484         NdisDeregisterDeviceEx(gOvsDeviceHandle);
485         gOvsDeviceHandle = NULL;
486         gOvsDeviceObject = NULL;
487
488         OvsUnregisterSystemProvider();
489     }
490 }
491
492 POVS_OPEN_INSTANCE
493 OvsGetOpenInstance(PFILE_OBJECT fileObject,
494                    UINT32 dpNo)
495 {
496     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
497     ASSERT(instance);
498     ASSERT(instance->fileObject == fileObject);
499     if (gOvsSwitchContext->dpNo != dpNo) {
500         return NULL;
501     }
502     return instance;
503 }
504
505
506 POVS_OPEN_INSTANCE
507 OvsFindOpenInstance(PFILE_OBJECT fileObject)
508 {
509     UINT32 i, j;
510     for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES &&
511                        j < ovsNumberOfOpenInstances; i++) {
512         if (ovsOpenInstanceArray[i]) {
513             if (ovsOpenInstanceArray[i]->fileObject == fileObject) {
514                 return ovsOpenInstanceArray[i];
515             }
516             j++;
517         }
518     }
519     return NULL;
520 }
521
522 NTSTATUS
523 OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
524                    PFILE_OBJECT fileObject)
525 {
526     POVS_OPEN_INSTANCE instance =
527         (POVS_OPEN_INSTANCE)OvsAllocateMemoryWithTag(sizeof(OVS_OPEN_INSTANCE),
528                                                      OVS_DATAPATH_POOL_TAG);
529     UINT32 i;
530
531     if (instance == NULL) {
532         return STATUS_NO_MEMORY;
533     }
534     OvsAcquireCtrlLock();
535     ASSERT(OvsFindOpenInstance(fileObject) == NULL);
536
537     if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
538         OvsReleaseCtrlLock();
539         OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
540         return STATUS_INSUFFICIENT_RESOURCES;
541     }
542     RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
543
544     for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) {
545         if (ovsOpenInstanceArray[i] == NULL) {
546             ovsOpenInstanceArray[i] = instance;
547             ovsNumberOfOpenInstances++;
548             instance->cookie = i;
549             break;
550         }
551     }
552     ASSERT(i < OVS_MAX_OPEN_INSTANCES);
553     instance->fileObject = fileObject;
554     ASSERT(fileObject->FsContext == NULL);
555     instance->pid = (UINT32)InterlockedIncrement((LONG volatile *)&ovsExt->pidCount);
556     if (instance->pid == 0) {
557         /* XXX: check for rollover. */
558     }
559     fileObject->FsContext = instance;
560     OvsReleaseCtrlLock();
561     return STATUS_SUCCESS;
562 }
563
564 static VOID
565 OvsCleanupOpenInstance(PFILE_OBJECT fileObject)
566 {
567     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
568     ASSERT(instance);
569     ASSERT(fileObject == instance->fileObject);
570     OvsCleanupEvent(instance);
571     OvsCleanupPacketQueue(instance);
572 }
573
574 VOID
575 OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
576 {
577     POVS_OPEN_INSTANCE instance;
578     ASSERT(fileObject->FsContext);
579     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
580     ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES);
581
582     OvsAcquireCtrlLock();
583     fileObject->FsContext = NULL;
584     ASSERT(ovsOpenInstanceArray[instance->cookie] == instance);
585     ovsOpenInstanceArray[instance->cookie] = NULL;
586     ovsNumberOfOpenInstances--;
587     OvsReleaseCtrlLock();
588     ASSERT(instance->eventQueue == NULL);
589     ASSERT (instance->packetQueue == NULL);
590     OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
591 }
592
593 NTSTATUS
594 OvsCompleteIrpRequest(PIRP irp,
595                       ULONG_PTR infoPtr,
596                       NTSTATUS status)
597 {
598     irp->IoStatus.Information = infoPtr;
599     irp->IoStatus.Status = status;
600     IoCompleteRequest(irp, IO_NO_INCREMENT);
601     return status;
602 }
603
604
605 NTSTATUS
606 OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject,
607                    PIRP irp)
608 {
609     PIO_STACK_LOCATION irpSp;
610     NTSTATUS status = STATUS_SUCCESS;
611     PFILE_OBJECT fileObject;
612     POVS_DEVICE_EXTENSION ovsExt =
613         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
614
615     ASSERT(deviceObject == gOvsDeviceObject);
616     ASSERT(ovsExt != NULL);
617
618     irpSp = IoGetCurrentIrpStackLocation(irp);
619     fileObject = irpSp->FileObject;
620     OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u",
621                   deviceObject, fileObject,
622                   ovsExt->numberOpenInstance);
623
624     switch (irpSp->MajorFunction) {
625     case IRP_MJ_CREATE:
626         status = OvsAddOpenInstance(ovsExt, fileObject);
627         if (STATUS_SUCCESS == status) {
628             InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance);
629         }
630         break;
631     case IRP_MJ_CLOSE:
632         ASSERT(ovsExt->numberOpenInstance > 0);
633         OvsRemoveOpenInstance(fileObject);
634         InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance);
635         break;
636     default:
637         ASSERT(0);
638     }
639     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
640 }
641
642 _Use_decl_annotations_
643 NTSTATUS
644 OvsCleanupDevice(PDEVICE_OBJECT deviceObject,
645                  PIRP irp)
646 {
647
648     PIO_STACK_LOCATION irpSp;
649     PFILE_OBJECT fileObject;
650
651     NTSTATUS status = STATUS_SUCCESS;
652 #ifdef DBG
653     POVS_DEVICE_EXTENSION ovsExt =
654         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
655     if (ovsExt) {
656         ASSERT(ovsExt->numberOpenInstance > 0);
657     }
658 #else
659     UNREFERENCED_PARAMETER(deviceObject);
660 #endif
661     ASSERT(deviceObject == gOvsDeviceObject);
662     irpSp = IoGetCurrentIrpStackLocation(irp);
663     fileObject = irpSp->FileObject;
664
665     ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP);
666
667     OvsCleanupOpenInstance(fileObject);
668
669     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
670 }
671
672
673 /*
674  * --------------------------------------------------------------------------
675  * IOCTL function handler for the device.
676  * --------------------------------------------------------------------------
677  */
678 NTSTATUS
679 OvsDeviceControl(PDEVICE_OBJECT deviceObject,
680                  PIRP irp)
681 {
682     PIO_STACK_LOCATION irpSp;
683     NTSTATUS status = STATUS_SUCCESS;
684     PFILE_OBJECT fileObject;
685     PVOID inputBuffer = NULL;
686     PVOID outputBuffer = NULL;
687     UINT32 inputBufferLen, outputBufferLen;
688     UINT32 code, replyLen = 0;
689     POVS_OPEN_INSTANCE instance;
690     UINT32 devOp;
691     OVS_MESSAGE ovsMsgReadOp;
692     POVS_MESSAGE ovsMsg;
693     NETLINK_FAMILY *nlFamilyOps;
694     OVS_USER_PARAMS_CONTEXT usrParamsCtx;
695
696 #ifdef DBG
697     POVS_DEVICE_EXTENSION ovsExt =
698         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
699     ASSERT(deviceObject == gOvsDeviceObject);
700     ASSERT(ovsExt);
701     ASSERT(ovsExt->numberOpenInstance > 0);
702 #else
703     UNREFERENCED_PARAMETER(deviceObject);
704 #endif
705
706     irpSp = IoGetCurrentIrpStackLocation(irp);
707
708     ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
709     ASSERT(irpSp->FileObject != NULL);
710
711     fileObject = irpSp->FileObject;
712     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
713     code = irpSp->Parameters.DeviceIoControl.IoControlCode;
714     inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength;
715     outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
716     inputBuffer = irp->AssociatedIrp.SystemBuffer;
717
718     /* Check if the extension is enabled. */
719     if (NULL == gOvsSwitchContext) {
720         status = STATUS_NOT_FOUND;
721         goto exit;
722     }
723
724     if (!OvsAcquireSwitchContext()) {
725         status = STATUS_NOT_FOUND;
726         goto exit;
727     }
728
729     /* Concurrent netlink operations are not supported. */
730     if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) {
731         status = STATUS_RESOURCE_IN_USE;
732         goto done;
733     }
734
735     /*
736      * Validate the input/output buffer arguments depending on the type of the
737      * operation.
738      */
739     switch (code) {
740     case OVS_IOCTL_GET_PID:
741         /* Both input buffer and output buffer use the same location. */
742         outputBuffer = irp->AssociatedIrp.SystemBuffer;
743         if (outputBufferLen != 0) {
744             InitUserParamsCtx(irp, instance, 0, NULL,
745                               inputBuffer, inputBufferLen,
746                               outputBuffer, outputBufferLen,
747                               &usrParamsCtx);
748
749             ASSERT(outputBuffer);
750         } else {
751             status = STATUS_NDIS_INVALID_LENGTH;
752             goto done;
753         }
754
755         status = OvsGetPidHandler(&usrParamsCtx, &replyLen);
756         goto done;
757
758     case OVS_IOCTL_TRANSACT:
759         /* Both input buffer and output buffer are mandatory. */
760         if (outputBufferLen != 0) {
761             status = MapIrpOutputBuffer(irp, outputBufferLen,
762                                         sizeof *ovsMsg, &outputBuffer);
763             if (status != STATUS_SUCCESS) {
764                 goto done;
765             }
766             ASSERT(outputBuffer);
767         } else {
768             status = STATUS_NDIS_INVALID_LENGTH;
769             goto done;
770         }
771
772         if (inputBufferLen < sizeof (*ovsMsg)) {
773             status = STATUS_NDIS_INVALID_LENGTH;
774             goto done;
775         }
776
777         ovsMsg = inputBuffer;
778         devOp = OVS_TRANSACTION_DEV_OP;
779         break;
780
781     case OVS_IOCTL_READ_EVENT:
782     case OVS_IOCTL_READ_PACKET:
783         /*
784          * Output buffer is mandatory. These IOCTLs are used to read events and
785          * packets respectively. It is convenient to have separate ioctls.
786          */
787         if (outputBufferLen != 0) {
788             status = MapIrpOutputBuffer(irp, outputBufferLen,
789                                         sizeof *ovsMsg, &outputBuffer);
790             if (status != STATUS_SUCCESS) {
791                 goto done;
792             }
793             ASSERT(outputBuffer);
794         } else {
795             status = STATUS_NDIS_INVALID_LENGTH;
796             goto done;
797         }
798         inputBuffer = NULL;
799         inputBufferLen = 0;
800
801         ovsMsg = &ovsMsgReadOp;
802         ovsMsg->nlMsg.nlmsgType = OVS_WIN_NL_CTRL_FAMILY_ID;
803         ovsMsg->nlMsg.nlmsgPid = instance->pid;
804         /* An "artificial" command so we can use NL family function table*/
805         ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
806                               OVS_CTRL_CMD_EVENT_NOTIFY :
807                               OVS_CTRL_CMD_READ_NOTIFY;
808         devOp = OVS_READ_DEV_OP;
809         break;
810
811     case OVS_IOCTL_READ:
812         /* Output buffer is mandatory. */
813         if (outputBufferLen != 0) {
814             status = MapIrpOutputBuffer(irp, outputBufferLen,
815                                         sizeof *ovsMsg, &outputBuffer);
816             if (status != STATUS_SUCCESS) {
817                 goto done;
818             }
819             ASSERT(outputBuffer);
820         } else {
821             status = STATUS_NDIS_INVALID_LENGTH;
822             goto done;
823         }
824
825         /*
826          * Operate in the mode that read ioctl is similar to ReadFile(). This
827          * might change as the userspace code gets implemented.
828          */
829         inputBuffer = NULL;
830         inputBufferLen = 0;
831
832         /*
833          * For implementing read (ioctl or otherwise), we need to store some
834          * state in the instance to indicate the command that started the dump
835          * operation. The state can setup 'ovsMsgReadOp' appropriately. Note
836          * that 'ovsMsgReadOp' is needed only in this function to call into the
837          * appropriate handler. The handler itself can access the state in the
838          * instance.
839          *
840          * In the absence of a dump start, return 0 bytes.
841          */
842         if (instance->dumpState.ovsMsg == NULL) {
843             replyLen = 0;
844             status = STATUS_SUCCESS;
845             goto done;
846         }
847         RtlCopyMemory(&ovsMsgReadOp, instance->dumpState.ovsMsg,
848                       sizeof (ovsMsgReadOp));
849
850         /* Create an NL message for consumption. */
851         ovsMsg = &ovsMsgReadOp;
852         devOp = OVS_READ_DEV_OP;
853
854         break;
855
856     case OVS_IOCTL_WRITE:
857         /* Input buffer is mandatory. */
858         if (inputBufferLen < sizeof (*ovsMsg)) {
859             status = STATUS_NDIS_INVALID_LENGTH;
860             goto done;
861         }
862
863         ovsMsg = inputBuffer;
864         devOp = OVS_WRITE_DEV_OP;
865         break;
866
867     default:
868         status = STATUS_INVALID_DEVICE_REQUEST;
869         goto done;
870     }
871
872     ASSERT(ovsMsg);
873     switch (ovsMsg->nlMsg.nlmsgType) {
874     case OVS_WIN_NL_CTRL_FAMILY_ID:
875         nlFamilyOps = &nlControlFamilyOps;
876         break;
877     case OVS_WIN_NL_DATAPATH_FAMILY_ID:
878         nlFamilyOps = &nlDatapathFamilyOps;
879         break;
880     case OVS_WIN_NL_FLOW_FAMILY_ID:
881          nlFamilyOps = &nlFLowFamilyOps;
882          break;
883     case OVS_WIN_NL_PACKET_FAMILY_ID:
884          nlFamilyOps = &nlPacketFamilyOps;
885          break;
886     case OVS_WIN_NL_VPORT_FAMILY_ID:
887         nlFamilyOps = &nlVportFamilyOps;
888         break;
889     case OVS_WIN_NL_NETDEV_FAMILY_ID:
890         nlFamilyOps = &nlNetdevFamilyOps;
891         break;
892     default:
893         status = STATUS_INVALID_PARAMETER;
894         goto done;
895     }
896
897     /*
898      * For read operation, the netlink command has already been validated
899      * previously.
900      */
901     if (devOp != OVS_READ_DEV_OP) {
902         status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps);
903         if (status != STATUS_SUCCESS) {
904             goto done;
905         }
906     }
907
908     InitUserParamsCtx(irp, instance, devOp, ovsMsg,
909                       inputBuffer, inputBufferLen,
910                       outputBuffer, outputBufferLen,
911                       &usrParamsCtx);
912
913     status = InvokeNetlinkCmdHandler(&usrParamsCtx, nlFamilyOps, &replyLen);
914
915 done:
916     OvsReleaseSwitchContext(gOvsSwitchContext);
917
918 exit:
919     KeMemoryBarrier();
920     instance->inUse = 0;
921
922     /* Should not complete a pending IRP unless proceesing is completed */
923     if (status == STATUS_PENDING) {
924         return status;
925     }
926     return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
927 }
928
929
930 /*
931  * --------------------------------------------------------------------------
932  * Function to validate a netlink command. Only certain combinations of
933  * (device operation, netlink family, command) are valid.
934  * --------------------------------------------------------------------------
935  */
936 static NTSTATUS
937 ValidateNetlinkCmd(UINT32 devOp,
938                    POVS_OPEN_INSTANCE instance,
939                    POVS_MESSAGE ovsMsg,
940                    NETLINK_FAMILY *nlFamilyOps)
941 {
942     NTSTATUS status = STATUS_INVALID_PARAMETER;
943     UINT16 i;
944
945     for (i = 0; i < nlFamilyOps->opsCount; i++) {
946         if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
947             /* Validate if the command is valid for the device operation. */
948             if ((devOp & nlFamilyOps->cmds[i].supportedDevOp) == 0) {
949                 status = STATUS_INVALID_PARAMETER;
950                 goto done;
951             }
952
953             /* Validate the version. */
954             if (nlFamilyOps->version > ovsMsg->genlMsg.version) {
955                 status = STATUS_INVALID_PARAMETER;
956                 goto done;
957             }
958
959             /* Validate the DP for commands that require a DP. */
960             if (nlFamilyOps->cmds[i].validateDpIndex == TRUE) {
961                 if (ovsMsg->ovsHdr.dp_ifindex !=
962                                           (INT)gOvsSwitchContext->dpNo) {
963                     status = STATUS_INVALID_PARAMETER;
964                     goto done;
965                 }
966             }
967
968             /* Validate the PID. */
969             if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
970                 status = STATUS_INVALID_PARAMETER;
971                 goto done;
972             }
973
974             status = STATUS_SUCCESS;
975             break;
976         }
977     }
978
979 done:
980     return status;
981 }
982
983 /*
984  * --------------------------------------------------------------------------
985  * Function to invoke the netlink command handler.
986  * --------------------------------------------------------------------------
987  */
988 static NTSTATUS
989 InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
990                         NETLINK_FAMILY *nlFamilyOps,
991                         UINT32 *replyLen)
992 {
993     NTSTATUS status = STATUS_INVALID_PARAMETER;
994     UINT16 i;
995
996     for (i = 0; i < nlFamilyOps->opsCount; i++) {
997         if (nlFamilyOps->cmds[i].cmd == usrParamsCtx->ovsMsg->genlMsg.cmd) {
998             NetlinkCmdHandler *handler = nlFamilyOps->cmds[i].handler;
999             ASSERT(handler);
1000             if (handler) {
1001                 status = handler(usrParamsCtx, replyLen);
1002             }
1003             break;
1004         }
1005     }
1006
1007     return status;
1008 }
1009
1010 /*
1011  * --------------------------------------------------------------------------
1012  *  Handler for 'OVS_IOCTL_GET_PID'.
1013  *
1014  *  Each handle on the device is assigned a unique PID when the handle is
1015  *  created. This function passes the PID to userspace using METHOD_BUFFERED
1016  *  method.
1017  * --------------------------------------------------------------------------
1018  */
1019 static NTSTATUS
1020 OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1021                  UINT32 *replyLen)
1022 {
1023     NTSTATUS status = STATUS_SUCCESS;
1024     PUINT32 msgOut = (PUINT32)usrParamsCtx->outputBuffer;
1025
1026     if (usrParamsCtx->outputLength >= sizeof *msgOut) {
1027         POVS_OPEN_INSTANCE instance =
1028             (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1029
1030         RtlZeroMemory(msgOut, sizeof *msgOut);
1031         RtlCopyMemory(msgOut, &instance->pid, sizeof(*msgOut));
1032         *replyLen = sizeof *msgOut;
1033     } else {
1034         *replyLen = sizeof *msgOut;
1035         status = STATUS_NDIS_INVALID_LENGTH;
1036     }
1037
1038     return status;
1039 }
1040
1041 /*
1042  * --------------------------------------------------------------------------
1043  * Utility function to fill up information about the datapath in a reply to
1044  * userspace.
1045  * --------------------------------------------------------------------------
1046  */
1047 static NTSTATUS
1048 OvsDpFillInfo(POVS_SWITCH_CONTEXT ovsSwitchContext,
1049               POVS_MESSAGE msgIn,
1050               PNL_BUFFER nlBuf)
1051 {
1052     BOOLEAN writeOk;
1053     OVS_MESSAGE msgOutTmp;
1054     OVS_DATAPATH *datapath = &ovsSwitchContext->datapath;
1055     PNL_MSG_HDR nlMsg;
1056
1057     ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && NlBufRemLen(nlBuf) >= sizeof *msgIn);
1058
1059     msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_DATAPATH_FAMILY_ID;
1060     msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
1061     msgOutTmp.nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
1062     msgOutTmp.nlMsg.nlmsgPid = msgIn->nlMsg.nlmsgPid;
1063
1064     msgOutTmp.genlMsg.cmd = OVS_DP_CMD_GET;
1065     msgOutTmp.genlMsg.version = nlDatapathFamilyOps.version;
1066     msgOutTmp.genlMsg.reserved = 0;
1067
1068     msgOutTmp.ovsHdr.dp_ifindex = ovsSwitchContext->dpNo;
1069
1070     writeOk = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
1071     if (writeOk) {
1072         writeOk = NlMsgPutTailString(nlBuf, OVS_DP_ATTR_NAME,
1073                                      OVS_SYSTEM_DP_NAME);
1074     }
1075     if (writeOk) {
1076         OVS_DP_STATS dpStats;
1077
1078         dpStats.n_hit = datapath->hits;
1079         dpStats.n_missed = datapath->misses;
1080         dpStats.n_lost = datapath->lost;
1081         dpStats.n_flows = datapath->nFlows;
1082         writeOk = NlMsgPutTailUnspec(nlBuf, OVS_DP_ATTR_STATS,
1083                                      (PCHAR)&dpStats, sizeof dpStats);
1084     }
1085     nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
1086     nlMsg->nlmsgLen = NlBufSize(nlBuf);
1087
1088     return writeOk ? STATUS_SUCCESS : STATUS_INVALID_BUFFER_SIZE;
1089 }
1090
1091 /*
1092  * --------------------------------------------------------------------------
1093  * Handler for queueing an IRP used for event notification. The IRP is
1094  * completed when a port state changes. STATUS_PENDING is returned on
1095  * success. User mode keep a pending IRP at all times.
1096  * --------------------------------------------------------------------------
1097  */
1098 static NTSTATUS
1099 OvsPendEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1100                        UINT32 *replyLen)
1101 {
1102     NDIS_STATUS status;
1103
1104     UNREFERENCED_PARAMETER(replyLen);
1105
1106     POVS_OPEN_INSTANCE instance =
1107         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1108     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1109     OVS_EVENT_POLL poll;
1110
1111     poll.dpNo = msgIn->ovsHdr.dp_ifindex;
1112     status = OvsWaitEventIoctl(usrParamsCtx->irp, instance->fileObject,
1113                                &poll, sizeof poll);
1114     return status;
1115 }
1116
1117 /*
1118  * --------------------------------------------------------------------------
1119  *  Handler for the subscription for the event queue
1120  * --------------------------------------------------------------------------
1121  */
1122 static NTSTATUS
1123 OvsSubscribeEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1124                             UINT32 *replyLen)
1125 {
1126     NDIS_STATUS status;
1127     OVS_EVENT_SUBSCRIBE request;
1128     BOOLEAN rc;
1129     UINT8 join;
1130     PNL_ATTR attrs[2];
1131     const NL_POLICY policy[] =  {
1132         [OVS_NL_ATTR_MCAST_GRP] = {.type = NL_A_U32 },
1133         [OVS_NL_ATTR_MCAST_JOIN] = {.type = NL_A_U8 },
1134         };
1135
1136     UNREFERENCED_PARAMETER(replyLen);
1137
1138     POVS_OPEN_INSTANCE instance =
1139         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1140     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1141
1142     rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
1143          NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
1144     if (!rc) {
1145         status = STATUS_INVALID_PARAMETER;
1146         goto done;
1147     }
1148
1149     /* XXX Ignore the MC group for now */
1150     join = NlAttrGetU8(attrs[OVS_NL_ATTR_MCAST_JOIN]);
1151     request.dpNo = msgIn->ovsHdr.dp_ifindex;
1152     request.subscribe = join;
1153     request.mask = OVS_EVENT_MASK_ALL;
1154
1155     status = OvsSubscribeEventIoctl(instance->fileObject, &request,
1156                                     sizeof request);
1157 done:
1158     return status;
1159 }
1160
1161 /*
1162  * --------------------------------------------------------------------------
1163  *  Command Handler for 'OVS_DP_CMD_NEW'.
1164  * --------------------------------------------------------------------------
1165  */
1166 static NTSTATUS
1167 OvsNewDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1168                    UINT32 *replyLen)
1169 {
1170     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1171 }
1172
1173 /*
1174  * --------------------------------------------------------------------------
1175  *  Command Handler for 'OVS_DP_CMD_GET'.
1176  *
1177  *  The function handles both the dump based as well as the transaction based
1178  *  'OVS_DP_CMD_GET' command. In the dump command, it handles the initial
1179  *  call to setup dump state, as well as subsequent calls to continue dumping
1180  *  data.
1181  * --------------------------------------------------------------------------
1182  */
1183 static NTSTATUS
1184 OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1185                    UINT32 *replyLen)
1186 {
1187     if (usrParamsCtx->devOp == OVS_TRANSACTION_DEV_OP) {
1188         return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1189     } else {
1190         return HandleGetDpDump(usrParamsCtx, replyLen);
1191     }
1192 }
1193
1194 /*
1195  * --------------------------------------------------------------------------
1196  *  Function for handling the transaction based 'OVS_DP_CMD_GET' command.
1197  * --------------------------------------------------------------------------
1198  */
1199 static NTSTATUS
1200 HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1201                        UINT32 *replyLen)
1202 {
1203     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1204 }
1205
1206
1207 /*
1208  * --------------------------------------------------------------------------
1209  *  Function for handling the dump-based 'OVS_DP_CMD_GET' command.
1210  * --------------------------------------------------------------------------
1211  */
1212 static NTSTATUS
1213 HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1214                 UINT32 *replyLen)
1215 {
1216     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1217     POVS_OPEN_INSTANCE instance =
1218         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1219
1220     if (usrParamsCtx->devOp == OVS_WRITE_DEV_OP) {
1221         *replyLen = 0;
1222         OvsSetupDumpStart(usrParamsCtx);
1223     } else {
1224         NL_BUFFER nlBuf;
1225         NTSTATUS status;
1226         POVS_MESSAGE msgIn = instance->dumpState.ovsMsg;
1227
1228         ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1229
1230         if (instance->dumpState.ovsMsg == NULL) {
1231             ASSERT(FALSE);
1232             return STATUS_INVALID_DEVICE_STATE;
1233         }
1234
1235         /* Dump state must have been deleted after previous dump operation. */
1236         ASSERT(instance->dumpState.index[0] == 0);
1237
1238         /* Output buffer has been validated while validating read dev op. */
1239         ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1240
1241         NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
1242                   usrParamsCtx->outputLength);
1243
1244         status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
1245
1246         if (status != STATUS_SUCCESS) {
1247             *replyLen = 0;
1248             FreeUserDumpState(instance);
1249             return status;
1250         }
1251
1252         /* Increment the dump index. */
1253         instance->dumpState.index[0] = 1;
1254         *replyLen = msgOut->nlMsg.nlmsgLen;
1255
1256         /* Free up the dump state, since there's no more data to continue. */
1257         FreeUserDumpState(instance);
1258     }
1259
1260     return STATUS_SUCCESS;
1261 }
1262
1263
1264 /*
1265  * --------------------------------------------------------------------------
1266  *  Command Handler for 'OVS_DP_CMD_SET'.
1267  * --------------------------------------------------------------------------
1268  */
1269 static NTSTATUS
1270 OvsSetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1271                    UINT32 *replyLen)
1272 {
1273     return HandleDpTransactionCommon(usrParamsCtx, replyLen);
1274 }
1275
1276 /*
1277  * --------------------------------------------------------------------------
1278  *  Function for handling transaction based 'OVS_DP_CMD_NEW', 'OVS_DP_CMD_GET'
1279  *  and 'OVS_DP_CMD_SET' commands.
1280  *
1281  * 'OVS_DP_CMD_NEW' is implemented to keep userspace code happy. Creation of a
1282  * new datapath is not supported currently.
1283  * --------------------------------------------------------------------------
1284  */
1285 static NTSTATUS
1286 HandleDpTransactionCommon(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1287                           UINT32 *replyLen)
1288 {
1289     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1290     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1291     NTSTATUS status = STATUS_SUCCESS;
1292     NL_BUFFER nlBuf;
1293     NL_ERROR nlError = NL_ERROR_SUCCESS;
1294     static const NL_POLICY ovsDatapathSetPolicy[] = {
1295         [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .maxLen = IFNAMSIZ },
1296         [OVS_DP_ATTR_UPCALL_PID] = { .type = NL_A_U32, .optional = TRUE },
1297         [OVS_DP_ATTR_USER_FEATURES] = { .type = NL_A_U32, .optional = TRUE },
1298     };
1299     PNL_ATTR dpAttrs[ARRAY_SIZE(ovsDatapathSetPolicy)];
1300
1301     UNREFERENCED_PARAMETER(msgOut);
1302
1303     /* input buffer has been validated while validating write dev op. */
1304     ASSERT(msgIn != NULL && usrParamsCtx->inputLength >= sizeof *msgIn);
1305
1306     /* Parse any attributes in the request. */
1307     if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET ||
1308         usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
1309         if (!NlAttrParse((PNL_MSG_HDR)msgIn,
1310                         NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN,
1311                         NlMsgAttrsLen((PNL_MSG_HDR)msgIn),
1312                         ovsDatapathSetPolicy, dpAttrs, ARRAY_SIZE(dpAttrs))) {
1313             return STATUS_INVALID_PARAMETER;
1314         }
1315
1316         /*
1317         * XXX: Not clear at this stage if there's any role for the
1318         * OVS_DP_ATTR_UPCALL_PID and OVS_DP_ATTR_USER_FEATURES attributes passed
1319         * from userspace.
1320         */
1321
1322     } else {
1323         RtlZeroMemory(dpAttrs, sizeof dpAttrs);
1324     }
1325
1326     /* Output buffer has been validated while validating transact dev op. */
1327     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1328
1329     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
1330
1331     if (dpAttrs[OVS_DP_ATTR_NAME] != NULL) {
1332         if (!OvsCompareString(NlAttrGet(dpAttrs[OVS_DP_ATTR_NAME]),
1333                               OVS_SYSTEM_DP_NAME)) {
1334
1335             /* Creation of new datapaths is not supported. */
1336             if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET) {
1337                 nlError = NL_ERROR_NOTSUPP;
1338                 goto cleanup;
1339             }
1340
1341             nlError = NL_ERROR_NODEV;
1342             goto cleanup;
1343         }
1344     } else if ((UINT32)msgIn->ovsHdr.dp_ifindex != gOvsSwitchContext->dpNo) {
1345         nlError = NL_ERROR_NODEV;
1346         goto cleanup;
1347     }
1348
1349     if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
1350         nlError = NL_ERROR_EXIST;
1351         goto cleanup;
1352     }
1353
1354     status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
1355
1356     *replyLen = NlBufSize(&nlBuf);
1357
1358 cleanup:
1359     if (nlError != NL_ERROR_SUCCESS) {
1360         POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
1361             usrParamsCtx->outputBuffer;
1362
1363         NlBuildErrorMsg(msgIn, msgError, nlError);
1364         *replyLen = msgError->nlMsg.nlmsgLen;
1365     }
1366
1367     return STATUS_SUCCESS;
1368 }
1369
1370
1371 NTSTATUS
1372 OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx)
1373 {
1374     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1375     POVS_OPEN_INSTANCE instance =
1376         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1377
1378     /* input buffer has been validated while validating write dev op. */
1379     ASSERT(msgIn != NULL && usrParamsCtx->inputLength >= sizeof *msgIn);
1380
1381     /* A write operation that does not indicate dump start is invalid. */
1382     if ((msgIn->nlMsg.nlmsgFlags & NLM_F_DUMP) != NLM_F_DUMP) {
1383         return STATUS_INVALID_PARAMETER;
1384     }
1385     /* XXX: Handle other NLM_F_* flags in the future. */
1386
1387     /*
1388      * This operation should be setting up the dump state. If there's any
1389      * previous state, clear it up so as to set it up afresh.
1390      */
1391     FreeUserDumpState(instance);
1392
1393     return InitUserDumpState(instance, msgIn);
1394 }
1395
1396
1397 /*
1398  * --------------------------------------------------------------------------
1399  *  Utility function to map the output buffer in an IRP. The buffer is assumed
1400  *  to have been passed down using METHOD_OUT_DIRECT (Direct I/O).
1401  * --------------------------------------------------------------------------
1402  */
1403 static NTSTATUS
1404 MapIrpOutputBuffer(PIRP irp,
1405                    UINT32 bufferLength,
1406                    UINT32 requiredLength,
1407                    PVOID *buffer)
1408 {
1409     ASSERT(irp);
1410     ASSERT(buffer);
1411     ASSERT(bufferLength);
1412     ASSERT(requiredLength);
1413     if (!buffer || !irp || bufferLength == 0 || requiredLength == 0) {
1414         return STATUS_INVALID_PARAMETER;
1415     }
1416
1417     if (bufferLength < requiredLength) {
1418         return STATUS_NDIS_INVALID_LENGTH;
1419     }
1420     if (irp->MdlAddress == NULL) {
1421         return STATUS_INVALID_PARAMETER;
1422     }
1423     *buffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
1424                                            NormalPagePriority);
1425     if (*buffer == NULL) {
1426         return STATUS_INSUFFICIENT_RESOURCES;
1427     }
1428
1429     return STATUS_SUCCESS;
1430 }
1431
1432 /*
1433  * --------------------------------------------------------------------------
1434  * Utility function to fill up information about the state of a port in a reply
1435  * to* userspace.
1436  * --------------------------------------------------------------------------
1437  */
1438 static NTSTATUS
1439 OvsPortFillInfo(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1440                 POVS_EVENT_ENTRY eventEntry,
1441                 PNL_BUFFER nlBuf)
1442 {
1443     NTSTATUS status;
1444     BOOLEAN ok;
1445     OVS_MESSAGE msgOutTmp;
1446     PNL_MSG_HDR nlMsg;
1447     POVS_VPORT_ENTRY vport;
1448
1449     ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && nlBuf->bufRemLen >= sizeof msgOutTmp);
1450
1451     msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_VPORT_FAMILY_ID;
1452     msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
1453
1454     /* driver intiated messages should have zerp seq number*/
1455     msgOutTmp.nlMsg.nlmsgSeq = 0;
1456     msgOutTmp.nlMsg.nlmsgPid = usrParamsCtx->ovsInstance->pid;
1457
1458     msgOutTmp.genlMsg.version = nlVportFamilyOps.version;
1459     msgOutTmp.genlMsg.reserved = 0;
1460
1461     /* we don't have netdev yet, treat link up/down a adding/removing a port*/
1462     if (eventEntry->status & (OVS_EVENT_LINK_UP | OVS_EVENT_CONNECT)) {
1463         msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_NEW;
1464     } else if (eventEntry->status &
1465              (OVS_EVENT_LINK_DOWN | OVS_EVENT_DISCONNECT)) {
1466         msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_DEL;
1467     } else {
1468         ASSERT(FALSE);
1469         return STATUS_UNSUCCESSFUL;
1470     }
1471     msgOutTmp.ovsHdr.dp_ifindex = gOvsSwitchContext->dpNo;
1472
1473     ok = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
1474     if (!ok) {
1475         status = STATUS_INVALID_BUFFER_SIZE;
1476         goto cleanup;
1477     }
1478
1479     vport = OvsFindVportByPortNo(gOvsSwitchContext, eventEntry->portNo);
1480     if (!vport) {
1481         status = STATUS_DEVICE_DOES_NOT_EXIST;
1482         goto cleanup;
1483     }
1484
1485     ok = NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_PORT_NO, eventEntry->portNo) &&
1486          NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_TYPE, vport->ovsType) &&
1487          NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_UPCALL_PID,
1488                          vport->upcallPid) &&
1489          NlMsgPutTailString(nlBuf, OVS_VPORT_ATTR_NAME, vport->ovsName);
1490     if (!ok) {
1491         status = STATUS_INVALID_BUFFER_SIZE;
1492         goto cleanup;
1493     }
1494
1495     /* XXXX Should we add the port stats attributes?*/
1496     nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
1497     nlMsg->nlmsgLen = NlBufSize(nlBuf);
1498     status = STATUS_SUCCESS;
1499
1500 cleanup:
1501     return status;
1502 }
1503
1504
1505 /*
1506  * --------------------------------------------------------------------------
1507  * Handler for reading events from the driver event queue. This handler is
1508  * executed when user modes issues a socket receive on a socket assocaited
1509  * with the MC group for events.
1510  * XXX user mode should read multiple events in one system call
1511  * --------------------------------------------------------------------------
1512  */
1513 static NTSTATUS
1514 OvsReadEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1515                        UINT32 *replyLen)
1516 {
1517 #ifdef DBG
1518     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1519     POVS_OPEN_INSTANCE instance =
1520         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1521 #endif
1522     NL_BUFFER nlBuf;
1523     NTSTATUS status;
1524     OVS_EVENT_ENTRY eventEntry;
1525
1526     ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1527
1528     /* Should never read events with a dump socket */
1529     ASSERT(instance->dumpState.ovsMsg == NULL);
1530
1531     /* Must have an event queue */
1532     ASSERT(instance->eventQueue != NULL);
1533
1534     /* Output buffer has been validated while validating read dev op. */
1535     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1536
1537     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
1538
1539     /* remove an event entry from the event queue */
1540     status = OvsRemoveEventEntry(usrParamsCtx->ovsInstance, &eventEntry);
1541     if (status != STATUS_SUCCESS) {
1542         /* If there were not elements, read should return no data. */
1543         status = STATUS_SUCCESS;
1544         *replyLen = 0;
1545         goto cleanup;
1546     }
1547
1548     status = OvsPortFillInfo(usrParamsCtx, &eventEntry, &nlBuf);
1549     if (status == NDIS_STATUS_SUCCESS) {
1550         *replyLen = NlBufSize(&nlBuf);
1551     }
1552
1553 cleanup:
1554     return status;
1555 }
1556
1557 /*
1558  * --------------------------------------------------------------------------
1559  * Handler for reading missed pacckets from the driver event queue. This
1560  * handler is executed when user modes issues a socket receive on a socket
1561  * --------------------------------------------------------------------------
1562  */
1563 static NTSTATUS
1564 OvsReadPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1565                        UINT32 *replyLen)
1566 {
1567 #ifdef DBG
1568     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
1569 #endif
1570     POVS_OPEN_INSTANCE instance =
1571         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1572     NTSTATUS status;
1573
1574     ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
1575
1576     /* Should never read events with a dump socket */
1577     ASSERT(instance->dumpState.ovsMsg == NULL);
1578
1579     /* Must have an packet queue */
1580     ASSERT(instance->packetQueue != NULL);
1581
1582     /* Output buffer has been validated while validating read dev op. */
1583     ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
1584
1585     /* Read a packet from the instance queue */
1586     status = OvsReadDpIoctl(instance->fileObject, usrParamsCtx->outputBuffer,
1587                             usrParamsCtx->outputLength, replyLen);
1588     return status;
1589 }
1590
1591 /*
1592  * --------------------------------------------------------------------------
1593  *  Handler for the subscription for a packet queue
1594  * --------------------------------------------------------------------------
1595  */
1596 static NTSTATUS
1597 OvsSubscribePacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1598                             UINT32 *replyLen)
1599 {
1600     NDIS_STATUS status;
1601     BOOLEAN rc;
1602     UINT8 join;
1603     UINT32 pid;
1604     const NL_POLICY policy[] =  {
1605         [OVS_NL_ATTR_PACKET_PID] = {.type = NL_A_U32 },
1606         [OVS_NL_ATTR_PACKET_SUBSCRIBE] = {.type = NL_A_U8 }
1607         };
1608     PNL_ATTR attrs[ARRAY_SIZE(policy)];
1609
1610     UNREFERENCED_PARAMETER(replyLen);
1611
1612     POVS_OPEN_INSTANCE instance =
1613         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1614     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
1615
1616     rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
1617          NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
1618     if (!rc) {
1619         status = STATUS_INVALID_PARAMETER;
1620         goto done;
1621     }
1622
1623     join = NlAttrGetU8(attrs[OVS_NL_ATTR_PACKET_PID]);
1624     pid = NlAttrGetU32(attrs[OVS_NL_ATTR_PACKET_PID]);
1625
1626     /* The socket subscribed with must be the same socket we perform receive*/
1627     ASSERT(pid == instance->pid);
1628
1629     status = OvsSubscribeDpIoctl(instance, pid, join);
1630
1631     /*
1632      * XXX Need to add this instance to a global data structure
1633      * which hold all packet based instances. The data structure (hash)
1634      * should be searched through the pid field of the instance for
1635      * placing the missed packet into the correct queue
1636      */
1637 done:
1638     return status;
1639 }
1640
1641 /*
1642  * --------------------------------------------------------------------------
1643  * Handler for queueing an IRP used for missed packet notification. The IRP is
1644  * completed when a packet received and mismatched. STATUS_PENDING is returned
1645  * on success. User mode keep a pending IRP at all times.
1646  * --------------------------------------------------------------------------
1647  */
1648 static NTSTATUS
1649 OvsPendPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
1650                        UINT32 *replyLen)
1651 {
1652     UNREFERENCED_PARAMETER(replyLen);
1653
1654     POVS_OPEN_INSTANCE instance =
1655         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
1656
1657     /*
1658      * XXX access to packet queue must be through acquiring a lock as user mode
1659      * could unsubscribe and the instnace will be freed.
1660      */
1661     return OvsWaitDpIoctl(usrParamsCtx->irp, instance->fileObject);
1662 }