datapath-windows: add infrastructure for supporting netlink
[cascardo/ovs.git] / datapath-windows / ovsext / Datapath.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface
19  * alive while we transition over to the netlink based interface.
20  * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c
21  * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c
22  */
23 #if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 1
24
25 #include "precomp.h"
26 #include "Datapath.h"
27 #include "OvsJhash.h"
28 #include "OvsSwitch.h"
29 #include "OvsVport.h"
30 #include "OvsEvent.h"
31 #include "OvsUser.h"
32 #include "OvsPacketIO.h"
33 #include "OvsNetProto.h"
34 #include "OvsFlow.h"
35 #include "OvsUser.h"
36
37 #ifdef OVS_DBG_MOD
38 #undef OVS_DBG_MOD
39 #endif
40 #define OVS_DBG_MOD OVS_DBG_DATAPATH
41 #include "OvsDebug.h"
42
43 #define NETLINK_FAMILY_NAME_LEN 48
44
45
46 /*
47  * Netlink messages are grouped by family (aka type), and each family supports
48  * a set of commands, and can be passed both from kernel -> userspace or
49  * vice-versa. To call into the kernel, userspace uses a device operation which
50  * is outside of a netlink message.
51  *
52  * Each command results in the invocation of a handler function to implement the
53  * request functionality.
54  *
55  * Expectedly, only certain combinations of (device operation, netlink family,
56  * command) are valid.
57  *
58  * Here, we implement the basic infrastructure to perform validation on the
59  * incoming message, version checking, and also to invoke the corresponding
60  * handler to do the heavy-lifting.
61  */
62
63 /*
64  * Handler for a given netlink command. Not all the parameters are used by all
65  * the handlers.
66  */
67 typedef NTSTATUS (*NetlinkCmdHandler)(PIRP irp, PFILE_OBJECT fileObject,
68                                       PVOID inputBuffer, UINT32 inputLength,
69                                       PVOID outputBuffer, UINT32 outputLength,
70                                       UINT32 *replyLen);
71
72 typedef struct _NETLINK_CMD {
73     UINT16 cmd;
74     NetlinkCmdHandler handler;
75     UINT32 supportedDevOp;      /* Supported device operations. */
76 } NETLINK_CMD, *PNETLINK_CMD;
77
78 /* A netlink family is a group of commands. */
79 typedef struct _NETLINK_FAMILY {
80     CHAR *name;
81     UINT32 id;
82     UINT16 version;
83     UINT16 maxAttr;
84     NETLINK_CMD *cmds;          /* Array of netlink commands and handlers. */
85     UINT16 opsCount;
86 } NETLINK_FAMILY, *PNETLINK_FAMILY;
87
88 /*
89  * Device operations to tag netlink commands with. This is a bitmask since it is
90  * possible that a particular command can be invoked via different device
91  * operations.
92  */
93 #define OVS_READ_DEV_OP          (1 << 0)
94 #define OVS_WRITE_DEV_OP         (1 << 1)
95 #define OVS_TRANSACTION_DEV_OP   (1 << 2)
96
97 /* Handlers for the various netlink commands. */
98 static NTSTATUS OvsGetPidCmdHandler(PIRP irp, PFILE_OBJECT fileObject,
99                                     PVOID inputBuffer, UINT32 inputLength,
100                                     PVOID outputBuffer, UINT32 outputLength,
101                                     UINT32 *replyLen);
102
103 /*
104  * The various netlink families, along with the supported commands. Most of
105  * these families and commands are part of the openvswitch specification for a
106  * netlink datapath. In addition, each platform can implement a few families
107  * and commands as extensions.
108  */
109
110 /* Netlink control family: this is a Windows specific family. */
111 NETLINK_CMD nlControlFamilyCmdOps[] = {
112     { OVS_CTRL_CMD_WIN_GET_PID, OvsGetPidCmdHandler, OVS_TRANSACTION_DEV_OP, }
113 };
114
115 NETLINK_FAMILY nlControlFamilyOps = {
116     OVS_WIN_CONTROL_FAMILY,
117     OVS_WIN_NL_CTRL_FAMILY_ID,
118     OVS_WIN_CONTROL_VERSION,
119     OVS_WIN_CONTROL_ATTR_MAX,
120     nlControlFamilyCmdOps,
121     ARRAY_SIZE(nlControlFamilyCmdOps)
122 };
123
124
125
126 /* Netlink packet family. */
127 /* XXX: Add commands here. */
128 NETLINK_FAMILY nlPacketFamilyOps = {
129     OVS_PACKET_FAMILY,
130     OVS_WIN_NL_PACKET_FAMILY_ID,
131     OVS_PACKET_VERSION,
132     OVS_PACKET_ATTR_MAX,
133     NULL, /* XXX: placeholder. */
134     0
135 };
136
137 /* Netlink datapath family. */
138 /* XXX: Add commands here. */
139 NETLINK_FAMILY nlDatapathFamilyOps = {
140     OVS_DATAPATH_FAMILY,
141     OVS_WIN_NL_DATAPATH_FAMILY_ID,
142     OVS_DATAPATH_VERSION,
143     OVS_DP_ATTR_MAX,
144     NULL, /* XXX: placeholder. */
145     0
146 };
147
148 /* Netlink vport family. */
149 /* XXX: Add commands here. */
150 NETLINK_FAMILY nlVportFamilyOps = {
151     OVS_VPORT_FAMILY,
152     OVS_WIN_NL_VPORT_FAMILY_ID,
153     OVS_VPORT_VERSION,
154     OVS_VPORT_ATTR_MAX,
155     NULL, /* XXX: placeholder. */
156     0
157 };
158
159 /* Netlink flow family. */
160 /* XXX: Add commands here. */
161 NETLINK_FAMILY nlFLowFamilyOps = {
162     OVS_FLOW_FAMILY,
163     OVS_WIN_NL_FLOW_FAMILY_ID,
164     OVS_FLOW_VERSION,
165     OVS_FLOW_ATTR_MAX,
166     NULL, /* XXX: placeholder. */
167     0
168 };
169
170 static NTSTATUS
171 MapIrpOutputBuffer(PIRP irp,
172                    UINT32 bufferLength,
173                    UINT32 requiredLength,
174                    PVOID *buffer);
175 static NTSTATUS
176 ValidateNetlinkCmd(UINT32 devOp,
177                    POVS_MESSAGE ovsMsg,
178                    NETLINK_FAMILY *nlFamilyOps);
179 static NTSTATUS
180 InvokeNetlinkCmdHandler(PIRP irp,
181                         PFILE_OBJECT fileObject,
182                         UINT32 devOp,
183                         POVS_MESSAGE ovsMsg,
184                         NETLINK_FAMILY *nlFamily,
185                         PVOID inputBuffer,
186                         UINT32 inputLength,
187                         PVOID outputBuffer,
188                         UINT32 outputLength,
189                         UINT32 *replyLen);
190
191
192 /* Handles to the device object for communication with userspace. */
193 NDIS_HANDLE gOvsDeviceHandle;
194 PDEVICE_OBJECT gOvsDeviceObject;
195
196 _Dispatch_type_(IRP_MJ_CREATE)
197 _Dispatch_type_(IRP_MJ_CLOSE)
198 DRIVER_DISPATCH OvsOpenCloseDevice;
199
200 _Dispatch_type_(IRP_MJ_CLEANUP)
201 DRIVER_DISPATCH OvsCleanupDevice;
202
203 _Dispatch_type_(IRP_MJ_DEVICE_CONTROL)
204 DRIVER_DISPATCH OvsDeviceControl;
205
206 #ifdef ALLOC_PRAGMA
207 #pragma alloc_text(INIT, OvsCreateDeviceObject)
208 #pragma alloc_text(PAGE, OvsOpenCloseDevice)
209 #pragma alloc_text(PAGE, OvsCleanupDevice)
210 #pragma alloc_text(PAGE, OvsDeviceControl)
211 #endif // ALLOC_PRAGMA
212
213 /*
214  * We might hit this limit easily since userspace opens a netlink descriptor for
215  * each thread, and at least one descriptor per vport. Revisit this later.
216  */
217 #define OVS_MAX_OPEN_INSTANCES 512
218
219 POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES];
220 UINT32 ovsNumberOfOpenInstances;
221 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
222
223 NDIS_SPIN_LOCK ovsCtrlLockObj;
224 PNDIS_SPIN_LOCK gOvsCtrlLock;
225
226
227 VOID
228 OvsInit()
229 {
230     gOvsCtrlLock = &ovsCtrlLockObj;
231     NdisAllocateSpinLock(gOvsCtrlLock);
232     OvsInitEventQueue();
233     OvsUserInit();
234 }
235
236 VOID
237 OvsCleanup()
238 {
239     OvsCleanupEventQueue();
240     if (gOvsCtrlLock) {
241         NdisFreeSpinLock(gOvsCtrlLock);
242         gOvsCtrlLock = NULL;
243     }
244     OvsUserCleanup();
245 }
246
247 VOID
248 OvsAcquireCtrlLock()
249 {
250     NdisAcquireSpinLock(gOvsCtrlLock);
251 }
252
253 VOID
254 OvsReleaseCtrlLock()
255 {
256     NdisReleaseSpinLock(gOvsCtrlLock);
257 }
258
259
260 /*
261  * --------------------------------------------------------------------------
262  * Creates the communication device between user and kernel, and also
263  * initializes the data associated data structures.
264  * --------------------------------------------------------------------------
265  */
266 NDIS_STATUS
267 OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
268 {
269     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
270     UNICODE_STRING deviceName;
271     UNICODE_STRING symbolicDeviceName;
272     PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];
273     NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes;
274     OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle);
275
276     RtlZeroMemory(dispatchTable,
277                   (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH));
278     dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice;
279     dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice;
280     dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice;
281     dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl;
282
283     NdisInitUnicodeString(&deviceName, OVS_DEVICE_NAME_NT);
284     NdisInitUnicodeString(&symbolicDeviceName, OVS_DEVICE_NAME_DOS);
285
286     RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
287
288     OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header,
289                            NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES,
290                            NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1,
291                            sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES));
292
293     deviceAttributes.DeviceName = &deviceName;
294     deviceAttributes.SymbolicName = &symbolicDeviceName;
295     deviceAttributes.MajorFunctions = dispatchTable;
296     deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION);
297
298     status = NdisRegisterDeviceEx(ovsExtDriverHandle,
299                                   &deviceAttributes,
300                                   &gOvsDeviceObject,
301                                   &gOvsDeviceHandle);
302     if (status != NDIS_STATUS_SUCCESS) {
303         POVS_DEVICE_EXTENSION ovsExt =
304             (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject);
305         ASSERT(gOvsDeviceObject != NULL);
306         ASSERT(gOvsDeviceHandle != NULL);
307
308         if (ovsExt) {
309             ovsExt->numberOpenInstance = 0;
310         }
311     } else {
312         /* Initialize the associated data structures. */
313         OvsInit();
314     }
315     OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
316     return status;
317 }
318
319
320 VOID
321 OvsDeleteDeviceObject()
322 {
323     if (gOvsDeviceHandle) {
324 #ifdef DBG
325         POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION)
326                     NdisGetDeviceReservedExtension(gOvsDeviceObject);
327         if (ovsExt) {
328             ASSERT(ovsExt->numberOpenInstance == 0);
329         }
330 #endif
331
332         ASSERT(gOvsDeviceObject);
333         NdisDeregisterDeviceEx(gOvsDeviceHandle);
334         gOvsDeviceHandle = NULL;
335         gOvsDeviceObject = NULL;
336     }
337     OvsCleanup();
338 }
339
340 POVS_OPEN_INSTANCE
341 OvsGetOpenInstance(PFILE_OBJECT fileObject,
342                    UINT32 dpNo)
343 {
344     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
345     ASSERT(instance);
346     ASSERT(instance->fileObject == fileObject);
347     if (gOvsSwitchContext == NULL ||
348         gOvsSwitchContext->dpNo != dpNo) {
349         return NULL;
350     }
351     return instance;
352 }
353
354
355 POVS_OPEN_INSTANCE
356 OvsFindOpenInstance(PFILE_OBJECT fileObject)
357 {
358     UINT32 i, j;
359     for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES &&
360                        j < ovsNumberOfOpenInstances; i++) {
361         if (ovsOpenInstanceArray[i]) {
362             if (ovsOpenInstanceArray[i]->fileObject == fileObject) {
363                 return ovsOpenInstanceArray[i];
364             }
365             j++;
366         }
367     }
368     return NULL;
369 }
370
371 NTSTATUS
372 OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
373                    PFILE_OBJECT fileObject)
374 {
375     POVS_OPEN_INSTANCE instance =
376         (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE));
377     UINT32 i;
378
379     if (instance == NULL) {
380         return STATUS_NO_MEMORY;
381     }
382     OvsAcquireCtrlLock();
383     ASSERT(OvsFindOpenInstance(fileObject) == NULL);
384
385     if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
386         OvsReleaseCtrlLock();
387         OvsFreeMemory(instance);
388         return STATUS_INSUFFICIENT_RESOURCES;
389     }
390     RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
391
392     for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) {
393         if (ovsOpenInstanceArray[i] == NULL) {
394             ovsOpenInstanceArray[i] = instance;
395             instance->cookie = i;
396             break;
397         }
398     }
399     ASSERT(i < OVS_MAX_OPEN_INSTANCES);
400     instance->fileObject = fileObject;
401     ASSERT(fileObject->FsContext == NULL);
402     instance->pid = (UINT32)InterlockedIncrement((LONG volatile *)&ovsExt->pidCount);
403     if (instance->pid == 0) {
404         /* XXX: check for rollover. */
405     }
406     fileObject->FsContext = instance;
407     OvsReleaseCtrlLock();
408     return STATUS_SUCCESS;
409 }
410
411 static VOID
412 OvsCleanupOpenInstance(PFILE_OBJECT fileObject)
413 {
414     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
415     ASSERT(instance);
416     ASSERT(fileObject == instance->fileObject);
417     OvsCleanupEvent(instance);
418     OvsCleanupPacketQueue(instance);
419 }
420
421 VOID
422 OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
423 {
424     POVS_OPEN_INSTANCE instance;
425     ASSERT(fileObject->FsContext);
426     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
427     ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES);
428
429     OvsAcquireCtrlLock();
430     fileObject->FsContext = NULL;
431     ASSERT(ovsOpenInstanceArray[instance->cookie] == instance);
432     ovsOpenInstanceArray[instance->cookie] = NULL;
433     OvsReleaseCtrlLock();
434     ASSERT(instance->eventQueue == NULL);
435     ASSERT (instance->packetQueue == NULL);
436     OvsFreeMemory(instance);
437 }
438
439 NTSTATUS
440 OvsCompleteIrpRequest(PIRP irp,
441                       ULONG_PTR infoPtr,
442                       NTSTATUS status)
443 {
444     irp->IoStatus.Information = infoPtr;
445     irp->IoStatus.Status = status;
446     IoCompleteRequest(irp, IO_NO_INCREMENT);
447     return status;
448 }
449
450
451 NTSTATUS
452 OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject,
453                    PIRP irp)
454 {
455     PIO_STACK_LOCATION irpSp;
456     NTSTATUS status = STATUS_SUCCESS;
457     PFILE_OBJECT fileObject;
458     POVS_DEVICE_EXTENSION ovsExt =
459         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
460
461     ASSERT(deviceObject == gOvsDeviceObject);
462     ASSERT(ovsExt != NULL);
463
464     irpSp = IoGetCurrentIrpStackLocation(irp);
465     fileObject = irpSp->FileObject;
466     OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u",
467                   deviceObject, fileObject,
468                   ovsExt->numberOpenInstance);
469
470     switch (irpSp->MajorFunction) {
471     case IRP_MJ_CREATE:
472         status = OvsAddOpenInstance(ovsExt, fileObject);
473         if (STATUS_SUCCESS == status) {
474             InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance);
475         }
476         break;
477     case IRP_MJ_CLOSE:
478         ASSERT(ovsExt->numberOpenInstance > 0);
479         OvsRemoveOpenInstance(fileObject);
480         InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance);
481         break;
482     default:
483         ASSERT(0);
484     }
485     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
486 }
487
488 _Use_decl_annotations_
489 NTSTATUS
490 OvsCleanupDevice(PDEVICE_OBJECT deviceObject,
491                  PIRP irp)
492 {
493
494     PIO_STACK_LOCATION irpSp;
495     PFILE_OBJECT fileObject;
496
497     NTSTATUS status = STATUS_SUCCESS;
498 #ifdef DBG
499     POVS_DEVICE_EXTENSION ovsExt =
500         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
501     if (ovsExt) {
502         ASSERT(ovsExt->numberOpenInstance > 0);
503     }
504 #else
505     UNREFERENCED_PARAMETER(deviceObject);
506 #endif
507     ASSERT(deviceObject == gOvsDeviceObject);
508     irpSp = IoGetCurrentIrpStackLocation(irp);
509     fileObject = irpSp->FileObject;
510
511     ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP);
512
513     OvsCleanupOpenInstance(fileObject);
514
515     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
516 }
517
518
519 /*
520  * --------------------------------------------------------------------------
521  * IOCTL function handler for the device.
522  * --------------------------------------------------------------------------
523  */
524 NTSTATUS
525 OvsDeviceControl(PDEVICE_OBJECT deviceObject,
526                  PIRP irp)
527 {
528
529     PIO_STACK_LOCATION irpSp;
530     NTSTATUS status = STATUS_SUCCESS;
531     PFILE_OBJECT fileObject;
532     PVOID inputBuffer;
533     PVOID outputBuffer = NULL;
534     UINT32 inputBufferLen, outputBufferLen;
535     UINT32 code, replyLen = 0;
536     POVS_OPEN_INSTANCE instance;
537     UINT32 devOp;
538     OVS_MESSAGE ovsMsgReadOp;
539     POVS_MESSAGE ovsMsg;
540     NETLINK_FAMILY *nlFamilyOps;
541
542 #ifdef DBG
543     POVS_DEVICE_EXTENSION ovsExt =
544         (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject);
545     ASSERT(deviceObject == gOvsDeviceObject);
546     ASSERT(ovsExt);
547     ASSERT(ovsExt->numberOpenInstance > 0);
548 #else
549     UNREFERENCED_PARAMETER(deviceObject);
550 #endif
551
552     irpSp = IoGetCurrentIrpStackLocation(irp);
553
554     ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL);
555     ASSERT(irpSp->FileObject != NULL);
556
557     fileObject = irpSp->FileObject;
558     instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
559     code = irpSp->Parameters.DeviceIoControl.IoControlCode;
560     inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength;
561     outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
562     inputBuffer = irp->AssociatedIrp.SystemBuffer;
563
564     /* Concurrent netlink operations are not supported. */
565     if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) {
566         status = STATUS_RESOURCE_IN_USE;
567         goto done;
568     }
569
570     /*
571      * Validate the input/output buffer arguments depending on the type of the
572      * operation.
573      */
574     switch (code) {
575     case OVS_IOCTL_TRANSACT:
576         /* Input buffer is mandatory, output buffer is optional. */
577         if (outputBufferLen != 0) {
578             status = MapIrpOutputBuffer(irp, outputBufferLen,
579                                         sizeof *ovsMsg, &outputBuffer);
580             if (status != STATUS_SUCCESS) {
581                 goto done;
582             }
583             ASSERT(outputBuffer);
584         }
585
586         if (inputBufferLen < sizeof (*ovsMsg)) {
587             status = STATUS_NDIS_INVALID_LENGTH;
588             goto done;
589         }
590
591         ovsMsg = inputBuffer;
592         devOp = OVS_TRANSACTION_DEV_OP;
593         break;
594
595     case OVS_IOCTL_READ:
596         /* Output buffer is mandatory. */
597         if (outputBufferLen != 0) {
598             status = MapIrpOutputBuffer(irp, outputBufferLen,
599                                         sizeof *ovsMsg, &outputBuffer);
600             if (status != STATUS_SUCCESS) {
601                 goto done;
602             }
603             ASSERT(outputBuffer);
604         } else {
605             status = STATUS_NDIS_INVALID_LENGTH;
606             goto done;
607         }
608
609         /*
610          * Operate in the mode that read ioctl is similar to ReadFile(). This
611          * might change as the userspace code gets implemented.
612          */
613         inputBuffer = NULL;
614         inputBufferLen = 0;
615         /* Create an NL message for consumption. */
616         ovsMsg = &ovsMsgReadOp;
617         devOp = OVS_READ_DEV_OP;
618
619         /*
620          * For implementing read (ioctl or otherwise), we need to store some
621          * state in the instance to indicate the previous command. The state can
622          * setup 'ovsMsgReadOp' appropriately.
623          *
624          * XXX: Support for that will be added as the userspace code evolves.
625          */
626         status = STATUS_NOT_IMPLEMENTED;
627         goto done;
628
629         break;
630
631     case OVS_IOCTL_WRITE:
632         /* Input buffer is mandatory. */
633         if (inputBufferLen < sizeof (*ovsMsg)) {
634             status = STATUS_NDIS_INVALID_LENGTH;
635             goto done;
636         }
637
638         ovsMsg = inputBuffer;
639         devOp = OVS_WRITE_DEV_OP;
640         break;
641
642     default:
643         status = STATUS_INVALID_DEVICE_REQUEST;
644         goto done;
645     }
646
647     ASSERT(ovsMsg);
648     switch (ovsMsg->nlMsg.nlmsg_type) {
649     case OVS_WIN_NL_CTRL_FAMILY_ID:
650         nlFamilyOps = &nlControlFamilyOps;
651         break;
652     case OVS_WIN_NL_PACKET_FAMILY_ID:
653     case OVS_WIN_NL_DATAPATH_FAMILY_ID:
654     case OVS_WIN_NL_FLOW_FAMILY_ID:
655     case OVS_WIN_NL_VPORT_FAMILY_ID:
656         status = STATUS_NOT_IMPLEMENTED;
657         goto done;
658
659     default:
660         status = STATUS_INVALID_PARAMETER;
661         goto done;
662     }
663
664     /*
665      * For read operation, the netlink command has already been validated
666      * previously.
667      */
668     if (devOp != OVS_READ_DEV_OP) {
669         status = ValidateNetlinkCmd(devOp, ovsMsg, nlFamilyOps);
670         if (status != STATUS_SUCCESS) {
671             goto done;
672         }
673     }
674
675     status = InvokeNetlinkCmdHandler(irp, fileObject, devOp,
676                                      ovsMsg, nlFamilyOps,
677                                      inputBuffer, inputBufferLen,
678                                      outputBuffer, outputBufferLen,
679                                      &replyLen);
680
681 done:
682     KeMemoryBarrier();
683     instance->inUse = 0;
684     return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
685 }
686
687
688 /*
689  * --------------------------------------------------------------------------
690  * Function to validate a netlink command. Only certain combinations of
691  * (device operation, netlink family, command) are valid.
692  * --------------------------------------------------------------------------
693  */
694 static NTSTATUS
695 ValidateNetlinkCmd(UINT32 devOp,
696                    POVS_MESSAGE ovsMsg,
697                    NETLINK_FAMILY *nlFamilyOps)
698 {
699     NTSTATUS status = STATUS_INVALID_PARAMETER;
700     UINT16 i;
701
702     for (i = 0; i < nlFamilyOps->opsCount; i++) {
703         if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
704             /* Validate if the command is valid for the device operation. */
705             if ((devOp & nlFamilyOps->cmds[i].supportedDevOp) == 0) {
706                 status = STATUS_INVALID_PARAMETER;
707                 goto done;
708             }
709
710             /* Validate the version. */
711             if (nlFamilyOps->version > ovsMsg->genlMsg.version) {
712                 status = STATUS_INVALID_PARAMETER;
713                 goto done;
714             }
715
716             /* Validate the DP for commands where the DP is actually set. */
717             if (ovsMsg->genlMsg.cmd != OVS_CTRL_CMD_WIN_GET_PID) {
718                 OvsAcquireCtrlLock();
719                 if (ovsMsg->ovsHdr.dp_ifindex == (INT)gOvsSwitchContext->dpNo) {
720                     status = STATUS_INVALID_PARAMETER;
721                     OvsReleaseCtrlLock();
722                     goto done;
723                 }
724                 OvsReleaseCtrlLock();
725             }
726
727             status = STATUS_SUCCESS;
728             break;
729         }
730     }
731
732 done:
733     return status;
734 }
735
736 /*
737  * --------------------------------------------------------------------------
738  * Function to invoke the netlink command handler.
739  * --------------------------------------------------------------------------
740  */
741 static NTSTATUS
742 InvokeNetlinkCmdHandler(PIRP irp,
743                         PFILE_OBJECT fileObject,
744                         UINT32 devOp,
745                         OVS_MESSAGE *ovsMsg,
746                         NETLINK_FAMILY *nlFamilyOps,
747                         PVOID inputBuffer,
748                         UINT32 inputLength,
749                         PVOID outputBuffer,
750                         UINT32 outputLength,
751                         UINT32 *replyLen)
752 {
753     NTSTATUS status = STATUS_INVALID_PARAMETER;
754     UINT16 i;
755
756     UNREFERENCED_PARAMETER(devOp);
757
758     for (i = 0; i < nlFamilyOps->opsCount; i++) {
759         if (nlFamilyOps->cmds[i].cmd == ovsMsg->genlMsg.cmd) {
760             status = nlFamilyOps->cmds[i].handler(irp, fileObject,
761                                                 inputBuffer, inputLength,
762                                                 outputBuffer, outputLength,
763                                                 replyLen);
764             break;
765         }
766     }
767
768     return status;
769 }
770
771
772 /*
773  * --------------------------------------------------------------------------
774  *  Each handle on the device is assigned a unique PID when the handle is
775  *  created. On platforms that support netlink natively, the PID is available
776  *  to userspace when the netlink socket is created. However, without native
777  *  netlink support on Windows, OVS datapath generates the PID and lets the
778  *  userspace query it.
779  *
780  *  This function implements the query.
781  * --------------------------------------------------------------------------
782  */
783 static NTSTATUS
784 OvsGetPidCmdHandler(PIRP irp,
785                     PFILE_OBJECT fileObject,
786                     PVOID inputBuffer,
787                     UINT32 inputLength,
788                     PVOID outputBuffer,
789                     UINT32 outputLength,
790                     UINT32 *replyLen)
791 {
792     UNREFERENCED_PARAMETER(irp);
793     UNREFERENCED_PARAMETER(fileObject);
794     UNREFERENCED_PARAMETER(inputBuffer);
795     UNREFERENCED_PARAMETER(inputLength);
796
797     POVS_MESSAGE msgIn = (POVS_MESSAGE)inputBuffer;
798     POVS_MESSAGE msgOut = (POVS_MESSAGE)outputBuffer;
799
800     if (outputLength >= sizeof *msgOut) {
801         POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
802
803         RtlZeroMemory(msgOut, sizeof *msgOut);
804         msgOut->nlMsg.nlmsg_seq = msgIn->nlMsg.nlmsg_seq;
805         msgOut->nlMsg.nlmsg_pid = instance->pid;
806         *replyLen = sizeof *msgOut;
807         /* XXX: We might need to return the DP index as well. */
808     } else {
809         return STATUS_NDIS_INVALID_LENGTH;
810     }
811
812     return NDIS_STATUS_SUCCESS;
813 }
814
815
816 /*
817  * --------------------------------------------------------------------------
818  *  Utility function to map the output buffer in an IRP. The buffer is assumed
819  *  to have been passed down using METHOD_OUT_DIRECT (Direct I/O).
820  * --------------------------------------------------------------------------
821  */
822 static NTSTATUS
823 MapIrpOutputBuffer(PIRP irp,
824                    UINT32 bufferLength,
825                    UINT32 requiredLength,
826                    PVOID *buffer)
827 {
828     ASSERT(irp);
829     ASSERT(buffer);
830     ASSERT(bufferLength);
831     ASSERT(requiredLength);
832     if (!buffer || !irp || bufferLength == 0 || requiredLength == 0) {
833         return STATUS_INVALID_PARAMETER;
834     }
835
836     if (bufferLength < requiredLength) {
837         return STATUS_NDIS_INVALID_LENGTH;
838     }
839     if (irp->MdlAddress == NULL) {
840         return STATUS_INVALID_PARAMETER;
841     }
842     *buffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress,
843                                            NormalPagePriority);
844     if (*buffer == NULL) {
845         return STATUS_INSUFFICIENT_RESOURCES;
846     }
847
848     return STATUS_SUCCESS;
849 }
850
851 #endif /* OVS_USE_NL_INTERFACE */