datapath-windows: Solved memory leak in OVS datapath
[cascardo/ovs.git] / datapath-windows / ovsext / Datapath.c
index c145d00..4af909c 100644 (file)
  * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c
  * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c
  */
-#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 1
 
 #include "precomp.h"
+#include "Switch.h"
+#include "User.h"
 #include "Datapath.h"
 #include "Jhash.h"
-#include "Switch.h"
 #include "Vport.h"
 #include "Event.h"
 #include "User.h"
@@ -33,6 +33,7 @@
 #include "NetProto.h"
 #include "Flow.h"
 #include "User.h"
+#include "Vxlan.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
  * Handler for a given netlink command. Not all the parameters are used by all
  * the handlers.
  */
-typedef NTSTATUS (*NetlinkCmdHandler)(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
-                                      UINT32 *replyLen);
+typedef NTSTATUS(NetlinkCmdHandler)(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                                    UINT32 *replyLen);
 
 typedef struct _NETLINK_CMD {
     UINT16 cmd;
-    NetlinkCmdHandler handler;
+    NetlinkCmdHandler *handler;
     UINT32 supportedDevOp;      /* Supported device operations. */
     BOOLEAN validateDpIndex;    /* Does command require a valid DP argument. */
 } NETLINK_CMD, *PNETLINK_CMD;
@@ -77,30 +78,41 @@ typedef struct _NETLINK_CMD {
 /* A netlink family is a group of commands. */
 typedef struct _NETLINK_FAMILY {
     CHAR *name;
-    UINT32 id;
+    UINT16 id;
     UINT8 version;
-    UINT8 pad;
+    UINT8 pad1;
     UINT16 maxAttr;
+    UINT16 pad2;
     NETLINK_CMD *cmds;          /* Array of netlink commands and handlers. */
     UINT16 opsCount;
 } NETLINK_FAMILY, *PNETLINK_FAMILY;
 
-/*
- * Device operations to tag netlink commands with. This is a bitmask since it is
- * possible that a particular command can be invoked via different device
- * operations.
- */
-#define OVS_READ_DEV_OP          (1 << 0)
-#define OVS_WRITE_DEV_OP         (1 << 1)
-#define OVS_TRANSACTION_DEV_OP   (1 << 2)
-
 /* Handlers for the various netlink commands. */
-static NTSTATUS OvsGetPidCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+static NetlinkCmdHandler OvsPendEventCmdHandler,
+                         OvsPendPacketCmdHandler,
+                         OvsSubscribeEventCmdHandler,
+                         OvsSubscribePacketCmdHandler,
+                         OvsReadEventCmdHandler,
+                         OvsReadPacketCmdHandler,
+                         OvsNewDpCmdHandler,
+                         OvsGetDpCmdHandler,
+                         OvsSetDpCmdHandler;
+
+NetlinkCmdHandler        OvsGetNetdevCmdHandler,
+                         OvsGetVportCmdHandler,
+                         OvsSetVportCmdHandler,
+                         OvsNewVportCmdHandler,
+                         OvsDeleteVportCmdHandler;
+
+static NTSTATUS HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                                       UINT32 *replyLen);
+static NTSTATUS HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                                UINT32 *replyLen);
+static NTSTATUS HandleDpTransactionCommon(
+                    POVS_USER_PARAMS_CONTEXT usrParamsCtx, UINT32 *replyLen);
+static NTSTATUS OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                                     UINT32 *replyLen);
 
-static NTSTATUS OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
-                                   UINT32 *replyLen);
-
 /*
  * The various netlink families, along with the supported commands. Most of
  * these families and commands are part of the openvswitch specification for a
@@ -110,10 +122,35 @@ static NTSTATUS OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
 
 /* Netlink control family: this is a Windows specific family. */
 NETLINK_CMD nlControlFamilyCmdOps[] = {
-    { .cmd             = OVS_CTRL_CMD_WIN_GET_PID,
-      .handler         = OvsGetPidCmdHandler,
-      .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
-      .validateDpIndex = FALSE
+    { .cmd = OVS_CTRL_CMD_WIN_PEND_REQ,
+      .handler = OvsPendEventCmdHandler,
+      .supportedDevOp = OVS_WRITE_DEV_OP,
+      .validateDpIndex = TRUE,
+    },
+    { .cmd = OVS_CTRL_CMD_WIN_PEND_PACKET_REQ,
+      .handler = OvsPendPacketCmdHandler,
+      .supportedDevOp = OVS_WRITE_DEV_OP,
+      .validateDpIndex = TRUE,
+    },
+    { .cmd = OVS_CTRL_CMD_MC_SUBSCRIBE_REQ,
+      .handler = OvsSubscribeEventCmdHandler,
+      .supportedDevOp = OVS_WRITE_DEV_OP,
+      .validateDpIndex = TRUE,
+    },
+    { .cmd = OVS_CTRL_CMD_PACKET_SUBSCRIBE_REQ,
+      .handler = OvsSubscribePacketCmdHandler,
+      .supportedDevOp = OVS_WRITE_DEV_OP,
+      .validateDpIndex = TRUE,
+    },
+    { .cmd = OVS_CTRL_CMD_EVENT_NOTIFY,
+      .handler = OvsReadEventCmdHandler,
+      .supportedDevOp = OVS_READ_DEV_OP,
+      .validateDpIndex = FALSE,
+    },
+    { .cmd = OVS_CTRL_CMD_READ_NOTIFY,
+      .handler = OvsReadPacketCmdHandler,
+      .supportedDevOp = OVS_READ_DEV_OP,
+      .validateDpIndex = FALSE,
     }
 };
 
@@ -128,10 +165,22 @@ NETLINK_FAMILY nlControlFamilyOps = {
 
 /* Netlink datapath family. */
 NETLINK_CMD nlDatapathFamilyCmdOps[] = {
+    { .cmd             = OVS_DP_CMD_NEW,
+      .handler         = OvsNewDpCmdHandler,
+      .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = FALSE
+    },
     { .cmd             = OVS_DP_CMD_GET,
       .handler         = OvsGetDpCmdHandler,
-      .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP,
+      .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
+                         OVS_TRANSACTION_DEV_OP,
       .validateDpIndex = FALSE
+    },
+    { .cmd             = OVS_DP_CMD_SET,
+      .handler         = OvsSetDpCmdHandler,
+      .supportedDevOp  = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
+                         OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
     }
 };
 
@@ -145,36 +194,109 @@ NETLINK_FAMILY nlDatapathFamilyOps = {
 };
 
 /* Netlink packet family. */
-/* XXX: Add commands here. */
+
+NETLINK_CMD nlPacketFamilyCmdOps[] = {
+    { .cmd             = OVS_PACKET_CMD_EXECUTE,
+      .handler         = OvsNlExecuteCmdHandler,
+      .supportedDevOp  = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
+    }
+};
+
 NETLINK_FAMILY nlPacketFamilyOps = {
     .name     = OVS_PACKET_FAMILY,
     .id       = OVS_WIN_NL_PACKET_FAMILY_ID,
     .version  = OVS_PACKET_VERSION,
     .maxAttr  = OVS_PACKET_ATTR_MAX,
-    .cmds     = NULL, /* XXX: placeholder. */
-    .opsCount = 0
+    .cmds     = nlPacketFamilyCmdOps,
+    .opsCount = ARRAY_SIZE(nlPacketFamilyCmdOps)
 };
 
 /* Netlink vport family. */
-/* XXX: Add commands here. */
+NETLINK_CMD nlVportFamilyCmdOps[] = {
+    { .cmd = OVS_VPORT_CMD_GET,
+      .handler = OvsGetVportCmdHandler,
+      .supportedDevOp = OVS_WRITE_DEV_OP | OVS_READ_DEV_OP |
+                        OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
+    },
+    { .cmd = OVS_VPORT_CMD_NEW,
+      .handler = OvsNewVportCmdHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
+    },
+    { .cmd = OVS_VPORT_CMD_SET,
+      .handler = OvsSetVportCmdHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
+    },
+    { .cmd = OVS_VPORT_CMD_DEL,
+      .handler = OvsDeleteVportCmdHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = TRUE
+    },
+};
+
 NETLINK_FAMILY nlVportFamilyOps = {
     .name     = OVS_VPORT_FAMILY,
     .id       = OVS_WIN_NL_VPORT_FAMILY_ID,
     .version  = OVS_VPORT_VERSION,
     .maxAttr  = OVS_VPORT_ATTR_MAX,
-    .cmds     = NULL, /* XXX: placeholder. */
-    .opsCount = 0
+    .cmds     = nlVportFamilyCmdOps,
+    .opsCount = ARRAY_SIZE(nlVportFamilyCmdOps)
 };
 
 /* Netlink flow family. */
-/* XXX: Add commands here. */
+
+NETLINK_CMD nlFlowFamilyCmdOps[] = {
+    { .cmd              = OVS_FLOW_CMD_NEW,
+      .handler          = OvsFlowNlCmdHandler,
+      .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex  = TRUE
+    },
+    { .cmd              = OVS_FLOW_CMD_SET,
+      .handler          = OvsFlowNlCmdHandler,
+      .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex  = TRUE
+    },
+    { .cmd              = OVS_FLOW_CMD_DEL,
+      .handler          = OvsFlowNlCmdHandler,
+      .supportedDevOp   = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex  = TRUE
+    },
+    { .cmd              = OVS_FLOW_CMD_GET,
+      .handler          = OvsFlowNlGetCmdHandler,
+      .supportedDevOp   = OVS_TRANSACTION_DEV_OP |
+                          OVS_WRITE_DEV_OP | OVS_READ_DEV_OP,
+      .validateDpIndex  = TRUE
+    },
+};
+
 NETLINK_FAMILY nlFLowFamilyOps = {
     .name     = OVS_FLOW_FAMILY,
     .id       = OVS_WIN_NL_FLOW_FAMILY_ID,
     .version  = OVS_FLOW_VERSION,
     .maxAttr  = OVS_FLOW_ATTR_MAX,
-    .cmds     = NULL, /* XXX: placeholder. */
-    .opsCount = 0
+    .cmds     = nlFlowFamilyCmdOps,
+    .opsCount = ARRAY_SIZE(nlFlowFamilyCmdOps)
+};
+
+/* Netlink netdev family. */
+NETLINK_CMD nlNetdevFamilyCmdOps[] = {
+    { .cmd = OVS_WIN_NETDEV_CMD_GET,
+      .handler = OvsGetNetdevCmdHandler,
+      .supportedDevOp = OVS_TRANSACTION_DEV_OP,
+      .validateDpIndex = FALSE
+    },
+};
+
+NETLINK_FAMILY nlNetdevFamilyOps = {
+    .name     = OVS_WIN_NETDEV_FAMILY,
+    .id       = OVS_WIN_NL_NETDEV_FAMILY_ID,
+    .version  = OVS_WIN_NETDEV_VERSION,
+    .maxAttr  = OVS_WIN_NETDEV_ATTR_MAX,
+    .cmds     = nlNetdevFamilyCmdOps,
+    .opsCount = ARRAY_SIZE(nlNetdevFamilyCmdOps)
 };
 
 static NTSTATUS MapIrpOutputBuffer(PIRP irp,
@@ -188,8 +310,6 @@ static NTSTATUS ValidateNetlinkCmd(UINT32 devOp,
 static NTSTATUS InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                                         NETLINK_FAMILY *nlFamilyOps,
                                         UINT32 *replyLen);
-static NTSTATUS OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx);
-
 
 /* Handles to the device object for communication with userspace. */
 NDIS_HANDLE gOvsDeviceHandle;
@@ -226,6 +346,37 @@ extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 NDIS_SPIN_LOCK ovsCtrlLockObj;
 PNDIS_SPIN_LOCK gOvsCtrlLock;
 
+NTSTATUS
+InitUserDumpState(POVS_OPEN_INSTANCE instance,
+                  POVS_MESSAGE ovsMsg)
+{
+    /* Clear the dumpState from a previous dump sequence. */
+    ASSERT(instance->dumpState.ovsMsg == NULL);
+    ASSERT(ovsMsg);
+
+    instance->dumpState.ovsMsg =
+        (POVS_MESSAGE)OvsAllocateMemoryWithTag(sizeof(OVS_MESSAGE),
+                                               OVS_DATAPATH_POOL_TAG);
+    if (instance->dumpState.ovsMsg == NULL) {
+        return STATUS_NO_MEMORY;
+    }
+    RtlCopyMemory(instance->dumpState.ovsMsg, ovsMsg,
+                  sizeof *instance->dumpState.ovsMsg);
+    RtlZeroMemory(instance->dumpState.index,
+                  sizeof instance->dumpState.index);
+
+    return STATUS_SUCCESS;
+}
+
+VOID
+FreeUserDumpState(POVS_OPEN_INSTANCE instance)
+{
+    if (instance->dumpState.ovsMsg != NULL) {
+        OvsFreeMemoryWithTag(instance->dumpState.ovsMsg,
+                             OVS_DATAPATH_POOL_TAG);
+        RtlZeroMemory(&instance->dumpState, sizeof instance->dumpState);
+    }
+}
 
 VOID
 OvsInit()
@@ -233,7 +384,6 @@ OvsInit()
     gOvsCtrlLock = &ovsCtrlLockObj;
     NdisAllocateSpinLock(gOvsCtrlLock);
     OvsInitEventQueue();
-    OvsUserInit();
 }
 
 VOID
@@ -244,7 +394,6 @@ OvsCleanup()
         NdisFreeSpinLock(gOvsCtrlLock);
         gOvsCtrlLock = NULL;
     }
-    OvsUserCleanup();
 }
 
 VOID
@@ -312,9 +461,9 @@ OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle)
             ovsExt->numberOpenInstance = 0;
         }
     } else {
-        /* Initialize the associated data structures. */
-        OvsInit();
+        OvsRegisterSystemProvider((PVOID)gOvsDeviceObject);
     }
+
     OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject);
     return status;
 }
@@ -336,8 +485,9 @@ OvsDeleteDeviceObject()
         NdisDeregisterDeviceEx(gOvsDeviceHandle);
         gOvsDeviceHandle = NULL;
         gOvsDeviceObject = NULL;
+
+        OvsUnregisterSystemProvider();
     }
-    OvsCleanup();
 }
 
 POVS_OPEN_INSTANCE
@@ -347,8 +497,7 @@ OvsGetOpenInstance(PFILE_OBJECT fileObject,
     POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
     ASSERT(instance);
     ASSERT(instance->fileObject == fileObject);
-    if (gOvsSwitchContext == NULL ||
-        gOvsSwitchContext->dpNo != dpNo) {
+    if (gOvsSwitchContext->dpNo != dpNo) {
         return NULL;
     }
     return instance;
@@ -376,7 +525,8 @@ OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
                    PFILE_OBJECT fileObject)
 {
     POVS_OPEN_INSTANCE instance =
-        (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE));
+        (POVS_OPEN_INSTANCE)OvsAllocateMemoryWithTag(sizeof(OVS_OPEN_INSTANCE),
+                                                     OVS_DATAPATH_POOL_TAG);
     UINT32 i;
 
     if (instance == NULL) {
@@ -387,7 +537,7 @@ OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
 
     if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) {
         OvsReleaseCtrlLock();
-        OvsFreeMemory(instance);
+        OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
         return STATUS_INSUFFICIENT_RESOURCES;
     }
     RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE));
@@ -395,6 +545,7 @@ OvsAddOpenInstance(POVS_DEVICE_EXTENSION ovsExt,
     for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) {
         if (ovsOpenInstanceArray[i] == NULL) {
             ovsOpenInstanceArray[i] = instance;
+            ovsNumberOfOpenInstances++;
             instance->cookie = i;
             break;
         }
@@ -433,10 +584,12 @@ OvsRemoveOpenInstance(PFILE_OBJECT fileObject)
     fileObject->FsContext = NULL;
     ASSERT(ovsOpenInstanceArray[instance->cookie] == instance);
     ovsOpenInstanceArray[instance->cookie] = NULL;
+    ovsNumberOfOpenInstances--;
     OvsReleaseCtrlLock();
     ASSERT(instance->eventQueue == NULL);
     ASSERT (instance->packetQueue == NULL);
-    OvsFreeMemory(instance);
+    FreeUserDumpState(instance);
+    OvsFreeMemoryWithTag(instance, OVS_DATAPATH_POOL_TAG);
 }
 
 NTSTATUS
@@ -518,7 +671,6 @@ OvsCleanupDevice(PDEVICE_OBJECT deviceObject,
     return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status);
 }
 
-
 /*
  * --------------------------------------------------------------------------
  * IOCTL function handler for the device.
@@ -528,7 +680,6 @@ NTSTATUS
 OvsDeviceControl(PDEVICE_OBJECT deviceObject,
                  PIRP irp)
 {
-
     PIO_STACK_LOCATION irpSp;
     NTSTATUS status = STATUS_SUCCESS;
     PFILE_OBJECT fileObject;
@@ -565,10 +716,15 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
     outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength;
     inputBuffer = irp->AssociatedIrp.SystemBuffer;
 
-    /* Concurrent netlink operations are not supported. */
-    if (InterlockedCompareExchange((LONG volatile *)&instance->inUse, 1, 0)) {
-        status = STATUS_RESOURCE_IN_USE;
-        goto done;
+    /* Check if the extension is enabled. */
+    if (NULL == gOvsSwitchContext) {
+        status = STATUS_NOT_FOUND;
+        goto exit;
+    }
+
+    if (!OvsAcquireSwitchContext()) {
+        status = STATUS_NOT_FOUND;
+        goto exit;
     }
 
     /*
@@ -576,8 +732,26 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
      * operation.
      */
     switch (code) {
+    case OVS_IOCTL_GET_PID:
+        /* Both input buffer and output buffer use the same location. */
+        outputBuffer = irp->AssociatedIrp.SystemBuffer;
+        if (outputBufferLen != 0) {
+            InitUserParamsCtx(irp, instance, 0, NULL,
+                              inputBuffer, inputBufferLen,
+                              outputBuffer, outputBufferLen,
+                              &usrParamsCtx);
+
+            ASSERT(outputBuffer);
+        } else {
+            status = STATUS_NDIS_INVALID_LENGTH;
+            goto done;
+        }
+
+        status = OvsGetPidHandler(&usrParamsCtx, &replyLen);
+        goto done;
+
     case OVS_IOCTL_TRANSACT:
-        /* Input buffer is mandatory, output buffer is optional. */
+        /* Both input buffer and output buffer are mandatory. */
         if (outputBufferLen != 0) {
             status = MapIrpOutputBuffer(irp, outputBufferLen,
                                         sizeof *ovsMsg, &outputBuffer);
@@ -585,6 +759,9 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
                 goto done;
             }
             ASSERT(outputBuffer);
+        } else {
+            status = STATUS_NDIS_INVALID_LENGTH;
+            goto done;
         }
 
         if (inputBufferLen < sizeof (*ovsMsg)) {
@@ -596,6 +773,41 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
         devOp = OVS_TRANSACTION_DEV_OP;
         break;
 
+    case OVS_IOCTL_READ_EVENT:
+    case OVS_IOCTL_READ_PACKET:
+        /*
+         * Output buffer is mandatory. These IOCTLs are used to read events and
+         * packets respectively. It is convenient to have separate ioctls.
+         */
+        if (outputBufferLen != 0) {
+            status = MapIrpOutputBuffer(irp, outputBufferLen,
+                                        sizeof *ovsMsg, &outputBuffer);
+            if (status != STATUS_SUCCESS) {
+                goto done;
+            }
+            ASSERT(outputBuffer);
+        } else {
+            status = STATUS_NDIS_INVALID_LENGTH;
+            goto done;
+        }
+        inputBuffer = NULL;
+        inputBufferLen = 0;
+
+        ovsMsg = &ovsMsgReadOp;
+        RtlZeroMemory(ovsMsg, sizeof *ovsMsg);
+        ovsMsg->nlMsg.nlmsgLen = sizeof *ovsMsg;
+        ovsMsg->nlMsg.nlmsgType = nlControlFamilyOps.id;
+        ovsMsg->nlMsg.nlmsgPid = instance->pid;
+
+        /* An "artificial" command so we can use NL family function table*/
+        ovsMsg->genlMsg.cmd = (code == OVS_IOCTL_READ_EVENT) ?
+                              OVS_CTRL_CMD_EVENT_NOTIFY :
+                              OVS_CTRL_CMD_READ_NOTIFY;
+        ovsMsg->genlMsg.version = nlControlFamilyOps.version;
+
+        devOp = OVS_READ_DEV_OP;
+        break;
+
     case OVS_IOCTL_READ:
         /* Output buffer is mandatory. */
         if (outputBufferLen != 0) {
@@ -622,7 +834,7 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
          * state in the instance to indicate the command that started the dump
          * operation. The state can setup 'ovsMsgReadOp' appropriately. Note
          * that 'ovsMsgReadOp' is needed only in this function to call into the
-         * appropraite handler. The handler itself can access the state in the
+         * appropriate handler. The handler itself can access the state in the
          * instance.
          *
          * In the absence of a dump start, return 0 bytes.
@@ -665,20 +877,26 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
     case OVS_WIN_NL_DATAPATH_FAMILY_ID:
         nlFamilyOps = &nlDatapathFamilyOps;
         break;
-    case OVS_WIN_NL_PACKET_FAMILY_ID:
     case OVS_WIN_NL_FLOW_FAMILY_ID:
+         nlFamilyOps = &nlFLowFamilyOps;
+         break;
+    case OVS_WIN_NL_PACKET_FAMILY_ID:
+         nlFamilyOps = &nlPacketFamilyOps;
+         break;
     case OVS_WIN_NL_VPORT_FAMILY_ID:
-        status = STATUS_NOT_IMPLEMENTED;
-        goto done;
-
+        nlFamilyOps = &nlVportFamilyOps;
+        break;
+    case OVS_WIN_NL_NETDEV_FAMILY_ID:
+        nlFamilyOps = &nlNetdevFamilyOps;
+        break;
     default:
         status = STATUS_INVALID_PARAMETER;
         goto done;
     }
 
     /*
-     * For read operation, the netlink command has already been validated
-     * previously.
+     * For read operation, avoid duplicate validation since 'ovsMsg' is either
+     * "artificial" or was copied from a previously validated 'ovsMsg'.
      */
     if (devOp != OVS_READ_DEV_OP) {
         status = ValidateNetlinkCmd(devOp, instance, ovsMsg, nlFamilyOps);
@@ -695,8 +913,17 @@ OvsDeviceControl(PDEVICE_OBJECT deviceObject,
     status = InvokeNetlinkCmdHandler(&usrParamsCtx, nlFamilyOps, &replyLen);
 
 done:
-    KeMemoryBarrier();
-    instance->inUse = 0;
+    OvsReleaseSwitchContext(gOvsSwitchContext);
+
+exit:
+    /* Should not complete a pending IRP unless proceesing is completed. */
+    if (status == STATUS_PENDING) {
+        /* STATUS_PENDING is returned by the NL handler when the request is
+         * to be processed later, so we mark the IRP as pending and complete
+         * it in another thread when the request is processed. */
+        IoMarkIrpPending(irp);
+        return status;
+    }
     return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status);
 }
 
@@ -705,8 +932,6 @@ done:
  * --------------------------------------------------------------------------
  * Function to validate a netlink command. Only certain combinations of
  * (device operation, netlink family, command) are valid.
- *
- * XXX: Take policy into consideration.
  * --------------------------------------------------------------------------
  */
 static NTSTATUS
@@ -734,22 +959,17 @@ ValidateNetlinkCmd(UINT32 devOp,
 
             /* Validate the DP for commands that require a DP. */
             if (nlFamilyOps->cmds[i].validateDpIndex == TRUE) {
-                OvsAcquireCtrlLock();
                 if (ovsMsg->ovsHdr.dp_ifindex !=
-                    (INT)gOvsSwitchContext->dpNo) {
+                                          (INT)gOvsSwitchContext->dpNo) {
                     status = STATUS_INVALID_PARAMETER;
-                    OvsReleaseCtrlLock();
                     goto done;
                 }
-                OvsReleaseCtrlLock();
             }
 
             /* Validate the PID. */
-            if (ovsMsg->genlMsg.cmd != OVS_CTRL_CMD_WIN_GET_PID) {
-                if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
-                    status = STATUS_INVALID_PARAMETER;
-                    goto done;
-                }
+            if (ovsMsg->nlMsg.nlmsgPid != instance->pid) {
+                status = STATUS_INVALID_PARAMETER;
+                goto done;
             }
 
             status = STATUS_SUCCESS;
@@ -763,7 +983,9 @@ done:
 
 /*
  * --------------------------------------------------------------------------
- * Function to invoke the netlink command handler.
+ * Function to invoke the netlink command handler. The function also stores
+ * the return value of the handler function to construct a 'NL_ERROR' message,
+ * and in turn returns success to the caller.
  * --------------------------------------------------------------------------
  */
 static NTSTATUS
@@ -776,60 +998,261 @@ InvokeNetlinkCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
 
     for (i = 0; i < nlFamilyOps->opsCount; i++) {
         if (nlFamilyOps->cmds[i].cmd == usrParamsCtx->ovsMsg->genlMsg.cmd) {
-            status = nlFamilyOps->cmds[i].handler(usrParamsCtx, replyLen);
+            NetlinkCmdHandler *handler = nlFamilyOps->cmds[i].handler;
+            ASSERT(handler);
+            if (handler) {
+                status = handler(usrParamsCtx, replyLen);
+            }
             break;
         }
     }
 
+    /*
+     * Netlink socket semantics dictate that the return value of the netlink
+     * function should be an error ONLY under fatal conditions. If the message
+     * made it all the way to the handler function, it is not a fatal condition.
+     * Absorb the error returned by the handler function into a 'struct
+     * NL_ERROR' and populate the 'output buffer' to return to userspace.
+     *
+     * This behavior is obviously applicable only to netlink commands that
+     * specify an 'output buffer'. For other commands, we return the error as
+     * is.
+     *
+     * 'STATUS_PENDING' is a special return value and userspace is equipped to
+     * handle it.
+     */
+    if (status != STATUS_SUCCESS && status != STATUS_PENDING) {
+        if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP && *replyLen == 0) {
+            NL_ERROR nlError = NlMapStatusToNlErr(status);
+            POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+            POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
+                usrParamsCtx->outputBuffer;
+
+            ASSERT(msgError);
+            NlBuildErrorMsg(msgIn, msgError, nlError);
+            *replyLen = msgError->nlMsg.nlmsgLen;
+        }
+
+        if (*replyLen != 0) {
+            status = STATUS_SUCCESS;
+        }
+    }
+
+#ifdef DBG
+    if (usrParamsCtx->devOp != OVS_WRITE_DEV_OP) {
+        ASSERT(status == STATUS_PENDING || *replyLen != 0 || status == STATUS_SUCCESS);
+    }
+#endif
+
     return status;
 }
 
-
 /*
  * --------------------------------------------------------------------------
- *  Each handle on the device is assigned a unique PID when the handle is
- *  created. On platforms that support netlink natively, the PID is available
- *  to userspace when the netlink socket is created. However, without native
- *  netlink support on Windows, OVS datapath generates the PID and lets the
- *  userspace query it.
+ *  Handler for 'OVS_IOCTL_GET_PID'.
  *
- *  This function implements the query.
+ *  Each handle on the device is assigned a unique PID when the handle is
+ *  created. This function passes the PID to userspace using METHOD_BUFFERED
+ *  method.
  * --------------------------------------------------------------------------
  */
 static NTSTATUS
-OvsGetPidCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
-                    UINT32 *replyLen)
+OvsGetPidHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                 UINT32 *replyLen)
 {
-    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
-    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+    NTSTATUS status = STATUS_SUCCESS;
+    PUINT32 msgOut = (PUINT32)usrParamsCtx->outputBuffer;
 
     if (usrParamsCtx->outputLength >= sizeof *msgOut) {
         POVS_OPEN_INSTANCE instance =
             (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
 
         RtlZeroMemory(msgOut, sizeof *msgOut);
-        msgOut->nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
-        msgOut->nlMsg.nlmsgPid = instance->pid;
+        RtlCopyMemory(msgOut, &instance->pid, sizeof(*msgOut));
         *replyLen = sizeof *msgOut;
-        /* XXX: We might need to return the DP index as well. */
     } else {
-        return STATUS_NDIS_INVALID_LENGTH;
+        *replyLen = sizeof *msgOut;
+        status = STATUS_NDIS_INVALID_LENGTH;
     }
 
-    return STATUS_SUCCESS;
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to fill up information about the datapath in a reply to
+ * userspace.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsDpFillInfo(POVS_SWITCH_CONTEXT ovsSwitchContext,
+              POVS_MESSAGE msgIn,
+              PNL_BUFFER nlBuf)
+{
+    BOOLEAN writeOk;
+    OVS_MESSAGE msgOutTmp;
+    OVS_DATAPATH *datapath = &ovsSwitchContext->datapath;
+    PNL_MSG_HDR nlMsg;
+
+    ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && NlBufRemLen(nlBuf) >= sizeof *msgIn);
+
+    msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_DATAPATH_FAMILY_ID;
+    msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
+    msgOutTmp.nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
+    msgOutTmp.nlMsg.nlmsgPid = msgIn->nlMsg.nlmsgPid;
+
+    msgOutTmp.genlMsg.cmd = OVS_DP_CMD_GET;
+    msgOutTmp.genlMsg.version = nlDatapathFamilyOps.version;
+    msgOutTmp.genlMsg.reserved = 0;
+
+    msgOutTmp.ovsHdr.dp_ifindex = ovsSwitchContext->dpNo;
+
+    writeOk = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
+    if (writeOk) {
+        writeOk = NlMsgPutTailString(nlBuf, OVS_DP_ATTR_NAME,
+                                     OVS_SYSTEM_DP_NAME);
+    }
+    if (writeOk) {
+        OVS_DP_STATS dpStats;
+
+        dpStats.n_hit = datapath->hits;
+        dpStats.n_missed = datapath->misses;
+        dpStats.n_lost = datapath->lost;
+        dpStats.n_flows = datapath->nFlows;
+        writeOk = NlMsgPutTailUnspec(nlBuf, OVS_DP_ATTR_STATS,
+                                     (PCHAR)&dpStats, sizeof dpStats);
+    }
+    nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
+    nlMsg->nlmsgLen = NlBufSize(nlBuf);
+
+    return writeOk ? STATUS_SUCCESS : STATUS_INVALID_BUFFER_SIZE;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Handler for queueing an IRP used for event notification. The IRP is
+ * completed when a port state changes. STATUS_PENDING is returned on
+ * success. User mode keep a pending IRP at all times.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsPendEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+    NDIS_STATUS status;
+
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+    OVS_EVENT_POLL poll;
+
+    poll.dpNo = msgIn->ovsHdr.dp_ifindex;
+    status = OvsWaitEventIoctl(usrParamsCtx->irp, instance->fileObject,
+                               &poll, sizeof poll);
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ *  Handler for the subscription for the event queue
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsSubscribeEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                            UINT32 *replyLen)
+{
+    NDIS_STATUS status;
+    OVS_EVENT_SUBSCRIBE request;
+    BOOLEAN rc;
+    UINT8 join;
+    PNL_ATTR attrs[2];
+    const NL_POLICY policy[] =  {
+        [OVS_NL_ATTR_MCAST_GRP] = {.type = NL_A_U32 },
+        [OVS_NL_ATTR_MCAST_JOIN] = {.type = NL_A_U8 },
+        };
+
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+
+    rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
+         NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
+    if (!rc) {
+        status = STATUS_INVALID_PARAMETER;
+        goto done;
+    }
+
+    /* XXX Ignore the MC group for now */
+    join = NlAttrGetU8(attrs[OVS_NL_ATTR_MCAST_JOIN]);
+    request.dpNo = msgIn->ovsHdr.dp_ifindex;
+    request.subscribe = join;
+    request.mask = OVS_EVENT_MASK_ALL;
+
+    status = OvsSubscribeEventIoctl(instance->fileObject, &request,
+                                    sizeof request);
+done:
+    return status;
 }
 
 /*
  * --------------------------------------------------------------------------
- *  Handler for the get dp command. The function handles the initial call to
- *  setup the dump state, as well as subsequent calls to continue dumping data.
+ *  Command Handler for 'OVS_DP_CMD_NEW'.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsNewDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                   UINT32 *replyLen)
+{
+    return HandleDpTransactionCommon(usrParamsCtx, replyLen);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ *  Command Handler for 'OVS_DP_CMD_GET'.
+ *
+ *  The function handles both the dump based as well as the transaction based
+ *  'OVS_DP_CMD_GET' command. In the dump command, it handles the initial
+ *  call to setup dump state, as well as subsequent calls to continue dumping
+ *  data.
  * --------------------------------------------------------------------------
  */
 static NTSTATUS
 OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
                    UINT32 *replyLen)
 {
-    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+    if (usrParamsCtx->devOp == OVS_TRANSACTION_DEV_OP) {
+        return HandleDpTransactionCommon(usrParamsCtx, replyLen);
+    } else {
+        return HandleGetDpDump(usrParamsCtx, replyLen);
+    }
+}
+
+/*
+ * --------------------------------------------------------------------------
+ *  Function for handling the transaction based 'OVS_DP_CMD_GET' command.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+HandleGetDpTransaction(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+    return HandleDpTransactionCommon(usrParamsCtx, replyLen);
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ *  Function for handling the dump-based 'OVS_DP_CMD_GET' command.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+HandleGetDpDump(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                UINT32 *replyLen)
+{
     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
     POVS_OPEN_INSTANCE instance =
         (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
@@ -838,6 +1261,10 @@ OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
         *replyLen = 0;
         OvsSetupDumpStart(usrParamsCtx);
     } else {
+        NL_BUFFER nlBuf;
+        NTSTATUS status;
+        POVS_MESSAGE msgIn = instance->dumpState.ovsMsg;
+
         ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
 
         if (instance->dumpState.ovsMsg == NULL) {
@@ -847,81 +1274,24 @@ OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
 
         /* Dump state must have been deleted after previous dump operation. */
         ASSERT(instance->dumpState.index[0] == 0);
+
         /* Output buffer has been validated while validating read dev op. */
         ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
 
-        /* XXX: Replace this with the netlink set API. */
-        msgIn = instance->dumpState.ovsMsg;
-        msgOut->nlMsg.nlmsgType = OVS_WIN_NL_DATAPATH_FAMILY_ID;
-        msgOut->nlMsg.nlmsgFlags = 0;  /* XXX: ? */
-        msgOut->nlMsg.nlmsgSeq = msgIn->nlMsg.nlmsgSeq;
-        msgOut->nlMsg.nlmsgPid = msgIn->nlMsg.nlmsgPid;
-        msgOut->nlMsg.nlmsgLen = sizeof *msgOut;
-
-        msgOut->genlMsg.cmd = OVS_DP_CMD_GET;
-        msgOut->genlMsg.version = nlDatapathFamilyOps.version;
-        msgOut->genlMsg.reserved = 0;
-
-        OvsAcquireCtrlLock();
-        if (gOvsSwitchContext) {
-            msgOut->ovsHdr.dp_ifindex = gOvsSwitchContext->dpNo;
-        } else {
-            /* Treat this as a dump done. */
-            OvsReleaseCtrlLock();
+        NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
+                  usrParamsCtx->outputLength);
+
+        status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
+
+        if (status != STATUS_SUCCESS) {
             *replyLen = 0;
             FreeUserDumpState(instance);
-            return STATUS_SUCCESS;
-        }
-
-        /* XXX: Replace this with the netlink set API. */
-        {
-            /*
-             * Assume that the output buffer is at least 512 bytes. Once the
-             * netlink set API is implemented, the netlink buffer manipulation
-             * API should provide for checking the remaining number of bytes as
-             * we write out the message.
-             */
-            if (usrParamsCtx->outputLength <= 512) {
-                OvsReleaseCtrlLock();
-                return STATUS_NDIS_INVALID_LENGTH;
-            }
-            UINT16 attrTotalSize = 0;
-            UINT16 attrSize = 0;
-            PNL_ATTR nlAttr = (PNL_ATTR) ((PCHAR)msgOut + sizeof (*msgOut));
-            struct ovs_dp_stats *dpStats;
-            OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath;
-
-            nlAttr->nlaType = OVS_DP_ATTR_NAME;
-            /* 'nlaLen' must not include the pad. */
-            nlAttr->nlaLen = sizeof (*nlAttr) + sizeof (OVS_SYSTEM_DP_NAME);
-            attrSize = sizeof (*nlAttr) +
-                       NLA_ALIGN(sizeof (OVS_SYSTEM_DP_NAME));
-            attrTotalSize += attrSize;
-
-            PNL_ATTR nlAttrNext = NlAttrGet(nlAttr);
-            RtlCopyMemory((PCHAR)nlAttrNext, OVS_SYSTEM_DP_NAME,
-                          sizeof (OVS_SYSTEM_DP_NAME));
-
-            nlAttr = (PNL_ATTR)((PCHAR)nlAttr + attrSize);
-            nlAttr->nlaType = OVS_DP_ATTR_STATS;
-            nlAttr->nlaLen = sizeof (*nlAttr) + sizeof (*dpStats);
-            attrSize = sizeof (*nlAttr) + NLA_ALIGN(sizeof (*dpStats));
-            attrTotalSize += attrSize;
-
-            dpStats = NlAttrGet(nlAttr);
-            dpStats->n_hit = datapath->hits;
-            dpStats->n_missed = datapath->misses;
-            dpStats->n_lost = datapath->lost;
-            dpStats->n_flows = datapath->nFlows;
-
-            msgOut->nlMsg.nlmsgLen += attrTotalSize;
+            return status;
         }
-        OvsReleaseCtrlLock();
 
         /* Increment the dump index. */
         instance->dumpState.index[0] = 1;
         *replyLen = msgOut->nlMsg.nlmsgLen;
-        ASSERT(*replyLen <= 512);
 
         /* Free up the dump state, since there's no more data to continue. */
         FreeUserDumpState(instance);
@@ -930,7 +1300,115 @@ OvsGetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
     return STATUS_SUCCESS;
 }
 
+
+/*
+ * --------------------------------------------------------------------------
+ *  Command Handler for 'OVS_DP_CMD_SET'.
+ * --------------------------------------------------------------------------
+ */
 static NTSTATUS
+OvsSetDpCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                   UINT32 *replyLen)
+{
+    return HandleDpTransactionCommon(usrParamsCtx, replyLen);
+}
+
+/*
+ * --------------------------------------------------------------------------
+ *  Function for handling transaction based 'OVS_DP_CMD_NEW', 'OVS_DP_CMD_GET'
+ *  and 'OVS_DP_CMD_SET' commands.
+ *
+ * 'OVS_DP_CMD_NEW' is implemented to keep userspace code happy. Creation of a
+ * new datapath is not supported currently.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+HandleDpTransactionCommon(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                          UINT32 *replyLen)
+{
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+    NTSTATUS status = STATUS_SUCCESS;
+    NL_BUFFER nlBuf;
+    NL_ERROR nlError = NL_ERROR_SUCCESS;
+    static const NL_POLICY ovsDatapathSetPolicy[] = {
+        [OVS_DP_ATTR_NAME] = { .type = NL_A_STRING, .maxLen = IFNAMSIZ },
+        [OVS_DP_ATTR_UPCALL_PID] = { .type = NL_A_U32, .optional = TRUE },
+        [OVS_DP_ATTR_USER_FEATURES] = { .type = NL_A_U32, .optional = TRUE },
+    };
+    PNL_ATTR dpAttrs[ARRAY_SIZE(ovsDatapathSetPolicy)];
+
+    UNREFERENCED_PARAMETER(msgOut);
+
+    /* input buffer has been validated while validating write dev op. */
+    ASSERT(msgIn != NULL && usrParamsCtx->inputLength >= sizeof *msgIn);
+
+    /* Parse any attributes in the request. */
+    if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET ||
+        usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
+        if (!NlAttrParse((PNL_MSG_HDR)msgIn,
+                        NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN,
+                        NlMsgAttrsLen((PNL_MSG_HDR)msgIn),
+                        ovsDatapathSetPolicy, dpAttrs, ARRAY_SIZE(dpAttrs))) {
+            return STATUS_INVALID_PARAMETER;
+        }
+
+        /*
+        * XXX: Not clear at this stage if there's any role for the
+        * OVS_DP_ATTR_UPCALL_PID and OVS_DP_ATTR_USER_FEATURES attributes passed
+        * from userspace.
+        */
+
+    } else {
+        RtlZeroMemory(dpAttrs, sizeof dpAttrs);
+    }
+
+    /* Output buffer has been validated while validating transact dev op. */
+    ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
+
+    NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
+
+    if (dpAttrs[OVS_DP_ATTR_NAME] != NULL) {
+        if (!OvsCompareString(NlAttrGet(dpAttrs[OVS_DP_ATTR_NAME]),
+                              OVS_SYSTEM_DP_NAME)) {
+
+            /* Creation of new datapaths is not supported. */
+            if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_SET) {
+                nlError = NL_ERROR_NOTSUPP;
+                goto cleanup;
+            }
+
+            nlError = NL_ERROR_NODEV;
+            goto cleanup;
+        }
+    } else if ((UINT32)msgIn->ovsHdr.dp_ifindex != gOvsSwitchContext->dpNo) {
+        nlError = NL_ERROR_NODEV;
+        goto cleanup;
+    }
+
+    if (usrParamsCtx->ovsMsg->genlMsg.cmd == OVS_DP_CMD_NEW) {
+        nlError = NL_ERROR_EXIST;
+        goto cleanup;
+    }
+
+    status = OvsDpFillInfo(gOvsSwitchContext, msgIn, &nlBuf);
+
+    *replyLen = NlBufSize(&nlBuf);
+
+cleanup:
+    if (nlError != NL_ERROR_SUCCESS) {
+        POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
+            usrParamsCtx->outputBuffer;
+
+        NlBuildErrorMsg(msgIn, msgError, nlError);
+        *replyLen = msgError->nlMsg.nlmsgLen;
+    }
+
+    return STATUS_SUCCESS;
+}
+
+
+NTSTATUS
 OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx)
 {
     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
@@ -950,13 +1428,12 @@ OvsSetupDumpStart(POVS_USER_PARAMS_CONTEXT usrParamsCtx)
      * This operation should be setting up the dump state. If there's any
      * previous state, clear it up so as to set it up afresh.
      */
-    if (instance->dumpState.ovsMsg != NULL) {
-        FreeUserDumpState(instance);
-    }
+    FreeUserDumpState(instance);
 
     return InitUserDumpState(instance, msgIn);
 }
 
+
 /*
  * --------------------------------------------------------------------------
  *  Utility function to map the output buffer in an IRP. The buffer is assumed
@@ -992,4 +1469,234 @@ MapIrpOutputBuffer(PIRP irp,
     return STATUS_SUCCESS;
 }
 
-#endif /* OVS_USE_NL_INTERFACE */
+/*
+ * --------------------------------------------------------------------------
+ * Utility function to fill up information about the state of a port in a reply
+ * to* userspace.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsPortFillInfo(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                POVS_EVENT_ENTRY eventEntry,
+                PNL_BUFFER nlBuf)
+{
+    NTSTATUS status;
+    BOOLEAN ok;
+    OVS_MESSAGE msgOutTmp;
+    PNL_MSG_HDR nlMsg;
+    POVS_VPORT_ENTRY vport;
+
+    ASSERT(NlBufAt(nlBuf, 0, 0) != 0 && nlBuf->bufRemLen >= sizeof msgOutTmp);
+
+    msgOutTmp.nlMsg.nlmsgType = OVS_WIN_NL_VPORT_FAMILY_ID;
+    msgOutTmp.nlMsg.nlmsgFlags = 0;  /* XXX: ? */
+
+    /* driver intiated messages should have zerp seq number*/
+    msgOutTmp.nlMsg.nlmsgSeq = 0;
+    msgOutTmp.nlMsg.nlmsgPid = usrParamsCtx->ovsInstance->pid;
+
+    msgOutTmp.genlMsg.version = nlVportFamilyOps.version;
+    msgOutTmp.genlMsg.reserved = 0;
+
+    /* we don't have netdev yet, treat link up/down a adding/removing a port*/
+    if (eventEntry->status & (OVS_EVENT_LINK_UP | OVS_EVENT_CONNECT)) {
+        msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_NEW;
+    } else if (eventEntry->status &
+             (OVS_EVENT_LINK_DOWN | OVS_EVENT_DISCONNECT)) {
+        msgOutTmp.genlMsg.cmd = OVS_VPORT_CMD_DEL;
+    } else {
+        ASSERT(FALSE);
+        return STATUS_UNSUCCESSFUL;
+    }
+    msgOutTmp.ovsHdr.dp_ifindex = gOvsSwitchContext->dpNo;
+
+    ok = NlMsgPutHead(nlBuf, (PCHAR)&msgOutTmp, sizeof msgOutTmp);
+    if (!ok) {
+        status = STATUS_INVALID_BUFFER_SIZE;
+        goto cleanup;
+    }
+
+    vport = OvsFindVportByPortNo(gOvsSwitchContext, eventEntry->portNo);
+    if (!vport) {
+        status = STATUS_DEVICE_DOES_NOT_EXIST;
+        goto cleanup;
+    }
+
+    ok = NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_PORT_NO, eventEntry->portNo) &&
+         NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_TYPE, vport->ovsType) &&
+         NlMsgPutTailU32(nlBuf, OVS_VPORT_ATTR_UPCALL_PID,
+                         vport->upcallPid) &&
+         NlMsgPutTailString(nlBuf, OVS_VPORT_ATTR_NAME, vport->ovsName);
+    if (!ok) {
+        status = STATUS_INVALID_BUFFER_SIZE;
+        goto cleanup;
+    }
+
+    /* XXXX Should we add the port stats attributes?*/
+    nlMsg = (PNL_MSG_HDR)NlBufAt(nlBuf, 0, 0);
+    nlMsg->nlmsgLen = NlBufSize(nlBuf);
+    status = STATUS_SUCCESS;
+
+cleanup:
+    return status;
+}
+
+
+/*
+ * --------------------------------------------------------------------------
+ * Handler for reading events from the driver event queue. This handler is
+ * executed when user modes issues a socket receive on a socket assocaited
+ * with the MC group for events.
+ * XXX user mode should read multiple events in one system call
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsReadEventCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+#ifdef DBG
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+#endif
+    NL_BUFFER nlBuf;
+    NTSTATUS status;
+    OVS_EVENT_ENTRY eventEntry;
+
+    ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
+
+    /* Should never read events with a dump socket */
+    ASSERT(instance->dumpState.ovsMsg == NULL);
+
+    /* Must have an event queue */
+    ASSERT(instance->eventQueue != NULL);
+
+    /* Output buffer has been validated while validating read dev op. */
+    ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
+
+    NlBufInit(&nlBuf, usrParamsCtx->outputBuffer, usrParamsCtx->outputLength);
+
+    /* remove an event entry from the event queue */
+    status = OvsRemoveEventEntry(usrParamsCtx->ovsInstance, &eventEntry);
+    if (status != STATUS_SUCCESS) {
+        /* If there were not elements, read should return no data. */
+        status = STATUS_SUCCESS;
+        *replyLen = 0;
+        goto cleanup;
+    }
+
+    status = OvsPortFillInfo(usrParamsCtx, &eventEntry, &nlBuf);
+    if (status == NDIS_STATUS_SUCCESS) {
+        *replyLen = NlBufSize(&nlBuf);
+    }
+
+cleanup:
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Handler for reading missed pacckets from the driver event queue. This
+ * handler is executed when user modes issues a socket receive on a socket
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsReadPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+#ifdef DBG
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+#endif
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    NTSTATUS status;
+
+    ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
+
+    /* Should never read events with a dump socket */
+    ASSERT(instance->dumpState.ovsMsg == NULL);
+
+    /* Must have an packet queue */
+    ASSERT(instance->packetQueue != NULL);
+
+    /* Output buffer has been validated while validating read dev op. */
+    ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
+
+    /* Read a packet from the instance queue */
+    status = OvsReadDpIoctl(instance->fileObject, usrParamsCtx->outputBuffer,
+                            usrParamsCtx->outputLength, replyLen);
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ *  Handler for the subscription for a packet queue
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsSubscribePacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                            UINT32 *replyLen)
+{
+    NDIS_STATUS status;
+    BOOLEAN rc;
+    UINT8 join;
+    UINT32 pid;
+    const NL_POLICY policy[] =  {
+        [OVS_NL_ATTR_PACKET_PID] = {.type = NL_A_U32 },
+        [OVS_NL_ATTR_PACKET_SUBSCRIBE] = {.type = NL_A_U8 }
+        };
+    PNL_ATTR attrs[ARRAY_SIZE(policy)];
+
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+
+    rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
+         NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, attrs, ARRAY_SIZE(attrs));
+    if (!rc) {
+        status = STATUS_INVALID_PARAMETER;
+        goto done;
+    }
+
+    join = NlAttrGetU8(attrs[OVS_NL_ATTR_PACKET_PID]);
+    pid = NlAttrGetU32(attrs[OVS_NL_ATTR_PACKET_PID]);
+
+    /* The socket subscribed with must be the same socket we perform receive*/
+    ASSERT(pid == instance->pid);
+
+    status = OvsSubscribeDpIoctl(instance, pid, join);
+
+    /*
+     * XXX Need to add this instance to a global data structure
+     * which hold all packet based instances. The data structure (hash)
+     * should be searched through the pid field of the instance for
+     * placing the missed packet into the correct queue
+     */
+done:
+    return status;
+}
+
+/*
+ * --------------------------------------------------------------------------
+ * Handler for queueing an IRP used for missed packet notification. The IRP is
+ * completed when a packet received and mismatched. STATUS_PENDING is returned
+ * on success. User mode keep a pending IRP at all times.
+ * --------------------------------------------------------------------------
+ */
+static NTSTATUS
+OvsPendPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+
+    /*
+     * XXX access to packet queue must be through acquiring a lock as user mode
+     * could unsubscribe and the instnace will be freed.
+     */
+    return OvsWaitDpIoctl(usrParamsCtx->irp, instance->fileObject);
+}