datapath-windows: Refactor sofware offloads and mss
[cascardo/ovs.git] / datapath-windows / ovsext / User.c
index 612a4bd..e97f2b2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 VMware, Inc.
+ * Copyright (c) 2014, 2016 VMware, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
 #include "precomp.h"
 
 #include "Datapath.h"
-#include "Switch.h"
-#include "Vport.h"
+#include "Debug.h"
 #include "Event.h"
-#include "User.h"
-#include "PacketIO.h"
-#include "Checksum.h"
-#include "NetProto.h"
 #include "Flow.h"
+#include "Jhash.h"
+#include "NetProto.h"
+#include "Offload.h"
+#include "PacketIO.h"
+#include "Switch.h"
 #include "TunnelIntf.h"
+#include "User.h"
+#include "Vport.h"
 
 #ifdef OVS_DBG_MOD
 #undef OVS_DBG_MOD
 #endif
 #define OVS_DBG_MOD OVS_DBG_USER
-#include "Debug.h"
-
-OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES];
 
 POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance);
 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
 OVS_USER_STATS ovsUserStats;
 
+static VOID _MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
+                                   OvsPacketExecute  *execute);
+extern NL_POLICY nlFlowKeyPolicy[];
+extern UINT32 nlFlowKeyPolicyLen;
 
-NTSTATUS
-OvsUserInit()
+static __inline VOID
+OvsAcquirePidHashLock()
 {
-    UINT32 i;
-    POVS_USER_PACKET_QUEUE queue;
-    for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
-        queue = &ovsPacketQueues[i];
-        RtlZeroMemory(queue, sizeof (*queue));
-        InitializeListHead(&queue->packetList);
-        NdisAllocateSpinLock(&queue->queueLock);
-    }
-    return STATUS_SUCCESS;
+    NdisAcquireSpinLock(&(gOvsSwitchContext->pidHashLock));
 }
 
-VOID
-OvsUserCleanup()
+static __inline VOID
+OvsReleasePidHashLock()
 {
-    UINT32 i;
-    POVS_USER_PACKET_QUEUE queue;
-    for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) {
-        queue = &ovsPacketQueues[i];
-        ASSERT(IsListEmpty(&queue->packetList));
-        ASSERT(queue->instance == NULL);
-        ASSERT(queue->pendingIrp == NULL);
-        NdisFreeSpinLock(&queue->queueLock);
-    }
+    NdisReleaseSpinLock(&(gOvsSwitchContext->pidHashLock));
 }
 
+
 static VOID
 OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
                     POVS_OPEN_INSTANCE instance)
@@ -98,11 +86,10 @@ OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue,
     LIST_FORALL_SAFE(&tmp, link, next) {
         RemoveEntryList(link);
         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
-        OvsFreeMemory(elem);
+        OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
     }
 }
 
-
 VOID
 OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
 {
@@ -112,13 +99,17 @@ OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
     LIST_ENTRY tmp;
     PIRP irp = NULL;
 
+    ASSERT(instance);
     InitializeListHead(&tmp);
     queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue;
     if (queue) {
         PDRIVER_CANCEL cancelRoutine;
         NdisAcquireSpinLock(&queue->queueLock);
+        ASSERT(queue->instance == instance);
+        /* XXX Should not happen */
         if (queue->instance != instance) {
             NdisReleaseSpinLock(&queue->queueLock);
+            NdisFreeSpinLock(&queue->queueLock);
             return;
         }
 
@@ -127,7 +118,6 @@ OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
             queue->numPackets = 0;
         }
         queue->instance = NULL;
-        queue->queueId = OVS_MAX_NUM_PACKET_QUEUES;
         instance->packetQueue = NULL;
         irp = queue->pendingIrp;
         queue->pendingIrp = NULL;
@@ -138,55 +128,67 @@ OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance)
             }
         }
         NdisReleaseSpinLock(&queue->queueLock);
+        NdisFreeSpinLock(&queue->queueLock);
     }
     LIST_FORALL_SAFE(&tmp, link, next) {
         RemoveEntryList(link);
         elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
-        OvsFreeMemory(elem);
+        OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
     }
     if (irp) {
         OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
     }
+    if (queue) {
+        OvsFreeMemoryWithTag(queue, OVS_USER_POOL_TAG);
+    }
+
+    /* Verify if gOvsSwitchContext exists. */
+    if (gOvsSwitchContext) {
+        /* Remove the instance from pidHashArray */
+        OvsAcquirePidHashLock();
+        OvsDelPidInstance(gOvsSwitchContext, instance->pid);
+        OvsReleasePidHashLock();
+    }
 }
 
 NTSTATUS
-OvsSubscribeDpIoctl(PFILE_OBJECT fileObject,
-                    PVOID inputBuffer,
-                    UINT32 inputLength)
+OvsSubscribeDpIoctl(PVOID instanceP,
+                    UINT32 pid,
+                    UINT8 join)
 {
-    POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext;
-    UINT32 queueId;
     POVS_USER_PACKET_QUEUE queue;
-    if (inputLength < sizeof (UINT32)) {
-        return STATUS_INVALID_PARAMETER;
-    }
-    queueId = *(UINT32 *)inputBuffer;
-    if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
-        /*
-         * unsubscribe
-         */
+    POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)instanceP;
+
+    if (instance->packetQueue && !join) {
+        /* unsubscribe */
         OvsCleanupPacketQueue(instance);
-    } else if (instance->packetQueue == NULL &&
-               queueId < OVS_MAX_NUM_PACKET_QUEUES) {
-        queue = &ovsPacketQueues[queueId];
-        NdisAcquireSpinLock(&queue->queueLock);
-        if (ovsPacketQueues[queueId].instance) {
-             if (ovsPacketQueues[queueId].instance != instance) {
-                 NdisReleaseSpinLock(&queue->queueLock);
-                 return STATUS_INSUFFICIENT_RESOURCES;
-             } else {
-                 NdisReleaseSpinLock(&queue->queueLock);
-                 return STATUS_SUCCESS;
-             }
+    } else if (instance->packetQueue == NULL && join) {
+        queue = (POVS_USER_PACKET_QUEUE) OvsAllocateMemoryWithTag(
+            sizeof *queue, OVS_USER_POOL_TAG);
+        if (queue == NULL) {
+            return STATUS_NO_MEMORY;
         }
-        queue->queueId = queueId;
+        InitializeListHead(&(instance->pidLink));
+        instance->packetQueue = queue;
+        RtlZeroMemory(queue, sizeof (*queue));
+        NdisAllocateSpinLock(&queue->queueLock);
+        NdisAcquireSpinLock(&queue->queueLock);
+        InitializeListHead(&queue->packetList);
+        queue->pid = pid;
         queue->instance = instance;
         instance->packetQueue = queue;
-        ASSERT(IsListEmpty(&queue->packetList));
         NdisReleaseSpinLock(&queue->queueLock);
+
+        OvsAcquirePidHashLock();
+        /* Insert the instance to pidHashArray */
+        OvsAddPidInstance(gOvsSwitchContext, pid, instance);
+        OvsReleasePidHashLock();
+
     } else {
+        /* user mode should call only once for subscribe */
         return STATUS_INVALID_PARAMETER;
     }
+
     return STATUS_SUCCESS;
 }
 
@@ -223,13 +225,12 @@ OvsReadDpIoctl(PFILE_OBJECT fileObject,
         if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) &&
             len == elem->packet.totalLen) {
             UINT16 sum, *ptr;
-            UINT16 size = (UINT16)(elem->packet.userDataLen +
-                                   elem->hdrInfo.l4Offset +
-                                   (UINT16)sizeof (OVS_PACKET_INFO));
-            RtlCopyMemory(outputBuffer, &elem->packet, size);
-            ASSERT(len - size >=  elem->hdrInfo.l4PayLoad);
+            UINT16 size = (UINT16)(elem->packet.payload - elem->packet.data +
+                                  elem->hdrInfo.l4Offset);
+            RtlCopyMemory(outputBuffer, &elem->packet.data, size);
+            ASSERT(len - size >= elem->hdrInfo.l4PayLoad);
             sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size,
-                                           (UINT8 *)&elem->packet + size,
+                                           (UINT8 *)&elem->packet.data + size,
                                            elem->hdrInfo.l4PayLoad, 0);
             ptr =(UINT16 *)((UINT8 *)outputBuffer + size +
                             (elem->hdrInfo.tcpCsumNeeded ?
@@ -237,11 +238,11 @@ OvsReadDpIoctl(PFILE_OBJECT fileObject,
             *ptr = sum;
             ovsUserStats.l4Csum++;
         } else {
-            RtlCopyMemory(outputBuffer, &elem->packet, len);
+            RtlCopyMemory(outputBuffer, &elem->packet.data, len);
         }
 
         *replyLen = len;
-        OvsFreeMemory(elem);
+        OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
     }
     return STATUS_SUCCESS;
 }
@@ -257,57 +258,137 @@ OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext,
 }
 
 /*
- * --------------------------------------------------------------------------
- * This function allocates all the stuff necessary for creating an NBL from the
- * input buffer of specified length, namely, a nonpaged data buffer of size
- * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
- * context yet. It also copies data from the specified buffer to the NBL.
- * --------------------------------------------------------------------------
+ *----------------------------------------------------------------------------
+ *  OvsNlExecuteCmdHandler --
+ *    Handler for OVS_PACKET_CMD_EXECUTE command.
+ *----------------------------------------------------------------------------
  */
-PNET_BUFFER_LIST
-OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext,
-                            PVOID userBuffer,
-                            ULONG length)
+NTSTATUS
+OvsNlExecuteCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
 {
-    UINT8 *data = NULL;
-    PNET_BUFFER_LIST nbl = NULL;
-    PNET_BUFFER nb;
-    PMDL mdl;
+    NTSTATUS status = STATUS_SUCCESS;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+    PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
+    PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
+    POVS_HDR ovsHdr = &(msgIn->ovsHdr);
+
+    PNL_ATTR nlAttrs[__OVS_PACKET_ATTR_MAX];
+    PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
+
+    UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
+    UINT32 keyAttrOffset = 0;
+    OvsPacketExecute execute;
+    NL_ERROR nlError = NL_ERROR_SUCCESS;
+    NL_BUFFER nlBuf;
+
+    static const NL_POLICY nlPktExecPolicy[] = {
+        [OVS_PACKET_ATTR_PACKET] = {.type = NL_A_UNSPEC, .optional = FALSE},
+        [OVS_PACKET_ATTR_KEY] = {.type = NL_A_UNSPEC, .optional = FALSE},
+        [OVS_PACKET_ATTR_ACTIONS] = {.type = NL_A_UNSPEC, .optional = FALSE},
+        [OVS_PACKET_ATTR_USERDATA] = {.type = NL_A_UNSPEC, .optional = TRUE},
+        [OVS_PACKET_ATTR_EGRESS_TUN_KEY] = {.type = NL_A_UNSPEC,
+                                            .optional = TRUE}
+    };
+
+    RtlZeroMemory(&execute, sizeof(OvsPacketExecute));
+
+    /* Get all the top level Flow attributes */
+    if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
+                     nlPktExecPolicy, ARRAY_SIZE(nlPktExecPolicy),
+                     nlAttrs, ARRAY_SIZE(nlAttrs)))
+                     != TRUE) {
+        OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
+                       nlMsgHdr);
+        status = STATUS_UNSUCCESSFUL;
+        goto done;
+    }
 
-    if (length > OVS_DEFAULT_DATA_SIZE) {
-        nbl = OvsAllocateVariableSizeNBL(switchContext, length,
-                                         OVS_DEFAULT_HEADROOM_SIZE);
+    keyAttrOffset = (UINT32)((PCHAR)nlAttrs[OVS_PACKET_ATTR_KEY] -
+                    (PCHAR)nlMsgHdr);
 
-    } else {
-        nbl = OvsAllocateFixSizeNBL(switchContext, length,
-                                    OVS_DEFAULT_HEADROOM_SIZE);
-    }
-    if (nbl == NULL) {
-        return NULL;
+    /* Get flow keys attributes */
+    if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset,
+                           NlAttrLen(nlAttrs[OVS_PACKET_ATTR_KEY]),
+                           nlFlowKeyPolicy, nlFlowKeyPolicyLen,
+                           keyAttrs, ARRAY_SIZE(keyAttrs))) != TRUE) {
+        OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p", nlMsgHdr);
+        status = STATUS_UNSUCCESSFUL;
+        goto done;
     }
 
-    nb = NET_BUFFER_LIST_FIRST_NB(nbl);
-    mdl = NET_BUFFER_CURRENT_MDL(nb);
-    data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
-                    NET_BUFFER_CURRENT_MDL_OFFSET(nb);
-    if (!data) {
-        OvsCompleteNBL(switchContext, nbl, TRUE);
-        return NULL;
+    execute.dpNo = ovsHdr->dp_ifindex;
+
+    _MapNlAttrToOvsPktExec(nlAttrs, keyAttrs, &execute);
+
+    status = OvsExecuteDpIoctl(&execute);
+
+    /* Default reply that we want to send */
+    if (status == STATUS_SUCCESS) {
+        BOOLEAN ok;
+
+        NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
+                  usrParamsCtx->outputLength);
+
+        /* Prepare nl Msg headers */
+        ok = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
+                 nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
+                 genlMsgHdr->cmd, OVS_PACKET_VERSION,
+                 ovsHdr->dp_ifindex);
+
+        if (ok) {
+            *replyLen = msgOut->nlMsg.nlmsgLen;
+        } else {
+            status = STATUS_INVALID_BUFFER_SIZE;
+        }
+    } else {
+        /* Map NTSTATUS to NL_ERROR */
+        nlError = NlMapStatusToNlErr(status);
+
+        /* As of now there are no transactional errors in the implementation.
+         * Once we have them then we need to map status to correct
+         * nlError value, so that below mentioned code gets hit. */
+        if ((nlError != NL_ERROR_SUCCESS) &&
+            (usrParamsCtx->outputBuffer)) {
+
+            POVS_MESSAGE_ERROR msgError = (POVS_MESSAGE_ERROR)
+                                           usrParamsCtx->outputBuffer;
+            NlBuildErrorMsg(msgIn, msgError, nlError);
+            *replyLen = msgError->nlMsg.nlmsgLen;
+            status = STATUS_SUCCESS;
+            goto done;
+        }
     }
 
-    NdisMoveMemory(data, userBuffer, length);
+done:
+    return status;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *  _MapNlAttrToOvsPktExec --
+ *    Maps input Netlink attributes to OvsPacketExecute.
+ *----------------------------------------------------------------------------
+ */
+static VOID
+_MapNlAttrToOvsPktExec(PNL_ATTR *nlAttrs, PNL_ATTR *keyAttrs,
+                       OvsPacketExecute *execute)
+{
+    execute->packetBuf = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_PACKET]);
+    execute->packetLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_PACKET]);
+
+    execute->actions = NlAttrGet(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
+    execute->actionsLen = NlAttrGetSize(nlAttrs[OVS_PACKET_ATTR_ACTIONS]);
 
-    return nbl;
+    execute->inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
 }
 
 NTSTATUS
-OvsExecuteDpIoctl(PVOID inputBuffer,
-                  UINT32 inputLength,
-                  UINT32 outputLength)
+OvsExecuteDpIoctl(OvsPacketExecute *execute)
 {
     NTSTATUS                    status = STATUS_SUCCESS;
     NTSTATUS                    ndisStatus;
-    OvsPacketExecute            *execute;
     LOCK_STATE_EX               lockState;
     PNET_BUFFER_LIST pNbl;
     PNL_ATTR actions;
@@ -316,39 +397,24 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
     OVS_PACKET_HDR_INFO layers;
     POVS_VPORT_ENTRY vport;
 
-    if (inputLength < sizeof(*execute) || outputLength != 0) {
-        return STATUS_INFO_LENGTH_MISMATCH;
-    }
-
-    NdisAcquireSpinLock(gOvsCtrlLock);
-    if (gOvsSwitchContext == NULL) {
-        status = STATUS_INVALID_PARAMETER;
-        goto unlock;
-    }
-
-    execute = (struct OvsPacketExecute *) inputBuffer;
-
     if (execute->packetLen == 0) {
         status = STATUS_INVALID_PARAMETER;
-        goto unlock;
+        goto exit;
     }
 
-    if (inputLength != sizeof (*execute) +
-                       execute->actionsLen + execute->packetLen) {
-        status = STATUS_INFO_LENGTH_MISMATCH;
-        goto unlock;
-    }
-    actions = (PNL_ATTR)((PCHAR)&execute->actions + execute->packetLen);
+    actions = execute->actions;
+
+    ASSERT(actions);
 
     /*
      * Allocate the NBL, copy the data from the userspace buffer. Allocate
      * also, the forwarding context for the packet.
      */
-    pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, &execute->packetBuf,
-                                       execute->packetLen);
+    pNbl = OvsAllocateNBLFromBuffer(gOvsSwitchContext, execute->packetBuf,
+                                    execute->packetLen);
     if (pNbl == NULL) {
         status = STATUS_NO_MEMORY;
-        goto unlock;
+        goto exit;
     }
 
     fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl);
@@ -363,14 +429,12 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
     // XXX: Figure out if any of the other members of fwdDetail need to be set.
 
     ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers,
-                              NULL);
+                                NULL);
     if (ndisStatus == NDIS_STATUS_SUCCESS) {
-        ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL);
-        NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState,
-                              NDIS_RWL_AT_DISPATCH_LEVEL);
+        NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, 0);
         ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl,
                                        vport ? vport->portNo :
-                                               OVS_DEFAULT_PORT_NO,
+                                               OVS_DPPORT_NUMBER_INVALID,
                                        NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP,
                                        &key, NULL, &layers, actions,
                                        execute->actionsLen);
@@ -378,14 +442,17 @@ OvsExecuteDpIoctl(PVOID inputBuffer,
         NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState);
     }
     if (ndisStatus != NDIS_STATUS_SUCCESS) {
-        status = STATUS_UNSUCCESSFUL;
+        if (ndisStatus == NDIS_STATUS_NOT_SUPPORTED) {
+            status = STATUS_NOT_SUPPORTED;
+        } else {
+            status = STATUS_UNSUCCESSFUL;
+        }
     }
 
     if (pNbl) {
         OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE);
     }
-unlock:
-    NdisReleaseSpinLock(gOvsCtrlLock);
+exit:
     return status;
 }
 
@@ -505,34 +572,357 @@ OvsGetNextPacket(POVS_OPEN_INSTANCE instance)
     return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
 }
 
-
+/*
+ * ---------------------------------------------------------------------------
+ * Given a pid, returns the corresponding USER_PACKET_QUEUE.
+ * ---------------------------------------------------------------------------
+ */
 POVS_USER_PACKET_QUEUE
-OvsGetQueue(UINT32 queueId)
+OvsGetQueue(UINT32 pid)
 {
-    POVS_USER_PACKET_QUEUE queue;
-    if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) {
-        return NULL;
+    POVS_OPEN_INSTANCE instance;
+    POVS_USER_PACKET_QUEUE ret = NULL;
+
+    instance = OvsGetPidInstance(gOvsSwitchContext, pid);
+
+    if (instance) {
+        ret = instance->packetQueue;
     }
-    queue = &ovsPacketQueues[queueId];
-    return queue->instance != NULL ? queue : NULL;
+
+    return ret;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Given a pid, returns the corresponding instance.
+ * pidHashLock must be acquired before calling this API.
+ * ---------------------------------------------------------------------------
+ */
+POVS_OPEN_INSTANCE
+OvsGetPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
+{
+    POVS_OPEN_INSTANCE instance;
+    PLIST_ENTRY head, link;
+    UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
+                                OVS_HASH_BASIS);
+    head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
+    LIST_FORALL(head, link) {
+        instance = CONTAINING_RECORD(link, OVS_OPEN_INSTANCE, pidLink);
+        if (instance->pid == pid) {
+            return instance;
+        }
+    }
+    return NULL;
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Given a pid and an instance. This API adds instance to pidHashArray.
+ * pidHashLock must be acquired before calling this API.
+ * ---------------------------------------------------------------------------
+ */
+VOID
+OvsAddPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid,
+                  POVS_OPEN_INSTANCE instance)
+{
+    PLIST_ENTRY head;
+    UINT32 hash = OvsJhashBytes((const VOID *)&pid, sizeof(pid),
+                                OVS_HASH_BASIS);
+    head = &(switchContext->pidHashArray[hash & OVS_PID_MASK]);
+    InsertHeadList(head, &(instance->pidLink));
+}
+
+/*
+ * ---------------------------------------------------------------------------
+ * Given a pid and an instance. This API removes instance from pidHashArray.
+ * pidHashLock must be acquired before calling this API.
+ * ---------------------------------------------------------------------------
+ */
+VOID
+OvsDelPidInstance(POVS_SWITCH_CONTEXT switchContext, UINT32 pid)
+{
+    POVS_OPEN_INSTANCE instance = OvsGetPidInstance(switchContext, pid);
+
+    if (instance) {
+        RemoveEntryList(&(instance->pidLink));
+    }
+}
+
+VOID
+OvsQueuePackets(PLIST_ENTRY packetList,
+                UINT32 numElems)
+{
+    POVS_USER_PACKET_QUEUE upcallQueue = NULL;
+    POVS_PACKET_QUEUE_ELEM elem;
+    PLIST_ENTRY  link;
+    UINT32 num = 0;
+    LIST_ENTRY dropPackets;
+
+    OVS_LOG_LOUD("Enter: numELems: %u", numElems);
+
+    InitializeListHead(&dropPackets);
+
+    while (!IsListEmpty(packetList)) {
+        link = RemoveHeadList(packetList);
+        elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+
+        ASSERT(elem);
+
+        OvsAcquirePidHashLock();
+
+        upcallQueue = OvsGetQueue(elem->upcallPid);
+        if (!upcallQueue) {
+            /* No upcall queue found, drop this packet. */
+            InsertTailList(&dropPackets, &elem->link);
+        } else {
+            NdisAcquireSpinLock(&upcallQueue->queueLock);
+
+            if (upcallQueue->instance == NULL) {
+                InsertTailList(&dropPackets, &elem->link);
+            } else {
+                InsertTailList(&upcallQueue->packetList, &elem->link);
+                upcallQueue->numPackets++;
+                if (upcallQueue->pendingIrp) {
+                    PIRP irp = upcallQueue->pendingIrp;
+                    PDRIVER_CANCEL cancelRoutine;
+                    upcallQueue->pendingIrp = NULL;
+                    cancelRoutine = IoSetCancelRoutine(irp, NULL);
+                    if (cancelRoutine != NULL) {
+                        OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
+                    }
+                }
+            }
+            NdisReleaseSpinLock(&upcallQueue->queueLock);
+        }
+        OvsReleasePidHashLock();
+    }
+
+    while (!IsListEmpty(&dropPackets)) {
+        link = RemoveHeadList(&dropPackets);
+        elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
+        OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
+        num++;
+    }
+
+    OVS_LOG_LOUD("Exit: drop %u packets", num);
 }
 
 /*
  *----------------------------------------------------------------------------
- * OvsCreateQueuePacket --
+ * OvsCreateAndAddPackets --
+ *
+ *  Create a packet and forwarded to user space.
+ *
+ *  This function would fragment packet if needed, and queue
+ *  each segment to user space.
+ *----------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsCreateAndAddPackets(PVOID userData,
+                       UINT32 userDataLen,
+                       UINT32 cmd,
+                       POVS_VPORT_ENTRY vport,
+                       OvsFlowKey *key,
+                       PNET_BUFFER_LIST nbl,
+                       BOOLEAN isRecv,
+                       POVS_PACKET_HDR_INFO hdrInfo,
+                       POVS_SWITCH_CONTEXT switchContext,
+                       LIST_ENTRY *list,
+                       UINT32 *num)
+{
+    POVS_PACKET_QUEUE_ELEM elem;
+    PNET_BUFFER_LIST newNbl = NULL;
+    PNET_BUFFER nb;
+
+    if (hdrInfo->isTcp) {
+        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
+        UINT32 packetLength;
+
+        tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
+        nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+        packetLength = NET_BUFFER_DATA_LENGTH(nb);
+
+        OVS_LOG_TRACE("MSS %u packet len %u",
+                tsoInfo.LsoV1Transmit.MSS, packetLength);
+        if (tsoInfo.LsoV1Transmit.MSS) {
+            OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
+            newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
+                    tsoInfo.LsoV1Transmit.MSS , 0);
+            if (newNbl == NULL) {
+                return NDIS_STATUS_FAILURE;
+            }
+            nbl = newNbl;
+        }
+    }
+
+    nb = NET_BUFFER_LIST_FIRST_NB(nbl);
+    while (nb) {
+        elem = OvsCreateQueueNlPacket(userData, userDataLen,
+                                    cmd, vport, key, nbl, nb,
+                                    isRecv, hdrInfo);
+        if (elem) {
+            InsertTailList(list, &elem->link);
+            (*num)++;
+        }
+        nb = NET_BUFFER_NEXT_NB(nb);
+    }
+    if (newNbl) {
+        OvsCompleteNBL(switchContext, newNbl, TRUE);
+    }
+    return NDIS_STATUS_SUCCESS;
+}
+
+static __inline UINT32
+OvsGetUpcallMsgSize(PVOID userData,
+                    UINT32 userDataLen,
+                    OvsIPv4TunnelKey *tunnelKey,
+                    UINT32 payload)
+{
+    UINT32 size = NLMSG_ALIGN(sizeof(struct ovs_header)) +
+                  NlAttrSize(payload) +
+                  NlAttrSize(OvsFlowKeyAttrSize());
+
+    /* OVS_PACKET_ATTR_USERDATA */
+    if (userData) {
+        size += NlAttrTotalSize(userDataLen);
+    }
+    /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
+    /* Is it included in the flow key attr XXX */
+    if (tunnelKey) {
+        size += NlAttrTotalSize(OvsTunKeyAttrSize());
+    }
+    return size;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * This function completes the IP Header csum. record the L4 payload offset and
+ * if there is a need to calculate the TCP or UDP csum. The actual csum will be
+ * caluculated simopultaneossly with the copy of the payload to the destination
+ * buffer when the packet is read.
+ *----------------------------------------------------------------------------
+ */
+static VOID
+OvsCompletePacketHeader(UINT8 *packet,
+                        BOOLEAN isRecv,
+                        NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo,
+                        POVS_PACKET_HDR_INFO hdrInfoIn,
+                        POVS_PACKET_HDR_INFO hdrInfoOut)
+{
+    if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
+        (!isRecv && csumInfo.Transmit.IsIPv4 &&
+        csumInfo.Transmit.IpHeaderChecksum)) {
+        PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoOut->l3Offset);
+        ASSERT(hdrInfoIn->isIPv4);
+        ASSERT(ipHdr->Version == 4);
+        ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
+            ipHdr->HeaderLength << 2,
+            (UINT16)~ipHdr->HeaderChecksum);
+        ovsUserStats.ipCsum++;
+    }
+    ASSERT(hdrInfoIn->tcpCsumNeeded == 0 && hdrInfoOut->udpCsumNeeded == 0);
+    /*
+     * calculate TCP/UDP pseudo checksum
+     */
+    if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
+        /*
+         * Only this case, we need to reclaculate pseudo checksum
+         * all other cases, it is assumed the pseudo checksum is
+         * filled already.
+         *
+         */
+        PTCP_HDR tcpHdr = (PTCP_HDR)(packet + hdrInfoIn->l4Offset);
+        if (hdrInfoIn->isIPv4) {
+            PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
+            hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
+                                    (ipHdr->HeaderLength << 2));
+            tcpHdr->th_sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
+                                         (UINT32 *)&ipHdr->DestinationAddress,
+                                         IPPROTO_TCP, hdrInfoOut->l4PayLoad);
+        } else {
+            PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet + hdrInfoIn->l3Offset);
+            hdrInfoOut->l4PayLoad =
+                (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
+                hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
+                hdrInfoIn->l4Offset);
+            ASSERT(hdrInfoIn->isIPv6);
+            tcpHdr->th_sum =
+                IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
+                (UINT32 *)&ipv6Hdr->DestinationAddress,
+                IPPROTO_TCP, hdrInfoOut->l4PayLoad);
+        }
+        hdrInfoOut->tcpCsumNeeded = 1;
+        ovsUserStats.recalTcpCsum++;
+    } else if (!isRecv) {
+        if (csumInfo.Transmit.TcpChecksum) {
+            hdrInfoOut->tcpCsumNeeded = 1;
+        } else if (csumInfo.Transmit.UdpChecksum) {
+            hdrInfoOut->udpCsumNeeded = 1;
+        }
+        if (hdrInfoOut->tcpCsumNeeded || hdrInfoOut->udpCsumNeeded) {
+#ifdef DBG
+            UINT16 sum, *ptr;
+            UINT8 proto =
+                hdrInfoOut->tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
+#endif
+            if (hdrInfoIn->isIPv4) {
+                PIPV4_HEADER ipHdr = (PIPV4_HEADER)(packet + hdrInfoIn->l3Offset);
+                hdrInfoOut->l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
+                    (ipHdr->HeaderLength << 2));
+#ifdef DBG
+                sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
+                    (UINT32 *)&ipHdr->DestinationAddress,
+                    proto, hdrInfoOut->l4PayLoad);
+#endif
+            } else {
+                PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(packet +
+                    hdrInfoIn->l3Offset);
+                hdrInfoOut->l4PayLoad =
+                    (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
+                    hdrInfoIn->l3Offset + sizeof(IPV6_HEADER)-
+                    hdrInfoIn->l4Offset);
+                ASSERT(hdrInfoIn->isIPv6);
+#ifdef DBG
+                sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
+                    (UINT32 *)&ipv6Hdr->DestinationAddress,
+                    proto, hdrInfoOut->l4PayLoad);
+#endif
+            }
+#ifdef DBG
+            ptr = (UINT16 *)(packet + hdrInfoIn->l4Offset +
+                (hdrInfoOut->tcpCsumNeeded ?
+            TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
+            ASSERT(*ptr == sum);
+#endif
+        }
+    }
+}
+
+static NTSTATUS
+OvsGetPid(POVS_VPORT_ENTRY vport, PNET_BUFFER nb, UINT32 *pid)
+{
+    UNREFERENCED_PARAMETER(nb);
+
+    ASSERT(vport);
+
+    /* XXX select a pid from an array of pids using a flow based hash */
+    *pid = vport->upcallPid;
+    return STATUS_SUCCESS;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ * OvsCreateQueueNlPacket --
  *
  *  Create a packet which will be forwarded to user space.
  *
  * InputParameter:
- *   queueId Identify the queue the packet to be inserted
- *      This will be used when multiple queues is supported
- *      in userspace
  *   userData: when cmd is user action, this field contain
  *      user action data.
  *   userDataLen: as name indicated
  *   cmd: either miss or user action
  *   inPort: datapath port id from which the packet is received.
- *   tunnelKey: tunnelKey for tunneled packet
+ *   key: flow Key with a tunnel key if available
  *   nbl:  the NET_BUFFER_LIST which contain the packet
  *   nb: the packet
  *   isRecv: This is used to decide how to interprete the csum info
@@ -544,195 +934,150 @@ OvsGetQueue(UINT32 queueId)
  *----------------------------------------------------------------------------
  */
 POVS_PACKET_QUEUE_ELEM
-OvsCreateQueuePacket(UINT32 queueId,
-                     PVOID userData,
-                     UINT32 userDataLen,
-                     UINT32 cmd,
-                     UINT32 inPort,
-                     OvsIPv4TunnelKey *tunnelKey,
-                     PNET_BUFFER_LIST nbl,
-                     PNET_BUFFER nb,
-                     BOOLEAN isRecv,
-                     POVS_PACKET_HDR_INFO hdrInfo)
+OvsCreateQueueNlPacket(PVOID userData,
+                       UINT32 userDataLen,
+                       UINT32 cmd,
+                       POVS_VPORT_ENTRY vport,
+                       OvsFlowKey *key,
+                       PNET_BUFFER_LIST nbl,
+                       PNET_BUFFER nb,
+                       BOOLEAN isRecv,
+                       POVS_PACKET_HDR_INFO hdrInfo)
 {
 #define VLAN_TAG_SIZE 4
-    UINT32 allocLen, dataLen, extraLen = 0;
+    UINT32 allocLen, dataLen, extraLen;
     POVS_PACKET_QUEUE_ELEM elem;
-    PMDL mdl;
     UINT8 *src, *dst;
     NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
     NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo;
+    OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
+    UINT32 pid;
+    UINT32 nlMsgSize;
+    NL_BUFFER nlBuf;
+    PNL_MSG_HDR nlMsg;
+
+    if (vport == NULL){
+        /* No vport is not fatal. */
+        return NULL;
+    }
+
+    OvsGetPid(vport, nb, &pid);
+
+    if (!pid) {
+        /*
+         * There is no userspace queue created yet, so there is no point for
+         * creating a new packet to be queued.
+         */
+        return NULL;
+    }
 
     csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo);
 
     if (isRecv && (csumInfo.Receive.TcpChecksumFailed ||
-                   (csumInfo.Receive.UdpChecksumFailed &&
-                    !hdrInfo->udpCsumZero) ||
-                   csumInfo.Receive.IpChecksumFailed)) {
+                  (csumInfo.Receive.UdpChecksumFailed && !hdrInfo->udpCsumZero) ||
+                  csumInfo.Receive.IpChecksumFailed)) {
         OVS_LOG_INFO("Packet dropped due to checksum failure.");
         ovsUserStats.dropDuetoChecksum++;
         return NULL;
     }
 
     vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo);
-    if (vlanInfo.TagHeader.VlanId) {
-        /*
-         * We may also need to check priority XXX
-         */
-        extraLen = VLAN_TAG_SIZE;
-    }
+    extraLen = vlanInfo.TagHeader.VlanId ? VLAN_TAG_SIZE : 0;
 
     dataLen = NET_BUFFER_DATA_LENGTH(nb);
-    allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + userDataLen + dataLen +
-           extraLen;
 
-    elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen);
+    if (NlAttrSize(dataLen) > MAXUINT16) {
+        return NULL;
+    }
+
+    nlMsgSize = OvsGetUpcallMsgSize(userData, userDataLen, tunnelKey,
+                                    dataLen + extraLen);
+
+    allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + nlMsgSize;
+    elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemoryWithTag(allocLen,
+                                                            OVS_USER_POOL_TAG);
     if (elem == NULL) {
         ovsUserStats.dropDuetoResource++;
         return NULL;
     }
     elem->hdrInfo.value = hdrInfo->value;
-    elem->packet.totalLen = sizeof (OVS_PACKET_INFO) + userDataLen + dataLen +
-       extraLen;
-    elem->packet.queue = queueId;
+    elem->upcallPid = pid;
+    elem->packet.totalLen = nlMsgSize;
+    /* XXX remove queueid */
+    elem->packet.queue = 0;
+    /* XXX  no need as the length is already in the NL attrib */
     elem->packet.userDataLen = userDataLen;
-    elem->packet.inPort = inPort;
+    elem->packet.inPort = vport->portNo;
     elem->packet.cmd = cmd;
     if (cmd == (UINT32)OVS_PACKET_CMD_MISS) {
         ovsUserStats.miss++;
-    } else {
+    } else if (cmd == (UINT32)OVS_PACKET_CMD_ACTION) {
         ovsUserStats.action++;
+    } else {
+        ASSERT(FALSE);
+        goto fail;
     }
+    /* XXX Should we have both packetLen and TotalLen*/
     elem->packet.packetLen = dataLen + extraLen;
-    if (tunnelKey) {
-        RtlCopyMemory(&elem->packet.tunnelKey, tunnelKey,
-                      sizeof (*tunnelKey));
-    } else {
-        RtlZeroMemory(&elem->packet.tunnelKey,
-                      sizeof (elem->packet.tunnelKey));
+
+    NlBufInit(&nlBuf, (PCHAR)elem->packet.data, nlMsgSize);
+
+    /*
+     * Initialize the OVS header
+     * Since we are pre allocating memory for the NL buffer
+     * the attribute settings should not fail
+     */
+    if (!NlFillOvsMsg(&nlBuf, OVS_WIN_NL_PACKET_FAMILY_ID, 0,
+                      0, pid, (UINT8)cmd, OVS_PACKET_VERSION,
+                      gOvsSwitchContext->dpNo)) {
+        goto fail;
+    }
+
+    if (MapFlowKeyToNlKey(&nlBuf, key, OVS_PACKET_ATTR_KEY,
+                          OVS_KEY_ATTR_TUNNEL) != STATUS_SUCCESS) {
+        goto fail;
+    }
+
+    /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
+    if (userData){
+        if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
+                                userData, (UINT16)userDataLen)) {
+            goto fail;
+        }
     }
 
-    dst = elem->packet.data;
-    if (userDataLen) {
-        RtlCopyMemory(dst, userData, userDataLen);
-        dst = dst + userDataLen;
+    /*
+     * Make space for the payload to be copied and set the attribute
+     * XXX Uninit set initilizes the buffer with xero, we don't actually need
+     * that the payload to be initailized
+     */
+    dst = (UINT8 *)NlMsgPutTailUnspecUninit(&nlBuf, OVS_PACKET_ATTR_PACKET,
+                                            (UINT16)(dataLen + extraLen));
+    if (!dst) {
+        goto fail;
     }
+
+    /* Store the payload for csum calculation when packet is read */
+    elem->packet.payload = dst;
     dst += extraLen;
 
-    mdl = NET_BUFFER_CURRENT_MDL(nb);
     src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0);
     if (src == NULL) {
-        OvsFreeMemory(elem);
         ovsUserStats.dropDuetoResource++;
-        return NULL;
-    } else if (src != dst) {
+        goto fail;
+    }    else if (src != dst) {
         /* Copy the data from the NDIS buffer to dst. */
         RtlCopyMemory(dst, src, dataLen);
     }
 
-    dst =  elem->packet.data + userDataLen + extraLen;
-    /*
-     * Fix IP hdr if necessary
-     */
-    if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) ||
-        (!isRecv && csumInfo.Transmit.IsIPv4 &&
-         csumInfo.Transmit.IpHeaderChecksum)) {
-        PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
-        ASSERT(elem->hdrInfo.isIPv4);
-        ASSERT(ipHdr->Version == 4);
-        ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr,
-                                           ipHdr->HeaderLength << 2,
-                                           (UINT16)~ipHdr->HeaderChecksum);
-        ovsUserStats.ipCsum++;
-    }
-    ASSERT(elem->hdrInfo.tcpCsumNeeded == 0 &&
-           elem->hdrInfo.udpCsumNeeded == 0);
-    /*
-     * Fow now, we will not do verification
-     * There is no correctness issue here.
-     * XXX
-     */
-    /*
-     * calculate TCP/UDP pseudo checksum
-     */
-    if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) {
-        /*
-         * Only this case, we need to reclaculate pseudo checksum
-         * all other cases, it is assumed the pseudo checksum is
-         * filled already.
-         *
-         */
-        PTCP_HDR tcpHdr = (PTCP_HDR)(dst + hdrInfo->l4Offset);
-        if (hdrInfo->isIPv4) {
-            PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
-            elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
-                                               (ipHdr->HeaderLength << 2));
-            tcpHdr->th_sum =
-                 IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
-                                  (UINT32 *)&ipHdr->DestinationAddress,
-                                  IPPROTO_TCP, elem->hdrInfo.l4PayLoad);
-        } else {
-            PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + hdrInfo->l3Offset);
-            elem->hdrInfo.l4PayLoad =
-                  (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
-                           hdrInfo->l3Offset + sizeof(IPV6_HEADER) -
-                           hdrInfo->l4Offset);
-            ASSERT(hdrInfo->isIPv6);
-            tcpHdr->th_sum =
-                IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
-                                   (UINT32 *)&ipv6Hdr->DestinationAddress,
-                                   IPPROTO_TCP, elem->hdrInfo.l4PayLoad);
-        }
-        elem->hdrInfo.tcpCsumNeeded = 1;
-        ovsUserStats.recalTcpCsum++;
-    } else if (!isRecv) {
-        if (csumInfo.Transmit.TcpChecksum) {
-            elem->hdrInfo.tcpCsumNeeded = 1;
-        } else if (csumInfo.Transmit.UdpChecksum) {
-            elem->hdrInfo.udpCsumNeeded = 1;
-        }
-        if (elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) {
-#ifdef DBG
-            UINT16 sum, *ptr;
-            UINT8 proto =
-               elem->hdrInfo.tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP;
-#endif
-            if (hdrInfo->isIPv4) {
-                PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset);
-                elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) -
-                                                   (ipHdr->HeaderLength << 2));
-#ifdef DBG
-                sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress,
-                                       (UINT32 *)&ipHdr->DestinationAddress,
-                                       proto, elem->hdrInfo.l4PayLoad);
-#endif
-            } else {
-                PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst +
-                                                      hdrInfo->l3Offset);
-                elem->hdrInfo.l4PayLoad =
-                       (UINT16)(ntohs(ipv6Hdr->PayloadLength) +
-                                hdrInfo->l3Offset + sizeof(IPV6_HEADER) -
-                                hdrInfo->l4Offset);
-                ASSERT(hdrInfo->isIPv6);
-#ifdef DBG
-                sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress,
-                                         (UINT32 *)&ipv6Hdr->DestinationAddress,
-                                         proto, elem->hdrInfo.l4PayLoad);
-#endif
-            }
-#ifdef DBG
-            ptr = (UINT16 *)(dst + hdrInfo->l4Offset +
-                             (elem->hdrInfo.tcpCsumNeeded ?
-                              TCP_CSUM_OFFSET : UDP_CSUM_OFFSET));
-            ASSERT(*ptr == sum);
-#endif
-        }
-    }
+    /* Set csum if was offloaded */
+    OvsCompletePacketHeader(dst, isRecv, csumInfo, hdrInfo, &elem->hdrInfo);
+
     /*
      * Finally insert VLAN tag
      */
     if (extraLen) {
-        dst = elem->packet.data + userDataLen;
+        dst = elem->packet.payload;
         src = dst + extraLen;
         ((UINT32 *)dst)[0] = ((UINT32 *)src)[0];
         ((UINT32 *)dst)[1] = ((UINT32 *)src)[1];
@@ -740,128 +1085,127 @@ OvsCreateQueuePacket(UINT32 queueId,
         dst += 12;
         ((UINT16 *)dst)[0] = htons(0x8100);
         ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId |
-                                   (vlanInfo.TagHeader.UserPriority << 13));
+            (vlanInfo.TagHeader.UserPriority << 13));
         elem->hdrInfo.l3Offset += VLAN_TAG_SIZE;
         elem->hdrInfo.l4Offset += VLAN_TAG_SIZE;
         ovsUserStats.vlanInsert++;
     }
 
+    nlMsg = (PNL_MSG_HDR)NlBufAt(&nlBuf, 0, 0);
+    nlMsg->nlmsgLen = NlBufSize(&nlBuf);
+    /* 'totalLen' should be size of valid data. */
+    elem->packet.totalLen = nlMsg->nlmsgLen;
+
     return elem;
+fail:
+    OvsFreeMemoryWithTag(elem, OVS_USER_POOL_TAG);
+    return NULL;
 }
 
-
-VOID
-OvsQueuePackets(UINT32 queueId,
-                PLIST_ENTRY packetList,
-                UINT32 numElems)
+/*
+ * --------------------------------------------------------------------------
+ *  Handler for the subscription for a packet queue
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsSubscribePacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                             UINT32 *replyLen)
 {
-    POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId);
-    POVS_PACKET_QUEUE_ELEM elem;
-    PIRP irp = NULL;
-    PLIST_ENTRY  link;
-    UINT32 num = 0;
-
-    OVS_LOG_LOUD("Enter: queueId %u, numELems: %u",
-                  queueId, numElems);
-    if (queue == NULL) {
-        goto cleanup;
+    NDIS_STATUS status;
+    BOOLEAN rc;
+    UINT8 join;
+    UINT32 pid;
+    const NL_POLICY policy[] =  {
+        [OVS_NL_ATTR_PACKET_PID] = {.type = NL_A_U32 },
+        [OVS_NL_ATTR_PACKET_SUBSCRIBE] = {.type = NL_A_U8 }
+        };
+    PNL_ATTR attrs[ARRAY_SIZE(policy)];
+
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
+
+    rc = NlAttrParse(&msgIn->nlMsg, sizeof (*msgIn),
+         NlMsgAttrsLen((PNL_MSG_HDR)msgIn), policy, ARRAY_SIZE(policy),
+                       attrs, ARRAY_SIZE(attrs));
+    if (!rc) {
+        status = STATUS_INVALID_PARAMETER;
+        goto done;
     }
 
-    NdisAcquireSpinLock(&queue->queueLock);
-    if (queue->instance == NULL) {
-        NdisReleaseSpinLock(&queue->queueLock);
-        goto cleanup;
-    } else {
-        OvsAppendList(&queue->packetList, packetList);
-        queue->numPackets += numElems;
-    }
-    if (queue->pendingIrp) {
-        PDRIVER_CANCEL cancelRoutine;
-        irp = queue->pendingIrp;
-        queue->pendingIrp = NULL;
-        cancelRoutine = IoSetCancelRoutine(irp, NULL);
-        if (cancelRoutine == NULL) {
-            irp = NULL;
-        }
-    }
-    NdisReleaseSpinLock(&queue->queueLock);
-    if (irp) {
-        OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS);
-    }
+    join = NlAttrGetU8(attrs[OVS_NL_ATTR_PACKET_SUBSCRIBE]);
+    pid = NlAttrGetU32(attrs[OVS_NL_ATTR_PACKET_PID]);
 
-cleanup:
-    while (!IsListEmpty(packetList)) {
-        link = RemoveHeadList(packetList);
-        elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link);
-        OvsFreeMemory(elem);
-        num++;
-    }
-    OVS_LOG_LOUD("Exit: drop %u packets", num);
+    /* The socket subscribed with must be the same socket we perform receive*/
+    ASSERT(pid == instance->pid);
+
+    status = OvsSubscribeDpIoctl(instance, pid, join);
+
+    /*
+     * XXX Need to add this instance to a global data structure
+     * which hold all packet based instances. The data structure (hash)
+     * should be searched through the pid field of the instance for
+     * placing the missed packet into the correct queue
+     */
+done:
+    return status;
 }
 
+/*
+ * --------------------------------------------------------------------------
+ * Handler for queueing an IRP used for missed packet notification. The IRP is
+ * completed when a packet received and mismatched. STATUS_PENDING is returned
+ * on success. User mode keep a pending IRP at all times.
+ * --------------------------------------------------------------------------
+ */
+NTSTATUS
+OvsPendPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
+{
+    UNREFERENCED_PARAMETER(replyLen);
+
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+
+    /*
+     * XXX access to packet queue must be through acquiring a lock as user mode
+     * could unsubscribe and the instnace will be freed.
+     */
+    return OvsWaitDpIoctl(usrParamsCtx->irp, instance->fileObject);
+}
 
 /*
- *----------------------------------------------------------------------------
- * OvsCreateAndAddPackets --
- *
- *  Create a packet and forwarded to user space.
- *
- *  This function would fragment packet if needed, and queue
- *  each segment to user space.
- *----------------------------------------------------------------------------
+ * --------------------------------------------------------------------------
+ * Handler for reading missed pacckets from the driver event queue. This
+ * handler is executed when user modes issues a socket receive on a socket
+ * --------------------------------------------------------------------------
  */
 NTSTATUS
-OvsCreateAndAddPackets(UINT32 queueId,
-                       PVOID userData,
-                       UINT32 userDataLen,
-                       UINT32 cmd,
-                       UINT32 inPort,
-                       OvsIPv4TunnelKey *tunnelKey,
-                       PNET_BUFFER_LIST nbl,
-                       BOOLEAN isRecv,
-                       POVS_PACKET_HDR_INFO hdrInfo,
-                       POVS_SWITCH_CONTEXT switchContext,
-                       LIST_ENTRY *list,
-                       UINT32 *num)
+OvsReadPacketCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
+                       UINT32 *replyLen)
 {
-    POVS_PACKET_QUEUE_ELEM elem;
-    PNET_BUFFER_LIST newNbl = NULL;
-    PNET_BUFFER nb;
+#ifdef DBG
+    POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
+#endif
+    POVS_OPEN_INSTANCE instance =
+        (POVS_OPEN_INSTANCE)usrParamsCtx->ovsInstance;
+    NTSTATUS status;
 
-    if (hdrInfo->isTcp) {
-        NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
-        UINT32 packetLength;
+    ASSERT(usrParamsCtx->devOp == OVS_READ_DEV_OP);
 
-        tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo);
-        nb = NET_BUFFER_LIST_FIRST_NB(nbl);
-        packetLength = NET_BUFFER_DATA_LENGTH(nb);
+    /* Should never read events with a dump socket */
+    ASSERT(instance->dumpState.ovsMsg == NULL);
 
-        OVS_LOG_TRACE("MSS %u packet len %u",
-                tsoInfo.LsoV1Transmit.MSS, packetLength);
-        if (tsoInfo.LsoV1Transmit.MSS) {
-            OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset);
-            newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo,
-                    tsoInfo.LsoV1Transmit.MSS , 0);
-            if (newNbl == NULL) {
-                return NDIS_STATUS_FAILURE;
-            }
-            nbl = newNbl;
-        }
-    }
+    /* Must have an packet queue */
+    ASSERT(instance->packetQueue != NULL);
 
-    nb = NET_BUFFER_LIST_FIRST_NB(nbl);
-    while (nb) {
-        elem = OvsCreateQueuePacket(queueId, userData, userDataLen,
-                                    cmd, inPort, tunnelKey, nbl, nb,
-                                    isRecv, hdrInfo);
-        if (elem) {
-            InsertTailList(list, &elem->link);
-            (*num)++;
-        }
-        nb = NET_BUFFER_NEXT_NB(nb);
-    }
-    if (newNbl) {
-        OvsCompleteNBL(switchContext, newNbl, TRUE);
-    }
-    return NDIS_STATUS_SUCCESS;
+    /* Output buffer has been validated while validating read dev op. */
+    ASSERT(msgOut != NULL && usrParamsCtx->outputLength >= sizeof *msgOut);
+
+    /* Read a packet from the instance queue */
+    status = OvsReadDpIoctl(instance->fileObject, usrParamsCtx->outputBuffer,
+                            usrParamsCtx->outputLength, replyLen);
+    return status;
 }