From: Samuel Ghinet Date: Fri, 29 Aug 2014 04:06:48 +0000 (+0000) Subject: datapath-windows: Rename files. X-Git-Tag: v2.4.0~1559 X-Git-Url: http://git.cascardo.eti.br/?p=cascardo%2Fovs.git;a=commitdiff_plain;h=fa1324c92810c6b1e33b7e87caaaf2e6c4041040 datapath-windows: Rename files. This patch includes the file renaming and accommodations needed for the file renaming to build the forwarding extension for Hyper-V. This patch is also a follow-up for the thread: http://openvswitch.org/pipermail/dev/2014-August/044005.html Signed-off-by: Samuel Ghinet Co-authored-by: Alin Gabriel Serdean Signed-off-by: Ben Pfaff --- diff --git a/build-aux/extract-odp-netlink-windows-dp-h b/build-aux/extract-odp-netlink-windows-dp-h index 041d103b1..f2d9f0748 100755 --- a/build-aux/extract-odp-netlink-windows-dp-h +++ b/build-aux/extract-odp-netlink-windows-dp-h @@ -14,7 +14,7 @@ s/_LINUX_OPENVSWITCH_H/__OVS_DP_INTERFACE_H_/ # and use the appropriate userspace header. -s,,"OvsTypes.h", +s,,"Types.h", # Add ETH_ADDR_LEN macro to avoid including userspace packet.h s,#include ,\n#ifndef ETH_ADDR_LEN \ diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index ace9dc55b..eb59274f3 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -13,48 +13,48 @@ EXTRA_DIST += \ datapath-windows/ovsext.sln \ datapath-windows/ovsext/Datapath.c \ datapath-windows/ovsext/Datapath.h \ - datapath-windows/ovsext/OvsActions.c \ - datapath-windows/ovsext/OvsAtomic.h \ - datapath-windows/ovsext/OvsBufferMgmt.c \ - datapath-windows/ovsext/OvsBufferMgmt.h \ - datapath-windows/ovsext/OvsChecksum.c \ - datapath-windows/ovsext/OvsChecksum.h \ - datapath-windows/ovsext/OvsDebug.c \ - datapath-windows/ovsext/OvsDebug.h \ - datapath-windows/ovsext/OvsDriver.c \ - datapath-windows/ovsext/OvsEth.h \ - datapath-windows/ovsext/OvsEvent.c \ - datapath-windows/ovsext/OvsEvent.h \ - datapath-windows/ovsext/OvsFlow.c \ - datapath-windows/ovsext/OvsFlow.h \ - datapath-windows/ovsext/OvsIoctl.c \ - datapath-windows/ovsext/OvsIoctl.h \ - datapath-windows/ovsext/OvsIpHelper.c \ - datapath-windows/ovsext/OvsIpHelper.h \ - datapath-windows/ovsext/OvsJhash.c \ - datapath-windows/ovsext/OvsJhash.h \ - datapath-windows/ovsext/OvsNetProto.h \ - datapath-windows/ovsext/OvsOid.c \ - datapath-windows/ovsext/OvsOid.h \ - datapath-windows/ovsext/OvsPacketIO.c \ - datapath-windows/ovsext/OvsPacketIO.h \ - datapath-windows/ovsext/OvsPacketParser.c \ - datapath-windows/ovsext/OvsPacketParser.h \ - datapath-windows/ovsext/OvsSwitch.c \ - datapath-windows/ovsext/OvsSwitch.h \ - datapath-windows/ovsext/OvsTunnel.c \ - datapath-windows/ovsext/OvsTunnel.h \ - datapath-windows/ovsext/OvsTunnelFilter.c \ - datapath-windows/ovsext/OvsTunnelIntf.h \ - datapath-windows/ovsext/OvsTypes.h \ - datapath-windows/ovsext/OvsUser.c \ - datapath-windows/ovsext/OvsUser.h \ - datapath-windows/ovsext/OvsUtil.c \ - datapath-windows/ovsext/OvsUtil.h \ - datapath-windows/ovsext/OvsVport.c \ - datapath-windows/ovsext/OvsVport.h \ - datapath-windows/ovsext/OvsVxlan.c \ - datapath-windows/ovsext/OvsVxlan.h \ + datapath-windows/ovsext/Actions.c \ + datapath-windows/ovsext/Atomic.h \ + datapath-windows/ovsext/BufferMgmt.c \ + datapath-windows/ovsext/BufferMgmt.h \ + datapath-windows/ovsext/Checksum.c \ + datapath-windows/ovsext/Checksum.h \ + datapath-windows/ovsext/Debug.c \ + datapath-windows/ovsext/Debug.h \ + datapath-windows/ovsext/Driver.c \ + datapath-windows/ovsext/Ethernet.h \ + datapath-windows/ovsext/Event.c \ + datapath-windows/ovsext/Event.h \ + datapath-windows/ovsext/Flow.c \ + datapath-windows/ovsext/Flow.h \ + datapath-windows/ovsext/Ioctl.c \ + datapath-windows/ovsext/Ioctl.h \ + datapath-windows/ovsext/IpHelper.c \ + datapath-windows/ovsext/IpHelper.h \ + datapath-windows/ovsext/Jhash.c \ + datapath-windows/ovsext/Jhash.h \ + datapath-windows/ovsext/NetProto.h \ + datapath-windows/ovsext/Oid.c \ + datapath-windows/ovsext/Oid.h \ + datapath-windows/ovsext/PacketIO.c \ + datapath-windows/ovsext/PacketIO.h \ + datapath-windows/ovsext/PacketParser.c \ + datapath-windows/ovsext/PacketParser.h \ + datapath-windows/ovsext/Switch.c \ + datapath-windows/ovsext/Switch.h \ + datapath-windows/ovsext/Tunnel.c \ + datapath-windows/ovsext/Tunnel.h \ + datapath-windows/ovsext/TunnelFilter.c \ + datapath-windows/ovsext/TunnelIntf.h \ + datapath-windows/ovsext/Types.h \ + datapath-windows/ovsext/User.c \ + datapath-windows/ovsext/User.h \ + datapath-windows/ovsext/Util.c \ + datapath-windows/ovsext/Util.h \ + datapath-windows/ovsext/Vport.c \ + datapath-windows/ovsext/Vport.h \ + datapath-windows/ovsext/Vxlan.c \ + datapath-windows/ovsext/Vxlan.h \ datapath-windows/ovsext/ovsext.inf \ datapath-windows/ovsext/ovsext.rc \ datapath-windows/ovsext/ovsext.vcxproj \ diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c new file mode 100644 index 000000000..35ebfdf8c --- /dev/null +++ b/datapath-windows/ovsext/Actions.c @@ -0,0 +1,1548 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "NetProto.h" +#include "Flow.h" +#include "Vxlan.h" +#include "Checksum.h" +#include "PacketIO.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_ACTION +#include "Debug.h" + +typedef struct _OVS_ACTION_STATS { + UINT64 rxVxlan; + UINT64 txVxlan; + UINT64 flowMiss; + UINT64 flowUserspace; + UINT64 txTcp; + UINT32 failedFlowMiss; + UINT32 noVport; + UINT32 failedFlowExtract; + UINT32 noResource; + UINT32 noCopiedNbl; + UINT32 failedEncap; + UINT32 failedDecap; + UINT32 cannotGrowDest; + UINT32 zeroActionLen; + UINT32 failedChecksum; +} OVS_ACTION_STATS, *POVS_ACTION_STATS; + +OVS_ACTION_STATS ovsActionStats; + +/* + * There a lot of data that needs to be maintained while executing the pipeline + * as dictated by the actions of a flow, across different functions at different + * levels. Such data is put together in a 'context' structure. Care should be + * exercised while adding new members to the structure - only add ones that get + * used across multiple stages in the pipeline/get used in multiple functions. + */ +#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2 +typedef struct OvsForwardingContext { + POVS_SWITCH_CONTEXT switchContext; + /* The NBL currently used in the pipeline. */ + PNET_BUFFER_LIST curNbl; + /* NDIS forwarding detail for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + /* Array of destination ports for 'curNbl'. */ + PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts; + /* send flags while sending 'curNbl' into NDIS. */ + ULONG sendFlags; + /* Total number of output ports, used + unused, in 'curNbl'. */ + UINT32 destPortsSizeIn; + /* Total number of used output ports in 'curNbl'. */ + UINT32 destPortsSizeOut; + /* + * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to + * be freed/completed. + */ + OvsCompletionList *completionList; + /* + * vport number of 'curNbl' when it is passed from the PIF bridge to the INT + * bridge. ie. during tunneling on the Rx side. + */ + UINT32 srcVportNo; + + /* + * Tunnel key: + * - specified in actions during tunneling Tx + * - extracted from an NBL during tunneling Rx + */ + OvsIPv4TunnelKey tunKey; + + /* + * Tunneling - Tx: + * To store the output port, when it is a tunneled port. We don't foresee + * multiple tunneled ports as outport for any given NBL. + */ + POVS_VPORT_ENTRY tunnelTxNic; + + /* + * Tunneling - Rx: + * Points to the Internal port on the PIF Bridge, if the packet needs to be + * de-tunneled. + */ + POVS_VPORT_ENTRY tunnelRxNic; + + /* header information */ + OVS_PACKET_HDR_INFO layers; +} OvsForwardingContext; + + +/* + * -------------------------------------------------------------------------- + * OvsInitForwardingCtx -- + * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline + * is being executed. + * + * Result: + * NDIS_STATUS_SUCCESS on success + * Other NDIS_STATUS upon failure. Upon failure, it is safe to call + * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized + * enough for OvsCompleteNBLForwardingCtx() to do its work. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + UINT32 srcVportNo, + ULONG sendFlags, + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail, + OvsCompletionList *completionList, + OVS_PACKET_HDR_INFO *layers, + BOOLEAN resetTunnelInfo) +{ + ASSERT(ovsFwdCtx); + ASSERT(switchContext); + ASSERT(curNbl); + ASSERT(fwdDetail); + + /* + * Set values for curNbl and switchContext so upon failures, we have enough + * information to do cleanup. + */ + ovsFwdCtx->curNbl = curNbl; + ovsFwdCtx->switchContext = switchContext; + ovsFwdCtx->completionList = completionList; + ovsFwdCtx->fwdDetail = fwdDetail; + + if (fwdDetail->NumAvailableDestinations > 0) { + /* + * XXX: even though MSDN says GetNetBufferListDestinations() returns + * NDIS_STATUS, the header files say otherwise. + */ + switchContext->NdisSwitchHandlers.GetNetBufferListDestinations( + switchContext->NdisSwitchContext, curNbl, + &ovsFwdCtx->destinationPorts); + + ASSERT(ovsFwdCtx->destinationPorts); + /* Ensure that none of the elements are consumed yet. */ + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + fwdDetail->NumAvailableDestinations); + } else { + ovsFwdCtx->destinationPorts = NULL; + } + ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations; + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->srcVportNo = srcVportNo; + ovsFwdCtx->sendFlags = sendFlags; + if (layers) { + ovsFwdCtx->layers = *layers; + } else { + RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers); + } + if (resetTunnelInfo) { + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunnelRxNic = NULL; + RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey); + } + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsDetectTunnelRxPkt -- + * Utility function for an RX packet to detect its tunnel type. + * + * Result: + * True - if the tunnel type was detected. + * False - if not a tunnel packet or tunnel type not supported. + * -------------------------------------------------------------------------- + */ +static __inline BOOLEAN +OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, + const OvsFlowKey *flowKey) +{ + POVS_VPORT_ENTRY tunnelVport = NULL; + + /* XXX: we should also check for the length of the UDP payload to pick + * packets only if they are at least VXLAN header size. + */ + if (!flowKey->ipKey.nwFrag && + flowKey->ipKey.nwProto == IPPROTO_UDP && + flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) { + tunnelVport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); + ovsActionStats.rxVxlan++; + } + + // We might get tunnel packets even before the tunnel gets initialized. + if (tunnelVport) { + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ovsFwdCtx->tunnelRxNic = tunnelVport; + return TRUE; + } + + return FALSE; +} + +/* + * -------------------------------------------------------------------------- + * OvsDetectTunnelPkt -- + * Utility function to detect if a packet is to be subjected to + * tunneling (Tx) or de-tunneling (Rx). Various factors such as source + * port, destination port, packet contents, and previously setup tunnel + * context are used. + * + * Result: + * True - If the packet is to be subjected to tunneling. + * In case of invalid tunnel context, the tunneling functionality is + * a no-op and is completed within this function itself by consuming + * all of the tunneling context. + * False - If not a tunnel packet or tunnel type not supported. Caller should + * process the packet as a non-tunnel packet. + * -------------------------------------------------------------------------- + */ +static __inline BOOLEAN +OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, + const POVS_VPORT_ENTRY dstVport, + const OvsFlowKey *flowKey) +{ + if (OvsIsInternalVportType(dstVport->ovsType)) { + /* + * Rx: + * The source of NBL during tunneling Rx could be the external + * port or if it is being executed from userspace, the source port is + * default port. + */ + BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId == + ovsFwdCtx->switchContext->externalPortId) || + (ovsFwdCtx->fwdDetail->SourcePortId == + NDIS_SWITCH_DEFAULT_PORT_ID); + + if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) { + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunnelRxNic != NULL); + return TRUE; + } + } else if (OvsIsTunnelVportType(dstVport->ovsType)) { + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + + /* + * Tx: + * The destination port is a tunnel port. Encapsulation must be + * performed only on packets that originate from a VIF port or from + * userspace (default port) + * + * If the packet will not be encapsulated, consume the tunnel context + * by clearing it. + */ + if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO && + !OvsIsVifVportNo(ovsFwdCtx->srcVportNo)) { + ovsFwdCtx->tunKey.dst = 0; + } + + /* Tunnel the packet only if tunnel context is set. */ + if (ovsFwdCtx->tunKey.dst != 0) { + ovsActionStats.txVxlan++; + ovsFwdCtx->tunnelTxNic = dstVport; + } + + return TRUE; + } + + return FALSE; +} + + +/* + * -------------------------------------------------------------------------- + * OvsAddPorts -- + * Add the specified destination vport into the forwarding context. If the + * vport is a VIF/external port, it is added directly to the NBL. If it is + * a tunneling port, it is NOT added to the NBL. + * + * Result: + * NDIS_STATUS_SUCCESS on success + * Other NDIS_STATUS upon failure. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsAddPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *flowKey, + NDIS_SWITCH_PORT_ID dstPortId, + BOOLEAN preserveVLAN, + BOOLEAN preservePriority) +{ + POVS_VPORT_ENTRY vport; + PNDIS_SWITCH_PORT_DESTINATION fwdPort; + NDIS_STATUS status; + POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; + + /* + * We hold the dispatch lock that protects the list of vports, so vports + * validated here can be added as destinations safely before we call into + * NDIS. + * + * Some of the vports can be tunnelled ports as well in which case + * they should be added to a separate list of tunnelled destination ports + * instead of the VIF ports. The context for the tunnel is settable + * in OvsForwardingContext. + */ + vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + /* + * There may be some latency between a port disappearing, and userspace + * updating the recalculated flows. In the meantime, handle invalid + * ports gracefully. + */ + ovsActionStats.noVport++; + return NDIS_STATUS_SUCCESS; + } + ASSERT(vport->nicState == NdisSwitchNicStateConnected); + vport->stats.txPackets++; + vport->stats.txBytes += + NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl)); + + if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) { + return NDIS_STATUS_SUCCESS; + } + + if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) { + if (ovsFwdCtx->destPortsSizeIn == 0) { + ASSERT(ovsFwdCtx->destinationPorts == NULL); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); + status = + switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + OVS_DEST_PORTS_ARRAY_MIN_SIZE, + &ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ovsFwdCtx->destPortsSizeIn = + ovsFwdCtx->fwdDetail->NumAvailableDestinations; + ASSERT(ovsFwdCtx->destinationPorts); + } else { + ASSERT(ovsFwdCtx->destinationPorts != NULL); + /* + * NumElements: + * A ULONG value that specifies the total number of + * NDIS_SWITCH_PORT_DESTINATION elements in the + * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure. + * + * NumDestinations: + * A ULONG value that specifies the number of + * NDIS_SWITCH_PORT_DESTINATION elements in the + * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that + * specify port destinations. + * + * NumAvailableDestinations: + * A value that specifies the number of unused extensible switch + * destination ports elements within an NET_BUFFER_LIST structure. + */ + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + ovsFwdCtx->destPortsSizeIn); + ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == + ovsFwdCtx->destPortsSizeOut - + ovsFwdCtx->fwdDetail->NumAvailableDestinations); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0); + /* + * Before we grow the array of destination ports, the current set + * of ports needs to be committed. Only the ports added since the + * last commit need to be part of the new update. + */ + status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->fwdDetail->NumAvailableDestinations, + ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ASSERT(ovsFwdCtx->destinationPorts->NumElements == + ovsFwdCtx->destPortsSizeIn); + ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == + ovsFwdCtx->destPortsSizeOut); + ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); + + status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.cannotGrowDest++; + return status; + } + ASSERT(ovsFwdCtx->destinationPorts != NULL); + ovsFwdCtx->destPortsSizeIn <<= 1; + } + } + + ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); + fwdPort = + NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, + ovsFwdCtx->destPortsSizeOut); + + fwdPort->PortId = vport->portId; + fwdPort->NicIndex = vport->nicIndex; + fwdPort->IsExcluded = 0; + fwdPort->PreserveVLAN = preserveVLAN; + fwdPort->PreservePriority = preservePriority; + ovsFwdCtx->destPortsSizeOut += 1; + + return NDIS_STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * OvsClearTunTxCtx -- + * Utility function to clear tx tunneling context. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx) +{ + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunKey.dst = 0; +} + + +/* + * -------------------------------------------------------------------------- + * OvsClearTunRxCtx -- + * Utility function to clear rx tunneling context. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx) +{ + ovsFwdCtx->tunnelRxNic = NULL; + ovsFwdCtx->tunKey.dst = 0; +} + + +/* + * -------------------------------------------------------------------------- + * OvsCompleteNBLForwardingCtx -- + * This utility function is responsible for freeing/completing an NBL - either + * by adding it to a completion list or by freeing it. + * + * Side effects: + * It also resets the necessary fields in 'ovsFwdCtx'. + * -------------------------------------------------------------------------- + */ +static __inline VOID +OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx, + PCWSTR dropReason) +{ + NDIS_STRING filterReason; + + RtlInitUnicodeString(&filterReason, dropReason); + if (ovsFwdCtx->completionList) { + OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE, + ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1, + &filterReason); + ovsFwdCtx->curNbl = NULL; + } else { + /* If there is no completionList, we assume this is ovs created NBL */ + ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext, + ovsFwdCtx->curNbl, TRUE); + ASSERT(ovsFwdCtx->curNbl == NULL); + } + /* XXX: these can be made debug only to save cycles. Ideally the pipeline + * using these fields should reset the values at the end of the pipeline. */ + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->tunnelTxNic = NULL; + ovsFwdCtx->tunnelRxNic = NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsDoFlowLookupOutput -- + * Function to be used for the second stage of a tunneling workflow, ie.: + * - On the encapsulated packet on Tx path, to do a flow extract, flow + * lookup and excuting the actions. + * - On the decapsulated packet on Rx path, to do a flow extract, flow + * lookup and excuting the actions. + * + * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is + * until the new buffer management framework is adopted. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) +{ + OvsFlowKey key; + OvsFlow *flow; + UINT64 hash; + NDIS_STATUS status; + POVS_VPORT_ENTRY vport = + OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + ASSERT(FALSE); // XXX: let's catch this for now + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to internal/tunnel port removal"); + ovsActionStats.noVport++; + return NDIS_STATUS_SUCCESS; + } + ASSERT(vport->nicState == NdisSwitchNicStateConnected); + + /* Assert that in the Rx direction, key is always setup. */ + ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); + status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, + &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? + &ovsFwdCtx->tunKey : NULL); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Flow extract failed"); + ovsActionStats.failedFlowExtract++; + return status; + } + + flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); + ovsFwdCtx->switchContext->datapath.hits++; + status = OvsActionsExecute(ovsFwdCtx->switchContext, + ovsFwdCtx->completionList, ovsFwdCtx->curNbl, + ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags, + &key, &hash, &ovsFwdCtx->layers, + flow->actions, flow->actionsLen); + ovsFwdCtx->curNbl = NULL; + } else { + LIST_ENTRY missedPackets; + UINT32 num = 0; + ovsFwdCtx->switchContext->datapath.misses++; + InitializeListHead(&missedPackets); + status = OvsCreateAndAddPackets( + OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS, + ovsFwdCtx->srcVportNo, + key.tunKey.dst != 0 ? + (OvsIPv4TunnelKey *)&key.tunKey : NULL, + ovsFwdCtx->curNbl, + ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers, + ovsFwdCtx->switchContext, &missedPackets, &num); + if (num) { + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); + } + if (status == NDIS_STATUS_SUCCESS) { + /* Complete the packet since it was copied to user buffer. */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped since packet was copied to userspace"); + ovsActionStats.flowMiss++; + status = NDIS_STATUS_SUCCESS; + } else { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to failure to queue to userspace"); + status = NDIS_STATUS_FAILURE; + ovsActionStats.failedFlowMiss++; + } + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelPortTx -- + * The start function for Tx tunneling - encapsulates the packet, and + * outputs the packet on the PIF bridge. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + PNET_BUFFER_LIST newNbl = NULL; + + /* + * Setup the source port to be the internal port to as to facilitate the + * second OvsLookupFlow. + */ + if (ovsFwdCtx->switchContext->internalVport == NULL) { + OvsClearTunTxCtx(ovsFwdCtx); + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped since internal port is absent"); + return NDIS_STATUS_FAILURE; + } + ovsFwdCtx->srcVportNo = + ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo; + + ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId; + ovsFwdCtx->fwdDetail->SourceNicIndex = + ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex; + + /* Do the encap. Encap function does not consume the NBL. */ + switch(ovsFwdCtx->tunnelTxNic->ovsType) { + case OVSWIN_VPORT_TYPE_VXLAN: + status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, + ovsFwdCtx->switchContext, + (VOID *)ovsFwdCtx->completionList, + &ovsFwdCtx->layers, &newNbl); + break; + default: + ASSERT(! "Tx: Unhandled tunnel type"); + } + + /* Reset the tunnel context so that it doesn't get used after this point. */ + OvsClearTunTxCtx(ovsFwdCtx); + + if (status == NDIS_STATUS_SUCCESS) { + ASSERT(newNbl); + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Complete after cloning NBL for encapsulation"); + ovsFwdCtx->curNbl = newNbl; + status = OvsDoFlowLookupOutput(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + } else { + /* + * XXX: Temporary freeing of the packet until we register a + * callback to IP helper. + */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to encap failure"); + ovsActionStats.failedEncap++; + status = NDIS_STATUS_SUCCESS; + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelPortRx -- + * Decapsulate the incoming NBL based on the tunnel type and goes through + * the flow lookup for the inner packet. + * + * Note: IP checksum is validate here, but L4 checksum validation needs + * to be done by the corresponding tunnel types. + * + * Side effects: + * The NBL in 'ovsFwdCtx' is consumed. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER_LIST newNbl = NULL; + POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic; + + if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers) + != NDIS_STATUS_SUCCESS) { + ovsActionStats.failedChecksum++; + OVS_LOG_INFO("Packet dropped due to IP checksum failure."); + goto dropNbl; + } + + switch(tunnelRxVport->ovsType) { + case OVSWIN_VPORT_TYPE_VXLAN: + /* + * OvsDoDecapVxlan should return a new NBL if it was copied, and + * this new NBL should be setup as the ovsFwdCtx->curNbl. + */ + status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + &ovsFwdCtx->tunKey, &newNbl); + break; + default: + OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", + tunnelRxVport->ovsType); + ASSERT(! "Rx: Unhandled tunnel type"); + status = NDIS_STATUS_NOT_SUPPORTED; + } + + if (status != NDIS_STATUS_SUCCESS) { + ovsActionStats.failedDecap++; + goto dropNbl; + } + + /* + * tunnelRxNic and other fields will be cleared, re-init the context + * before usage. + */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-dropped due to new decap packet"); + + /* Decapsulated packet is in a new NBL */ + ovsFwdCtx->tunnelRxNic = tunnelRxVport; + OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, tunnelRxVport->portNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + + /* + * Set the NBL's SourcePortId and SourceNicIndex to default values to + * keep NDIS happy when we forward the packet. + */ + ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + ovsFwdCtx->fwdDetail->SourceNicIndex = 0; + + status = OvsDoFlowLookupOutput(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + OvsClearTunRxCtx(ovsFwdCtx); + + return status; + +dropNbl: + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-dropped due to decap failure"); + OvsClearTunRxCtx(ovsFwdCtx); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsOutputForwardingCtx -- + * This function outputs an NBL to NDIS or to a tunneling pipeline based on + * the ports added so far into 'ovsFwdCtx'. + * + * Side effects: + * This function consumes the NBL - either by forwarding it successfully to + * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it. + * + * Also makes sure that the list of destination ports - tunnel or otherwise is + * drained. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx) +{ + NDIS_STATUS status = STATUS_SUCCESS; + POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; + + /* + * Handle the case where the some of the destination ports are tunneled + * ports - the non-tunneled ports get a unmodified copy of the NBL, and the + * tunneling pipeline starts when we output the packet to tunneled port. + */ + if (ovsFwdCtx->destPortsSizeOut > 0) { + PNET_BUFFER_LIST newNbl = NULL; + PNET_BUFFER nb; + UINT32 portsToUpdate = + ovsFwdCtx->fwdDetail->NumAvailableDestinations - + (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut); + + ASSERT(ovsFwdCtx->destinationPorts != NULL); + + /* + * Create a copy of the packet in order to do encap on it later. Also, + * don't copy the offload context since the encap'd packet has a + * different set of headers. This will change when we implement offloads + * before doing encapsulation. + */ + if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) { + nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /*copy NBL info*/); + if (newNbl == NULL) { + status = NDIS_STATUS_RESOURCES; + ovsActionStats.noCopiedNbl++; + goto dropit; + } + } + + /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */ + ASSERT(portsToUpdate > 0); + status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( + switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, + portsToUpdate, ovsFwdCtx->destinationPorts); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); + ovsActionStats.cannotGrowDest++; + goto dropit; + } + + OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + ovsFwdCtx->sendFlags); + /* End this pipeline by resetting the corresponding context. */ + ovsFwdCtx->destPortsSizeOut = 0; + ovsFwdCtx->curNbl = NULL; + if (newNbl) { + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Dropped due to resouces"); + goto dropit; + } + } + } + + if (ovsFwdCtx->tunnelTxNic != NULL) { + status = OvsTunnelPortTx(ovsFwdCtx); + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + ASSERT(ovsFwdCtx->tunKey.dst == 0); + } else if (ovsFwdCtx->tunnelRxNic != NULL) { + status = OvsTunnelPortRx(ovsFwdCtx); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ASSERT(ovsFwdCtx->tunKey.dst == 0); + } + ASSERT(ovsFwdCtx->curNbl == NULL); + + return status; + +dropit: + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX"); + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsLookupFlowOutput -- + * Utility function for external callers to do flow extract, lookup, + * actions execute on a given NBL. + * + * Note: If this is being used from a callback function, make sure that the + * arguments specified are still valid in the asynchronous context. + * + * Side effects: + * This function consumes the NBL. + * -------------------------------------------------------------------------- + */ +VOID +OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, + VOID *compList, + PNET_BUFFER_LIST curNbl) +{ + NDIS_STATUS status; + OvsForwardingContext ovsFwdCtx; + POVS_VPORT_ENTRY internalVport = + (POVS_VPORT_ENTRY)switchContext->internalVport; + + /* XXX: make sure comp list was not a stack variable previously. */ + OvsCompletionList *completionList = (OvsCompletionList *)compList; + + /* + * XXX: can internal port disappear while we are busy doing ARP resolution? + * It could, but will we get this callback from IP helper in that case. Need + * to check. + */ + ASSERT(switchContext->internalVport); + status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, + internalVport->portNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl), + completionList, NULL, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(&ovsFwdCtx, + L"OVS-Dropped due to resources"); + return; + } + + ASSERT(FALSE); + /* + * XXX: We need to acquire the dispatch lock and the datapath lock. + */ + + OvsDoFlowLookupOutput(&ovsFwdCtx); +} + + +/* + * -------------------------------------------------------------------------- + * OvsOutputBeforeSetAction -- + * Function to be called to complete one set of actions on an NBL, before + * we start the next one. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx) +{ + PNET_BUFFER_LIST newNbl; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER nb; + + /* + * Create a copy and work on the copy after this point. The original NBL is + * forwarded. One reason to not use the copy for forwarding is that + * ports have already been added to the original NBL, and it might be + * inefficient/impossible to remove/re-add them to the copy. There's no + * notion of removing the ports, the ports need to be marked as + * "isExcluded". There's seems no real advantage to retaining the original + * and sending out the copy instead. + * + * XXX: We are copying the offload context here. This is to handle actions + * such as: + * outport, pop_vlan(), outport, push_vlan(), outport + * + * copy size needs to include inner ether + IP + TCP, need to revisit + * if we support IP options. + * XXX Head room needs to include the additional encap. + * XXX copySize check is not considering multiple NBs. + */ + nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /*copy NBL info*/); + + ASSERT(ovsFwdCtx->destPortsSizeOut > 0 || + ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL); + + /* Send the original packet out */ + status = OvsOutputForwardingCtx(ovsFwdCtx); + ASSERT(ovsFwdCtx->curNbl == NULL); + ASSERT(ovsFwdCtx->destPortsSizeOut == 0); + ASSERT(ovsFwdCtx->tunnelRxNic == NULL); + ASSERT(ovsFwdCtx->tunnelTxNic == NULL); + + /* If we didn't make a copy, can't continue. */ + if (newNbl == NULL) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + + /* Finish the remaining actions with the new NBL */ + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); + } else { + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + ovsFwdCtx->completionList, + &ovsFwdCtx->layers, FALSE); + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * OvsPopVlanInPktBuf -- + * Function to pop a VLAN tag when the tag is in the packet buffer. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) +{ + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48); + UINT32 packetLen, mdlLen; + PNET_BUFFER_LIST newNbl; + NDIS_STATUS status; + + /* + * Declare a dummy vlanTag structure since we need to compute the size + * of shiftLength. The NDIS one is a unionized structure. + */ + NDIS_PACKET_8021Q_INFO vlanTag = {0}; + ULONG shiftLength = sizeof (vlanTag.TagHeader); + PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)]; + + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + 0, 0, TRUE /* copy NBL info */); + if (!newNbl) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + + /* Complete the original NBL and create a copy to modify. */ + OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy"); + + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + NULL, &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Dropped due to resouces"); + return NDIS_STATUS_RESOURCES; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + ASSERT(curNb->Next == NULL); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */ + if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) { + ASSERT(FALSE); + return NDIS_STATUS_FAILURE; + } + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + RtlCopyMemory(tempBuffer, bufferStart, dataLength); + RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength); + NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL); + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsTunnelAttrToIPv4TunnelKey -- + * Convert tunnel attribute to OvsIPv4TunnelKey. + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, + OvsIPv4TunnelKey *tunKey) +{ + PNL_ATTR a; + INT rem; + + tunKey->attr[0] = 0; + tunKey->attr[1] = 0; + tunKey->attr[2] = 0; + ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL); + + NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr), + NlAttrGetSize(attr)) { + switch (NlAttrType(a)) { + case OVS_TUNNEL_KEY_ATTR_ID: + tunKey->tunnelId = NlAttrGetBe64(a); + tunKey->flags |= OVS_TNL_F_KEY; + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: + tunKey->src = NlAttrGetBe32(a); + break; + case OVS_TUNNEL_KEY_ATTR_IPV4_DST: + tunKey->dst = NlAttrGetBe32(a); + break; + case OVS_TUNNEL_KEY_ATTR_TOS: + tunKey->tos = NlAttrGetU8(a); + break; + case OVS_TUNNEL_KEY_ATTR_TTL: + tunKey->ttl = NlAttrGetU8(a); + break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: + tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT; + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: + tunKey->flags |= OVS_TNL_F_CSUM; + break; + default: + ASSERT(0); + } + } + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsUpdateEthHeader -- + * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the + * specified key. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ethernet *ethAttr) +{ + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + UINT32 packetLen, mdlLen; + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + ASSERT(mdlLen > 0); + /* Bail out if the L2 header is not in a contiguous buffer. */ + if (MIN(packetLen, mdlLen) < sizeof *ethHdr) { + ASSERT(FALSE); + return NDIS_STATUS_FAILURE; + } + ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)); + + RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, + sizeof ethHdr->Destination); + RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source); + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsUpdateIPv4Header -- + * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the + * specified key. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + const struct ovs_key_ipv4 *ipAttr) +{ + PNET_BUFFER curNb; + PMDL curMdl; + ULONG curMdlOffset; + PUINT8 bufferStart; + UINT32 mdlLen, hdrSize, packetLen; + OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; + NDIS_STATUS status; + IPHdr *ipHdr; + TCPHdr *tcpHdr = NULL; + UDPHdr *udpHdr = NULL; + + ASSERT(layers->value != 0); + + /* + * Peek into the MDL to get a handle to the IP header and if required + * the TCP/UDP header as well. We check if the required headers are in one + * contiguous MDL, and if not, we copy them over to one MDL. + */ + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + packetLen = NET_BUFFER_DATA_LENGTH(curNb); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!bufferStart) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + mdlLen -= curMdlOffset; + ASSERT((INT)mdlLen >= 0); + + if (layers->isTcp || layers->isUdp) { + hdrSize = layers->l4Offset + + layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr); + } else { + hdrSize = layers->l3Offset + sizeof (*ipHdr); + } + + /* Count of number of bytes of valid data there are in the first MDL. */ + mdlLen = MIN(packetLen, mdlLen); + if (mdlLen < hdrSize) { + PNET_BUFFER_LIST newNbl; + newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, + hdrSize, 0, TRUE /*copy NBL info*/); + if (!newNbl) { + ovsActionStats.noCopiedNbl++; + return NDIS_STATUS_RESOURCES; + } + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"Complete after partial copy."); + + status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, + newNbl, ovsFwdCtx->srcVportNo, 0, + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), + NULL, &ovsFwdCtx->layers, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + OvsCompleteNBLForwardingCtx(ovsFwdCtx, + L"OVS-Dropped due to resources"); + return NDIS_STATUS_RESOURCES; + } + + curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); + ASSERT(curNb->Next == NULL); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); + if (!curMdl) { + ovsActionStats.noResource++; + return NDIS_STATUS_RESOURCES; + } + curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + mdlLen -= curMdlOffset; + ASSERT(mdlLen >= hdrSize); + } + + ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset); + + if (layers->isTcp) { + tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); + } else if (layers->isUdp) { + udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); + } + + /* + * Adjust the IP header inline as dictated by the action, nad also update + * the IP and the TCP checksum for the data modified. + * + * In the future, this could be optimized to make one call to + * ChecksumUpdate32(). Ignoring this for now, since for the most common + * case, we only update the TTL. + */ + if (ipHdr->saddr != ipAttr->ipv4_src) { + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr, + ipAttr->ipv4_src); + } + ipHdr->saddr = ipAttr->ipv4_src; + } + if (ipHdr->daddr != ipAttr->ipv4_dst) { + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr, + ipAttr->ipv4_dst); + } + ipHdr->daddr = ipAttr->ipv4_dst; + } + if (ipHdr->protocol != ipAttr->ipv4_proto) { + UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00; + UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00; + if (tcpHdr) { + tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto); + } else if (udpHdr && udpHdr->check) { + udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto); + } + + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto); + } + ipHdr->protocol = ipAttr->ipv4_proto; + } + if (ipHdr->ttl != ipAttr->ipv4_ttl) { + UINT16 oldTtl = (ipHdr->ttl) & 0xff; + UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff; + if (ipHdr->check != 0) { + ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl); + } + ipHdr->ttl = ipAttr->ipv4_ttl; + } + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsExecuteSetAction -- + * Executes a set() action, but storing the actions into 'ovsFwdCtx' + * -------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, + UINT64 *hash, + const PNL_ATTR a) +{ + enum ovs_key_attr type = NlAttrType(a); + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + switch (type) { + case OVS_KEY_ATTR_ETHERNET: + status = OvsUpdateEthHeader(ovsFwdCtx, + NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet))); + break; + + case OVS_KEY_ATTR_IPV4: + status = OvsUpdateIPv4Header(ovsFwdCtx, + NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4))); + break; + + case OVS_KEY_ATTR_TUNNEL: + { + OvsIPv4TunnelKey tunKey; + + status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); + ASSERT(status == NDIS_STATUS_SUCCESS); + tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); + RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); + + break; + } + case OVS_KEY_ATTR_SKB_MARK: + /* XXX: Not relevant to Hyper-V. Return OK */ + break; + case OVS_KEY_ATTR_UNSPEC: + case OVS_KEY_ATTR_ENCAP: + case OVS_KEY_ATTR_ETHERTYPE: + case OVS_KEY_ATTR_IN_PORT: + case OVS_KEY_ATTR_VLAN: + case OVS_KEY_ATTR_ICMP: + case OVS_KEY_ATTR_ICMPV6: + case OVS_KEY_ATTR_ARP: + case OVS_KEY_ATTR_ND: + case __OVS_KEY_ATTR_MAX: + default: + OVS_LOG_INFO("Unhandled attribute %#x", type); + ASSERT(FALSE); + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsActionsExecute -- + * Interpret and execute the specified 'actions' on the specifed packet + * 'curNbl'. The expectation is that if the packet needs to be dropped + * (completed) for some reason, it is added to 'completionList' so that the + * caller can complete the packet. If 'completionList' is NULL, the NBL is + * assumed to be generated by OVS and freed up. Otherwise, the function + * consumes the NBL by generating a NDIS send indication for the packet. + * + * There are one or more of "clone" NBLs that may get generated while + * executing the actions. Upon any failures, the "cloned" NBLs are freed up, + * and the caller does not have to worry about them. + * + * Success or failure is returned based on whether the specified actions + * were executed successfully on the packet or not. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, + OvsCompletionList *completionList, + PNET_BUFFER_LIST curNbl, + UINT32 portNo, + ULONG sendFlags, + OvsFlowKey *key, + UINT64 *hash, + OVS_PACKET_HDR_INFO *layers, + const PNL_ATTR actions, + INT actionsLen) +{ + PNL_ATTR a; + INT rem; + UINT32 dstPortID; + OvsForwardingContext ovsFwdCtx; + PCWSTR dropReason = L""; + NDIS_STATUS status; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = + NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + + /* XXX: ASSERT that the flow table lock is held. */ + status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo, + sendFlags, fwdDetail, completionList, + layers, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-initing destination port list failed"; + goto dropit; + } + + if (actionsLen == 0) { + dropReason = L"OVS-Dropped due to Flow action"; + ovsActionStats.zeroActionLen++; + goto dropit; + } + + NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) { + switch(NlAttrType(a)) { + case OVS_ACTION_ATTR_OUTPUT: + dstPortID = NlAttrGetU32(a); + status = OvsAddPorts(&ovsFwdCtx, key, dstPortID, + TRUE, TRUE); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination port failed"; + goto dropit; + } + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + { + struct ovs_action_push_vlan *vlan; + PVOID vlanTagValue; + PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag; + + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo); + if (vlanTagValue != NULL) { + /* + * XXX: We don't support double VLAN tag offload. In such cases, + * we need to insert the existing one into the packet buffer, + * and add the new one as offload. This will take care of + * guest tag-in-tag case as well as OVS rules that specify + * tag-in-tag. + */ + } else { + vlanTagValue = 0; + vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; + vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a); + vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff; + vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13; + + NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) = vlanTagValue; + } + break; + } + + case OVS_ACTION_ATTR_POP_VLAN: + { + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) != 0) { + NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, + Ieee8021QNetBufferListInfo) = 0; + } else { + /* + * The VLAN tag is inserted into the packet buffer. Pop the tag + * by packet buffer modification. + */ + status = OvsPopVlanInPktBuf(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-pop vlan action failed"; + goto dropit; + } + } + break; + } + + case OVS_ACTION_ATTR_USERSPACE: + { + PNL_ATTR userdataAttr; + PNL_ATTR queueAttr; + POVS_PACKET_QUEUE_ELEM elem; + UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE; + //XXX confusing that portNo is actually portId for external port. + BOOLEAN isRecv = (portNo == switchContext->externalPortId) + || OvsIsTunnelVportNo(portNo); + + queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID); + userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA); + + elem = OvsCreateQueuePacket(queueId, (PVOID)userdataAttr, + userdataAttr->nlaLen, + OVS_PACKET_CMD_ACTION, + portNo, (OvsIPv4TunnelKey *)&key->tunKey, + ovsFwdCtx.curNbl, + NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl), + isRecv, + layers); + if (elem) { + LIST_ENTRY missedPackets; + InitializeListHead(&missedPackets); + InsertTailList(&missedPackets, &elem->link); + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); + dropReason = L"OVS-Completed since packet was copied to " + L"userspace"; + } else { + dropReason = L"OVS-Dropped due to failure to queue to " + L"userspace"; + goto dropit; + } + break; + } + case OVS_ACTION_ATTR_SET: + { + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputBeforeSetAction(&ovsFwdCtx); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-adding destination failed"; + goto dropit; + } + } + + status = OvsExecuteSetAction(&ovsFwdCtx, key, hash, + (const PNL_ATTR)NlAttrGet + ((const PNL_ATTR)a)); + if (status != NDIS_STATUS_SUCCESS) { + dropReason = L"OVS-set action failed"; + goto dropit; + } + break; + } + case OVS_ACTION_ATTR_SAMPLE: + break; + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + default: + break; + } + } + + if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL + || ovsFwdCtx.tunnelRxNic != NULL) { + status = OvsOutputForwardingCtx(&ovsFwdCtx); + ASSERT(ovsFwdCtx.curNbl == NULL); + } + + ASSERT(ovsFwdCtx.destPortsSizeOut == 0); + ASSERT(ovsFwdCtx.tunnelRxNic == NULL); + ASSERT(ovsFwdCtx.tunnelTxNic == NULL); + +dropit: + /* + * If curNbl != NULL, it implies the NBL has not been not freed up so far. + */ + if (ovsFwdCtx.curNbl) { + OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason); + } + + return status; +} diff --git a/datapath-windows/ovsext/Atomic.h b/datapath-windows/ovsext/Atomic.h new file mode 100644 index 000000000..1e45d9f89 --- /dev/null +++ b/datapath-windows/ovsext/Atomic.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ATOMIC_H_ +#define __ATOMIC_H_ 1 + +static __inline UINT64 +atomic_add64(UINT64 *ptr, UINT32 val) +{ + return InterlockedAdd64((LONGLONG volatile *) ptr, (LONGLONG) val); +} + +static __inline UINT64 +atomic_inc64(UINT64 *ptr) +{ + return InterlockedIncrement64((LONGLONG volatile *) ptr); +} + +#endif /* __ATOMIC_H_ */ diff --git a/datapath-windows/ovsext/BufferMgmt.c b/datapath-windows/ovsext/BufferMgmt.c new file mode 100644 index 000000000..e0377c13a --- /dev/null +++ b/datapath-windows/ovsext/BufferMgmt.c @@ -0,0 +1,1535 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * **************************************************************************** + * + * Simple Buffer Management framework for OVS + * + * It introduces four NDIS buffer pools + * **Fix size net buffer list pool--this is used for small buffer + * One allocation will include NBL + NB + MDL + Data + CONTEXT. + * + * **Variable size net buffer list pool--this is used for variable size + * buffer. The allocation of net buffer list will include NBL + NB + + * CONTEXT, a separate allocation of MDL + data buffer is required. + * + * **NBL only net buffer list pool-- this is used for partial copy + * (or clone). In this case we can not allocate net buffer list and + * net buffer at the same time. + * + * **Net buffer pool-- this is required when net buffer need to be + * allocated separately. + * + * A Buffer context is defined to track the buffer specific information + * so that during NBL completion, proper action can be taken. Please see + * code for details. + * + * Here is the usage of the management API + * All external NBL should be initialized its NBL context by calling + * OvsInitExternalNBLContext() + * + * After the external NBL context is initialized, it can call the following + * API to allocate, copy or partial copy NBL. + * + * OvsAllocateFixSizeNBL() + * OvsAllocateVariableSizeNBL() + * + * OvsPartialCopyNBL() + * OvsPartialCopyToMultipleNBLs() + * + * OvsFullCopyNBL() + * OvsFullCopyToMultipleNBLs() + * + * See code comments for detail description of the functions. + * + * All NBLs is completed through + * OvsCompleteNBL() + * If this API return non NULL value, then the returned NBL should be + * returned to upper layer by calling + * NdisFSendNetBufferListsComplete() if the buffer is from upper + * layer. In case of WFP, it can call the corresponding completion routine + * to return the NBL to the framework. + * + * NOTE: + * 1. Copy or partial copy will not copy destination port array + * 2. Copy or partial copy will copy src port id and index + * 3. New Allocated NBL will have src port set to default port id + * 4. If original packet has direction flag set, the copied or partial + * copied NBL will still be in same direction. + * 5. When you advance or retreate the buffer, you may need to update + * relevant meta data to keep it consistent. + * + * **************************************************************************** + */ + +#include "precomp.h" +#include "Switch.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_BUFMGMT +#include "Debug.h" +#include "NetProto.h" +#include "Flow.h" +#include "Checksum.h" +#include "PacketParser.h" + +/* + * -------------------------------------------------------------------------- + * OvsInitBufferPool -- + * + * Allocate NBL and NB pool + * + * XXX: more optimization may be done for buffer management include local cache + * of NBL, NB, data, context, MDL. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsInitBufferPool(PVOID ovsContext) +{ + POVS_NBL_POOL ovsPool; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + NET_BUFFER_LIST_POOL_PARAMETERS nblParam; + NET_BUFFER_POOL_PARAMETERS nbParam; + + C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8); + + OVS_LOG_TRACE("Enter: context: %p", context); + + ovsPool = &context->ovsPool; + RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL)); + ovsPool->ndisHandle = context->NdisFilterHandle; + ovsPool->ndisContext = context->NdisSwitchContext; + /* + * fix size NBL pool includes + * NBL + NB + MDL + DATA + Context + * This is mainly used for Packet execute or slow path when copy is + * required and size is less than OVS_DEFAULT_DATA_SIZE. We expect + * Most of packet from user space will use this Pool. (This is + * true for all bfd and cfm packet. + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG; + nblParam.fAllocateNetBuffer = TRUE; + nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE; + + ovsPool->fixSizePool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->fixSizePool == NULL) { + goto pool_cleanup; + } + + /* + * Zero Size NBL Pool includes + * NBL + NB + Context + * This is mainly for packet with large data Size, in this case MDL and + * Data will be allocate separately. + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG; + nblParam.fAllocateNetBuffer = TRUE; + nblParam.DataSize = 0; + + ovsPool->zeroSizePool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->zeroSizePool == NULL) { + goto pool_cleanup; + } + + /* + * NBL only pool just includes + * NBL (+ context) + * This is mainly used for clone and partial copy + */ + RtlZeroMemory(&nblParam, sizeof (nblParam)); + OVS_INIT_OBJECT_HEADER(&nblParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); + + nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; + nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG; + nblParam.fAllocateNetBuffer = FALSE; + nblParam.DataSize = 0; + + ovsPool->nblOnlyPool = + NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); + if (ovsPool->nblOnlyPool == NULL) { + goto pool_cleanup; + } + + /* nb Pool + * NB only pool, used for copy + */ + + OVS_INIT_OBJECT_HEADER(&nbParam.Header, + NDIS_OBJECT_TYPE_DEFAULT, + NET_BUFFER_POOL_PARAMETERS_REVISION_1, + NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1); + nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG; + nbParam.DataSize = 0; + ovsPool->nbPool = + NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam); + if (ovsPool->nbPool == NULL) { + goto pool_cleanup; + } + OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p" + "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool, + ovsPool->nblOnlyPool, ovsPool->nbPool); + return NDIS_STATUS_SUCCESS; + +pool_cleanup: + OvsCleanupBufferPool(context); + OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool"); + return NDIS_STATUS_RESOURCES; +} + + +/* + * -------------------------------------------------------------------------- + * OvsCleanupBufferPool -- + * Free Buffer pool for NBL and NB. + * -------------------------------------------------------------------------- + */ +VOID +OvsCleanupBufferPool(PVOID ovsContext) +{ + POVS_NBL_POOL ovsPool; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + ovsPool = &context->ovsPool; + OVS_LOG_TRACE("Enter: context: %p", context); +#ifdef DBG + ASSERT(ovsPool->fixNBLCount == 0); + ASSERT(ovsPool->zeroNBLCount == 0); + ASSERT(ovsPool->nblOnlyCount == 0); + ASSERT(ovsPool->nbCount == 0); + ASSERT(ovsPool->sysNBLCount == 0); + ASSERT(ovsPool->fragNBLCount == 0); +#endif + + if (ovsPool->fixSizePool) { + NdisFreeNetBufferListPool(ovsPool->fixSizePool); + ovsPool->fixSizePool = NULL; + } + if (ovsPool->zeroSizePool) { + NdisFreeNetBufferListPool(ovsPool->zeroSizePool); + ovsPool->zeroSizePool = NULL; + } + if (ovsPool->nblOnlyPool) { + NdisFreeNetBufferListPool(ovsPool->nblOnlyPool); + ovsPool->nblOnlyPool = NULL; + } + if (ovsPool->nbPool) { + NdisFreeNetBufferPool(ovsPool->nbPool); + ovsPool->nbPool = NULL; + } + OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool"); +} + + +static VOID +OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, + UINT16 flags, + UINT32 origDataLength, + UINT32 srcPortNo) +{ + ctx->magic = OVS_CTX_MAGIC; + ctx->refCount = 1; + ctx->flags = flags; + ctx->srcPortNo = srcPortNo; + ctx->origDataLength = origDataLength; +} + + +static VOID +OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl) +{ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + if (info == NULL) { + return; + } + OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d " + "isDataSafe: %s, safeDataSize: %d", + nbl, info->NumAvailableDestinations, info->SourcePortId, + info->SourceNicIndex, + info->IsPacketDataSafe ? "TRUE" : "FALSE", + info->IsPacketDataSafe ? 0 : info->SafePacketDataSize); + +} + +static VOID +OvsDumpNBLContext(PNET_BUFFER_LIST nbl) +{ + PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context; + if (ctx == NULL) { + OVS_LOG_INFO("No Net Buffer List context"); + return; + } + while (ctx) { + OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d", + nbl, ctx, ctx->Size, ctx->Offset); + ctx = ctx->Next; + } +} + + +static VOID +OvsDumpMDLChain(PMDL mdl) +{ + PMDL tmp; + tmp = mdl; + while (tmp) { + OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p" + " ByteCount: %d, ByteOffset: %d", + tmp, tmp->Size, tmp->MappedSystemVa, + tmp->StartVa, tmp->ByteCount, tmp->ByteOffset); + tmp = tmp->Next; + } +} + + +static VOID +OvsDumpNetBuffer(PNET_BUFFER nb) +{ + OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p " + "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d", + nb, + NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle, + NET_BUFFER_FIRST_MDL(nb), + NET_BUFFER_CURRENT_MDL(nb), + NET_BUFFER_CURRENT_MDL_OFFSET(nb), + NET_BUFFER_DATA_LENGTH(nb), + NET_BUFFER_DATA_OFFSET(nb)); + OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb)); +} + + +static VOID +OvsDumpNetBufferList(PNET_BUFFER_LIST nbl) +{ + PNET_BUFFER nb; + OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d " + "poolHandle: %p", + nbl, nbl->ParentNetBufferList, + nbl->SourceHandle, nbl->ChildRefCount, + nbl->NdisPoolHandle); + OvsDumpNBLContext(nbl); + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + OvsDumpNetBuffer(nb); + nb = NET_BUFFER_NEXT_NB(nb); + } +} + +/* + * -------------------------------------------------------------------------- + * OvsAllocateFixSizeNBL -- + * + * Allocate fix size NBL which include + * NBL + NB + MBL + Data + Context + * Please note: + * * Forwarding Context is allocated, but forwarding detail information + * is not initailized. + * * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128 + * byte). + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateFixSizeNBL(PVOID ovsContext, + UINT32 size, + UINT32 headRoom) +{ + PNET_BUFFER_LIST nbl = NULL; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_BUFFER_CONTEXT ctx; + POVS_NBL_POOL ovsPool = &context->ovsPool; + NDIS_STATUS status; + UINT32 line; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + + if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) { + line = __LINE__; + goto allocate_done; + } + + nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); + + if (nbl == NULL) { + line = __LINE__; + goto allocate_done; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + NdisFreeNetBufferList(nbl); + nbl = NULL; + line = __LINE__; + goto allocate_done; + } + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + ASSERT(info); + info->IsPacketDataSafe = TRUE; + info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + + status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), + size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount); + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); +#endif + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + ASSERT(ctx); + + OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size, + OVS_DEFAULT_PORT_NO); + line = __LINE__; +allocate_done: + OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line); + return nbl; +} + + +static PMDL +OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle, + UINT32 dataSize) +{ + PMDL mdl; + PVOID data; + + data = OvsAllocateMemory(dataSize); + if (data == NULL) { + return NULL; + } + + mdl = NdisAllocateMdl(ndisHandle, data, dataSize); + if (mdl == NULL) { + OvsFreeMemory(data); + } + + return mdl; +} + + +static VOID +OvsFreeMDLAndData(PMDL mdl) +{ + PVOID data; + + data = MmGetMdlVirtualAddress(mdl); + NdisFreeMdl(mdl); + OvsFreeMemory(data); +} + + +/* + * -------------------------------------------------------------------------- + * OvsAllocateVariableSizeNBL -- + * + * Allocate variable size NBL, the NBL looks like + * NBL + NB + Context + * MDL + Data + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateVariableSizeNBL(PVOID ovsContext, + UINT32 size, + UINT32 headRoom) +{ + PNET_BUFFER_LIST nbl = NULL; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_NBL_POOL ovsPool = &context->ovsPool; + POVS_BUFFER_CONTEXT ctx; + UINT32 realSize; + PMDL mdl; + NDIS_STATUS status; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; + if (size == 0) { + return NULL; + } + realSize = MEM_ALIGN_SIZE(size + headRoom); + + mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize); + if (mdl == NULL) { + return NULL; + } + + nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL, + mdl, realSize, 0); + if (nbl == NULL) { + OvsFreeMDLAndData(mdl); + return NULL; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + /* + * do we need to remove mdl from nbl XXX + */ + OvsFreeMDLAndData(mdl); + NdisFreeNetBufferList(nbl); + return NULL; + } + + info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + ASSERT(info); + info->IsPacketDataSafe = TRUE; + info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), + size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount); + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); +#endif + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | + OVS_BUFFER_FROM_ZERO_SIZE_POOL, + size, OVS_DEFAULT_PORT_NO); + + OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl); + return nbl; +} + + +/* + * -------------------------------------------------------------------------- + * OvsInitExternalNBLContext -- + * + * For NBL not allocated by OVS, it will allocate and initialize + * the NBL context. + * -------------------------------------------------------------------------- + */ +POVS_BUFFER_CONTEXT +OvsInitExternalNBLContext(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv) +{ + NDIS_HANDLE poolHandle; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_BUFFER_CONTEXT ctx; + PNET_BUFFER nb; + NDIS_STATUS status; + UINT16 flags; + + poolHandle = NdisGetPoolFromNetBufferList(nbl); + + if (poolHandle == context->ovsPool.ndisHandle) { + return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + } + status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT), + OVS_DEFAULT_NBL_CONTEXT_FILL, + OVS_OTHER_POOL_TAG); + if (status != NDIS_STATUS_SUCCESS) { + return NULL; + } +#ifdef DBG + OvsDumpNBLContext(nbl); + InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount); +#endif + flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER; + flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + /* + * we use first nb to decide whether we need advance or retreat during + * complete. + */ + OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO); + return ctx; +} + +/* + * -------------------------------------------------------------------------- + * OvsAllocateNBLContext + * + * Create NBL buffer context and forwarding context. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl) +{ + POVS_NBL_POOL ovsPool = &context->ovsPool; + NDIS_STATUS status; + + status = NdisAllocateNetBufferListContext(nbl, + sizeof (OVS_BUFFER_CONTEXT), + OVS_DEFAULT_NBL_CONTEXT_FILL, + OVS_OTHER_POOL_TAG); + if (status != NDIS_STATUS_SUCCESS) { + return NDIS_STATUS_FAILURE; + } + + nbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + + if (status != NDIS_STATUS_SUCCESS) { + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + return NDIS_STATUS_FAILURE; + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsFreeNBLContext + * + * Free the NBL buffer context and forwarding context. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsFreeNBLContext(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl) +{ + POVS_NBL_POOL ovsPool = &context->ovsPool; + + context->NdisSwitchHandlers. + FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsCopyNBLInfo + * + * Copy NBL info from src to dst + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl, + POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize, + BOOLEAN copyNblInfo) +{ + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl); + dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl); + if (srcInfo) { +#ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO + status = context->NdisSwitchHandlers. + CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0); + + if (status != NDIS_STATUS_SUCCESS) { + return status; + } +#else + dstInfo->SourcePortId = srcInfo->SourcePortId; + dstInfo->SourceNicIndex = srcInfo->SourceNicIndex; + if (copyNblInfo) { + if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) { + NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl); + } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) { + NdisCopySendNetBufferListInfo(dstNbl, srcNbl); + } + } +#endif + dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe; + if (!srcInfo->IsPacketDataSafe && copySize > + srcInfo->SafePacketDataSize) { + srcInfo->SafePacketDataSize = copySize; + } + } else { + /* + * Assume all data are safe + */ + dstInfo->IsPacketDataSafe = TRUE; + dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + } + return status; +} + +/* + * -------------------------------------------------------------------------- + * OvsPartialCopyNBL -- + * + * Partial copy NBL, if there is multiple NB in NBL, each one will be + * copied. We also reserve headroom for the new NBL. + * + * Please note, + * NBL should have OVS_BUFFER_CONTEXT setup before calling + * this function. + * The NBL should already have ref to itself so that during copy + * it will not be freed. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsPartialCopyNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + PNET_BUFFER_LIST newNbl; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + NDIS_STATUS status; + PNET_BUFFER srcNb, dstNb; + ULONG byteCopied; + POVS_NBL_POOL ovsPool = &context->ovsPool; + POVS_BUFFER_CONTEXT srcCtx, dstCtx; + UINT16 flags; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + if (copySize) { + NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL); + } + newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool, + NULL, 0); + if (copySize) { + status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0, + NULL, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + } + + if (newNbl == NULL) { + return NULL; + } + + /* + * Allocate private memory for copy + */ + if (copySize + headRoom) { + status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom, + 0, NULL, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto retreat_error; + } + + if (headRoom) { + NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL); + } + if (copySize) { + srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); + dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl); + + while (srcNb) { + status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize, + srcNb, 0, + &byteCopied); + if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) { + goto nbl_context_error; + } + srcNb = NET_BUFFER_NEXT_NB(srcNb); + dstNb = NET_BUFFER_NEXT_NB(dstNb); + } + } + } + + status = OvsAllocateNBLContext(context, newNbl); + if (status != NDIS_STATUS_SUCCESS) { + goto nbl_context_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo); + if (status != NDIS_STATUS_SUCCESS) { + goto copy_list_info_error; + } + +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + + newNbl->ParentNetBufferList = nbl; + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx != NULL); + + flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); + + flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; + + srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); + OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize, + OVS_DEFAULT_PORT_NO); + + InterlockedIncrement((LONG volatile *)&srcCtx->refCount); +#ifdef DBG + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl); + return newNbl; + +copy_list_info_error: + OvsFreeNBLContext(context, newNbl); +nbl_context_error: + if (copySize) { + NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL); + } +retreat_error: + NdisFreeCloneNetBufferList(newNbl, 0); + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsPartialCopyToMultipleNBLs -- + * + * This is similar to OvsPartialCopyNBL() except that each NB will + * have its own NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsPartialCopyToMultipleNBLs(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL; + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + if (NET_BUFFER_NEXT_NB(nb) == NULL) { + return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo); + } + + firstNb = nb; + prevNb = nb; + + while (nb) { + nextNb = NET_BUFFER_NEXT_NB(nb); + NET_BUFFER_NEXT_NB(nb) = NULL; + + NET_BUFFER_LIST_FIRST_NB(nbl) = nb; + + newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom, + copyNblInfo); + if (newNbl == NULL) { + goto cleanup; + } + if (prevNbl == NULL) { + firstNbl = newNbl; + } else { + NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; + NET_BUFFER_NEXT_NB(prevNb) = nb; + } + prevNbl = newNbl; + prevNb = nb; + nb = nextNb; + } + NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; + return firstNbl; + +cleanup: + NET_BUFFER_NEXT_NB(prevNb) = nb; + NET_BUFFER_NEXT_NB(nb) = nextNb; + NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; + + newNbl = firstNbl; + while (newNbl) { + firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl); + NET_BUFFER_LIST_NEXT_NBL(firstNbl) = NULL; + OvsCompleteNBL(context, newNbl, TRUE); + newNbl = firstNbl; + } + return NULL; +} + + +static PNET_BUFFER_LIST +OvsCopySinglePacketNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + UINT32 size; + ULONG copiedSize; + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER newNb; + NDIS_STATUS status; + POVS_BUFFER_CONTEXT srcCtx, dstCtx; + + size = NET_BUFFER_DATA_LENGTH(nb); + if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) { + newNbl = OvsAllocateFixSizeNBL(context, size, headRoom); + } else { + newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom); + } + if (newNbl == NULL) { + return NULL; + } + newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, + &copiedSize); + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (status == NDIS_STATUS_SUCCESS) { + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo); + } + + if (status != NDIS_STATUS_SUCCESS || copiedSize != size) { + OvsCompleteNBL(context, newNbl, TRUE); + return NULL; + } + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx && srcCtx); + ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC); + + dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | + OVS_BUFFER_SEND_BUFFER); +#ifdef DBG + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl); + return newNbl; +} + +/* + * -------------------------------------------------------------------------- + * OvsFullCopyNBL -- + * + * Copy the NBL to a new NBL including data. + * + * Notes: + * The NBL can have multiple NBs, but the final result is one NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsFullCopyNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + POVS_NBL_POOL ovsPool = &context->ovsPool; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL; + POVS_BUFFER_CONTEXT dstCtx, srcCtx; + PMDL mdl; + NDIS_STATUS status; + UINT32 size, totalSize; + ULONG copiedSize; + UINT16 flags; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + + if (NET_BUFFER_NEXT_NB(nb) == NULL) { + return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); + } + + newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool, + (UINT16)sizeof (OVS_BUFFER_CONTEXT), + (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); + if (newNbl == NULL) { + return NULL; + } + + while (nb) { + size = NET_BUFFER_DATA_LENGTH(nb); + totalSize = MEM_ALIGN_SIZE(size + headRoom); + mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize); + + if (mdl == NULL) { + goto nblcopy_error; + } + newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0); + if (newNb == NULL) { + OvsFreeMDLAndData(mdl); + goto nblcopy_error; + } + if (firstNb == NULL) { + firstNb = newNb; + } else { + NET_BUFFER_NEXT_NB(prevNb) = newNb; + } + prevNb = newNb; +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->nbCount); +#endif + status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, + &copiedSize); + if (status != NDIS_STATUS_SUCCESS || size != copiedSize) { + goto nblcopy_error; + } + + nb = NET_BUFFER_NEXT_NB(nb); + } + + NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb; + + newNbl->SourceHandle = ovsPool->ndisHandle; + status = context->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl); + + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl); + dstInfo->IsPacketDataSafe = TRUE; + + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + + flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); + + flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | + OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; + + OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb), + OVS_DEFAULT_PORT_NO); + +#ifdef DBG + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + OVS_LOG_LOUD("newNbl: %p", newNbl); + return newNbl; + +nblcopy_error: + while (firstNb) { +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); +#endif + prevNb = firstNb; + firstNb = NET_BUFFER_NEXT_NB(prevNb); + mdl = NET_BUFFER_FIRST_MDL(prevNb); + NET_BUFFER_FIRST_MDL(prevNb) = NULL; + NdisFreeNetBuffer(prevNb); + OvsFreeMDLAndData(mdl); + } + NdisFreeNetBufferList(newNbl); + OVS_LOG_ERROR("OvsFullCopyNBL failed"); + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * GetSegmentHeaderInfo + * + * Extract header size and sequence number for the segment. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, + const POVS_PACKET_HDR_INFO hdrInfo, + UINT32 *hdrSize, UINT32 *seqNumber) +{ + TCPHdr tcpStorage; + const TCPHdr *tcp; + + /* Parse the orginal Eth/IP/TCP header */ + tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); + if (tcp == NULL) { + return NDIS_STATUS_FAILURE; + } + *seqNumber = ntohl(tcp->seq); + *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp); + + return NDIS_STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * FixSegmentHeader + * + * Fix IP length, IP checksum, TCP sequence number and TCP checksum + * in the segment. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber) +{ + EthHdr *dstEth; + IPHdr *dstIP; + TCPHdr *dstTCP; + PMDL mdl; + PUINT8 bufferStart; + + mdl = NET_BUFFER_FIRST_MDL(nb); + + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority); + if (!bufferStart) { + return NDIS_STATUS_RESOURCES; + } + dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb)); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)); + dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth); + dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4); + ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) + >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); + + /* Fix IP length and checksum */ + ASSERT(dstIP->protocol == IPPROTO_TCP); + dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); + dstIP->check = 0; + dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0); + + /* Fix TCP checksum */ + dstTCP->seq = htonl(seqNumber); + dstTCP->check = + IPPseudoChecksum((UINT32 *)&dstIP->saddr, + (UINT32 *)&dstIP->daddr, + IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP)); + dstTCP->check = CalculateChecksumNB(nb, + (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4), + sizeof *dstEth + dstIP->ihl * 4); + return STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsTcpSegmentyNBL -- + * + * Segment TCP payload, and prepend each segment with ether/IP/TCP header. + * Leave headRoom for additional encap. + * + * Please note, + * NBL should have OVS_BUFFER_CONTEXT setup before calling + * this function. + * The NBL should already have ref to itself so that during copy + * it will not be freed. + * Currently this API assert there is only one NB in an NBL, it needs + * to be fixed if we receive multiple NBs in an NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsTcpSegmentNBL(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 mss, + UINT32 headRoom) +{ + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; +#ifdef DBG + POVS_NBL_POOL ovsPool = &context->ovsPool; +#endif + POVS_BUFFER_CONTEXT dstCtx, srcCtx; + UINT32 size, hdrSize, seqNumber; + PNET_BUFFER_LIST newNbl; + PNET_BUFFER nb, newNb; + NDIS_STATUS status; + UINT16 segmentSize; + ULONG copiedSize; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); + + /* Figure out the segment header size */ + status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_INFO("Cannot parse NBL header"); + return NULL; + } + + size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize; + + /* XXX add to ovsPool counters? */ + newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, + NULL, hdrSize, mss, hdrSize + headRoom , 0, 0); + if (newNbl == NULL) { + return NULL; + } + + /* Now deal with TCP payload */ + for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL; + newNb = NET_BUFFER_NEXT_NB(newNb)) { + segmentSize = (size > mss ? mss : size) & 0xffff; + if (headRoom) { + NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL); + } + + /* Now copy the eth/IP/TCP header and fix up */ + status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0, + &copiedSize); + if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) { + goto nblcopy_error; + } + + status = FixSegmentHeader(newNb, segmentSize, seqNumber); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + + /* Move on to the next segment */ + size -= segmentSize; + seqNumber += segmentSize; + } + + status = OvsAllocateNBLContext(context, newNbl); + if (status != NDIS_STATUS_SUCCESS) { + goto nblcopy_error; + } + + status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE); + if (status != NDIS_STATUS_SUCCESS) { + goto nbl_context_error; + } + + newNbl->ParentNetBufferList = nbl; + + /* Remember it's a fragment NBL so we can free it properly */ + dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); + ASSERT(dstCtx != NULL); + dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT | + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER; + dstCtx->refCount = 1; + dstCtx->magic = OVS_CTX_MAGIC; + dstCtx->dataOffsetDelta = hdrSize + headRoom; + + InterlockedIncrement((LONG volatile *)&srcCtx->refCount); +#ifdef DBG + InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount); + + OvsDumpNetBufferList(nbl); + OvsDumpForwardingDetails(nbl); + + OvsDumpNetBufferList(newNbl); + OvsDumpForwardingDetails(newNbl); +#endif + OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl); + return newNbl; + +nbl_context_error: + OvsFreeNBLContext(context, newNbl); +nblcopy_error: +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); +#endif + NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0); + return NULL; +} + + +/* + * -------------------------------------------------------------------------- + * OvsFullCopyToMultipleNBLs -- + * + * Copy NBL to multiple NBLs, each NB will have its own NBL + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsFullCopyToMultipleNBLs(PVOID ovsContext, + PNET_BUFFER_LIST nbl, + UINT32 headRoom, + BOOLEAN copyNblInfo) +{ + + POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; + PNET_BUFFER_LIST firstNbl, currNbl, newNbl; + PNET_BUFFER nb; + POVS_BUFFER_CONTEXT srcCtx; + + srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { + OVS_LOG_INFO("src nbl must have ctx initialized"); + ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); + + if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) { + return newNbl; + } else { + firstNbl = newNbl; + currNbl = newNbl; + } + + while (nb) { + newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, + copyNblInfo); + if (newNbl == NULL) { + goto copymultiple_error; + } + NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl; + currNbl = newNbl; + nb = NET_BUFFER_NEXT_NB(nb); + } + return firstNbl; + +copymultiple_error: + while (firstNbl) { + currNbl = firstNbl; + firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl); + NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL; + OvsCompleteNBL(context, currNbl, TRUE); + } + return NULL; + +} + + +/* + * -------------------------------------------------------------------------- + * OvsCompleteNBL -- + * + * This function tries to free the NBL allocated by OVS buffer + * management module. If it trigger the completion of the parent + * NBL, it will recursively call itself. If it trigger the completion + * of external NBL, it will be returned to the caller. The caller + * is responsible to call API to return to upper layer. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsCompleteNBL(POVS_SWITCH_CONTEXT context, + PNET_BUFFER_LIST nbl, + BOOLEAN updateRef) +{ + POVS_BUFFER_CONTEXT ctx; + UINT16 flags; + PNET_BUFFER_LIST parent; + NDIS_STATUS status; + NDIS_HANDLE poolHandle; + LONG value; + POVS_NBL_POOL ovsPool = &context->ovsPool; + PNET_BUFFER nb; + + + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + + OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d", + nbl, ctx, ctx->refCount, updateRef); + + if (updateRef) { + value = InterlockedDecrement((LONG volatile *)&ctx->refCount); + if (value != 0) { + return NULL; + } + } else { + /* + * This is a special case, the refCount must be zero + */ + ASSERT(ctx->refCount == 0); + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + + flags = ctx->flags; + if (!(flags & OVS_BUFFER_FRAGMENT) && + NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) { + UINT32 diff; + if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) { + diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb); + status = NdisRetreatNetBufferListDataStart(nbl, diff, 0, + NULL, NULL); + ASSERT(status == NDIS_STATUS_SUCCESS); + } else { + diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength; + NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL); + } + } + + if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) { + NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); + } + + if (flags & OVS_BUFFER_NEED_COMPLETE) { + /* + * return to caller for completion + */ +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount); +#endif + return nbl; + } + + if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) { + context->NdisSwitchHandlers. + FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); + } + + if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) { + PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + PMDL mdl = NET_BUFFER_FIRST_MDL(nb); + NET_BUFFER_FIRST_MDL(nb) = NULL; + ASSERT(mdl->Next == NULL); + OvsFreeMDLAndData(mdl); + nb = NET_BUFFER_NEXT_NB(nb); + } + } + + if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) { + PNET_BUFFER nb, nextNb; + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + nextNb = NET_BUFFER_NEXT_NB(nb); + NdisFreeNetBuffer(nb); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); +#endif + nb = nextNb; + } + NET_BUFFER_LIST_FIRST_NB(nbl) = NULL; + } + + parent = nbl->ParentNetBufferList; + + poolHandle = NdisGetPoolFromNetBufferList(nbl); + if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) { + ASSERT(poolHandle == ovsPool->fixSizePool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount); +#endif + NdisFreeNetBufferList(nbl); + } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) { + ASSERT(poolHandle == ovsPool->zeroSizePool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount); +#endif + NdisFreeNetBufferList(nbl); + } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) { + ASSERT(poolHandle == ovsPool->nblOnlyPool); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount); +#endif + NdisFreeCloneNetBufferList(nbl, 0); + } else if (flags & OVS_BUFFER_FRAGMENT) { + OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent); +#ifdef DBG + InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); +#endif + NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0); + } + + if (parent != NULL) { + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent); + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + value = InterlockedDecrement((LONG volatile *)&ctx->refCount); + if (value == 0) { + return OvsCompleteNBL(context, parent, FALSE); + } + } + return NULL; +} + +/* + * -------------------------------------------------------------------------- + * OvsSetCtxSourcePortNo -- + * Setter function which stores the source port of an NBL in the NBL + * Context Info. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, + UINT32 portNo) +{ + POVS_BUFFER_CONTEXT ctx; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (ctx == NULL) { + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + return STATUS_INVALID_PARAMETER; + } + + ctx->srcPortNo = portNo; + return NDIS_STATUS_SUCCESS; +} + +/* + * -------------------------------------------------------------------------- + * OvsGetCtxSourcePortNo -- + * Get source port of an NBL from its Context Info. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, + UINT32 *portNo) +{ + POVS_BUFFER_CONTEXT ctx; + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); + if (ctx == NULL || portNo == NULL) { + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + return STATUS_INVALID_PARAMETER; + } + *portNo = ctx->srcPortNo; + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/BufferMgmt.h b/datapath-windows/ovsext/BufferMgmt.h new file mode 100644 index 000000000..915d7f555 --- /dev/null +++ b/datapath-windows/ovsext/BufferMgmt.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BUFFER_MGMT_H_ +#define __BUFFER_MGMT_H_ 1 + +#define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT +#define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN) +#define OVS_CTX_MAGIC 0xabcd + +#define OVS_DEFAULT_NBL_CONTEXT_SIZE MEM_ALIGN_SIZE(64) +#define OVS_DEFAULT_NBL_CONTEXT_FILL \ + (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT)) + +#define OVS_DEFAULT_DATA_SIZE 256 +#define OVS_DEFAULT_HEADROOM_SIZE 128 +#define OVS_FIX_NBL_DATA_SIZE (OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE) + +/* Default we copy 18 bytes, to make sure ethernet header and vlan is in + * continuous buffer */ +#define OVS_DEFAULT_COPY_SIZE 18 + +enum { + OVS_BUFFER_NEED_COMPLETE = BIT16(0), + OVS_BUFFER_PRIVATE_MDL = BIT16(1), + OVS_BUFFER_PRIVATE_DATA = BIT16(2), + OVS_BUFFER_PRIVATE_NET_BUFFER = BIT16(3), + OVS_BUFFER_PRIVATE_FORWARD_CONTEXT = BIT16(4), + OVS_BUFFER_PRIVATE_CONTEXT = BIT16(5), + OVS_BUFFER_FROM_FIX_SIZE_POOL = BIT16(6), + OVS_BUFFER_FROM_ZERO_SIZE_POOL = BIT16(7), + OVS_BUFFER_FROM_NBL_ONLY_POOL = BIT16(8), + OVS_BUFFER_RECV_BUFFER = BIT16(9), + OVS_BUFFER_SEND_BUFFER = BIT16(10), + OVS_BUFFER_FRAGMENT = BIT16(11), +}; + +typedef union _OVS_BUFFER_CONTEXT { + struct { + UINT16 magic; + UINT16 flags; + UINT32 srcPortNo; + UINT32 refCount; + union { + UINT32 origDataLength; + UINT32 dataOffsetDelta; + }; + }; + + UINT64 value[MEM_ALIGN_SIZE(16) >> 3]; +} OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT; + + +typedef struct _OVS_NBL_POOL { + NDIS_SWITCH_CONTEXT ndisContext; + NDIS_HANDLE ndisHandle; + NDIS_HANDLE fixSizePool; // data size of 256 + NDIS_HANDLE zeroSizePool; // no data, NBL + NB + Context + NDIS_HANDLE nblOnlyPool; // NBL + context for clone + NDIS_HANDLE nbPool; // NB for clone +#ifdef DBG + LONG fixNBLCount; + LONG zeroNBLCount; + LONG nblOnlyCount; + LONG nbCount; + LONG sysNBLCount; + LONG fragNBLCount; +#endif +} OVS_NBL_POOL, *POVS_NBL_POOL; + + +NDIS_STATUS OvsInitBufferPool(PVOID context); +VOID OvsCleanupBufferPool(PVOID context); + +PNET_BUFFER_LIST OvsAllocateFixSizeNBL(PVOID context, + UINT32 size, + UINT32 headRoom); +PNET_BUFFER_LIST OvsAllocateVariableSizeNBL(PVOID context, + UINT32 size, + UINT32 headRoom); + +POVS_BUFFER_CONTEXT OvsInitExternalNBLContext(PVOID context, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv); + +PNET_BUFFER_LIST OvsPartialCopyNBL(PVOID context, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsPartialCopyToMultipleNBLs(PVOID context, + PNET_BUFFER_LIST nbl, + UINT32 copySize, + UINT32 headRoom, + BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsFullCopyNBL(PVOID context, PNET_BUFFER_LIST nbl, + UINT32 headRoom, BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context, + PNET_BUFFER_LIST nbl, + POVS_PACKET_HDR_INFO hdrInfo, + UINT32 MSS, + UINT32 headRoom); +PNET_BUFFER_LIST OvsFullCopyToMultipleNBLs(PVOID context, + PNET_BUFFER_LIST nbl, UINT32 headRoom, BOOLEAN copyNblInfo); +PNET_BUFFER_LIST OvsCompleteNBL(PVOID context, PNET_BUFFER_LIST nbl, + BOOLEAN updateRef); +NDIS_STATUS OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 portNo); + +NDIS_STATUS OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 *portNo); + +#endif /* __BUFFER_MGMT_H_ */ diff --git a/datapath-windows/ovsext/Checksum.c b/datapath-windows/ovsext/Checksum.c new file mode 100644 index 000000000..510a094e4 --- /dev/null +++ b/datapath-windows/ovsext/Checksum.c @@ -0,0 +1,578 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "Checksum.h" +#include "Flow.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_CHECKSUM +#include "Debug.h" +#include "PacketParser.h" + +#ifndef htons +#define htons(_x) (((UINT16)(_x) >> 8) + (((UINT16)(_x) << 8) & 0xff00)) +#endif + +#ifndef swap64 +#define swap64(_x) ((((UINT64)(_x) >> 8) & 0x00ff00ff00ff00ff) + \ + (((UINT64)(_x) << 8) & 0xff00ff00ff00ff00)) +#endif + +#define fold64(_x) \ + _x = ((_x) >> 32) + ((_x) & 0xffffffff); \ + _x = (UINT32)(((_x) >> 32) + (_x)); \ + _x = ((_x) >> 16) + ((_x) & 0xffff); \ + _x = (UINT16)(((_x) >> 16) + (_x)) + +#define fold32(_x) \ + _x = ((_x) >> 16) + ((_x) & 0xffff); \ + _x = (UINT16)(((_x) >> 16) + (_x)) + + +/* + *---------------------------------------------------------------------------- + * CalculateOnesComplement -- + * + * Given the start address and buffer length, calculate the 1's complement + * This routine can be used when multiple buffers are used for a packets. + * + * PLEASE NOTE, even though the last parameter is UINT64, but the assumption + * is it will not overflowed after adding the extra data. + * ------------------------------------------------ + * + * Result: + * As name indicate, the final data is not 1's complemnent + *---------------------------------------------------------------------------- + */ +UINT64 +CalculateOnesComplement(UINT8 *start, + UINT16 totalLength, + UINT64 initial, + BOOLEAN isEvenStart) +{ + UINT64 sum = 0, val; + UINT64 *src = (UINT64 *)start; + union { + UINT32 val; + UINT8 b8[4]; + } tmp; + + while (totalLength > 7) { + val = *src; + sum += (val >> 32) + (val & 0xffffffff); + src++; + totalLength -= 8; + } + if (totalLength > 3) { + sum += *(UINT32 *)src; + src = (UINT64 *)((UINT8 *)src + 4); + totalLength -= 4; + } + start = (UINT8 *)src; + tmp.val = 0; + switch (totalLength) { + case 3: + tmp.b8[2] = start[2]; + case 2: + tmp.b8[1] = start[1]; + case 1: + tmp.b8[0] = start[0]; + sum += tmp.val; + } + sum = (isEvenStart ? sum : swap64(sum)) + initial; + return sum; +} + +/* + *---------------------------------------------------------------------------- + * CalculateChecksum -- + * + * Given the start point, and length, calculate the checksum + * as 1's complement of 1's comlement. + * + * This assume the checksum field is initailized properly. + * + * Input Parameter: + * ptr: point to the data to be checksumed + * totalLength: total length of the data + * initial: inital value to remit the checksum. Please note this + * value should be network byte order value. + * + * The last parameter may be useful where you don't want to set + * checksum field to zero, in that case you can pass ~checksum, + * this is equivalent of set checksum field to zero. + * + * Result: + * The result can be assigned to checksum field directly. + *---------------------------------------------------------------------------- + */ +UINT16 +CalculateChecksum(UINT8 *ptr, + UINT16 totalLength, + UINT16 initial) +{ + UINT64 sum = CalculateOnesComplement(ptr, totalLength, initial, TRUE); + fold64(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * CopyAndCalculateOnesComplement -- + * + * Given the start address and buffer length, calculate the 1's complement + * at same time, copt the data from src to dst. + * + * This routine can be used when multiple buffers are used for a packets. + * + * PLEASE NOTE, even though the last parameter is UINT64, but the assumption + * is it will not overflowed after adding the extra data. + * ------------------------------------------------ + * + * Result: + * As name indicate, the final data is not 1's complemnent + *---------------------------------------------------------------------------- + */ +UINT64 +CopyAndCalculateOnesComplement(UINT8 *dst, + UINT8 *src, + UINT16 length, + UINT64 initial, + BOOLEAN isEvenStart) +{ + UINT64 sum =0, val; + UINT64 *src64, *dst64; + union { + UINT32 val; + UINT8 b8[4]; + } tmp; + + src64 = (UINT64 *)src; + dst64 = (UINT64 *)dst; + + while (length > 7) { + val = *src64; + *dst64 = val; + sum += (val >> 32) + (val & 0xffffffff); + src64++; + dst64++; + length -= 8; + } + + if (length > 3) { + val = *(UINT32 *)src64; + *(UINT32 *)dst64 = (UINT32)val; + sum += (UINT32)val; + dst64 = (UINT64 *)((UINT8 *)dst64 + 4); + src64 = (UINT64 *)((UINT8 *)src64 + 4); + length -= 4; + } + src = (UINT8 *)src64; + dst = (UINT8 *)dst64; + tmp.val = 0; + switch (length) { + case 3: + dst[2] = src[2]; + tmp.b8[2] = src[2]; + case 2: + dst[1] = src[1]; + tmp.b8[1] = src[1]; + case 1: + dst[0] = src[0]; + tmp.b8[0] = src[0]; + sum += tmp.val; + } + sum = (isEvenStart ? sum : swap64(sum)) + initial; + return sum; +} + +/* + *---------------------------------------------------------------------------- + * CopyAndCalculateChecksum -- + * + * This is similar to CalculateChecksum, except it will also copy data to + * destination address. + *---------------------------------------------------------------------------- + */ +UINT16 +CopyAndCalculateChecksum(UINT8 *dst, + UINT8 *src, + UINT16 length, + UINT16 initial) +{ + + UINT64 sum = CopyAndCalculateOnesComplement(dst, src, length, initial, + TRUE); + fold64(sum); + return (UINT16)~sum; +} + + +/* + *---------------------------------------------------------------------------- + * IPChecksum -- + * + * Give IP header, calculate the IP checksum. + * We assume IP checksum field is initialized properly + * + * Input Pramater: + * ipHdr: IP header start point + * length: IP header length (potentially include IP options) + * initial: same as CalculateChecksum + * + * Result: + * The result is already 1's complement, so can be assigned + * to checksum field directly + *---------------------------------------------------------------------------- + */ +UINT16 +IPChecksum(UINT8 *ipHdr, + UINT16 length, + UINT16 initial) +{ + UINT32 sum = initial; + UINT16 *ptr = (UINT16 *)ipHdr; + ASSERT((length & 0x3) == 0); + while (length > 1) { + sum += ptr[0]; + ptr++; + length -= 2; + } + fold32(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * IPPseudoChecksum -- + * + * Give src and dst IP address, protocol value and total + * upper layer length(not include IP header, but include + * upller layer protocol header, for example it include + * TCP header for TCP checksum), calculate the pseudo + * checksum, please note this checksum is just 1's complement + * addition. + * + * Input Parameter: + * src: please note it is in network byte order + * dst: same as src + * protocol: protocol value in IP header + * totalLength: total length of upper layer data including + * header. + * + * Result: + * + * This value should be put in TCP checksum field before + * calculating TCP checksum using CalculateChecksum with + * initial value of 0. + *---------------------------------------------------------------------------- + */ +UINT16 +IPPseudoChecksum(UINT32 *src, + UINT32 *dst, + UINT8 protocol, + UINT16 totalLength) +{ + UINT32 sum = (UINT32)htons(totalLength) + htons(protocol); + sum += (*src >> 16) + (*src & 0xffff); + sum += (*dst >> 16) + (*dst & 0xffff); + fold32(sum); + return (UINT16)sum; +} + +/* + *---------------------------------------------------------------------------- + * IPv6PseudoChecksum -- + * + * Given IPv6 src and dst address, upper layer protocol and total + * upper layer protocol data length including upper layer header + * part, calculate the pseudo checksum for upper layer protocol + * checksum. + * + * please note this checksum is just 1's complement addition. + * + * Input Parameter: + * src: src IPv6 address in network byte order + * dst: dst IPv6 address. + * protocol: upper layer protocol + * totalLength: total length of upper layer data. Please note this is + * in host byte order. + * + * Result: + * + * Place in upper layer checksum field before calculate upper layer + * checksum. + *---------------------------------------------------------------------------- + */ +UINT16 +IPv6PseudoChecksum(UINT32 *src, + UINT32 *dst, + UINT8 protocol, + UINT16 totalLength) +{ + UINT64 sum = (UINT32)htons(totalLength) + htons(protocol); + sum += (UINT64)src[0] + src[1] + src[2] + src[3]; + sum += (UINT64)dst[0] + dst[1] + dst[2] + dst[3]; + fold64(sum); + return (UINT16)sum; +} + +/* + *---------------------------------------------------------------------------- + * ChecksumUpdate32 -- + * + * Given old checksum value (as it is in checksum field), + * prev value of the relevant field in network byte order + * new value of the relevant field in the network byte order + * calculate the new checksum. + * Please check relevant RFC for reference. + * + * Input Pramater: + * oldSum: old checksum value in checksum field + * prev: previous value of relevant 32 bit feld in network + * byte order. + * new: new value of the relevant 32 bit field in network + * byte order. + * + * Result: + * new checksum value to be placed in the checksum field. + *---------------------------------------------------------------------------- + */ +UINT16 +ChecksumUpdate32(UINT16 oldSum, + UINT32 prev, + UINT32 newValue) +{ + UINT32 sum = ~prev; + sum = (sum >> 16) + (sum & 0xffff); + sum += (newValue >> 16) + (newValue & 0xffff); + sum += (UINT16)~oldSum; + fold32(sum); + return (UINT16)~sum; +} + + +/* + *---------------------------------------------------------------------------- + * ChecksumUpdate16 -- + * + * Given old checksum value (as it is in checksum field), + * prev value of the relevant field in network byte order + * new value of the relevant field in the network byte order + * calculate the new checksum. + * Please check relevant RFC for reference. + * + * Input Pramater: + * oldSum: old checksum value in checksum field + * prev: previous value of relevant 32 bit feld in network + * byte order. + * new: new value of the relevant 32 bit field in network + * byte order. + * + * Result: + * new checksum value to be placed in the checksum field. + *---------------------------------------------------------------------------- + */ +UINT16 +ChecksumUpdate16(UINT16 oldSum, + UINT16 prev, + UINT16 newValue) +{ + UINT32 sum = (UINT16)~oldSum; + sum += (UINT32)((UINT16)~prev) + newValue; + fold32(sum); + return (UINT16)~sum; +} + +/* + *---------------------------------------------------------------------------- + * CalculateChecksumNB -- + * + * Calculates checksum over a length of bytes contained in an NB. + * + * nb : NB which contains the packet bytes. + * csumDataLen : Length of bytes to be checksummed. + * offset : offset to the first bytes of the data stream to be + * checksumed. + * + * Result: + * return 0, if there is a failure. + *---------------------------------------------------------------------------- + */ +UINT16 +CalculateChecksumNB(const PNET_BUFFER nb, + UINT16 csumDataLen, + UINT32 offset) +{ + ULONG mdlLen; + UINT16 csLen; + PUCHAR src; + UINT64 csum = 0; + PMDL currentMdl; + ULONG firstMdlLen; + /* Running count of bytes in remainder of the MDLs including current. */ + ULONG packetLen; + + if ((nb == NULL) || (csumDataLen == 0) + || (offset >= NET_BUFFER_DATA_LENGTH(nb)) + || (offset + csumDataLen > NET_BUFFER_DATA_LENGTH(nb))) { + OVS_LOG_ERROR("Invalid parameters - csum length %u, offset %u," + "pkt%s len %u", csumDataLen, offset, nb? "":"(null)", + nb? NET_BUFFER_DATA_LENGTH(nb) : 0); + return 0; + } + + currentMdl = NET_BUFFER_CURRENT_MDL(nb); + packetLen = NET_BUFFER_DATA_LENGTH(nb); + firstMdlLen = + MmGetMdlByteCount(currentMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb); + + firstMdlLen = MIN(firstMdlLen, packetLen); + if (offset < firstMdlLen) { + src = (PUCHAR) MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + src += (NET_BUFFER_CURRENT_MDL_OFFSET(nb) + offset); + mdlLen = firstMdlLen - offset; + packetLen -= firstMdlLen; + ASSERT((INT)packetLen >= 0); + } else { + offset -= firstMdlLen; + packetLen -= firstMdlLen; + ASSERT((INT)packetLen >= 0); + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + + while (offset >= mdlLen) { + offset -= mdlLen; + packetLen -= mdlLen; + ASSERT((INT)packetLen >= 0); + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + } + + src = (PUCHAR)MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + + src += offset; + mdlLen -= offset; + } + + while (csumDataLen && (currentMdl != NULL)) { + ASSERT(mdlLen < 65536); + csLen = MIN((UINT16) mdlLen, csumDataLen); + //XXX Not handling odd bytes yet. + ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen); + + csum = CalculateOnesComplement(src, csLen, csum, TRUE); + fold64(csum); + + csumDataLen -= csLen; + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + if (csumDataLen && currentMdl) { + src = MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); + if (!src) { + return 0; + } + + mdlLen = MmGetMdlByteCount(currentMdl); + mdlLen = MIN(mdlLen, packetLen); + /* packetLen does not include the current MDL from here on. */ + packetLen -= mdlLen; + ASSERT((INT)packetLen >= 0); + } + } + + ASSERT(csumDataLen == 0); + ASSERT((csum & ~0xffff) == 0); + return (UINT16) ~csum; +} + +/* + * -------------------------------------------------------------------------- + * OvsValidateIPChecksum + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, + POVS_PACKET_HDR_INFO hdrInfo) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + uint16_t checksum, hdrChecksum; + struct IPHdr ip_storage; + const IPHdr *ipHdr; + + if (!hdrInfo->isIPv4) { + return NDIS_STATUS_SUCCESS; + } + + /* First check if NIC has indicated checksum failure. */ + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpIpChecksumNetBufferListInfo); + if (csumInfo.Receive.IpChecksumFailed) { + return NDIS_STATUS_FAILURE; + } + + /* Next, check if the NIC did not validate the RX checksum. */ + if (!csumInfo.Receive.IpChecksumSucceeded) { + ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); + if (ipHdr) { + ip_storage = *ipHdr; + hdrChecksum = ipHdr->check; + ip_storage.check = 0; + checksum = IPChecksum((uint8 *)&ip_storage, ipHdr->ihl * 4, 0); + if (checksum != hdrChecksum) { + return NDIS_STATUS_FAILURE; + } + } + } + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsValidateUDPChecksum + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + if (udpCsumZero) { + /* Zero is valid checksum. */ + csumInfo.Receive.UdpChecksumFailed = 0; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; + } + + /* First check if NIC has indicated UDP checksum failure. */ + if (csumInfo.Receive.UdpChecksumFailed) { + return NDIS_STATUS_INVALID_PACKET; + } + + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/Checksum.h b/datapath-windows/ovsext/Checksum.h new file mode 100644 index 000000000..2378a324a --- /dev/null +++ b/datapath-windows/ovsext/Checksum.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKSUM_H_ +#define __CHECKSUM_H_ 1 + +typedef union _OVS_PACKET_HDR_INFO *POVS_PACKET_HDR_INFO; + +UINT16 CalculateChecksum(UINT8 *ptr, UINT16 length, UINT16 initial); +UINT16 CopyAndCalculateChecksum(UINT8 *dst, UINT8 *src, UINT16 length, + UINT16 initial); +UINT16 IPChecksum(UINT8 *ipHdr, UINT16 length, UINT16 initial); +UINT16 IPPseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, + UINT16 totalLength); +UINT16 IPv6PseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, + UINT16 totalLength); +UINT16 ChecksumUpdate32(UINT16 oldSum, UINT32 prev, UINT32 newValue); +UINT16 ChecksumUpdate16(UINT16 oldSum, UINT16 prev, UINT16 newValue); +UINT16 CalculateChecksumNB(const PNET_BUFFER nb, UINT16 csumDataLen, + UINT32 offset); +NDIS_STATUS OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, + POVS_PACKET_HDR_INFO hdrInfo); +NDIS_STATUS OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, + BOOLEAN udpCsumZero); + +#endif /* __CHECKSUM_H_ */ diff --git a/datapath-windows/ovsext/Datapath.c b/datapath-windows/ovsext/Datapath.c index 40654f50e..5522580f6 100644 --- a/datapath-windows/ovsext/Datapath.c +++ b/datapath-windows/ovsext/Datapath.c @@ -24,21 +24,21 @@ #include "precomp.h" #include "Datapath.h" -#include "OvsJhash.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsPacketIO.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" -#include "OvsUser.h" +#include "Jhash.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "PacketIO.h" +#include "NetProto.h" +#include "Flow.h" +#include "User.h" #ifdef OVS_DBG_MOD #undef OVS_DBG_MOD #endif #define OVS_DBG_MOD OVS_DBG_DATAPATH -#include "OvsDebug.h" +#include "Debug.h" #define NETLINK_FAMILY_NAME_LEN 48 diff --git a/datapath-windows/ovsext/Datapath.h b/datapath-windows/ovsext/Datapath.h index 6d8a6db4f..bfbbd71f4 100644 --- a/datapath-windows/ovsext/Datapath.h +++ b/datapath-windows/ovsext/Datapath.h @@ -21,12 +21,12 @@ * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c */ #if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 0 -#include "OvsIoctl.h" +#include "Ioctl.h" #else -#ifndef __OVS_DATAPATH_H_ -#define __OVS_DATAPATH_H_ 1 +#ifndef __DATAPATH_H_ +#define __DATAPATH_H_ 1 typedef struct _OVS_DEVICE_EXTENSION { INT numberOpenInstance; @@ -77,6 +77,6 @@ typedef struct _OVS_MESSAGE { /* Variable length nl_attrs follow. */ } OVS_MESSAGE, *POVS_MESSAGE; -#endif /* __OVS_DATAPATH_H_ */ +#endif /* __DATAPATH_H_ */ #endif /* OVS_USE_NL_INTERFACE */ diff --git a/datapath-windows/ovsext/Debug.c b/datapath-windows/ovsext/Debug.c new file mode 100644 index 000000000..a96d38d40 --- /dev/null +++ b/datapath-windows/ovsext/Debug.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Debug.h" +#ifdef DBG +#define OVS_DBG_DEFAULT OVS_DBG_INFO +#else +#define OVS_DBG_DEFAULT OVS_DBG_ERROR +#endif + +UINT32 ovsLogFlags = 0xffffffff; +UINT32 ovsLogLevel = OVS_DBG_DEFAULT; + +#define OVS_LOG_BUFFER_SIZE 384 + +/* + * -------------------------------------------------------------------------- + * OvsLog -- + * Utility function to log to the Windows debug console. + * -------------------------------------------------------------------------- + */ +VOID +OvsLog(UINT32 level, + UINT32 flag, + CHAR *funcName, + UINT32 line, + CHAR *format, + ...) +{ + va_list args; + CHAR buf[OVS_LOG_BUFFER_SIZE]; + + if (level > ovsLogLevel || (ovsLogFlags & flag) == 0) { + return; + } + + buf[0] = 0; + va_start(args, format); + RtlStringCbVPrintfA(buf, sizeof (buf), format, args); + va_end(args); + + DbgPrintEx(DPFLTR_IHVNETWORK_ID, level, "%s:%lu %s\n", funcName, line, buf); +} diff --git a/datapath-windows/ovsext/Debug.h b/datapath-windows/ovsext/Debug.h new file mode 100644 index 000000000..cc9787a80 --- /dev/null +++ b/datapath-windows/ovsext/Debug.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DEBUG_H_ +#define __DEBUG_H_ 1 + +#define OVS_DBG_INIT BIT32(0) +#define OVS_DBG_SWITCH BIT32(1) +#define OVS_DBG_VPORT BIT32(2) +#define OVS_DBG_FLOW BIT32(3) +#define OVS_DBG_QOS BIT32(4) +#define OVS_DBG_USER BIT32(5) +#define OVS_DBG_EXECUTE BIT32(6) +#define OVS_DBG_EVENT BIT32(7) +#define OVS_DBG_DISPATCH BIT32(8) +#define OVS_DBG_OID BIT32(9) +#define OVS_DBG_STATUS BIT32(10) +#define OVS_DBG_CHECKSUM BIT32(11) +#define OVS_DBG_VXLAN BIT32(12) +#define OVS_DBG_GRE BIT32(13) +#define OVS_DBG_GRE64 BIT32(14) +#define OVS_DBG_ACTION BIT32(15) +#define OVS_DBG_DATAPATH BIT32(16) +#define OVS_DBG_PROPERTY BIT32(17) +#define OVS_DBG_IPHELPER BIT32(18) +#define OVS_DBG_BUFMGMT BIT32(19) +#define OVS_DBG_OTHERS BIT32(21) +#define OVS_DBG_NETLINK BIT32(22) + +#define OVS_DBG_RESERVED BIT32(31) +//Please add above OVS_DBG_RESERVED. + +#define OVS_DBG_ERROR DPFLTR_ERROR_LEVEL +#define OVS_DBG_WARN DPFLTR_WARNING_LEVEL +#define OVS_DBG_TRACE DPFLTR_TRACE_LEVEL +#define OVS_DBG_INFO DPFLTR_INFO_LEVEL +#define OVS_DBG_LOUD (DPFLTR_INFO_LEVEL + 1) + + + +VOID OvsLog(UINT32 level, UINT32 flag, CHAR *funcName, + UINT32 line, CHAR *format, ...); + + +#define OVS_LOG_LOUD(_format, ...) \ + OvsLog(OVS_DBG_LOUD, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_INFO(_format, ...) \ + OvsLog(OVS_DBG_INFO, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_TRACE(_format, ...) \ + OvsLog(OVS_DBG_TRACE, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_ERROR(_format, ...) \ + OvsLog(OVS_DBG_ERROR, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#define OVS_LOG_WARN(_format, ...) \ + OvsLog(OVS_DBG_WARN, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) + +#if DBG +#define OVS_VERIFY_IRQL(_x) \ + if (KeGetCurrentIrql() != (KIRQL)_x) { \ + OVS_LOG_WARN("expected IRQL %u, actual IRQL: %u", \ + _x, KeGetCurrentIrql()); \ + } + +#define OVS_VERIFY_IRQL_LE(_x) \ + if (KeGetCurrentIrql() > (KIRQL)_x) { \ + OVS_LOG_WARN("expected IRQL <= %u, actual IRQL: %u", \ + _x, KeGetCurrentIrql()); \ + } + +#else +#define OVS_VERIFY_IRQL(_x) +#define OVS_VERIFY_IRQL_LE(_x) +#endif + +#endif /* __DEBUG_H_ */ diff --git a/datapath-windows/ovsext/Driver.c b/datapath-windows/ovsext/Driver.c new file mode 100644 index 000000000..79d2edf4e --- /dev/null +++ b/datapath-windows/ovsext/Driver.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "Switch.h" +#include "Datapath.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DRIVER +#include "Debug.h" + +/* Global handles. XXX: Some of them need not be global. */ +/* + * Maps to DriverObject and FilterDriverContext parameters in the NDIS filter + * driver functions. + * DriverObject is specified by NDIS. + * FilterDriverContext is specified by the filter driver. + */ +NDIS_HANDLE gOvsExtDriverObject; + +/* + * Maps to NdisFilterHandle parameter in the NDIS filter driver functions. + * NdisFilterHandle is returned by NDISFRegisterFilterDriver. + */ +NDIS_HANDLE gOvsExtDriverHandle; + +/* + * Maps to FilterModuleContext parameter in the NDIS filter driver functions. + * FilterModuleContext is a allocated by the driver in the FilterAttach + * function. + */ +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +static PWCHAR ovsExtFriendlyName = L"Open vSwitch Extension"; +static PWCHAR ovsExtServiceName = L"OVSExt"; +NDIS_STRING ovsExtGuidUC; +NDIS_STRING ovsExtFriendlyNameUC; + +static PWCHAR ovsExtGuidStr = L"{583CC151-73EC-4A6A-8B47-578297AD7623}"; +static const GUID ovsExtGuid = { + 0x583cc151, + 0x73ec, + 0x4a6a, + {0x8b, 0x47, 0x57, 0x82, 0x97, 0xad, 0x76, 0x23} +}; + +/* Declarations of callback functions for the filter driver. */ +DRIVER_UNLOAD OvsExtUnload; +FILTER_NET_PNP_EVENT OvsExtNetPnPEvent; +FILTER_STATUS OvsExtStatus; + +FILTER_ATTACH OvsExtAttach; +FILTER_DETACH OvsExtDetach; +FILTER_RESTART OvsExtRestart; +FILTER_PAUSE OvsExtPause; + +FILTER_SEND_NET_BUFFER_LISTS OvsExtSendNBL; +FILTER_SEND_NET_BUFFER_LISTS_COMPLETE OvsExtSendNBLComplete; +FILTER_CANCEL_SEND_NET_BUFFER_LISTS OvsExtCancelSendNBL; +FILTER_RECEIVE_NET_BUFFER_LISTS OvsExtReceiveNBL; +FILTER_RETURN_NET_BUFFER_LISTS OvsExtReturnNBL; + +FILTER_OID_REQUEST OvsExtOidRequest; +FILTER_OID_REQUEST_COMPLETE OvsExtOidRequestComplete; +FILTER_CANCEL_OID_REQUEST OvsExtCancelOidRequest; + + +/* + * -------------------------------------------------------------------------- + * Init/Load function for the OVSEXT filter Driver. + * -------------------------------------------------------------------------- + */ +NTSTATUS +DriverEntry(PDRIVER_OBJECT driverObject, + PUNICODE_STRING registryPath) +{ + NDIS_STATUS status; + NDIS_FILTER_DRIVER_CHARACTERISTICS driverChars; + + UNREFERENCED_PARAMETER(registryPath); + + gOvsExtDriverObject = driverObject; + + RtlZeroMemory(&driverChars, sizeof driverChars); + driverChars.Header.Type = NDIS_OBJECT_TYPE_FILTER_DRIVER_CHARACTERISTICS; + driverChars.Header.Size = sizeof driverChars; + driverChars.Header.Revision = NDIS_FILTER_CHARACTERISTICS_REVISION_2; + driverChars.MajorNdisVersion = NDIS_FILTER_MAJOR_VERSION; + driverChars.MinorNdisVersion = NDIS_FILTER_MINOR_VERSION; + driverChars.MajorDriverVersion = 1; + driverChars.MinorDriverVersion = 0; + driverChars.Flags = 0; + + RtlInitUnicodeString(&driverChars.ServiceName, ovsExtServiceName); + RtlInitUnicodeString(&ovsExtFriendlyNameUC, ovsExtFriendlyName); + RtlInitUnicodeString(&ovsExtGuidUC, ovsExtGuidStr); + + driverChars.FriendlyName = ovsExtFriendlyNameUC; + driverChars.UniqueName = ovsExtGuidUC; + + driverChars.AttachHandler = OvsExtAttach; + driverChars.DetachHandler = OvsExtDetach; + driverChars.RestartHandler = OvsExtRestart; + driverChars.PauseHandler = OvsExtPause; + + driverChars.SendNetBufferListsHandler = OvsExtSendNBL; + driverChars.SendNetBufferListsCompleteHandler = OvsExtSendNBLComplete; + driverChars.CancelSendNetBufferListsHandler = OvsExtCancelSendNBL; + driverChars.ReceiveNetBufferListsHandler = NULL; + driverChars.ReturnNetBufferListsHandler = NULL; + + driverChars.OidRequestHandler = OvsExtOidRequest; + driverChars.OidRequestCompleteHandler = OvsExtOidRequestComplete; + driverChars.CancelOidRequestHandler = OvsExtCancelOidRequest; + + driverChars.DevicePnPEventNotifyHandler = NULL; + driverChars.NetPnPEventHandler = OvsExtNetPnPEvent; + driverChars.StatusHandler = NULL; + + driverObject->DriverUnload = OvsExtUnload; + + status = NdisFRegisterFilterDriver(driverObject, + (NDIS_HANDLE) gOvsExtDriverObject, + &driverChars, &gOvsExtDriverHandle); + if (status != NDIS_STATUS_SUCCESS) { + return status; + } + + /* Create the communication channel for usersapce. */ + status = OvsCreateDeviceObject(gOvsExtDriverHandle); + if (status != NDIS_STATUS_SUCCESS) { + NdisFDeregisterFilterDriver(gOvsExtDriverHandle); + gOvsExtDriverHandle = NULL; + } + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Un-init/Unload function for the OVS intermediate Driver. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtUnload(struct _DRIVER_OBJECT *driverObject) +{ + UNREFERENCED_PARAMETER(driverObject); + + OvsDeleteDeviceObject(); + NdisFDeregisterFilterDriver(gOvsExtDriverHandle); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterStatus function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtStatus(NDIS_HANDLE filterModuleContext, + PNDIS_STATUS_INDICATION statusIndication) +{ + UNREFERENCED_PARAMETER(statusIndication); + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + + NdisFIndicateStatus(switchObject->NdisFilterHandle, statusIndication); + return; +} diff --git a/datapath-windows/ovsext/Ethernet.h b/datapath-windows/ovsext/Ethernet.h new file mode 100644 index 000000000..22aa27c9d --- /dev/null +++ b/datapath-windows/ovsext/Ethernet.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __ETHERNET_H_ +#define __ETHERNET_H_ 1 + +#define ETH_LADRF_LEN 2 +#define ETH_ADDR_LENGTH 6 + +typedef UINT8 Eth_Address[ETH_ADDR_LENGTH]; + +#define ETH_ADDR_FMT_STR "%02x:%02x:%02x:%02x:%02x:%02x" +#define ETH_ADDR_FMT_ARGS(a) ((UINT8 *)a)[0], ((UINT8 *)a)[1], ((UINT8 *)a)[2], \ + ((UINT8 *)a)[3], ((UINT8 *)a)[4], ((UINT8 *)a)[5] + +#define ETH_MAX_EXACT_MULTICAST_ADDRS 32 + +typedef enum Eth_RxMode { + ETH_FILTER_UNICAST = 0x0001, /* pass unicast (directed) frames */ + ETH_FILTER_MULTICAST = 0x0002, /* pass some multicast frames */ + ETH_FILTER_ALLMULTI = 0x0004, /* pass *all* multicast frames */ + ETH_FILTER_BROADCAST = 0x0008, /* pass broadcast frames */ + ETH_FILTER_PROMISC = 0x0010, /* pass all frames (ie no filter) */ + ETH_FILTER_USE_LADRF = 0x0020, /* use the LADRF for multicast filtering */ + ETH_FILTER_SINK = 0x10000 /* pass not-matched unicast frames */ +} Eth_RxMode; + +/* filter flags printf helpers */ +#define ETH_FILTER_FLAG_FMT_STR "%s%s%s%s%s%s%s" +#define ETH_FILTER_FLAG_FMT_ARGS(f) (f) & ETH_FILTER_UNICAST ? " UNICAST" : "", \ + (f) & ETH_FILTER_MULTICAST ? " MULTICAST" : "", \ + (f) & ETH_FILTER_ALLMULTI ? " ALLMULTI" : "", \ + (f) & ETH_FILTER_BROADCAST ? " BROADCAST" : "", \ + (f) & ETH_FILTER_PROMISC ? " PROMISC" : "", \ + (f) & ETH_FILTER_USE_LADRF ? " USE_LADRF" : "", \ + (f) & ETH_FILTER_SINK ? " SINK" : "" + +/* Ethernet header type */ +typedef enum { + ETH_HEADER_TYPE_DIX, + ETH_HEADER_TYPE_802_1PQ, + ETH_HEADER_TYPE_802_3, + ETH_HEADER_TYPE_802_1PQ_802_3, +} Eth_HdrType; + +/* DIX type fields we care about */ +typedef enum { + ETH_TYPE_IPV4 = 0x0800, + ETH_TYPE_IPV6 = 0x86DD, + ETH_TYPE_ARP = 0x0806, + ETH_TYPE_RARP = 0x8035, + ETH_TYPE_LLDP = 0x88CC, + ETH_TYPE_CDP = 0x2000, + ETH_TYPE_802_1PQ = 0x8100, // not really a DIX type, but used as such + ETH_TYPE_LLC = 0xFFFF, // 0xFFFF is IANA reserved, used to mark LLC +} Eth_DixType; + +typedef enum { + ETH_TYPE_IPV4_NBO = 0x0008, + ETH_TYPE_IPV6_NBO = 0xDD86, + ETH_TYPE_ARP_NBO = 0x0608, + ETH_TYPE_RARP_NBO = 0x3580, + ETH_TYPE_LLDP_NBO = 0xCC88, + ETH_TYPE_CDP_NBO = 0x0020, + ETH_TYPE_AKIMBI_NBO = 0xDE88, + ETH_TYPE_802_1PQ_NBO = 0x0081, // not really a DIX type, but used as such +} Eth_DixTypeNBO; + +/* low two bits of the LLC control byte */ +typedef enum { + ETH_LLC_CONTROL_IFRAME = 0x0, // both 0x0 and 0x2, only low bit of 0 needed + ETH_LLC_CONTROL_SFRAME = 0x1, + ETH_LLC_CONTROL_UFRAME = 0x3, +} Eth_LLCControlBits; + +#define ETH_LLC_CONTROL_UFRAME_MASK (0x3) + +typedef struct Eth_DIX { + UINT16 typeNBO; // indicates the higher level protocol +} Eth_DIX; + +/* + * LLC header come in two varieties: 8 bit control and 16 bit control. + * when the lower two bits of the first byte's control are '11', this + * indicated the 8 bit control field. + */ +typedef struct Eth_LLC8 { + UINT8 dsap; + UINT8 ssap; + UINT8 control; +} Eth_LLC8; + +typedef struct Eth_LLC16 { + UINT8 dsap; + UINT8 ssap; + UINT16 control; +} Eth_LLC16; + +typedef struct Eth_SNAP { + UINT8 snapOrg[3]; + Eth_DIX snapType; +} Eth_SNAP; + +typedef struct Eth_802_3 { + UINT16 lenNBO; // length of the frame + Eth_LLC8 llc; // LLC header + Eth_SNAP snap; // SNAP header +} Eth_802_3; + +// 802.1p QOS/priority tags +enum { + ETH_802_1_P_BEST_EFFORT = 0, + ETH_802_1_P_BACKGROUND = 1, + ETH_802_1_P_EXCELLENT_EFFORT = 2, + ETH_802_1_P_CRITICAL_APPS = 3, + ETH_802_1_P_VIDEO = 4, + ETH_802_1_P_VOICE = 5, + ETH_802_1_P_INTERNETWORK_CONROL = 6, + ETH_802_1_P_NETWORK_CONTROL = 7 +}; + +typedef struct Eth_802_1pq_Tag { + UINT16 typeNBO; // always ETH_TYPE_802_1PQ + UINT16 vidHi:4, // 802.1q vlan ID high nibble + canonical:1, // bit order? (should always be 0) + priority:3, // 802.1p priority tag + vidLo:8; // 802.1q vlan ID low byte +} Eth_802_1pq_Tag; + +typedef struct Eth_802_1pq { + Eth_802_1pq_Tag tag; // VLAN/QOS tag + union { + Eth_DIX dix; // DIX header follows + Eth_802_3 e802_3; // or 802.3 header follows + }; +} Eth_802_1pq; + +typedef struct Eth_Header { + Eth_Address dst; // all types of ethernet frame have dst first + Eth_Address src; // and the src next (at least all the ones we'll see) + union { + Eth_DIX dix; // followed by a DIX header... + Eth_802_3 e802_3; // ...or an 802.3 header + Eth_802_1pq e802_1pq; // ...or an 802.1[pq] tag and a header + }; +} Eth_Header; + +#define ETH_BROADCAST_ADDRESS { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } + +static Eth_Address netEthBroadcastAddr = ETH_BROADCAST_ADDRESS; + +/* + * simple predicate for 1536 boundary. + * the parameter is a network ordered UINT16, which is compared to 0x06, + * testing for "length" values greater than or equal to 0x0600 (1536) + */ + +#define ETH_TYPENOT8023(x) (((x) & 0xff) >= 0x06) + +/* + * header length macros + * + * first two are typical: ETH_HEADER_LEN_DIX, ETH_HEADER_LEN_802_1PQ + * last two are suspicious, due to 802.3 incompleteness + */ + +#define ETH_HEADER_LEN_DIX (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_DIX)) +#define ETH_HEADER_LEN_802_1PQ (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(Eth_DIX)) +#define ETH_HEADER_LEN_802_2_LLC (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC8)) +#define ETH_HEADER_LEN_802_2_LLC16 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC16)) +#define ETH_HEADER_LEN_802_3 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_3)) +#define ETH_HEADER_LEN_802_1PQ_LLC (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC8)) +#define ETH_HEADER_LEN_802_1PQ_LLC16 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(UINT16) + \ + sizeof(Eth_LLC16)) +#define ETH_HEADER_LEN_802_1PQ_802_3 (sizeof(Eth_Address) + \ + sizeof(Eth_Address) + \ + sizeof(Eth_802_1pq_Tag) + \ + sizeof(Eth_802_3)) + +#define ETH_MIN_HEADER_LEN (ETH_HEADER_LEN_DIX) +#define ETH_MAX_HEADER_LEN (ETH_HEADER_LEN_802_1PQ_802_3) + +#define ETH_MIN_FRAME_LEN 60 +#define ETH_MAX_STD_MTU 1500 +#define ETH_MAX_STD_FRAMELEN (ETH_MAX_STD_MTU + ETH_MAX_HEADER_LEN) +#define ETH_MAX_JUMBO_MTU 9000 +#define ETH_MAX_JUMBO_FRAMELEN (ETH_MAX_JUMBO_MTU + ETH_MAX_HEADER_LEN) + +#define ETH_DEFAULT_MTU 1500 + +#define ETH_FCS_LEN 4 +#define ETH_VLAN_LEN sizeof(Eth_802_1pq_Tag) + + +/* + *---------------------------------------------------------------------------- + * Do the two ethernet addresses match? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsAddrMatch(const Eth_Address addr1, const Eth_Address addr2) +{ + return !memcmp(addr1, addr2, ETH_ADDR_LENGTH); +} + + +/* + *---------------------------------------------------------------------------- + * Is the address the broadcast address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsBroadcastAddr(const Eth_Address addr) +{ + return Eth_IsAddrMatch(addr, netEthBroadcastAddr); +} + + +/* + *---------------------------------------------------------------------------- + * Is the address a unicast address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsUnicastAddr(const Eth_Address addr) +{ + // broadcast and multicast frames always have the low bit set in byte 0 + return !(((CHAR *)addr)[0] & 0x1); +} + +/* + *---------------------------------------------------------------------------- + * Is the address the all-zeros address? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsNullAddr(const Eth_Address addr) +{ + return ((addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]) == 0); +} + +/* + *---------------------------------------------------------------------------- + * + * Eth_HeaderType -- + * return an Eth_HdrType depending on the eth header + * contents. will not work in all cases, especially since it + * requres ETH_HEADER_LEN_802_1PQ bytes to determine the type + * + * HeaderType isn't sufficient to determine the length of + * the eth header. for 802.3 header, its not clear without + * examination, whether a SNAP is included + * + * returned type: + * + * ETH_HEADER_TYPE_DIX: typical 14 byte eth header + * ETH_HEADER_TYPE_802_1PQ: DIX+vlan tagging + * ETH_HEADER_TYPE_802_3: 802.3 eth header + * ETH_HEADER_TYPE_802_1PQ_802_3: 802.3 + vlan tag + * + * the test for DIX was moved from a 1500 boundary to a 1536 + * boundary, since the vmxnet2 MTU was updated to 1514. when + * W2K8 attempted to send LLC frames, these were interpreted + * as DIX frames instead of the correct 802.3 type + * + * these links may help if they're valid: + * + * http://standards.ieee.org/regauth/ethertype/type-tut.html + * http://standards.ieee.org/regauth/ethertype/type-pub.html + * + * Results: + * Eth_HdrType value + * + *---------------------------------------------------------------------------- + */ +static __inline Eth_HdrType +Eth_HeaderType(const Eth_Header *eh) +{ + /* + * we use 1536 (IEEE 802.3-std mentions 1536, but iana indicates + * type of 0-0x5dc are 802.3) instead of some #def symbol to prevent + * inadvertant reuse of the same macro for buffer size decls. + */ + if (ETH_TYPENOT8023(eh->dix.typeNBO)) { + if (eh->dix.typeNBO != ETH_TYPE_802_1PQ_NBO) { + /* typical case */ + return ETH_HEADER_TYPE_DIX; + } + + /* some type of 802.1pq tagged frame */ + if (ETH_TYPENOT8023(eh->e802_1pq.dix.typeNBO)) { + /* vlan tagging with dix style type */ + return ETH_HEADER_TYPE_802_1PQ; + } + + /* vlan tagging with 802.3 header */ + return ETH_HEADER_TYPE_802_1PQ_802_3; + } + + /* assume 802.3 */ + return ETH_HEADER_TYPE_802_3; +} + + +/* + *---------------------------------------------------------------------------- + * + * Eth_EncapsulatedPktType -- + * Get the encapsulated (layer 3) frame type. + * for LLC frames without SNAP, we don't have + * an encapsulated type, and return ETH_TYPE_LLC. + * + * IANA reserves 0xFFFF, which we reuse to indicate + * ETH_TYPE_LLC. + * + * Results: + * NBO frame type. + * + *---------------------------------------------------------------------------- + */ +static __inline UINT16 +Eth_EncapsulatedPktType(const Eth_Header *eh) +{ + Eth_HdrType type = Eth_HeaderType(eh); + + switch (type) { + case ETH_HEADER_TYPE_DIX: return eh->dix.typeNBO; + case ETH_HEADER_TYPE_802_1PQ: return eh->e802_1pq.dix.typeNBO; + case ETH_HEADER_TYPE_802_3: + /* + * Documentation describes SNAP headers as having ONLY + * 0x03 as the control fields, not just the lower two bits + * This prevents the use of Eth_IsLLCControlUFormat. + */ + if ((eh->e802_3.llc.dsap == 0xaa) && (eh->e802_3.llc.ssap == 0xaa) && + (eh->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { + return eh->e802_3.snap.snapType.typeNBO; + } else { + // LLC, no snap header, then no type + return ETH_TYPE_LLC; + } + + case ETH_HEADER_TYPE_802_1PQ_802_3: + if ((eh->e802_1pq.e802_3.llc.dsap == 0xaa) && + (eh->e802_1pq.e802_3.llc.ssap == 0xaa) && + (eh->e802_1pq.e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { + return eh->e802_1pq.e802_3.snap.snapType.typeNBO; + } else { + // tagged LLC, no snap header, then no type + return ETH_TYPE_LLC; + } + } + + ASSERT(FALSE); + return 0; +} + +/* + *---------------------------------------------------------------------------- + * Is the frame of the requested protocol type or is it an 802.1[pq] + * encapsulation of such a frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsDixType(const Eth_Header *eh, const Eth_DixTypeNBO type) +{ + return Eth_EncapsulatedPktType(eh) == type; +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an IPV4 frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsIPV4(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_IPV4_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an IPV6 frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsIPV6(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_IPV6_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Is the frame an ARP frame? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsARP(const Eth_Header *eh) +{ + return Eth_IsDixType(eh, ETH_TYPE_ARP_NBO); +} + + +/* + *---------------------------------------------------------------------------- + * Does the frame contain an 802.1[pq] tag? + *---------------------------------------------------------------------------- + */ +static __inline BOOLEAN +Eth_IsFrameTagged(const Eth_Header *eh) +{ + return (eh->dix.typeNBO == ETH_TYPE_802_1PQ_NBO); +} +#endif /* __ETHERNET_H_ */ diff --git a/datapath-windows/ovsext/Event.c b/datapath-windows/ovsext/Event.c new file mode 100644 index 000000000..fec3485f8 --- /dev/null +++ b/datapath-windows/ovsext/Event.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#include "Datapath.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_EVENT +#include "Debug.h" + +LIST_ENTRY ovsEventQueue; +UINT32 ovsNumEventQueue; +UINT32 ovsNumPollAll; + +extern PNDIS_SPIN_LOCK gOvsCtrlLock; + +NTSTATUS +OvsInitEventQueue() +{ + InitializeListHead(&ovsEventQueue); + return STATUS_SUCCESS; +} + +VOID +OvsCleanupEventQueue() +{ + ASSERT(IsListEmpty(&ovsEventQueue)); + ASSERT(ovsNumEventQueue == 0); +} + +static __inline VOID +OvsAcquireEventQueueLock() +{ + NdisAcquireSpinLock(gOvsCtrlLock); +} + +static __inline VOID +OvsReleaseEventQueueLock() +{ + NdisReleaseSpinLock(gOvsCtrlLock); +} + +/* + * -------------------------------------------------------------------------- + * Cleanup the event queue of the OpenInstance. + * -------------------------------------------------------------------------- + */ +VOID +OvsCleanupEvent(POVS_OPEN_INSTANCE instance) +{ + POVS_EVENT_QUEUE queue; + PIRP irp = NULL; + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue) { + POVS_EVENT_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + + OvsAcquireEventQueueLock(); + RemoveEntryList(&queue->queueLink); + ovsNumEventQueue--; + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + queue->pendingIrp = NULL; + if (cancelRoutine == NULL) { + irp = NULL; + } + } + instance->eventQueue = NULL; + OvsReleaseEventQueueLock(); + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + + LIST_FORALL_SAFE(&queue->elemList, link, next) { + elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + OvsFreeMemory(queue); + } +} + +/* + * -------------------------------------------------------------------------- + * When event is generated, we need to post the event to all + * the event queues. If there is pending Irp waiting for event + * complete the Irp to wakeup the user thread. + * + * Side effects: User thread may be woken up. + * -------------------------------------------------------------------------- + */ +VOID +OvsPostEvent(UINT32 portNo, + UINT32 status) +{ + POVS_EVENT_QUEUE_ELEM elem; + POVS_EVENT_QUEUE queue; + PLIST_ENTRY link; + BOOLEAN triggerPollAll = FALSE; + LIST_ENTRY list; + PLIST_ENTRY entry; + PIRP irp; + + InitializeListHead(&list); + + OVS_LOG_TRACE("Enter: portNo: %#x, status: %#x", portNo, status); + + OvsAcquireEventQueueLock(); + + LIST_FORALL(&ovsEventQueue, link) { + queue = CONTAINING_RECORD(link, OVS_EVENT_QUEUE, queueLink); + if ((status & queue->mask) == 0 || + queue->pollAll) { + continue; + } + if (queue->numElems > (OVS_MAX_VPORT_ARRAY_SIZE >> 1) || + portNo == OVS_DEFAULT_PORT_NO) { + queue->pollAll = TRUE; + } else { + elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemory(sizeof(*elem)); + if (elem == NULL) { + queue->pollAll = TRUE; + } else { + elem->portNo = portNo; + elem->status = (status & queue->mask); + InsertTailList(&queue->elemList, &elem->link); + queue->numElems++; + OVS_LOG_INFO("Queue: %p, numElems: %d", + queue, queue->numElems); + } + } + if (queue->pollAll) { + PLIST_ENTRY curr, next; + triggerPollAll = TRUE; + ovsNumPollAll++; + LIST_FORALL_SAFE(&queue->elemList, curr, next) { + RemoveEntryList(curr); + elem = CONTAINING_RECORD(curr, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + queue->numElems = 0; + } + if (queue->pendingIrp != NULL) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + InsertTailList(&list, &irp->Tail.Overlay.ListEntry); + } + } + } + OvsReleaseEventQueueLock(); + while (!IsListEmpty(&list)) { + entry = RemoveHeadList(&list); + irp = CONTAINING_RECORD(entry, IRP, Tail.Overlay.ListEntry); + OVS_LOG_INFO("Wakeup thread with IRP: %p", irp); + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + OVS_LOG_TRACE("Exit: triggered pollAll: %s", + (triggerPollAll ? "TRUE" : "FALSE")); +} + + +/* + * -------------------------------------------------------------------------- + * Subscribe for event notification. + * + * Results: + * STATUS_SUCCESS for valid request and enough resource. + * STATUS_NO_RESOURCES for queue allocation failure + * STATUS_INVALID_PARAMETER for invalid request + * + * Side effects: + * Event queue is created for the current open instance. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + POVS_EVENT_SUBSCRIBE request = (POVS_EVENT_SUBSCRIBE)inputBuffer; + NTSTATUS status = STATUS_SUCCESS; + POVS_OPEN_INSTANCE instance; + POVS_EVENT_QUEUE queue = NULL; + + OVS_LOG_TRACE("Enter: fileObject: %p, inputLength: %d", fileObject, + inputLength); + + if (inputLength < sizeof (OVS_EVENT_SUBSCRIBE) || + (request->mask & OVS_EVENT_MASK_ALL) == 0) { + OVS_LOG_TRACE("Exit: subscribe failed with invalid request."); + return STATUS_INVALID_PARAMETER; + } + + OvsAcquireEventQueueLock(); + + instance = OvsGetOpenInstance(fileObject, request->dpNo); + + if (instance == NULL) { + status = STATUS_INVALID_PARAMETER; + OVS_LOG_WARN("can not find open instance"); + goto done_event_subscribe; + } + + /* + * XXX for now, we don't allow change mask. + */ + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (request->subscribe && queue) { + if (queue->mask != request->mask) { + status = STATUS_INVALID_PARAMETER; + OVS_LOG_WARN("Can not chnage mask when the queue is subscribed"); + } + status = STATUS_SUCCESS; + goto done_event_subscribe; + } else if (!request->subscribe && queue == NULL) { + status = STATUS_SUCCESS; + goto done_event_subscribe; + } + + if (request->subscribe) { + queue = (POVS_EVENT_QUEUE)OvsAllocateMemory(sizeof (OVS_EVENT_QUEUE)); + if (queue == NULL) { + status = STATUS_NO_MEMORY; + OVS_LOG_WARN("Fail to allocate event queue"); + goto done_event_subscribe; + } + InitializeListHead(&queue->elemList); + queue->mask = request->mask; + queue->pendingIrp = NULL; + queue->numElems = 0; + queue->pollAll = TRUE; /* always poll all in the begining */ + InsertHeadList(&ovsEventQueue, &queue->queueLink); + ovsNumEventQueue++; + instance->eventQueue = queue; + queue->instance = instance; + } else { + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + RemoveEntryList(&queue->queueLink); + ovsNumEventQueue--; + instance->eventQueue = NULL; + } +done_event_subscribe: + if (!request->subscribe && queue) { + POVS_EVENT_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + PIRP irp = NULL; + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + OvsReleaseEventQueueLock(); + if (irp) { + OvsCompleteIrpRequest(queue->pendingIrp, 0, STATUS_SUCCESS); + } + LIST_FORALL_SAFE(&queue->elemList, link, next) { + elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + OvsFreeMemory(queue); + } else { + OvsReleaseEventQueueLock(); + } + OVS_LOG_TRACE("Exit: subscribe event with status: %#x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Poll event queued in the event queue. always synchronous. + * + * Results: + * STATUS_SUCCESS for valid request + * STATUS_BUFFER_TOO_SMALL if outputBuffer is too small. + * STATUS_INVALID_PARAMETER for invalid request + * + * Side effects: + * Event will be removed from event queue. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsPollEventIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_EVENT_POLL poll; + POVS_EVENT_STATUS eventStatus; + POVS_EVENT_ENTRY entry; + POVS_EVENT_QUEUE queue; + POVS_EVENT_QUEUE_ELEM elem; + POVS_OPEN_INSTANCE instance; + UINT32 numEntry, i; + + OVS_LOG_TRACE("Enter: inputLength:%d, outputLength: %d", + inputLength, outputLength); + + ASSERT(replyLen); + if (inputLength < sizeof (OVS_EVENT_POLL)) { + OVS_LOG_TRACE("Exit: input buffer too small"); + return STATUS_INVALID_PARAMETER; + } + *replyLen = sizeof (OVS_EVENT_STATUS) + sizeof (OVS_EVENT_ENTRY); + if (outputLength < *replyLen) { + OVS_LOG_TRACE("Exit: output buffer too small"); + return STATUS_BUFFER_TOO_SMALL; + } + poll = (POVS_EVENT_POLL)inputBuffer; + + OvsAcquireEventQueueLock(); + instance = OvsGetOpenInstance(fileObject, poll->dpNo); + if (instance == NULL) { + OvsReleaseEventQueueLock(); + *replyLen = 0; + OVS_LOG_TRACE("Exit: can not find Open instance"); + return STATUS_INVALID_PARAMETER; + } + + eventStatus = (POVS_EVENT_STATUS)outputBuffer; + numEntry = + (outputLength - sizeof (OVS_EVENT_STATUS)) / sizeof (OVS_EVENT_ENTRY); + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue->pollAll) { + eventStatus->numberEntries = 1; + numEntry = 1; + entry = &eventStatus->eventEntries[0]; + entry->portNo = OVS_DEFAULT_PORT_NO; + entry->status = OVS_DEFAULT_EVENT_STATUS; + queue->pollAll = FALSE; + goto event_poll_done; + } + numEntry = MIN(numEntry, queue->numElems); + eventStatus->numberEntries = numEntry; + + for (i = 0; i < numEntry; i++) { + elem = (POVS_EVENT_QUEUE_ELEM)RemoveHeadList(&queue->elemList); + entry = &eventStatus->eventEntries[i]; + entry->portNo = elem->portNo; + entry->status = elem->status; + OvsFreeMemory(elem); + queue->numElems--; + } +event_poll_done: + OvsReleaseEventQueueLock(); + *replyLen = sizeof (OVS_EVENT_STATUS) + + numEntry * sizeof (OVS_EVENT_ENTRY); + OVS_LOG_TRACE("Exit: numEventPolled: %d", numEntry); + return STATUS_SUCCESS; +} + + +/* + * -------------------------------------------------------------------------- + * Cancel wait IRP for event + * + * Please note, when this routine is called, it is always guaranteed that + * IRP is valid. + * + * Side effects: Pending IRP is completed. + * -------------------------------------------------------------------------- + */ +VOID +OvsCancelIrp(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + POVS_EVENT_QUEUE queue; + POVS_OPEN_INSTANCE instance; + + UNREFERENCED_PARAMETER(deviceObject); + + IoReleaseCancelSpinLock(irp->CancelIrql); + + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + if (fileObject == NULL) { + goto done; + } + OvsAcquireEventQueueLock(); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + if (instance == NULL || instance->eventQueue == NULL) { + OvsReleaseEventQueueLock(); + goto done; + } + queue = instance->eventQueue; + if (queue->pendingIrp == irp) { + queue->pendingIrp = NULL; + } + OvsReleaseEventQueueLock(); +done: + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); +} + +/* + * -------------------------------------------------------------------------- + * Wait for event. + * + * Results: + * STATUS_SUCCESS for valid request + * STATUS_DEVICE_BUSY if already in waiting state. + * STATUS_INVALID_PARAMETER for invalid request + * STATUS_PENDING wait for event + * + * Side effects: + * May return pending to IO manager. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsWaitEventIoctl(PIRP irp, + PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + NTSTATUS status; + POVS_EVENT_POLL poll; + POVS_EVENT_QUEUE queue; + POVS_OPEN_INSTANCE instance; + BOOLEAN cancelled = FALSE; + OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); + + if (inputLength < sizeof (OVS_EVENT_POLL)) { + OVS_LOG_TRACE("Exit: Invalid input buffer length."); + return STATUS_INVALID_PARAMETER; + } + poll = (POVS_EVENT_POLL)inputBuffer; + + OvsAcquireEventQueueLock(); + + instance = OvsGetOpenInstance(fileObject, poll->dpNo); + if (instance == NULL) { + OvsReleaseEventQueueLock(); + OVS_LOG_TRACE("Exit: Can not find open instance, dpNo: %d", poll->dpNo); + return STATUS_INVALID_PARAMETER; + } + + queue = (POVS_EVENT_QUEUE)instance->eventQueue; + if (queue->pendingIrp) { + OvsReleaseEventQueueLock(); + OVS_LOG_TRACE("Exit: Event queue already in pending state"); + return STATUS_DEVICE_BUSY; + } + + status = (queue->numElems != 0 || queue->pollAll) ? + STATUS_SUCCESS : STATUS_PENDING; + if (status == STATUS_PENDING) { + PDRIVER_CANCEL cancelRoutine; + IoMarkIrpPending(irp); + IoSetCancelRoutine(irp, OvsCancelIrp); + if (irp->Cancel) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + cancelled = TRUE; + } + } else { + queue->pendingIrp = irp; + } + } + OvsReleaseEventQueueLock(); + if (cancelled) { + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); + OVS_LOG_INFO("Event IRP cancelled: %p", irp); + } + OVS_LOG_TRACE("Exit: return status: %#x", status); + return status; +} diff --git a/datapath-windows/ovsext/Event.h b/datapath-windows/ovsext/Event.h new file mode 100644 index 000000000..f4801b981 --- /dev/null +++ b/datapath-windows/ovsext/Event.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __EVENT_H_ +#define __EVENT_H_ 1 + +typedef struct _OVS_EVENT_QUEUE_ELEM { + LIST_ENTRY link; + UINT32 portNo; + UINT32 status; +} OVS_EVENT_QUEUE_ELEM, *POVS_EVENT_QUEUE_ELEM; + +typedef struct _OVS_EVENT_QUEUE { + LIST_ENTRY queueLink; + LIST_ENTRY elemList; + UINT32 mask; + UINT16 numElems; + BOOLEAN pollAll; + PIRP pendingIrp; + PVOID instance; +} OVS_EVENT_QUEUE, *POVS_EVENT_QUEUE; + +NTSTATUS OvsInitEventQueue(VOID); +VOID OvsCleanupEventQueue(VOID); + +struct _OVS_OPEN_INSTANCE; + +VOID OvsCleanupEvent(struct _OVS_OPEN_INSTANCE *instance); +VOID OvsPostEvent(UINT32 portNo, UINT32 status); +NTSTATUS OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, + UINT32 inputLength); +NTSTATUS OvsPollEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, + UINT32 inputLength, PVOID outputBuffer, + UINT32 outputLength, UINT32 *replyLen); +NTSTATUS OvsWaitEventIoctl(PIRP irp, PFILE_OBJECT fileObject, + PVOID inputBuffer, UINT32 inputLength); +#endif /* __EVENT_H_ */ diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c new file mode 100644 index 000000000..dae1dca85 --- /dev/null +++ b/datapath-windows/ovsext/Flow.c @@ -0,0 +1,978 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "NetProto.h" +#include "Util.h" +#include "Jhash.h" +#include "Flow.h" +#include "PacketParser.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_FLOW +#include "Debug.h" + +#pragma warning( push ) +#pragma warning( disable:4127 ) + +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; +extern UINT64 ovsTimeIncrementPerTick; + +static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags, + UINT32 getActionsLen, OvsFlowInfo *info); +static NTSTATUS HandleFlowPut(OvsFlowPut *put, + OVS_DATAPATH *datapath, + struct OvsFlowStats *stats); +static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put, + UINT64 hash); +static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow); +static VOID DeleteAllFlows(OVS_DATAPATH *datapath); +static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow); +static VOID FreeFlow(OvsFlow *flow); +static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB); + +#define OVS_FLOW_TABLE_SIZE 2048 +#define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1) +#define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK) + +/* + *---------------------------------------------------------------------------- + * OvsDeleteFlowTable -- + * Results: + * NDIS_STATUS_SUCCESS always. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDeleteFlowTable(OVS_DATAPATH *datapath) +{ + if (datapath == NULL || datapath->flowTable == NULL) { + return NDIS_STATUS_SUCCESS; + } + + DeleteAllFlows(datapath); + OvsFreeMemory(datapath->flowTable); + datapath->flowTable = NULL; + NdisFreeRWLock(datapath->lock); + + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsAllocateFlowTable -- + * Results: + * NDIS_STATUS_SUCCESS on success. + * NDIS_STATUS_RESOURCES if memory couldn't be allocated + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsAllocateFlowTable(OVS_DATAPATH *datapath, + POVS_SWITCH_CONTEXT switchContext) +{ + PLIST_ENTRY bucket; + int i; + + datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE * + sizeof (LIST_ENTRY)); + if (!datapath->flowTable) { + return NDIS_STATUS_RESOURCES; + } + for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { + bucket = &(datapath->flowTable[i]); + InitializeListHead(bucket); + } + datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle); + + return NDIS_STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * GetStartAddrNBL -- + * Get the virtual address of the frame. + * + * Results: + * Virtual address of the frame. + *---------------------------------------------------------------------------- + */ +static __inline VOID * +GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) +{ + PMDL curMdl; + PUINT8 curBuffer; + PEthHdr curHeader; + + ASSERT(_pNB); + + // Ethernet Header is a guaranteed safe access. + curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; + curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!curBuffer) { + return NULL; + } + + curHeader = (PEthHdr) + (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); + + return (VOID *) curHeader; +} + +VOID +OvsFlowUsed(OvsFlow *flow, + const NET_BUFFER_LIST *packet, + const POVS_PACKET_HDR_INFO layers) +{ + LARGE_INTEGER tickCount; + + KeQueryTickCount(&tickCount); + flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; + flow->packetCount++; + flow->byteCount += OvsPacketLenNBL(packet); + flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); +} + + +VOID +DeleteAllFlows(OVS_DATAPATH *datapath) +{ + INT i; + PLIST_ENTRY bucket; + + for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { + PLIST_ENTRY next; + bucket = &(datapath->flowTable[i]); + while (!IsListEmpty(bucket)) { + OvsFlow *flow; + next = bucket->Flink; + flow = CONTAINING_RECORD(next, OvsFlow, ListEntry); + RemoveFlow(datapath, &flow); + } + } +} + +/* + *---------------------------------------------------------------------------- + * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and + * 'ofp_in_port'. + * + * Initializes 'packet' header pointers as follows: + * + * - packet->l2 to the start of the Ethernet header. + * + * - packet->l3 to just past the Ethernet header, or just past the + * vlan_header if one is present, to the first byte of the payload of the + * Ethernet frame. + * + * - packet->l4 to just past the IPv4 header, if one is present and has a + * correct length, and otherwise NULL. + * + * - packet->l7 to just past the TCP or UDP or ICMP header, if one is + * present and has a correct length, and otherwise NULL. + * + * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed + * (e.g. if Pkt_CopyBytesOut() returns an error). + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtractFlow(const NET_BUFFER_LIST *packet, + UINT32 inPort, + OvsFlowKey *flow, + POVS_PACKET_HDR_INFO layers, + OvsIPv4TunnelKey *tunKey) +{ + struct Eth_Header *eth; + UINT8 offset = 0; + PVOID vlanTagValue; + + layers->value = 0; + + if (tunKey) { + ASSERT(tunKey->dst != 0); + RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey); + flow->l2.offset = 0; + } else { + flow->tunKey.dst = 0; + flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE; + } + + flow->l2.inPort = inPort; + + if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; + return NDIS_STATUS_SUCCESS; + } + + /* Link layer. */ + eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); + memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); + memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); + + /* + * vlan_tci. + */ + vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); + if (vlanTagValue) { + PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = + (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; + flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI | + (vlanTag->TagHeader.UserPriority << 13)); + } else { + if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { + Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)ð->dix.typeNBO; + flow->l2.vlanTci = ((UINT16)tag->priority << 13) | + OVSWIN_VLAN_CFI | + ((UINT16)tag->vidHi << 8) | tag->vidLo; + offset = sizeof (Eth_802_1pq_Tag); + } else { + flow->l2.vlanTci = 0; + } + /* + * XXX + * Please note after this point, src mac and dst mac should + * not be accessed through eth + */ + eth = (Eth_Header *)((UINT8 *)eth + offset); + } + + /* + * dl_type. + * + * XXX assume that at least the first + * 12 bytes of received packets are mapped. This code has the stronger + * assumption that at least the first 22 bytes of 'packet' is mapped (if my + * arithmetic is right). + */ + if (ETH_TYPENOT8023(eth->dix.typeNBO)) { + flow->l2.dlType = eth->dix.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && + eth->e802_3.llc.dsap == 0xaa && + eth->e802_3.llc.ssap == 0xaa && + eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && + eth->e802_3.snap.snapOrg[0] == 0x00 && + eth->e802_3.snap.snapOrg[1] == 0x00 && + eth->e802_3.snap.snapOrg[2] == 0x00) { + flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO; + layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; + } else { + flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE); + layers->l3Offset = ETH_HEADER_LEN_DIX + offset; + } + + flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset; + /* Network layer. */ + if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { + struct IPHdr ip_storage; + const struct IPHdr *nh; + IpKey *ipKey = &flow->ipKey; + + flow->l2.keyLen += OVS_IP_KEY_SIZE; + layers->isIPv4 = 1; + nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); + if (nh) { + layers->l4Offset = layers->l3Offset + nh->ihl * 4; + + ipKey->nwSrc = nh->saddr; + ipKey->nwDst = nh->daddr; + ipKey->nwProto = nh->protocol; + + ipKey->nwTos = nh->tos; + if (nh->frag_off & htons(IP_MF | IP_OFFSET)) { + ipKey->nwFrag = OVSWIN_NW_FRAG_ANY; + if (nh->frag_off & htons(IP_OFFSET)) { + ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER; + } + } else { + ipKey->nwFrag = 0; + } + + ipKey->nwTtl = nh->ttl; + ipKey->l4.tpSrc = 0; + ipKey->l4.tpDst = 0; + + if (!(nh->frag_off & htons(IP_OFFSET))) { + if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, &ipKey->l4, layers); + } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, &ipKey->l4, layers); + } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { + ICMPHdr icmpStorage; + const ICMPHdr *icmp; + + icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); + if (icmp) { + ipKey->l4.tpSrc = htons(icmp->type); + ipKey->l4.tpDst = htons(icmp->code); + layers->l7Offset = layers->l4Offset + sizeof *icmp; + } + } + } + } else { + ((UINT64 *)ipKey)[0] = 0; + ((UINT64 *)ipKey)[1] = 0; + } + } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { + NDIS_STATUS status; + flow->l2.keyLen += OVS_IPV6_KEY_SIZE; + status = OvsParseIPv6(packet, flow, layers); + if (status != NDIS_STATUS_SUCCESS) { + memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); + return status; + } + layers->isIPv6 = 1; + flow->ipv6Key.l4.tpSrc = 0; + flow->ipv6Key.l4.tpDst = 0; + flow->ipv6Key.pad = 0; + + if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { + OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); + } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { + OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); + } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { + OvsParseIcmpV6(packet, flow, layers); + flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); + } + } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { + EtherArp arpStorage; + const EtherArp *arp; + ArpKey *arpKey = &flow->arpKey; + ((UINT64 *)arpKey)[0] = 0; + ((UINT64 *)arpKey)[1] = 0; + ((UINT64 *)arpKey)[2] = 0; + flow->l2.keyLen += OVS_ARP_KEY_SIZE; + arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); + if (arp && arp->ea_hdr.ar_hrd == htons(1) && + arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && + arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && + arp->ea_hdr.ar_pln == 4) { + /* We only match on the lower 8 bits of the opcode. */ + if (ntohs(arp->ea_hdr.ar_op) <= 0xff) { + arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op); + } + if (arpKey->nwProto == ARPOP_REQUEST + || arpKey->nwProto == ARPOP_REPLY) { + memcpy(&arpKey->nwSrc, arp->arp_spa, 4); + memcpy(&arpKey->nwDst, arp->arp_tpa, 4); + memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); + memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); + } + } + } + + return NDIS_STATUS_SUCCESS; +} + +__inline BOOLEAN +FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size) +{ + UINT32 i; + ASSERT((size & 0x7) == 0); + ASSERT(((UINT64)src & 0x7) == 0); + ASSERT(((UINT64)dst & 0x7) == 0); + for (i = 0; i < (size >> 3); i++) { + if (src[i] != dst[i]) { + return FALSE; + } + } + return TRUE; +} + + +/* + * ---------------------------------------------------------------------------- + * AddFlow -- + * Add a flow to flow table. + * + * Results: + * NDIS_STATUS_SUCCESS if no same flow in the flow table. + * ---------------------------------------------------------------------------- + */ +NTSTATUS +AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow) +{ + PLIST_ENTRY head; + + if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) { + return STATUS_INVALID_HANDLE; + } + + head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]); + /* + * We need fence here to make sure flow's nextPtr is updated before + * head->nextPtr is updated. + */ + KeMemoryBarrier(); + + //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql); + InsertTailList(head, &flow->ListEntry); + //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql); + + datapath->nFlows++; + + return STATUS_SUCCESS; +} + + +/* ---------------------------------------------------------------------------- + * RemoveFlow -- + * Remove a flow from flow table, and added to wait list + * ---------------------------------------------------------------------------- + */ +VOID +RemoveFlow(OVS_DATAPATH *datapath, + OvsFlow **flow) +{ + OvsFlow *f = *flow; + *flow = NULL; + UNREFERENCED_PARAMETER(datapath); + + ASSERT(datapath->nFlows); + datapath->nFlows--; + // Remove the flow from queue + RemoveEntryList(&f->ListEntry); + FreeFlow(f); +} + + +/* + * ---------------------------------------------------------------------------- + * OvsLookupFlow -- + * + * Find flow from flow table based on flow key. + * Caller should either hold portset handle or should + * have a flowRef in datapath or Acquired datapath. + * + * Results: + * Flow pointer if lookup successful. + * NULL if not exists. + * ---------------------------------------------------------------------------- + */ +OvsFlow * +OvsLookupFlow(OVS_DATAPATH *datapath, + const OvsFlowKey *key, + UINT64 *hash, + BOOLEAN hashValid) +{ + PLIST_ENTRY link, head; + UINT16 offset = key->l2.offset; + UINT16 size = key->l2.keyLen; + UINT8 *start; + + ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); + ASSERT(!key->tunKey.dst || offset == 0); + + start = (UINT8 *)key + offset; + + if (!hashValid) { + *hash = OvsJhashBytes(start, size, 0); + } + + head = &datapath->flowTable[HASH_BUCKET(*hash)]; + link = head->Flink; + while (link != head) { + OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry); + + if (flow->hash == *hash && + flow->key.l2.val == key->l2.val && + FlowEqual((UINT64 *)((uint8 *)&flow->key + offset), + (UINT64 *)start, size)) { + return flow; + } + link = link->Flink; + } + return NULL; +} + + +/* + * ---------------------------------------------------------------------------- + * OvsHashFlow -- + * Calculate the hash for the given flow key. + * ---------------------------------------------------------------------------- + */ +UINT64 +OvsHashFlow(const OvsFlowKey *key) +{ + UINT16 offset = key->l2.offset; + UINT16 size = key->l2.keyLen; + UINT8 *start; + + ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); + ASSERT(!key->tunKey.dst || offset == 0); + start = (UINT8 *)key + offset; + return OvsJhashBytes(start, size, 0); +} + + +/* + * ---------------------------------------------------------------------------- + * FreeFlow -- + * Free a flow and its actions. + * ---------------------------------------------------------------------------- + */ +VOID +FreeFlow(OvsFlow *flow) +{ + ASSERT(flow); + OvsFreeMemory(flow); +} + +NTSTATUS +OvsDoDumpFlows(OvsFlowDumpInput *dumpInput, + OvsFlowDumpOutput *dumpOutput, + UINT32 *replyLen) +{ + UINT32 dpNo; + OVS_DATAPATH *datapath = NULL; + OvsFlow *flow; + PLIST_ENTRY node, head; + UINT32 column = 0; + UINT32 rowIndex, columnIndex; + LOCK_STATE_EX dpLockState; + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN findNextNonEmpty = FALSE; + + dpNo = dumpInput->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + rowIndex = dumpInput->position[0]; + if (rowIndex >= OVS_FLOW_TABLE_SIZE) { + dumpOutput->n = 0; + *replyLen = sizeof(*dumpOutput); + goto unlock; + } + + columnIndex = dumpInput->position[1]; + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); + + head = &datapath->flowTable[rowIndex]; + node = head->Flink; + + while (column < columnIndex) { + if (node == head) { + break; + } + node = node->Flink; + column++; + } + + if (node == head) { + findNextNonEmpty = TRUE; + columnIndex = 0; + } + + if (findNextNonEmpty) { + while (head == node) { + if (++rowIndex >= OVS_FLOW_TABLE_SIZE) { + dumpOutput->n = 0; + goto dp_unlock; + } + head = &datapath->flowTable[rowIndex]; + node = head->Flink; + } + } + + ASSERT(node != head); + ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE); + + flow = CONTAINING_RECORD(node, OvsFlow, ListEntry); + status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen, + &dumpOutput->flow); + + if (status == STATUS_BUFFER_TOO_SMALL) { + dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen; + *replyLen = sizeof(*dumpOutput); + } else { + dumpOutput->n = 1; //one flow reported. + *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen; + } + + dumpOutput->position[0] = rowIndex; + dumpOutput->position[1] = ++columnIndex; + +dp_unlock: + OvsReleaseDatapath(datapath, &dpLockState); + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +NTSTATUS +OvsDumpFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer; + OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer; + + if (inputBuffer == NULL || outputBuffer == NULL) { + return STATUS_INVALID_PARAMETER; + } + + if ((inputLength != sizeof(OvsFlowDumpInput)) + || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen); +} + +static NTSTATUS +ReportFlowInfo(OvsFlow *flow, + UINT32 getFlags, + UINT32 getActionsLen, + OvsFlowInfo *info) +{ + NTSTATUS status = STATUS_SUCCESS; + + if (getFlags & FLOW_GET_KEY) { + // always copy the tunnel key part + RtlCopyMemory(&info->key, &flow->key, + flow->key.l2.keyLen + flow->key.l2.offset); + } + + if (getFlags & FLOW_GET_STATS) { + OvsFlowStats *stats = &info->stats; + stats->packetCount = flow->packetCount; + stats->byteCount = flow->byteCount; + stats->used = (UINT32)flow->used; + stats->tcpFlags = flow->tcpFlags; + } + + if (getFlags & FLOW_GET_ACTIONS) { + if (flow->actionsLen == 0) { + info->actionsLen = 0; + } else if (flow->actionsLen > getActionsLen) { + info->actionsLen = 0; + status = STATUS_BUFFER_TOO_SMALL; + } else { + RtlCopyMemory(info->actions, flow->actions, flow->actionsLen); + info->actionsLen = flow->actionsLen; + } + } + + return status; +} + +NTSTATUS +OvsPutFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + struct OvsFlowStats stats; + ULONG actionsLen; + OvsFlowPut *put; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + if ((outputLength != sizeof(stats)) || (outputBuffer == NULL)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + put = (OvsFlowPut *)inputBuffer; + if (put->actionsLen > 0) { + actionsLen = put->actionsLen; + } else { + actionsLen = 0; + } + if (inputLength != actionsLen + sizeof(*put)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = put->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + RtlZeroMemory(&stats, sizeof(stats)); + OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); + status = HandleFlowPut(put, datapath, &stats); + OvsReleaseDatapath(datapath, &dpLockState); + + if (status == STATUS_SUCCESS) { + // Copy stats to User mode app + NdisMoveMemory(outputBuffer, (PVOID)&stats, sizeof(stats)); + *replyLen = sizeof stats; + } + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + + +/* Handles flow add, modify as well as delete */ +static NTSTATUS +HandleFlowPut(OvsFlowPut *put, + OVS_DATAPATH *datapath, + struct OvsFlowStats *stats) +{ + BOOLEAN mayCreate, mayModify, mayDelete; + OvsFlow *KernelFlow; + UINT64 hash; + NTSTATUS status = STATUS_SUCCESS; + + mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0; + mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0; + mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0; + + if ((mayCreate || mayModify) == mayDelete) { + return STATUS_INVALID_PARAMETER; + } + + KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE); + if (!KernelFlow) { + if (!mayCreate) { + return STATUS_INVALID_PARAMETER; + } + + status = OvsPrepareFlow(&KernelFlow, put, hash); + if (status != STATUS_SUCCESS) { + FreeFlow(KernelFlow); + return STATUS_UNSUCCESSFUL; + } + + status = AddFlow(datapath, KernelFlow); + if (status != STATUS_SUCCESS) { + FreeFlow(KernelFlow); + return STATUS_UNSUCCESSFUL; + } + + /* Validate the flow addition */ + { + UINT64 newHash; + OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash, + FALSE); + ASSERT(flow); + ASSERT(newHash == hash); + if (!flow || newHash != hash) { + return STATUS_UNSUCCESSFUL; + } + } + } else { + stats->packetCount = KernelFlow->packetCount; + stats->byteCount = KernelFlow->byteCount; + stats->tcpFlags = KernelFlow->tcpFlags; + stats->used = (UINT32)KernelFlow->used; + + if (mayModify) { + OvsFlow *newFlow; + status = OvsPrepareFlow(&newFlow, put, hash); + if (status != STATUS_SUCCESS) { + return STATUS_UNSUCCESSFUL; + } + + KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE); + if (KernelFlow) { + if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { + newFlow->packetCount = KernelFlow->packetCount; + newFlow->byteCount = KernelFlow->byteCount; + newFlow->tcpFlags = KernelFlow->tcpFlags; + } + RemoveFlow(datapath, &KernelFlow); + } else { + if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { + newFlow->packetCount = stats->packetCount; + newFlow->byteCount = stats->byteCount; + newFlow->tcpFlags = stats->tcpFlags; + } + } + status = AddFlow(datapath, newFlow); + ASSERT(status == STATUS_SUCCESS); + + /* Validate the flow addition */ + { + UINT64 newHash; + OvsFlow *testflow = OvsLookupFlow(datapath, &put->key, + &newHash, FALSE); + ASSERT(testflow); + ASSERT(newHash == hash); + if (!testflow || newHash != hash) { + FreeFlow(newFlow); + return STATUS_UNSUCCESSFUL; + } + } + } else { + if (mayDelete) { + if (KernelFlow) { + RemoveFlow(datapath, &KernelFlow); + } + } else { + return STATUS_UNSUCCESSFUL; + } + } + } + return STATUS_SUCCESS; +} + +static NTSTATUS +OvsPrepareFlow(OvsFlow **flow, + const OvsFlowPut *put, + UINT64 hash) +{ + OvsFlow *localFlow = *flow; + NTSTATUS status = STATUS_SUCCESS; + + do { + *flow = localFlow = + OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen); + if (localFlow == NULL) { + status = STATUS_NO_MEMORY; + break; + } + + localFlow->key = put->key; + localFlow->actionsLen = put->actionsLen; + if (put->actionsLen) { + NdisMoveMemory((PUCHAR)localFlow->actions, put->actions, + put->actionsLen); + } + localFlow->userActionsLen = 0; // 0 indicate no conversion is made + localFlow->used = 0; + localFlow->packetCount = 0; + localFlow->byteCount = 0; + localFlow->tcpFlags = 0; + localFlow->hash = hash; + } while(FALSE); + + return status; +} + +NTSTATUS +OvsGetFlowIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + OvsFlow *flow; + UINT32 getFlags, getActionsLen; + OvsFlowGetInput *getInput; + OvsFlowGetOutput *getOutput; + UINT64 hash; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if (inputLength != sizeof(OvsFlowGetInput) + || inputBuffer == NULL) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + getInput = (OvsFlowGetInput *) inputBuffer; + getFlags = getInput->getFlags; + getActionsLen = getInput->actionsLen; + if (getInput->getFlags & FLOW_GET_KEY) { + return STATUS_INVALID_PARAMETER; + } + + if (outputBuffer == NULL + || outputLength != (sizeof *getOutput + + getInput->actionsLen)) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = getInput->dpNo; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); + flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE); + if (!flow) { + status = STATUS_INVALID_PARAMETER; + goto dp_unlock; + } + + // XXX: can be optimized to return only how much is written out + *replyLen = outputLength; + getOutput = (OvsFlowGetOutput *)outputBuffer; + ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info); + +dp_unlock: + OvsReleaseDatapath(datapath, &dpLockState); +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +NTSTATUS +OvsFlushFlowIoctl(PVOID inputBuffer, + UINT32 inputLength) +{ + NTSTATUS status = STATUS_SUCCESS; + OVS_DATAPATH *datapath = NULL; + UINT32 dpNo; + LOCK_STATE_EX dpLockState; + + if (inputLength != sizeof(UINT32) || inputBuffer == NULL) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + dpNo = *(UINT32 *)inputBuffer; + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + datapath = &gOvsSwitchContext->datapath; + ASSERT(datapath); + OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); + DeleteAllFlows(datapath); + OvsReleaseDatapath(datapath, &dpLockState); + +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + +#pragma warning( pop ) diff --git a/datapath-windows/ovsext/Flow.h b/datapath-windows/ovsext/Flow.h new file mode 100644 index 000000000..3964c5464 --- /dev/null +++ b/datapath-windows/ovsext/Flow.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FLOW_H_ +#define __FLOW_H_ 1 + +#include "precomp.h" +#include "Switch.h" +#include "User.h" +#include "NetProto.h" + +typedef struct _OvsFlow { + LIST_ENTRY ListEntry; // In Datapath's flowTable. + OvsFlowKey key; + UINT64 hash; + UINT32 actionsLen; + UINT8 tcpFlags; + UINT64 used; + UINT64 packetCount; + UINT64 byteCount; + UINT32 userActionsLen; // used for flow query + UINT32 actionBufferLen; // used for flow reuse + NL_ATTR actions[1]; +} OvsFlow; + + +typedef struct _OvsLayers { + UINT32 l3Ofs; // IPv4, IPv6, ARP, or other L3 header. + UINT32 l4Ofs; // TCP, UDP, ICMP, ICMPv6, or other L4 header. + UINT32 l7Ofs; // L4 protocol's payload. +} OvsLayers; + +extern UINT64 ovsUserTimestampDelta; +extern UINT64 ovsTimeIncrementPerTick; + +NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath); +NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, + POVS_SWITCH_CONTEXT switchContext); + +NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, + OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, + OvsIPv4TunnelKey *tunKey); +OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, + UINT64 *hash, BOOLEAN hashValid); +UINT64 OvsHashFlow(const OvsFlowKey *key); +VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, + const POVS_PACKET_HDR_INFO layers); + +NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsPutFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsGetFlowIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsFlushFlowIoctl(PVOID inputBuffer, UINT32 inputLength); + +/* Flags for tunneling */ +#define OVS_TNL_F_DONT_FRAGMENT (1 << 0) +#define OVS_TNL_F_CSUM (1 << 1) +#define OVS_TNL_F_KEY (1 << 2) + +#endif /* __FLOW_H_ */ diff --git a/datapath-windows/ovsext/Ioctl.c b/datapath-windows/ovsext/Ioctl.c new file mode 100644 index 000000000..71e393357 --- /dev/null +++ b/datapath-windows/ovsext/Ioctl.c @@ -0,0 +1,768 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface + * alive while we transition over to the netlink based interface. + * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c + * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c + */ +#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 0 + +#include "precomp.h" +#include "Ioctl.h" +#include "Jhash.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "PacketIO.h" +#include "NetProto.h" +#include "Flow.h" +#include "User.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DATAPATH +#include "Debug.h" + +/* Handles to the device object for communication with userspace. */ +NDIS_HANDLE gOvsDeviceHandle; +PDEVICE_OBJECT gOvsDeviceObject; + +/* + * There seems to be a skew between the kernel's version of current time and + * the userspace's version of current time. The skew was seen to + * monotonically increase as well. + * + * In order to deal with the situation, we pass down the userspace's version + * of the timestamp to the kernel, and let the kernel calculate the delta. + */ +UINT64 ovsUserTimestampDelta; +UINT64 ovsTimeIncrementPerTick; + +_Dispatch_type_(IRP_MJ_CREATE) +_Dispatch_type_(IRP_MJ_CLOSE) +DRIVER_DISPATCH OvsOpenCloseDevice; + +_Dispatch_type_(IRP_MJ_CLEANUP) +DRIVER_DISPATCH OvsCleanupDevice; + +_Dispatch_type_(IRP_MJ_DEVICE_CONTROL) +DRIVER_DISPATCH OvsDeviceControl; + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(INIT, OvsCreateDeviceObject) +#pragma alloc_text(PAGE, OvsOpenCloseDevice) +#pragma alloc_text(PAGE, OvsCleanupDevice) +#pragma alloc_text(PAGE, OvsDeviceControl) +#endif // ALLOC_PRAGMA + + +#define OVS_MAX_OPEN_INSTANCES 128 + +POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES]; +UINT32 ovsNumberOfOpenInstances; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +NDIS_SPIN_LOCK ovsCtrlLockObj; +NDIS_SPIN_LOCK ovsFlowLockObj; +PNDIS_SPIN_LOCK gOvsCtrlLock; +PNDIS_SPIN_LOCK ovsFlowLock; + +VOID +OvsInitIoctl() +{ + gOvsCtrlLock = &ovsCtrlLockObj; + ovsFlowLock = &ovsFlowLockObj; + NdisAllocateSpinLock(ovsFlowLock); + NdisAllocateSpinLock(gOvsCtrlLock); +} + +VOID +OvsCleanupIoctl() +{ + if (ovsFlowLock) { + NdisFreeSpinLock(ovsFlowLock); + NdisFreeSpinLock(gOvsCtrlLock); + gOvsCtrlLock = NULL; + gOvsCtrlLock = NULL; + } +} + +VOID +OvsInit() +{ + OvsInitIoctl(); + OvsInitEventQueue(); + OvsUserInit(); +} + +VOID +OvsCleanup() +{ + OvsCleanupEventQueue(); + OvsCleanupIoctl(); + OvsUserCleanup(); +} + +VOID +OvsAcquireCtrlLock() +{ + NdisAcquireSpinLock(gOvsCtrlLock); +} +VOID +OvsReleaseCtrlLock() +{ + NdisReleaseSpinLock(gOvsCtrlLock); +} + + +/* + * -------------------------------------------------------------------------- + * Creates the communication device between user and kernel, and also + * initializes the data associated data structures. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + UNICODE_STRING deviceName; + UNICODE_STRING symbolicDeviceName; + PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1]; + NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes; + OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle); + + RtlZeroMemory(dispatchTable, + (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH)); + dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice; + dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice; + dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice; + dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl; + + NdisInitUnicodeString(&deviceName, OVS_NT_DEVICE_NAME); + NdisInitUnicodeString(&symbolicDeviceName, OVS_DOS_DEVICE_NAME); + + RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); + + OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header, + NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES, + NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1, + sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); + + deviceAttributes.DeviceName = &deviceName; + deviceAttributes.SymbolicName = &symbolicDeviceName; + deviceAttributes.MajorFunctions = dispatchTable; + deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION); + + status = NdisRegisterDeviceEx(ovsExtDriverHandle, + &deviceAttributes, + &gOvsDeviceObject, + &gOvsDeviceHandle); + if (status != NDIS_STATUS_SUCCESS) { + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject); + ASSERT(gOvsDeviceObject != NULL); + ASSERT(gOvsDeviceHandle != NULL); + + if (ovsExt) { + ovsExt->numberOpenInstance = 0; + } + } else { + /* Initialize the associated data structures. */ + OvsInit(); + } + OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject); + return status; +} + + +VOID +OvsDeleteDeviceObject() +{ + if (gOvsDeviceHandle) { +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION) + NdisGetDeviceReservedExtension(gOvsDeviceObject); + if (ovsExt) { + ASSERT(ovsExt->numberOpenInstance == 0); + } +#endif + + ASSERT(gOvsDeviceObject); + NdisDeregisterDeviceEx(gOvsDeviceHandle); + gOvsDeviceHandle = NULL; + gOvsDeviceObject = NULL; + } + OvsCleanup(); +} + +POVS_OPEN_INSTANCE +OvsGetOpenInstance(PFILE_OBJECT fileObject, + UINT32 dpNo) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance); + ASSERT(instance->fileObject == fileObject); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + return NULL; + } + return instance; +} + + +POVS_OPEN_INSTANCE +OvsFindOpenInstance(PFILE_OBJECT fileObject) +{ + UINT32 i, j; + for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES && + j < ovsNumberOfOpenInstances; i++) { + if (ovsOpenInstanceArray[i]) { + if (ovsOpenInstanceArray[i]->fileObject == fileObject) { + return ovsOpenInstanceArray[i]; + } + j++; + } + } + return NULL; +} + +NTSTATUS +OvsAddOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = + (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE)); + UINT32 i; + + if (instance == NULL) { + return STATUS_NO_MEMORY; + } + OvsAcquireCtrlLock(); + ASSERT(OvsFindOpenInstance(fileObject) == NULL); + + if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) { + OvsReleaseCtrlLock(); + OvsFreeMemory(instance); + return STATUS_INSUFFICIENT_RESOURCES; + } + RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE)); + + for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) { + if (ovsOpenInstanceArray[i] == NULL) { + ovsOpenInstanceArray[i] = instance; + instance->cookie = i; + break; + } + } + ASSERT(i < OVS_MAX_OPEN_INSTANCES); + instance->fileObject = fileObject; + ASSERT(fileObject->FsContext == NULL); + fileObject->FsContext = instance; + OvsReleaseCtrlLock(); + return STATUS_SUCCESS; +} + +static VOID +OvsCleanupOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance); + ASSERT(fileObject == instance->fileObject); + OvsCleanupEvent(instance); + OvsCleanupPacketQueue(instance); +} + +VOID +OvsRemoveOpenInstance(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance; + ASSERT(fileObject->FsContext); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES); + + OvsAcquireCtrlLock(); + fileObject->FsContext = NULL; + ASSERT(ovsOpenInstanceArray[instance->cookie] == instance); + ovsOpenInstanceArray[instance->cookie] = NULL; + OvsReleaseCtrlLock(); + ASSERT(instance->eventQueue == NULL); + ASSERT (instance->packetQueue == NULL); + OvsFreeMemory(instance); +} + +NTSTATUS +OvsCompleteIrpRequest(PIRP irp, + ULONG_PTR infoPtr, + NTSTATUS status) +{ + irp->IoStatus.Information = infoPtr; + irp->IoStatus.Status = status; + IoCompleteRequest(irp, IO_NO_INCREMENT); + return status; +} + + +NTSTATUS +OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + NTSTATUS status = STATUS_SUCCESS; + PFILE_OBJECT fileObject; + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + + ASSERT(deviceObject == gOvsDeviceObject); + ASSERT(ovsExt != NULL); + + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u", + deviceObject, fileObject, + ovsExt->numberOpenInstance); + + switch (irpSp->MajorFunction) { + case IRP_MJ_CREATE: + status = OvsAddOpenInstance(fileObject); + if (STATUS_SUCCESS == status) { + InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance); + } + break; + case IRP_MJ_CLOSE: + ASSERT(ovsExt->numberOpenInstance > 0); + OvsRemoveOpenInstance(fileObject); + InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance); + break; + default: + ASSERT(0); + } + return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); +} + +_Use_decl_annotations_ +NTSTATUS +OvsCleanupDevice(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + + NTSTATUS status = STATUS_SUCCESS; +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + if (ovsExt) { + ASSERT(ovsExt->numberOpenInstance > 0); + } +#else + UNREFERENCED_PARAMETER(deviceObject); +#endif + ASSERT(deviceObject == gOvsDeviceObject); + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP); + + OvsCleanupOpenInstance(fileObject); + + return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); +} + +/* + *---------------------------------------------------------------------------- + * OvsGetVersionIoctl -- + * + * On entry None + * On exit Driver version + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsGetVersionIoctl(PVOID outputBuffer, + uint32 outputLength, + uint32 *replyLen) +{ + POVS_VERSION driverOut = (POVS_VERSION)outputBuffer; + + if (outputLength < sizeof (*driverOut)) { + return STATUS_BUFFER_TOO_SMALL; + } + *replyLen = sizeof (*driverOut); + driverOut->mjrDrvVer = OVS_DRIVER_MAJOR_VER; + driverOut->mnrDrvVer = OVS_DRIVER_MINOR_VER; + + return STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDpDumpIoctl -- + * Get All Datapath. For now, we only support one datapath. + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsDpDumpIoctl(PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + *replyLen = sizeof (UINT32); + if (outputLength < sizeof (UINT32)) { + return STATUS_BUFFER_TOO_SMALL; + } + OvsAcquireCtrlLock(); + if (gOvsSwitchContext) { + *(UINT32 *)outputBuffer = gOvsSwitchContext->dpNo; + } else { + *replyLen = 0; + } + OvsReleaseCtrlLock(); + + return STATUS_SUCCESS; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDpGetIoctl -- + * Given dpNo, get all datapath info as defined in OVS_DP_INFO. + * + * Result: + * STATUS_SUCCESS + * STATUS_BUFFER_TOO_SMALL + * STATUS_INVALID_PARAMETER + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsDpGetIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 dpNo; + POVS_DP_INFO info; + OVS_DATAPATH *datapath; + + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + + if (outputLength < sizeof (OVS_DP_INFO)) { + *replyLen = sizeof (OVS_DP_INFO); + return STATUS_BUFFER_TOO_SMALL; + } + + dpNo = *(UINT32 *)inputBuffer; + OvsAcquireCtrlLock(); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + OvsReleaseCtrlLock(); + return STATUS_INVALID_PARAMETER; + } + *replyLen = sizeof (OVS_DP_INFO); + RtlZeroMemory(outputBuffer, sizeof (OVS_DP_INFO)); + info = (POVS_DP_INFO)outputBuffer; + RtlCopyMemory(info->name, "ovs-system", sizeof ("ovs-system")); + datapath = &gOvsSwitchContext->datapath; + info->nMissed = datapath->misses; + info->nHit = datapath->hits; + info->nLost = datapath->lost; + info->nFlows = datapath->nFlows; + OvsReleaseCtrlLock(); + return STATUS_SUCCESS; +} + +NTSTATUS +OvsDeviceControl(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + + PIO_STACK_LOCATION irpSp; + NTSTATUS status = STATUS_SUCCESS; + PFILE_OBJECT fileObject; + PVOID inputBuffer; + PVOID outputBuffer; + UINT32 inputBufferLen, outputBufferLen, mdlBufferLen; + UINT32 code, replyLen = 0; +#ifdef DBG + POVS_DEVICE_EXTENSION ovsExt = + (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); + ASSERT(deviceObject == gOvsDeviceObject); + ASSERT(ovsExt); + ASSERT(ovsExt->numberOpenInstance > 0); +#else + UNREFERENCED_PARAMETER(deviceObject); +#endif + + irpSp = IoGetCurrentIrpStackLocation(irp); + + + ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); + ASSERT(irpSp->FileObject != NULL); + + fileObject = irpSp->FileObject; + code = irpSp->Parameters.DeviceIoControl.IoControlCode; + inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength; + outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength; + /* + * In case of an IRP with METHOD_IN_DIRECT or METHOD_OUT_DIRECT, the size + * of the MDL is stored in Parameters.DeviceIoControl.OutputBufferLength. + */ + mdlBufferLen = outputBufferLen; + outputBuffer = inputBuffer = irp->AssociatedIrp.SystemBuffer; + + switch(code) { + case OVS_IOCTL_VERSION_GET: + status = OvsGetVersionIoctl(outputBuffer, outputBufferLen, + &replyLen); + break; + case OVS_IOCTL_DP_DUMP: + status = OvsDpDumpIoctl(outputBuffer, outputBufferLen, &replyLen); + break; + case OVS_IOCTL_DP_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + status = OvsDpGetIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, &replyLen); + } + break; + case OVS_IOCTL_DP_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_VPORT_DUMP: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsDumpVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_VPORT_ADD: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsAddVportIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_VPORT_DEL: + status = OvsDelVportIoctl(inputBuffer, inputBufferLen, + &replyLen); + break; + case OVS_IOCTL_VPORT_EXT_INFO: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetExtInfoIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + OVS_LOG_INFO("ExtInfo: fail to get outputBuffer address"); + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_DUMP: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsDumpFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_GET: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsGetFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_FLOW_PUT: + // XXX: This is not really working - mapping the input buffer + // XXX: inputBufferLen = mdlBufferLen; + // inputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + // NormalPagePriority); + status = OvsPutFlowIoctl(inputBuffer, inputBufferLen, + outputBuffer, outputBufferLen, + &replyLen); + break; + case OVS_IOCTL_FLOW_FLUSH: + status = OvsFlushFlowIoctl(inputBuffer, inputBufferLen); + break; + case OVS_IOCTL_QOS_QUEUE_DUMP: + case OVS_IOCTL_QOS_QUEUE_GET: + case OVS_IOCTL_QOS_QUEUE_SET: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_DATAPATH_SUBSCRIBE: + status = OvsSubscribeDpIoctl(fileObject, inputBuffer, + inputBufferLen); + break; + case OVS_IOCTL_DATAPATH_READ: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer) { + status = OvsReadDpIoctl(fileObject, outputBuffer, + outputBufferLen, &replyLen); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + break; + case OVS_IOCTL_DATAPATH_OPERATE: + status = STATUS_NOT_IMPLEMENTED; + break; + case OVS_IOCTL_DATAPATH_EXECUTE: + // XXX: need to make the input direct + status = OvsExecuteDpIoctl(inputBuffer, inputBufferLen, + outputBufferLen); + break; + case OVS_IOCTL_DATAPATH_PURGE: + status = OvsPurgeDpIoctl(fileObject); + break; + case OVS_IOCTL_DATAPATH_WAIT: + status = OvsWaitDpIoctl(irp, fileObject); + break; + case OVS_IOCTL_EVENT_SUBSCRIBE: + status = OvsSubscribeEventIoctl(fileObject, inputBuffer, + inputBufferLen); + break; + case OVS_IOCTL_EVENT_POLL: + if (irp->MdlAddress == NULL) { + status = STATUS_INVALID_PARAMETER; + break; + } + outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, + NormalPagePriority); + if (outputBuffer == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + status = OvsPollEventIoctl(fileObject, inputBuffer, + inputBufferLen, outputBuffer, + outputBufferLen, &replyLen); + } + break; + case OVS_IOCTL_EVENT_WAIT: + status = OvsWaitEventIoctl(irp, fileObject, + inputBuffer, inputBufferLen); + break; + case OVS_IOCTL_DP_TIMESTAMP_SET: + if (inputBufferLen != sizeof (ovsUserTimestampDelta)) { + status = STATUS_INFO_LENGTH_MISMATCH; + } else { + int64 currentUserTS = *(int64 *)inputBuffer; + LARGE_INTEGER tickCount; + + /* So many ticks since system booted. */ + KeQueryTickCount(&tickCount); + ovsUserTimestampDelta = currentUserTS - + (tickCount.QuadPart * ovsTimeIncrementPerTick); + status = STATUS_SUCCESS; + } + break; + default: + status = STATUS_INVALID_DEVICE_REQUEST; + break; + } + + if (status == STATUS_PENDING) { + return status; + } else { + /* + * When the system-address-space mapping that is returned by + * MmGetSystemAddressForMdlSafe is no longer needed, it must be + * released. + * http://msdn.microsoft.com/en-us/library/windows/hardware/ff554559(v=vs.85).aspx + * + * We might have to release the MDL here. + */ + return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status); + } +} + +#endif /* OVS_USE_NL_INTERFACE */ diff --git a/datapath-windows/ovsext/Ioctl.h b/datapath-windows/ovsext/Ioctl.h new file mode 100644 index 000000000..c4f0bd617 --- /dev/null +++ b/datapath-windows/ovsext/Ioctl.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface + * alive while we transition over to the netlink based interface. + * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c + * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c + */ +#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 0 + +#ifndef __IOCTL_H_ +#define __IOCTL_H_ 1 + +typedef struct _OVS_DEVICE_EXTENSION { + INT numberOpenInstance; +} OVS_DEVICE_EXTENSION, *POVS_DEVICE_EXTENSION; + + +typedef struct _OVS_OPEN_INSTANCE { + UINT32 cookie; + PFILE_OBJECT fileObject; + PVOID eventQueue; + PVOID packetQueue; +} OVS_OPEN_INSTANCE, *POVS_OPEN_INSTANCE; + +NDIS_STATUS OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle); +VOID OvsDeleteDeviceObject(); + +POVS_OPEN_INSTANCE OvsGetOpenInstance(PFILE_OBJECT fileObject, + UINT32 dpNo); + +NTSTATUS OvsCompleteIrpRequest(PIRP irp, ULONG_PTR infoPtr, NTSTATUS status); + +#endif /* __IOCTL_H_ */ + +#endif /* OVS_USE_NL_INTERFACE */ diff --git a/datapath-windows/ovsext/IpHelper.c b/datapath-windows/ovsext/IpHelper.c new file mode 100644 index 000000000..fd663dc3a --- /dev/null +++ b/datapath-windows/ovsext/IpHelper.c @@ -0,0 +1,1689 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "IpHelper.h" +#include "Switch.h" +#include "Jhash.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_IPHELPER +#include "Debug.h" + +/* + * Fow now, we assume only one internal adapter + */ + +KSTART_ROUTINE OvsStartIpHelper; + + +/* + * Only when the internal IP is configured and virtual + * internal port is connected, the IP helper request can be + * queued. + */ +static BOOLEAN ovsInternalIPConfigured; +static UINT32 ovsInternalPortNo; +static GUID ovsInternalNetCfgId; +static MIB_IF_ROW2 ovsInternalRow; +static MIB_IPINTERFACE_ROW ovsInternalIPRow; + +/* we only keep one internal IP for reference, it will not be used for + * determining SRC IP of Tunnel + */ +static UINT32 ovsInternalIP; + + +/* + * FWD_ENTRY --------> IPFORWARD_ENTRY + * | + * |--------------------------------------> IPENIGH_ENTRY + * + * IPFORWARD_ENTRY ------> FWD_ENTRY LIST with same IPFORWARD + * + * IPNEIGH_ENTRY ------> FWD_ENTRY LIST with same IPNEIGH + * + */ + +static PLIST_ENTRY ovsFwdHashTable; // based on DST IP +static PLIST_ENTRY ovsRouteHashTable; // based on DST PREFIX +static PLIST_ENTRY ovsNeighHashTable; // based on DST IP +static LIST_ENTRY ovsSortedIPNeighList; +static UINT32 ovsNumFwdEntries; + + +static PNDIS_RW_LOCK_EX ovsTableLock; +static NDIS_SPIN_LOCK ovsIpHelperLock; + +static LIST_ENTRY ovsIpHelperRequestList; +static UINT32 ovsNumIpHelperRequests; + +static HANDLE ipInterfaceNotificationHandle; +static HANDLE ipRouteNotificationHandle; +static HANDLE unicastIPNotificationHandle; + +static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext; + +static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix); +static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf); +static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr); +static VOID OvsCleanupIpHelperRequestList(VOID); +static VOID OvsCleanupFwdTable(VOID); +static VOID OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn); + +static VOID +OvsDumpIfRow(PMIB_IF_ROW2 ifRow) +{ + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ifRow->InterfaceLuid.Info.NetLuidIndex, + ifRow->InterfaceLuid.Info.IfType); + OVS_LOG_INFO("InterfaceIndex: %d", ifRow->InterfaceIndex); + + OVS_LOG_INFO("Interface GUID: %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + ifRow->InterfaceGuid.Data1, + ifRow->InterfaceGuid.Data2, + ifRow->InterfaceGuid.Data3, + *(UINT16 *)ifRow->InterfaceGuid.Data4, + ifRow->InterfaceGuid.Data4[2], + ifRow->InterfaceGuid.Data4[3], + ifRow->InterfaceGuid.Data4[4], + ifRow->InterfaceGuid.Data4[5], + ifRow->InterfaceGuid.Data4[6], + ifRow->InterfaceGuid.Data4[7]); + OVS_LOG_INFO("Perm MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", + ifRow->PermanentPhysicalAddress[0], + ifRow->PermanentPhysicalAddress[1], + ifRow->PermanentPhysicalAddress[2], + ifRow->PermanentPhysicalAddress[3], + ifRow->PermanentPhysicalAddress[4], + ifRow->PermanentPhysicalAddress[5]); +} + + +static VOID +OvsDumpIfTable(PMIB_IF_TABLE2 ifTable) +{ + PMIB_IF_ROW2 ifRow; + UINT32 i; + + OVS_LOG_INFO("======Number of entries: %d========", ifTable->NumEntries); + + for (i = 0; i < ifTable->NumEntries; i++) { + ifRow = &ifTable->Table[i]; + OvsDumpIfRow(ifRow); + } +} + + +NTSTATUS +OvsGetIfEntry(GUID *interfaceGuid, PMIB_IF_ROW2 ifEntry) +{ + NTSTATUS status; + PMIB_IF_TABLE2 ifTable; + UINT32 i; + + if (interfaceGuid == NULL || ifEntry == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetIfTable2Ex(MibIfTableNormal, &ifTable); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get if table, status: %x", status); + return status; + } + status = STATUS_NOT_FOUND; + + for (i = 0; i < ifTable->NumEntries; i++) { + PMIB_IF_ROW2 ifRow; + + ifRow = &ifTable->Table[i]; + if (!memcmp(interfaceGuid, &ifRow->InterfaceGuid, sizeof (GUID))) { + RtlCopyMemory(ifEntry, ifRow, sizeof (MIB_IF_ROW2)); + status = STATUS_SUCCESS; + OvsDumpIfRow(ifEntry); + break; + } + } + + FreeMibTable(ifTable); + return status; +} + + +static VOID +OvsDumpIPInterfaceEntry(PMIB_IPINTERFACE_ROW ipRow) +{ + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); + + OVS_LOG_INFO("MaxReassembleSize: %u", ipRow->MaxReassemblySize); +} + + +NTSTATUS +OvsGetIPInterfaceEntry(NET_LUID luid, + PMIB_IPINTERFACE_ROW ipRow) +{ + NTSTATUS status; + + if (ipRow == NULL) { + return STATUS_INVALID_PARAMETER; + } + + ipRow->Family = AF_INET; + ipRow->InterfaceLuid.Value = luid.Value; + + status = GetIpInterfaceEntry(ipRow); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get internal IP Interface mib row, status: %x", + status); + return status; + } + OvsDumpIPInterfaceEntry(ipRow); + return status; +} + + +static VOID +OvsDumpIPEntry(PMIB_UNICASTIPADDRESS_ROW ipRow) +{ + UINT32 ipAddr; + + OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + + OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); + + ASSERT(ipRow->Address.si_family == AF_INET); + + ipAddr = ipRow->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Unicast Address: %d.%d.%d.%d\n", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, ipAddr >> 24); +} + + +NTSTATUS +OvsGetIPEntry(NET_LUID interfaceLuid, + PMIB_UNICASTIPADDRESS_ROW ipEntry) +{ + PMIB_UNICASTIPADDRESS_TABLE ipTable; + NTSTATUS status; + UINT32 i; + + if (ipEntry == NULL || ipEntry == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetUnicastIpAddressTable(AF_INET, &ipTable); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get unicast address table, status: %x", status); + return status; + } + + status = STATUS_NOT_FOUND; + + for (i = 0; i < ipTable->NumEntries; i++) { + PMIB_UNICASTIPADDRESS_ROW ipRow; + + ipRow = &ipTable->Table[i]; + if (ipRow->InterfaceLuid.Value == interfaceLuid.Value) { + RtlCopyMemory(ipEntry, ipRow, sizeof (*ipRow)); + OvsDumpIPEntry(ipEntry); + status = STATUS_SUCCESS; + break; + } + } + + FreeMibTable(ipTable); + return status; +} + +#ifdef OVS_ENABLE_IPPATH +static VOID +OvsDumpIPPath(PMIB_IPPATH_ROW ipPath) +{ + UINT32 ipAddr = ipPath->Source.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Source: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Destination: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = ipPath->CurrentNextHop.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("NextHop: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); +} + + +NTSTATUS +OvsGetIPPathEntry(PMIB_IPPATH_ROW ipPath) +{ + NTSTATUS status; + UINT32 ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; + + status = GetIpPathEntry(ipPath); + + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to get IP path to %d.%d.%d.%d, status:%x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + OvsDumpIPPath(ipPath); + return status; +} +#endif + +static VOID +OvsDumpRoute(const SOCKADDR_INET *sourceAddress, + const SOCKADDR_INET *destinationAddress, + PMIB_IPFORWARD_ROW2 route) +{ + UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Destination: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = sourceAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Source: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + + ipAddr = route->NextHop.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("NextHop: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); +} + + +NTSTATUS +OvsGetRoute(NET_LUID interfaceLuid, + const SOCKADDR_INET *destinationAddress, + PMIB_IPFORWARD_ROW2 route, + SOCKADDR_INET *sourceAddress) +{ + NTSTATUS status; + + if (destinationAddress == NULL || route == NULL) { + return STATUS_INVALID_PARAMETER; + } + + status = GetBestRoute2(&interfaceLuid, 0, + NULL, destinationAddress, + 0, route, sourceAddress); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + + OvsDumpRoute(sourceAddress, destinationAddress, route); + return status; +} + +static VOID +OvsDumpIPNeigh(PMIB_IPNET_ROW2 ipNeigh) +{ + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("Neigh: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + OVS_LOG_INFO("MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", + ipNeigh->PhysicalAddress[0], + ipNeigh->PhysicalAddress[1], + ipNeigh->PhysicalAddress[2], + ipNeigh->PhysicalAddress[3], + ipNeigh->PhysicalAddress[4], + ipNeigh->PhysicalAddress[5]); +} + + +NTSTATUS +OvsGetIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + + status = GetIpNetEntry2(ipNeigh); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to get ARP entry: %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + if (ipNeigh->State == NlnsReachable || + ipNeigh->State == NlnsPermanent) { + OvsDumpIPNeigh(ipNeigh); + return STATUS_SUCCESS; + } + return STATUS_FWP_TCPIP_NOT_READY; +} + + +NTSTATUS +OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + status = ResolveIpNetEntry2(ipNeigh, NULL); + + if (status != STATUS_SUCCESS) { + UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("Fail to resolve ARP entry: %d.%d.%d.%d, status: %x", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); + return status; + } + + if (ipNeigh->State == NlnsReachable || + ipNeigh->State == NlnsPermanent) { + OvsDumpIPNeigh(ipNeigh); + return STATUS_SUCCESS; + } + return STATUS_FWP_TCPIP_NOT_READY; +} + + +NTSTATUS +OvsGetOrResolveIPNeigh(UINT32 ipAddr, + PMIB_IPNET_ROW2 ipNeigh) +{ + NTSTATUS status; + + ASSERT(ipNeigh); + + RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); + ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; + ipNeigh->Address.si_family = AF_INET; + ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + + status = OvsGetIPNeighEntry(ipNeigh); + + if (status != STATUS_SUCCESS) { + RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); + ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; + ipNeigh->Address.si_family = AF_INET; + ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; + status = OvsResolveIPNeighEntry(ipNeigh); + } + return status; +} + + +static VOID +OvsChangeCallbackIpInterface(PVOID context, + PMIB_IPINTERFACE_ROW ipRow, + MIB_NOTIFICATION_TYPE notificationType) +{ + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibParameterNotification: + case MibAddInstance: + if (ipRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + /* + * Update the IP Interface Row + */ + NdisAcquireSpinLock(&ovsIpHelperLock); + RtlCopyMemory(&ovsInternalIPRow, ipRow, + sizeof (PMIB_IPINTERFACE_ROW)); + ovsInternalIPConfigured = TRUE; + NdisReleaseSpinLock(&ovsIpHelperLock); + } + OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d is %s", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType, + notificationType == MibAddInstance ? "added" : "modified"); + break; + case MibDeleteInstance: + OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d, deleted", + ipRow->InterfaceLuid.Info.NetLuidIndex, + ipRow->InterfaceLuid.Info.IfType); + if (ipRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalIPConfigured = FALSE; + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsCleanupIpHelperRequestList(); + + OvsCleanupFwdTable(); + } + + break; + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for IP Interface change."); + default: + return; + } +} + + +static VOID +OvsChangeCallbackIpRoute(PVOID context, + PMIB_IPFORWARD_ROW2 ipRoute, + MIB_NOTIFICATION_TYPE notificationType) +{ + UINT32 ipAddr, nextHop; + + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibAddInstance: + + ASSERT(ipRoute); + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff); + break; + + case MibParameterNotification: + case MibDeleteInstance: + ASSERT(ipRoute); + ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; + nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + + OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + ipRoute->DestinationPrefix.PrefixLength, + nextHop & 0xff, (nextHop >> 8) & 0xff, + (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff, + notificationType == MibDeleteInstance ? "deleted" : + "modified"); + + if (ipRoute->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + ipRoute->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + ipRoute->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + POVS_IPFORWARD_ENTRY ipf; + LOCK_STATE_EX lockState; + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipf = OvsLookupIPForwardEntry(&ipRoute->DestinationPrefix); + if (ipf != NULL) { + OvsRemoveIPForwardEntry(ipf); + } + NdisReleaseRWLock(ovsTableLock, &lockState); + } + break; + + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for IP Route change."); + default: + return; + } +} + + +static VOID +OvsChangeCallbackUnicastIpAddress(PVOID context, + PMIB_UNICASTIPADDRESS_ROW unicastRow, + MIB_NOTIFICATION_TYPE notificationType) +{ + UINT32 ipAddr; + + UNREFERENCED_PARAMETER(context); + switch (notificationType) { + case MibParameterNotification: + case MibAddInstance: + ASSERT(unicastRow); + ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + if (unicastRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + unicastRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + ovsInternalIP = ipAddr; + } + OVS_LOG_INFO("IP Address: %d.%d.%d.%d is %s", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, + notificationType == MibAddInstance ? "added": "modified"); + break; + + case MibDeleteInstance: + ASSERT(unicastRow); + ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; + OVS_LOG_INFO("IP Address removed: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + if (unicastRow->InterfaceLuid.Info.NetLuidIndex == + ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && + unicastRow->InterfaceLuid.Info.IfType == + ovsInternalRow.InterfaceLuid.Info.IfType && + unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { + + LOCK_STATE_EX lockState; + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + OvsRemoveAllFwdEntriesWithSrc(ipAddr); + NdisReleaseRWLock(ovsTableLock, &lockState); + + } + break; + + case MibInitialNotification: + OVS_LOG_INFO("Get Initial notification for Unicast IP Address change."); + default: + return; + } +} + + +static VOID +OvsCancelChangeNotification() +{ + if (ipInterfaceNotificationHandle != NULL) { + CancelMibChangeNotify2(ipInterfaceNotificationHandle); + ipInterfaceNotificationHandle = NULL; + } + if (ipRouteNotificationHandle != NULL) { + CancelMibChangeNotify2(ipRouteNotificationHandle); + ipRouteNotificationHandle = NULL; + } + if (unicastIPNotificationHandle != NULL) { + CancelMibChangeNotify2(unicastIPNotificationHandle); + unicastIPNotificationHandle = NULL; + } +} + + +static NTSTATUS +OvsRegisterChangeNotification() +{ + NTSTATUS status; + + + status = NotifyIpInterfaceChange(AF_INET, OvsChangeCallbackIpInterface, + NULL, TRUE, + &ipInterfaceNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to register Notify IP interface change, status:%x.", + status); + return status; + } + + status = NotifyRouteChange2(AF_INET, OvsChangeCallbackIpRoute, NULL, + TRUE, &ipRouteNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to regiter ip route change, status: %x.", + status); + goto register_cleanup; + } + status = NotifyUnicastIpAddressChange(AF_INET, + OvsChangeCallbackUnicastIpAddress, + NULL, TRUE, + &unicastIPNotificationHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fail to regiter unicast ip change, status: %x.", status); + } +register_cleanup: + if (status != STATUS_SUCCESS) { + OvsCancelChangeNotification(); + } + + return status; +} + + +static POVS_IPNEIGH_ENTRY +OvsLookupIPNeighEntry(UINT32 ipAddr) +{ + PLIST_ENTRY link; + POVS_IPNEIGH_ENTRY entry; + UINT32 hash = OvsJhashWords(&ipAddr, 1, OVS_HASH_BASIS); + + LIST_FORALL(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], link) { + entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, link); + if (entry->ipAddr == ipAddr) { + return entry; + } + } + return NULL; +} + + +static UINT32 +OvsHashIPPrefix(PIP_ADDRESS_PREFIX prefix) +{ + UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 | + (UINT32)prefix->PrefixLength; + return OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS); +} + + +static POVS_IPFORWARD_ENTRY +OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix) +{ + + PLIST_ENTRY link; + POVS_IPFORWARD_ENTRY ipfEntry; + UINT32 hash; + ASSERT(prefix->Prefix.si_family == AF_INET); + + hash = RtlUlongByteSwap(prefix->Prefix.Ipv4.sin_addr.s_addr); + + ASSERT(prefix->PrefixLength >= 32 || + (hash & (((UINT32)1 << (32 - prefix->PrefixLength)) - 1)) == 0); + + hash = OvsHashIPPrefix(prefix); + LIST_FORALL(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], link) { + ipfEntry = CONTAINING_RECORD(link, OVS_IPFORWARD_ENTRY, link); + if (ipfEntry->prefix.PrefixLength == prefix->PrefixLength && + ipfEntry->prefix.Prefix.Ipv4.sin_addr.s_addr == + prefix->Prefix.Ipv4.sin_addr.s_addr) { + return ipfEntry; + } + } + return NULL; +} + + +static POVS_FWD_ENTRY +OvsLookupIPFwdEntry(UINT32 dstIp) +{ + PLIST_ENTRY link; + POVS_FWD_ENTRY entry; + UINT32 hash = OvsJhashWords(&dstIp, 1, OVS_HASH_BASIS); + + LIST_FORALL(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], link) { + entry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); + if (entry->info.dstIpAddr == dstIp) { + return entry; + } + } + return NULL; +} + + +NTSTATUS +OvsLookupIPFwdInfo(UINT32 dstIp, + POVS_FWD_INFO info) +{ + POVS_FWD_ENTRY entry; + LOCK_STATE_EX lockState; + NTSTATUS status = STATUS_NOT_FOUND; + + NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); + entry = OvsLookupIPFwdEntry(dstIp); + if (entry) { + info->value[0] = entry->info.value[0]; + info->value[1] = entry->info.value[1]; + info->value[2] = entry->info.value[2]; + status = STATUS_SUCCESS; + } + NdisReleaseRWLock(ovsTableLock, &lockState); + return status; +} + + +static POVS_IPNEIGH_ENTRY +OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) +{ + + POVS_IPNEIGH_ENTRY entry; + UINT64 timeVal; + + ASSERT(ipNeigh != NULL); + entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemory(sizeof (OVS_IPNEIGH_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_IPNEIGH_ENTRY)); + entry->ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + entry->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; + RtlCopyMemory(entry->macAddr, ipNeigh->PhysicalAddress, + MAC_ADDRESS_LEN); + InitializeListHead(&entry->fwdList); + + return entry; +} + + +static POVS_IPFORWARD_ENTRY +OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute) +{ + + POVS_IPFORWARD_ENTRY entry; + + ASSERT(ipRoute); + + entry = + (POVS_IPFORWARD_ENTRY)OvsAllocateMemory(sizeof (OVS_IPFORWARD_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_IPFORWARD_ENTRY)); + RtlCopyMemory(&entry->prefix, &ipRoute->DestinationPrefix, + sizeof (IP_ADDRESS_PREFIX)); + entry->nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; + InitializeListHead(&entry->fwdList); + + return entry; +} + + +static POVS_FWD_ENTRY +OvsCreateFwdEntry(POVS_FWD_INFO fwdInfo) +{ + POVS_FWD_ENTRY entry; + + entry = (POVS_FWD_ENTRY)OvsAllocateMemory(sizeof (OVS_FWD_ENTRY)); + if (entry == NULL) { + return NULL; + } + + RtlZeroMemory(entry, sizeof (OVS_FWD_ENTRY)); + RtlCopyMemory(&entry->info, fwdInfo, sizeof (OVS_FWD_INFO)); + return entry; +} + + +static VOID +OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry) +{ + POVS_IPFORWARD_ENTRY ipf; + POVS_IPNEIGH_ENTRY ipn; + + ipf = fwdEntry->ipf; + ipn = fwdEntry->ipn; + + RemoveEntryList(&fwdEntry->link); + ovsNumFwdEntries--; + + RemoveEntryList(&fwdEntry->ipfLink); + ipf->refCount--; + + RemoveEntryList(&fwdEntry->ipnLink); + ipn->refCount--; + + if (ipf->refCount == 0) { + ASSERT(IsListEmpty(&ipf->fwdList)); + RemoveEntryList(&ipf->link); + OvsFreeMemory(ipf); + } + + if (ipn->refCount == 0) { + ASSERT(IsListEmpty(&ipn->fwdList)); + RemoveEntryList(&ipn->link); + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(ipn); + } + + OvsFreeMemory(fwdEntry); +} + + +static VOID +OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf) +{ + POVS_FWD_ENTRY fwdEntry; + PLIST_ENTRY link, next; + + ipf->refCount++; + + LIST_FORALL_SAFE(&ipf->fwdList, link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); + OvsRemoveFwdEntry(fwdEntry); + } + ASSERT(ipf->refCount == 1); + + RemoveEntryList(&ipf->link); + OvsFreeMemory(ipf); +} + + +static VOID +OvsRemoveIPNeighEntry(POVS_IPNEIGH_ENTRY ipn) +{ + PLIST_ENTRY link, next; + POVS_FWD_ENTRY fwdEntry; + + ipn->refCount++; + + LIST_FORALL_SAFE(&ipn->fwdList, link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); + OvsRemoveFwdEntry(fwdEntry); + } + + if (ipn->refCount == 1) { + RemoveEntryList(&ipn->link); + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(ipn); + } +} + + +static VOID +OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn) +{ + PLIST_ENTRY link; + POVS_IPNEIGH_ENTRY entry; + + if (!IsListEmpty(&ovsSortedIPNeighList)) { + link = ovsSortedIPNeighList.Blink; + entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + if (entry->timeout > ipn->timeout) { + ipn->timeout++; + } + } + InsertTailList(&ovsSortedIPNeighList, &ipn->slink); +} + + +static VOID +OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry, + POVS_IPFORWARD_ENTRY ipf, + POVS_IPNEIGH_ENTRY ipn) + +{ + UINT32 hash; + + if (ipn->refCount == 0) { + NdisAcquireSpinLock(&ovsIpHelperLock); + OvsAddToSortedNeighList(ipn); + NdisReleaseSpinLock(&ovsIpHelperLock); + hash = OvsJhashWords(&ipn->ipAddr, 1, OVS_HASH_BASIS); + InsertHeadList(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], + &ipn->link); + } + if (ipf->refCount == 0) { + hash = OvsHashIPPrefix(&ipf->prefix); + InsertHeadList(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], + &ipf->link); + } + + InsertHeadList(&ipf->fwdList, &fwdEntry->ipfLink); + ipf->refCount++; + fwdEntry->ipf = ipf; + + InsertHeadList(&ipn->fwdList, &fwdEntry->ipnLink); + ipn->refCount++; + fwdEntry->ipn = ipn; + + hash = OvsJhashWords(&fwdEntry->info.dstIpAddr, 1, OVS_HASH_BASIS); + InsertHeadList(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], + &fwdEntry->link); + ovsNumFwdEntries++; +} + + +static VOID +OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) +{ + UINT32 i; + POVS_FWD_ENTRY fwdEntry; + PLIST_ENTRY link, next; + + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + LIST_FORALL_SAFE(&ovsFwdHashTable[i], link, next) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); + if (fwdEntry->info.srcIpAddr == ipAddr) { + OvsRemoveFwdEntry(fwdEntry); + } + } + } +} + + +static VOID +OvsCleanupFwdTable(VOID) +{ + PLIST_ENTRY link, next; + POVS_IPNEIGH_ENTRY ipn; + UINT32 i; + LOCK_STATE_EX lockState; + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + if (ovsNumFwdEntries) { + LIST_FORALL_SAFE(&ovsSortedIPNeighList, link, next) { + ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + OvsRemoveIPNeighEntry(ipn); + } + } + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + ASSERT(IsListEmpty(&ovsFwdHashTable[i])); + } + for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { + ASSERT(IsListEmpty(&ovsRouteHashTable[i])); + } + NdisReleaseRWLock(ovsTableLock, &lockState); +} + + +static VOID +OvsCleanupIpHelperRequestList(VOID) +{ + LIST_ENTRY list; + PLIST_ENTRY next, link; + POVS_IP_HELPER_REQUEST request; + + NdisAcquireSpinLock(&ovsIpHelperLock); + if (ovsNumIpHelperRequests == 0) { + NdisReleaseSpinLock(&ovsIpHelperLock); + return; + } + + InitializeListHead(&list); + OvsAppendList(&list, &ovsIpHelperRequestList); + ovsNumIpHelperRequests = 0; + NdisReleaseSpinLock(&ovsIpHelperLock); + + LIST_FORALL_SAFE(&list, link, next) { + request = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + + if (request->command == OVS_IP_HELPER_FWD_REQUEST && + request->fwdReq.cb) { + request->fwdReq.cb(request->fwdReq.nbl, + request->fwdReq.inPort, + &request->fwdReq.tunnelKey, + request->fwdReq.cbData1, + request->fwdReq.cbData2, + STATUS_DEVICE_NOT_READY, + NULL); + } + OvsFreeMemory(request); + } +} + + + +static VOID +OvsWakeupIPHelper(VOID) +{ + KeSetEvent(&ovsIpHelperThreadContext.event, 0, FALSE); +} + +VOID +OvsInternalAdapterDown(VOID) +{ + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalPortNo = OVS_DEFAULT_PORT_NO; + ovsInternalIPConfigured = FALSE; + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsCleanupIpHelperRequestList(); + + OvsCleanupFwdTable(); +} + + +VOID +OvsInternalAdapterUp(UINT32 portNo, + GUID *netCfgInstanceId) +{ + POVS_IP_HELPER_REQUEST request; + + RtlCopyMemory(&ovsInternalNetCfgId, netCfgInstanceId, sizeof (GUID)); + RtlZeroMemory(&ovsInternalRow, sizeof (MIB_IF_ROW2)); + + request = + (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); + if (request == NULL) { + OVS_LOG_ERROR("Fail to initialize Internal Adapter"); + return; + } + RtlZeroMemory(request, sizeof (OVS_IP_HELPER_REQUEST)); + request->command = OVS_IP_HELPER_INTERNAL_ADAPTER_UP; + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalPortNo = portNo; + InsertHeadList(&ovsIpHelperRequestList, &request->link); + ovsNumIpHelperRequests++; + if (ovsNumIpHelperRequests == 1) { + OvsWakeupIPHelper(); + } + NdisReleaseSpinLock(&ovsIpHelperLock); +} + + +static VOID +OvsHandleInternalAdapterUp(POVS_IP_HELPER_REQUEST request) +{ + NTSTATUS status; + MIB_UNICASTIPADDRESS_ROW ipEntry; + GUID *netCfgInstanceId = &ovsInternalNetCfgId; + + OvsFreeMemory(request); + + status = OvsGetIfEntry(&ovsInternalNetCfgId, &ovsInternalRow); + + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Fali to get IF entry for internal port with GUID" + " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + netCfgInstanceId->Data1, + netCfgInstanceId->Data2, + netCfgInstanceId->Data3, + *(UINT16 *)netCfgInstanceId->Data4, + netCfgInstanceId->Data4[2], + netCfgInstanceId->Data4[3], + netCfgInstanceId->Data4[4], + netCfgInstanceId->Data4[5], + netCfgInstanceId->Data4[6], + netCfgInstanceId->Data4[7]); + return; + } + + status = OvsGetIPInterfaceEntry(ovsInternalRow.InterfaceLuid, + &ovsInternalIPRow); + + if (status == STATUS_SUCCESS) { + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsInternalIPConfigured = TRUE; + NdisReleaseSpinLock(&ovsIpHelperLock); + } else { + return; + } + + status = OvsGetIPEntry(ovsInternalRow.InterfaceLuid, &ipEntry); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fali to get IP entry for internal port with GUID" + " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", + netCfgInstanceId->Data1, + netCfgInstanceId->Data2, + netCfgInstanceId->Data3, + *(UINT16 *)netCfgInstanceId->Data4, + netCfgInstanceId->Data4[2], + netCfgInstanceId->Data4[3], + netCfgInstanceId->Data4[4], + netCfgInstanceId->Data4[5], + netCfgInstanceId->Data4[6], + netCfgInstanceId->Data4[7]); + } +} + + +static NTSTATUS +OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request) +{ + + NdisAcquireSpinLock(&ovsIpHelperLock); + + if (ovsInternalPortNo == OVS_DEFAULT_PORT_NO || + ovsInternalIPConfigured == FALSE) { + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsFreeMemory(request); + return STATUS_NDIS_ADAPTER_NOT_READY; + } else { + InsertHeadList(&ovsIpHelperRequestList, &request->link); + ovsNumIpHelperRequests++; + if (ovsNumIpHelperRequests == 1) { + OvsWakeupIPHelper(); + } + NdisReleaseSpinLock(&ovsIpHelperLock); + return STATUS_SUCCESS; + } +} + + +NTSTATUS +OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, + UINT32 inPort, + const OvsIPv4TunnelKey *tunnelKey, + OvsIPHelperCallback cb, + PVOID cbData1, + PVOID cbData2) +{ + POVS_IP_HELPER_REQUEST request; + + request = + (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); + + if (request == NULL) { + return STATUS_INSUFFICIENT_RESOURCES; + } + request->command = OVS_IP_HELPER_FWD_REQUEST; + request->fwdReq.nbl = nbl; + request->fwdReq.inPort = inPort; + RtlCopyMemory(&request->fwdReq.tunnelKey, tunnelKey, + sizeof (*tunnelKey)); + request->fwdReq.cb = cb; + request->fwdReq.cbData1 = cbData1; + request->fwdReq.cbData2 = cbData2; + + return OvsEnqueueIpHelperRequest(request); +} + + +static VOID +OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) +{ + SOCKADDR_INET dst, src; + NTSTATUS status = STATUS_SUCCESS; + MIB_IPFORWARD_ROW2 ipRoute; + MIB_IPNET_ROW2 ipNeigh; + OVS_FWD_INFO fwdInfo; + UINT32 ipAddr; + UINT32 srcAddr; + POVS_FWD_ENTRY fwdEntry = NULL; + POVS_IPFORWARD_ENTRY ipf = NULL; + POVS_IPNEIGH_ENTRY ipn = NULL; + LOCK_STATE_EX lockState; + BOOLEAN newIPF = FALSE; + BOOLEAN newIPN = FALSE; + BOOLEAN newFWD = FALSE; + + status = OvsLookupIPFwdInfo(request->fwdReq.tunnelKey.dst, + &fwdInfo); + if (status == STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + + /* find IPRoute */ + RtlZeroMemory(&dst, sizeof(dst)); + RtlZeroMemory(&src, sizeof(src)); + RtlZeroMemory(&ipRoute, sizeof (MIB_IPFORWARD_ROW2)); + dst.si_family = AF_INET; + dst.Ipv4.sin_addr.s_addr = request->fwdReq.tunnelKey.dst; + + status = OvsGetRoute(ovsInternalRow.InterfaceLuid, &dst, &ipRoute, &src); + if (status != STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + srcAddr = src.Ipv4.sin_addr.s_addr; + + /* find IPNeigh */ + ipAddr = ipRoute.NextHop.Ipv4.sin_addr.s_addr; + if (ipAddr != 0) { + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn) { + goto fwd_request_done; + } + NdisReleaseRWLock(ovsTableLock, &lockState); + } + RtlZeroMemory(&ipNeigh, sizeof (ipNeigh)); + ipNeigh.InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; + if (ipAddr == 0) { + ipAddr = request->fwdReq.tunnelKey.dst; + } + status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); + if (status != STATUS_SUCCESS) { + goto fwd_handle_nbl; + } + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + +fwd_request_done: + + /* + * Initialize ipf + */ + ipf = OvsLookupIPForwardEntry(&ipRoute.DestinationPrefix); + if (ipf == NULL) { + ipf = OvsCreateIPForwardEntry(&ipRoute); + if (ipf == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newIPF = TRUE; + } else { + PLIST_ENTRY link; + link = ipf->fwdList.Flink; + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); + srcAddr = fwdEntry->info.srcIpAddr; + } + + /* + * initialize ipn + */ + if (ipn == NULL) { + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn == NULL) { + ipn = OvsCreateIPNeighEntry(&ipNeigh); + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newIPN = TRUE; + } + } + + /* + * initialize fwdEntry + */ + fwdInfo.dstIpAddr = request->fwdReq.tunnelKey.dst; + fwdInfo.srcIpAddr = srcAddr; + RtlCopyMemory(fwdInfo.dstMacAddr, ipn->macAddr, MAC_ADDRESS_LEN); + RtlCopyMemory(fwdInfo.srcMacAddr, ovsInternalRow.PhysicalAddress, + MAC_ADDRESS_LEN); + fwdInfo.srcPortNo = request->fwdReq.inPort; + + fwdEntry = OvsCreateFwdEntry(&fwdInfo); + if (fwdEntry == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + status = STATUS_INSUFFICIENT_RESOURCES; + goto fwd_handle_nbl; + } + newFWD = TRUE; + /* + * Cache the result + */ + OvsAddIPFwdCache(fwdEntry, ipf, ipn); + NdisReleaseRWLock(ovsTableLock, &lockState); + +fwd_handle_nbl: + + if (status != STATUS_SUCCESS) { + if (newFWD) { + ASSERT(fwdEntry != NULL); + OvsFreeMemory(fwdEntry); + } + if (newIPF) { + ASSERT(ipf && ipf->refCount == 0); + OvsFreeMemory(ipf); + } + if (newIPN) { + ASSERT(ipn && ipn->refCount == 0); + OvsFreeMemory(ipn); + } + ipAddr = request->fwdReq.tunnelKey.dst; + OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d", + ipAddr & 0xff, (ipAddr >> 8) & 0xff, + (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); + } + if (request->fwdReq.cb) { + request->fwdReq.cb(request->fwdReq.nbl, + request->fwdReq.inPort, + &request->fwdReq.tunnelKey, + request->fwdReq.cbData1, + request->fwdReq.cbData2, + status, + status == STATUS_SUCCESS ? &fwdInfo : NULL); + } + OvsFreeMemory(request); +} + + +static VOID +OvsUpdateIPNeighEntry(UINT32 ipAddr, + PMIB_IPNET_ROW2 ipNeigh, + NTSTATUS status) +{ + UINT64 timeVal; + POVS_IPNEIGH_ENTRY ipn; + LOCK_STATE_EX lockState; + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + /* + * if mac changed, update all relevant fwdEntry + */ + if (status != STATUS_SUCCESS) { + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + } else { + NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); + } + ipn = OvsLookupIPNeighEntry(ipAddr); + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + if (status != STATUS_SUCCESS) { + OvsRemoveIPNeighEntry(ipn); + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + + if (memcmp((const PVOID)ipn->macAddr, + (const PVOID)ipNeigh->PhysicalAddress, + (size_t)MAC_ADDRESS_LEN)) { + PLIST_ENTRY link; + POVS_FWD_ENTRY fwdEntry; + NdisReleaseRWLock(ovsTableLock, &lockState); + /* + * need update, release and acquire write lock + * This is not the common case. + */ + + NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); + ipn = OvsLookupIPNeighEntry(ipAddr); + + if (ipn == NULL) { + NdisReleaseRWLock(ovsTableLock, &lockState); + return; + } + + LIST_FORALL(&ipn->fwdList, link) { + fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); + RtlCopyMemory(fwdEntry->info.dstMacAddr, + ipNeigh->PhysicalAddress, MAC_ADDRESS_LEN); + } + } + /* + * update timeout and move to the end of + * the sorted list + */ + + NdisAcquireSpinLock(&ovsIpHelperLock); + RemoveEntryList(&ipn->slink); + ipn->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; + OvsAddToSortedNeighList(ipn); + NdisReleaseSpinLock(&ovsIpHelperLock); + NdisReleaseRWLock(ovsTableLock, &lockState); +} + + +static VOID +OvsHandleIPNeighTimeout(UINT32 ipAddr) +{ + MIB_IPNET_ROW2 ipNeigh; + NTSTATUS status; + + status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); + + OvsUpdateIPNeighEntry(ipAddr, &ipNeigh, status); +} + + +/* + *---------------------------------------------------------------------------- + * IP Helper system threash handle following request + * 1. Intialize Internal port row when internal port is connected + * 2. Handle FWD request + * 3. Handle IP Neigh timeout + * + * IP Interface, unicast address, and IP route change will be handled + * by the revelant callback. + *---------------------------------------------------------------------------- + */ +VOID +OvsStartIpHelper(PVOID data) +{ + POVS_IP_HELPER_THREAD_CONTEXT context = (POVS_IP_HELPER_THREAD_CONTEXT)data; + POVS_IP_HELPER_REQUEST req; + POVS_IPNEIGH_ENTRY ipn; + PLIST_ENTRY link; + UINT64 timeVal, timeout; + + OVS_LOG_INFO("Start the IP Helper Thread, context: %p", context); + + NdisAcquireSpinLock(&ovsIpHelperLock); + while (!context->exit) { + + timeout = 0; + while (!IsListEmpty(&ovsIpHelperRequestList)) { + if (context->exit) { + goto ip_helper_wait; + } + link = ovsIpHelperRequestList.Flink; + RemoveEntryList(link); + NdisReleaseSpinLock(&ovsIpHelperLock); + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + switch (req->command) { + case OVS_IP_HELPER_INTERNAL_ADAPTER_UP: + OvsHandleInternalAdapterUp(req); + break; + case OVS_IP_HELPER_FWD_REQUEST: + OvsHandleFwdRequest(req); + break; + default: + OvsFreeMemory(req); + } + NdisAcquireSpinLock(&ovsIpHelperLock); + } + + /* for now, let us hold the lock here, if this cause any issue + * we will change to use IpHelper lock only to protect + * IPN + */ + while (!IsListEmpty(&ovsSortedIPNeighList)) { + UINT32 ipAddr; + if (context->exit) { + goto ip_helper_wait; + } + link = ovsSortedIPNeighList.Flink; + ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); + KeQuerySystemTime((LARGE_INTEGER *)&timeVal); + if (ipn->timeout > timeVal) { + timeout = ipn->timeout; + break; + } + ipAddr = ipn->ipAddr; + + NdisReleaseSpinLock(&ovsIpHelperLock); + + OvsHandleIPNeighTimeout(ipAddr); + + NdisAcquireSpinLock(&ovsIpHelperLock); + } + if (!IsListEmpty(&ovsIpHelperRequestList)) { + continue; + } + +ip_helper_wait: + if (context->exit) { + break; + } + + KeClearEvent(&context->event); + NdisReleaseSpinLock(&ovsIpHelperLock); + + KeWaitForSingleObject(&context->event, Executive, KernelMode, + FALSE, (LARGE_INTEGER *)&timeout); + NdisAcquireSpinLock(&ovsIpHelperLock); + } + NdisReleaseSpinLock(&ovsIpHelperLock); + OvsCleanupFwdTable(); + OvsCleanupIpHelperRequestList(); + + OVS_LOG_INFO("Terminating the OVS IP Helper system thread"); + + PsTerminateSystemThread(STATUS_SUCCESS); +} + + +NTSTATUS +OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle) +{ + NTSTATUS status; + HANDLE threadHandle; + UINT32 i; + + ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_FWD_HASH_TABLE_SIZE); + + ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_ROUTE_HASH_TABLE_SIZE); + + ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * + OVS_NEIGH_HASH_TABLE_SIZE); + + RtlZeroMemory(&ovsInternalRow, sizeof(MIB_IF_ROW2)); + RtlZeroMemory(&ovsInternalIPRow, sizeof (MIB_IPINTERFACE_ROW)); + ovsInternalIP = 0; + + ovsInternalPortNo = OVS_DEFAULT_PORT_NO; + + InitializeListHead(&ovsSortedIPNeighList); + + ovsTableLock = NdisAllocateRWLock(ndisFilterHandle); + NdisAllocateSpinLock(&ovsIpHelperLock); + + InitializeListHead(&ovsIpHelperRequestList); + ovsNumIpHelperRequests = 0; + ipInterfaceNotificationHandle = NULL; + ipRouteNotificationHandle = NULL; + unicastIPNotificationHandle = NULL; + + if (ovsFwdHashTable == NULL || + ovsRouteHashTable == NULL || + ovsNeighHashTable == NULL || + ovsTableLock == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto init_cleanup; + } + + for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsFwdHashTable[i]); + } + + for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsRouteHashTable[i]); + } + + for (i = 0; i < OVS_NEIGH_HASH_TABLE_SIZE; i++) { + InitializeListHead(&ovsNeighHashTable[i]); + } + + + KeInitializeEvent(&ovsIpHelperThreadContext.event, NotificationEvent, + FALSE); + status = OvsRegisterChangeNotification(); + ovsIpHelperThreadContext.exit = 0; + if (status == STATUS_SUCCESS) { + status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, + NULL, NULL, NULL, OvsStartIpHelper, + &ovsIpHelperThreadContext); + if (status != STATUS_SUCCESS) { + goto init_cleanup; + } + ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, + KernelMode, + &ovsIpHelperThreadContext.threadObject, + NULL); + ZwClose(threadHandle); + } + +init_cleanup: + + if (status != STATUS_SUCCESS) { + OvsCancelChangeNotification(); + if (ovsFwdHashTable) { + OvsFreeMemory(ovsFwdHashTable); + ovsFwdHashTable = NULL; + } + if (ovsRouteHashTable) { + OvsFreeMemory(ovsRouteHashTable); + ovsRouteHashTable = NULL; + } + if (ovsNeighHashTable) { + OvsFreeMemory(ovsNeighHashTable); + ovsNeighHashTable = NULL; + } + if (ovsTableLock) { + NdisFreeRWLock(ovsTableLock); + ovsTableLock = NULL; + } + NdisFreeSpinLock(&ovsIpHelperLock); + } + return STATUS_SUCCESS; +} + + +VOID +OvsCleanupIpHelper(VOID) +{ + OvsCancelChangeNotification(); + + NdisAcquireSpinLock(&ovsIpHelperLock); + ovsIpHelperThreadContext.exit = 1; + OvsWakeupIPHelper(); + NdisReleaseSpinLock(&ovsIpHelperLock); + + KeWaitForSingleObject(ovsIpHelperThreadContext.threadObject, Executive, + KernelMode, FALSE, NULL); + ObDereferenceObject(ovsIpHelperThreadContext.threadObject); + + OvsFreeMemory(ovsFwdHashTable); + OvsFreeMemory(ovsRouteHashTable); + OvsFreeMemory(ovsNeighHashTable); + + NdisFreeRWLock(ovsTableLock); + NdisFreeSpinLock(&ovsIpHelperLock); +} + +VOID +OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl) +{ + PLIST_ENTRY link, next; + POVS_IP_HELPER_REQUEST req; + LIST_ENTRY list; + InitializeListHead(&list); + + NdisAcquireSpinLock(&ovsIpHelperLock); + LIST_FORALL_SAFE(&ovsIpHelperRequestList, link, next) { + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + if (req->command == OVS_IP_HELPER_FWD_REQUEST && + (nbl == NULL || req->fwdReq.nbl == nbl)) { + RemoveEntryList(link); + InsertHeadList(&list, link); + if (nbl != NULL) { + break; + } + } + } + NdisReleaseSpinLock(&ovsIpHelperLock); + + LIST_FORALL_SAFE(&list, link, next) { + req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); + if (req->fwdReq.cb) { + req->fwdReq.cb(req->fwdReq.nbl, req->fwdReq.inPort, + &req->fwdReq.tunnelKey, + req->fwdReq.cbData1, + req->fwdReq.cbData2, + STATUS_DEVICE_NOT_READY, + NULL); + } + OvsFreeMemory(req); + } +} diff --git a/datapath-windows/ovsext/IpHelper.h b/datapath-windows/ovsext/IpHelper.h new file mode 100644 index 000000000..00f17f194 --- /dev/null +++ b/datapath-windows/ovsext/IpHelper.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __IP_HELPER_H_ +#define __IP_HELPER_H_ 1 + +#include +#include + +#define OVS_FWD_HASH_TABLE_SIZE ((UINT32)1 << 10) +#define OVS_FWD_HASH_TABLE_MASK (OVS_FWD_HASH_TABLE_SIZE - 1) + +#define OVS_ROUTE_HASH_TABLE_SIZE ((UINT32)1 << 8) +#define OVS_ROUTE_HASH_TABLE_MASK (OVS_ROUTE_HASH_TABLE_SIZE - 1) + +#define OVS_NEIGH_HASH_TABLE_SIZE ((UINT32)1 << 8) +#define OVS_NEIGH_HASH_TABLE_MASK (OVS_NEIGH_HASH_TABLE_SIZE - 1) + +#define OVS_IPNEIGH_TIMEOUT 100000000 // 10 s + + +typedef struct _OVS_IPNEIGH_ENTRY { + UINT8 macAddr[MAC_ADDRESS_LEN]; + UINT16 refCount; + UINT32 ipAddr; + UINT32 pad; + UINT64 timeout; + LIST_ENTRY link; + LIST_ENTRY slink; + LIST_ENTRY fwdList; +} OVS_IPNEIGH_ENTRY, *POVS_IPNEIGH_ENTRY; + +typedef struct _OVS_IPFORWARD_ENTRY { + IP_ADDRESS_PREFIX prefix; + UINT32 nextHop; + UINT16 refCount; + LIST_ENTRY link; + LIST_ENTRY fwdList; +} OVS_IPFORWARD_ENTRY, *POVS_IPFORWARD_ENTRY; + +typedef union _OVS_FWD_INFO { + struct { + UINT32 dstIpAddr; + UINT32 srcIpAddr; + UINT8 dstMacAddr[MAC_ADDRESS_LEN]; + UINT8 srcMacAddr[MAC_ADDRESS_LEN]; + UINT32 srcPortNo; + }; + UINT64 value[3]; +} OVS_FWD_INFO, *POVS_FWD_INFO; + +typedef struct _OVS_FWD_ENTRY { + OVS_FWD_INFO info; + POVS_IPFORWARD_ENTRY ipf; + POVS_IPNEIGH_ENTRY ipn; + LIST_ENTRY link; + LIST_ENTRY ipfLink; + LIST_ENTRY ipnLink; +} OVS_FWD_ENTRY, *POVS_FWD_ENTRY; + + +enum { + OVS_IP_HELPER_INTERNAL_ADAPTER_UP, + OVS_IP_HELPER_FWD_REQUEST, +}; + +typedef VOID (*OvsIPHelperCallback)(PNET_BUFFER_LIST nbl, + UINT32 inPort, + PVOID tunnelKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS status, + POVS_FWD_INFO fwdInfo); + +typedef struct _OVS_FWD_REQUEST_INFO { + PNET_BUFFER_LIST nbl; + UINT32 inPort; + OvsIPv4TunnelKey tunnelKey; + OvsIPHelperCallback cb; + PVOID cbData1; + PVOID cbData2; +} OVS_FWD_REQUEST_INFO, *POVS_FWD_REQUEST_INFO; + + +typedef struct _OVS_IP_HELPER_REQUEST { + LIST_ENTRY link; + UINT32 command; + union { + OVS_FWD_REQUEST_INFO fwdReq; + UINT32 dummy; + }; +} OVS_IP_HELPER_REQUEST, *POVS_IP_HELPER_REQUEST; + + +typedef struct _OVS_IP_HELPER_THREAD_CONTEXT { + KEVENT event; + PVOID threadObject; + UINT32 exit; +} OVS_IP_HELPER_THREAD_CONTEXT, *POVS_IP_HELPER_THREAD_CONTEXT; + +NTSTATUS OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle); +VOID OvsCleanupIpHelper(VOID); + +VOID OvsInternalAdapterUp(UINT32 portNo, GUID *netCfgInstanceId); +VOID OvsInternalAdapterDown(VOID); + +NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort, + const PVOID tunnelKey, + OvsIPHelperCallback cb, + PVOID cbData1, + PVOID cbData2); +NTSTATUS OvsLookupIPFwdInfo(UINT32 dstIp, POVS_FWD_INFO info); +VOID OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl); + +#endif /* __IP_HELPER_H_ */ diff --git a/datapath-windows/ovsext/Jhash.c b/datapath-windows/ovsext/Jhash.c new file mode 100644 index 000000000..db08d0b46 --- /dev/null +++ b/datapath-windows/ovsext/Jhash.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +static __inline UINT32 +GetUnalignedU32(const UINT32 *p_) +{ + const UINT8 *p = (const UINT8 *)p_; + return ntohl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); +} + +/* This is the public domain lookup3 hash by Bob Jenkins from + * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */ + +static __inline UINT32 +JhashRot(UINT32 x, INT k) +{ + return (x << k) | (x >> (32 - k)); +} + +static __inline VOID +JhashMix(UINT32 *a, UINT32 *b, UINT32 *c) +{ + *a -= *c; *a ^= JhashRot(*c, 4); *c += *b; + *b -= *a; *b ^= JhashRot(*a, 6); *a += *c; + *c -= *b; *c ^= JhashRot(*b, 8); *b += *a; + *a -= *c; *a ^= JhashRot(*c, 16); *c += *b; + *b -= *a; *b ^= JhashRot(*a, 19); *a += *c; + *c -= *b; *c ^= JhashRot(*b, 4); *b += *a; +} + +static __inline VOID +JhashFinal(UINT32 *a, UINT32 *b, UINT32 *c) +{ + *c ^= *b; *c -= JhashRot(*b, 14); + *a ^= *c; *a -= JhashRot(*c, 11); + *b ^= *a; *b -= JhashRot(*a, 25); + *c ^= *b; *c -= JhashRot(*b, 16); + *a ^= *c; *a -= JhashRot(*c, 4); + *b ^= *a; *b -= JhashRot(*a, 14); + *c ^= *b; *c -= JhashRot(*b, 24); +} + +/* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from + * 'basis'. 'p' must be properly aligned. + * + * Use hash_words() instead, unless you're computing a hash function whose + * value is exposed "on the wire" so we don't want to change it. */ +UINT32 +OvsJhashWords(const UINT32 *p, SIZE_T n, UINT32 basis) +{ + UINT32 a, b, c; + + a = b = c = 0xdeadbeef + (((UINT32) n) << 2) + basis; + + while (n > 3) { + a += p[0]; + b += p[1]; + c += p[2]; + JhashMix(&a, &b, &c); + n -= 3; + p += 3; + } + + switch (n) { + case 3: + c += p[2]; + /* fall through */ + case 2: + b += p[1]; + /* fall through */ + case 1: + a += p[0]; + JhashFinal(&a, &b, &c); + /* fall through */ + case 0: + break; + } + return c; +} + +/* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'. + * + * Use hash_bytes() instead, unless you're computing a hash function whose + * value is exposed "on the wire" so we don't want to change it. */ +UINT32 +OvsJhashBytes(const VOID *p_, SIZE_T n, UINT32 basis) +{ + const UINT32 *p = p_; + UINT32 a, b, c; + + a = b = c = 0xdeadbeef + (UINT32)n + basis; + + while (n >= 12) { + a += GetUnalignedU32(p); + b += GetUnalignedU32(p + 1); + c += GetUnalignedU32(p + 2); + JhashMix(&a, &b, &c); + n -= 12; + p += 3; + } + + if (n) { + UINT32 tmp[3]; + + tmp[0] = tmp[1] = tmp[2] = 0; + memcpy(tmp, p, n); + a += tmp[0]; + b += tmp[1]; + c += tmp[2]; + JhashFinal(&a, &b, &c); + } + + return c; +} diff --git a/datapath-windows/ovsext/Jhash.h b/datapath-windows/ovsext/Jhash.h new file mode 100644 index 000000000..eb3d4cd1b --- /dev/null +++ b/datapath-windows/ovsext/Jhash.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __JHASH_H_ +#define __JHASH_H_ 1 + +/* This is the public domain lookup3 hash by Bob Jenkins from + * http://burtleburtle.net/bob/c/lookup3.c, modified for style. + * + * Use the functions in hash.h instead if you can. These are here just for + * places where we've exposed a hash function "on the wire" and don't want it + * to change. */ + +uint32_t OvsJhashWords(const uint32_t *, size_t n_word, uint32_t basis); +uint32_t OvsJhashBytes(const void *, size_t n_bytes, uint32_t basis); + +#endif /* __JHASH_H_ */ diff --git a/datapath-windows/ovsext/NetProto.h b/datapath-windows/ovsext/NetProto.h new file mode 100644 index 000000000..a364869b2 --- /dev/null +++ b/datapath-windows/ovsext/NetProto.h @@ -0,0 +1,369 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NET_PROTO_H_ +#define __NET_PROTO_H_ 1 + +#include "precomp.h" +#include "Ethernet.h" + +#define ETH_ADDR_LENGTH 6 +/* + * There is a more inclusive definition of ethernet header (Eth_Header) in + * OvsEth.h that is used for packet parsing. For simple cases, , use the following definition. + */ +typedef struct EthHdr { + UINT8 Destination[ETH_ADDR_LENGTH]; + UINT8 Source[ETH_ADDR_LENGTH]; + UINT16 Type; +} EthHdr, *PEthHdr; + +#define IPV4 4 +#define IPV6 6 + +#define IP_HDR_MIN_LENGTH 20 +#define TCP_HDR_MIN_LENGTH 20 +#define TCP_CSUM_OFFSET 16 +#define UDP_CSUM_OFFSET 6 +#define ICMP_CSUM_OFFSET 2 +#define INET_CSUM_LENGTH (sizeof(UINT16)) + +#define IP4_UNITS_TO_BYTES(x) ((x) << 2) +#define IP4_BYTES_TO_UNITS(x) ((x) >> 2) + +// length unit for ip->ihl, tcp->doff +typedef UINT32 IP4UnitLength; + +#define IP4_LENGTH_UNIT (sizeof(IP4UnitLength)) +#define IP4_HDR_MIN_LENGTH_IN_UNITS (IP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) +#define TCP_HDR_MIN_LENGTH_IN_UNITS (TCP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) + +#define IP4_IHL_NO_OPTIONS IP4_HDR_MIN_LENGTH_IN_UNITS +#define IP4_HDR_LEN(iph) IP4_UNITS_TO_BYTES((iph)->ihl) + +// length unit for ip->frag_off +typedef UINT64 IP4FragUnitLength; + +#define IP4_FRAG_UNIT_LENGTH (sizeof(IP4FragUnitLength)) + +// length UINT for ipv6 header length. +typedef UINT64 IP6UnitLength; + +#define TCP_HDR_LEN(tcph) IP4_UNITS_TO_BYTES((tcph)->doff) +#define TCP_DATA_LENGTH(iph, tcph) (ntohs(iph->tot_len) - \ + IP4_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) + +#define TCP_DATA_OFFSET_NO_OPTIONS TCP_HDR_MIN_LENGTH_IN_UNITS +#define TCP_DATA_OFFSET_WITH_TIMESTAMP 8 + +/* + * This is the maximum value for the length field in the IP header. The meaning + * varies with IP protocols: + * IPv4: the total ip length (including ip header and extention) + * IPv6: the IP payload length (including IP extensions) + */ +#define IP_MAX_PACKET 0xFFFF + +#define IPPROTO_ICMP 1 +#define IPPROTO_IGMP 2 +#define IPPROTO_UDP 17 +#define IPPROTO_GRE 47 +#define IPPROTO_TCP 6 +#define IPPROTO_RSVD 0xff + +#define IPPROTO_HOPOPTS 0 /* Hop-by-hop option header */ +#define IPPROTO_IPV6 41 /* IPv6 in IPv6 */ +#define IPPROTO_ROUTING 43 /* Routing header */ +#define IPPROTO_FRAGMENT 44 /* Fragmentation/reassembly header */ +#define IPPROTO_GRE 47 /* General Routing Encapsulation */ +#define IPPROTO_ESP 50 /* Encap. Security Payload */ +#define IPPROTO_AH 51 /* Authentication header */ +#define IPPROTO_ICMPV6 58 /* ICMP for IPv6 */ +#define IPPROTO_NONE 59 /* No next header */ +#define IPPROTO_DSTOPTS 60 /* Destination options header */ +#define IPPROTO_ETHERIP 97 /* etherIp tunneled protocol */ + +/* ICMPv6 types. */ +#define ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */ +#define ND_NEIGHBOR_ADVERT 136 /* neighbor advertisment */ + +/* IPv6 Neighbor discovery option header. */ +#define ND_OPT_SOURCE_LINKADDR 1 +#define ND_OPT_TARGET_LINKADDR 2 + +/* Collides with MS definition (opposite order) */ +#define IP6F_OFF_HOST_ORDER_MASK 0xfff8 + +#define ARPOP_REQUEST 1 /* ARP request. */ +#define ARPOP_REPLY 2 /* ARP reply. */ +#define RARPOP_REQUEST 3 /* RARP request. */ +#define RARPOP_REPLY 4 /* RARP reply. */ + + /* all ARP NBO's assume short ar_op */ +#define ARPOP_REQUEST_NBO 0x0100 /* NBO ARP request. */ +#define ARPOP_REPLY_NBO 0x0200 /* NBO ARP reply. */ +#define RARPOP_REQUEST_NBO 0x0300 /* NBO RARP request. */ +#define RARPOP_REPLY_NBO 0x0300 /* NBO RARP reply. */ + +#define ICMP_ECHO 8 /* Echo Request */ +#define ICMP_ECHOREPLY 0 /* Echo Reply */ +#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ + +/* IGMP related constants */ +#define IGMP_UNKNOWN 0x00 /* For IGMP packets where we don't know the type */ + /* Eg: Fragmented packets without the header */ + +/* Constants from RFC 3376 */ +#define IGMP_QUERY 0x11 /* IGMP Host Membership Query. */ +#define IGMP_V1REPORT 0x12 /* IGMPv1 Host Membership Report. */ +#define IGMP_V2REPORT 0x16 /* IGMPv2 Host Membership Report. */ +#define IGMP_V3REPORT 0x22 /* IGMPv3 Host Membership Report. */ +#define IGMP_V2LEAVE 0x17 /* IGMPv2 Leave. */ + +/* Constants from RFC 2710 and RFC 3810 */ +#define MLD_QUERY 0x82 /* Multicast Listener Query. */ +#define MLD_V1REPORT 0x83 /* Multicast Listener V1 Report. */ +#define MLD_V2REPORT 0x8F /* Multicast Listener V2 Report. */ +#define MLD_DONE 0x84 /* Multicast Listener Done. */ + +/* IPv4 offset flags */ +#define IP_CE 0x8000 /* Flag: "Congestion" */ +#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ +#define IP_MF 0x2000 /* Flag: "More Fragments" */ +#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ + +#define IP_OFFSET_NBO 0xFF1F /* "Fragment Offset" part, NBO */ +#define IP_DF_NBO 0x0040 /* NBO version of don't fragment */ +#define IP_MF_NBO 0x0020 /* NBO version of more fragments */ + +#define IPOPT_RTRALT 0x94 + +/* IP Explicit Congestion Notification bits (TOS field) */ +#define IP_ECN_NOT_ECT 0 +#define IP_ECN_ECT_1 1 +#define IP_ECN_ECT_0 2 +#define IP_ECN_CE 3 +#define IP_ECN_MASK 3 + +/* TCP options */ +#define TCP_OPT_NOP 1 /* Padding */ +#define TCP_OPT_EOL 0 /* End of options */ +#define TCP_OPT_MSS 2 /* Segment size negotiating */ +#define TCP_OPT_WINDOW 3 /* Window scaling */ +#define TCP_OPT_SACK_PERM 4 /* SACK Permitted */ +#define TCP_OPT_SACK 5 /* SACK Block */ +#define TCP_OPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ +#define TCP_OPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ + +#define TCP_OPT_LEN_MSS 4 +#define TCP_OPT_LEN_WINDOW 3 +#define TCP_OPT_LEN_SACK_PERM 2 +#define TCP_OPT_LEN_TIMESTAMP 10 +#define TCP_OPT_LEN_MD5SIG 18 + +#define SOCKET_IPPROTO_HOPOPTS IPPROTO_HOPOPTS +#define SOCKET_IPPROTO_ROUTING IPPROTO_ROUTING +#define SOCKET_IPPROTO_FRAGMENT IPPROTO_FRAGMENT +#define SOCKET_IPPROTO_AH IPPROTO_AH +#define SOCKET_IPPROTO_ICMPV6 IPPROTO_ICMPV6 +#define SOCKET_IPPROTO_NONE IPPROTO_NONE +#define SOCKET_IPPROTO_DSTOPTS IPPROTO_DSTOPTS +#define SOCKET_IPPROTO_EON 80 +#define SOCKET_IPPROTO_ETHERIP IPPROTO_ETHERIP +#define SOCKET_IPPROTO_ENCAP 98 +#define SOCKET_IPPROTO_PIM 103 +#define SOCKET_IPPROTO_IPCOMP 108 +#define SOCKET_IPPROTO_CARP 112 +#define SOCKET_IPPROTO_PFSYNC 240 +#define SOCKET_IPPROTO_RAW IPPROTO_RSVD + +typedef union _OVS_PACKET_HDR_INFO { + struct { + UINT16 l3Offset; + UINT16 l4Offset; + union { + UINT16 l7Offset; + UINT16 l4PayLoad; + }; + UINT16 isIPv4:1; + UINT16 isIPv6:1; + UINT16 isTcp:1; + UINT16 isUdp:1; + UINT16 tcpCsumNeeded:1; + UINT16 udpCsumNeeded:1; + UINT16 udpCsumZero:1; + UINT16 pad:9; + } ; + UINT64 value; +} OVS_PACKET_HDR_INFO, *POVS_PACKET_HDR_INFO; + +typedef struct IPHdr { + UINT8 ihl:4, + version:4; + UINT8 tos; + UINT16 tot_len; + UINT16 id; + UINT16 frag_off; + UINT8 ttl; + UINT8 protocol; + UINT16 check; + UINT32 saddr; + UINT32 daddr; +} IPHdr; + + + /* + * IPv6 fixed header + * + * BEWARE, it is incorrect. The first 4 bits of flow_lbl + * are glued to priority now, forming "class". + */ + +typedef struct IPv6Hdr { + UINT8 priority:4, + version:4; + UINT8 flow_lbl[3]; + + UINT16 payload_len; + UINT8 nexthdr; + UINT8 hop_limit; + + struct in6_addr saddr; + struct in6_addr daddr; +} IPv6Hdr; + +// Generic IPv6 extension header +typedef struct IPv6ExtHdr { + UINT8 nextHeader; // type of the next header + UINT8 hdrExtLen; // length of header extensions (beyond 8 bytes) + UINT16 optPad1; + UINT32 optPad2; +} IPv6ExtHdr; + +typedef struct IPv6FragHdr { + UINT8 nextHeader; + UINT8 reserved; + UINT16 offlg; + UINT32 ident; +} IPv6FragHdr; + +typedef struct IPv6NdOptHdr { + UINT8 type; + UINT8 len; +} IPv6NdOptHdr; + +typedef struct ICMPHdr { + UINT8 type; + UINT8 code; + UINT16 checksum; +} ICMPHdr; + +typedef struct ICMPEcho { + UINT16 id; + UINT16 seq; +} ICMPEcho; + +typedef struct UDPHdr { + UINT16 source; + UINT16 dest; + UINT16 len; + UINT16 check; +} UDPHdr; + +typedef struct TCPHdr { + UINT16 source; + UINT16 dest; + UINT32 seq; + UINT32 ack_seq; + UINT16 res1:4, + doff:4, + fin:1, + syn:1, + rst:1, + psh:1, + ack:1, + urg:1, + ece:1, + cwr:1; + UINT16 window; + UINT16 check; + UINT16 urg_ptr; +} TCPHdr; + +typedef struct PseudoHdr { + UINT32 sourceIPAddr; + UINT32 destIPAddr; + UINT8 zero; + UINT8 protocol; + UINT16 length; +} PseudoHdr; + +typedef struct PseudoHdrIPv6 { + UINT8 sourceIPAddr[16]; + UINT8 destIPAddr[16]; + UINT8 zero; + UINT8 protocol; + UINT16 length; +} PseudoHdrIPv6; + + +struct ArpHdr { + UINT16 ar_hrd; /* Format of hardware address. */ + UINT16 ar_pro; /* Format of protocol address. */ + UINT8 ar_hln; /* Length of hardware address. */ + UINT8 ar_pln; /* Length of protocol address. */ + UINT16 ar_op; /* ARP opcode (command). */ +}; + +typedef struct EtherArp { + struct ArpHdr ea_hdr; /* fixed-size header */ + Eth_Address arp_sha; /* sender hardware address */ + UINT8 arp_spa[4]; /* sender protocol address */ + Eth_Address arp_tha; /* target hardware address */ + UINT8 arp_tpa[4]; /* target protocol address */ +} EtherArp; + +typedef struct IGMPHdr { + UINT8 type; + UINT8 maxResponseTime; + UINT16 csum; + UINT8 groupAddr[4]; +} IGMPHdr; + +typedef struct IGMPV3Trailer { + UINT8 qrv:3, + s:1, + resv:4; + UINT8 qqic; + UINT16 numSources; +} IGMPV3Trailer; + +typedef struct IPOpt { + UINT8 type; + UINT8 length; + UINT16 value; +} IPOpt; + +/* + * IP protocol types + */ +#define SOCKET_IPPROTO_IP 0 +#define SOCKET_IPPROTO_ICMP 1 +#define SOCKET_IPPROTO_TCP 6 +#define SOCKET_IPPROTO_UDP 17 +#define SOCKET_IPPROTO_GRE 47 + +#endif /* __NET_PROTO_H_ */ diff --git a/datapath-windows/ovsext/Netlink.c b/datapath-windows/ovsext/Netlink.c index 1c8d05406..90a633b7d 100644 --- a/datapath-windows/ovsext/Netlink.c +++ b/datapath-windows/ovsext/Netlink.c @@ -22,7 +22,7 @@ #undef OVS_DBG_MOD #endif #define OVS_DBG_MOD OVS_DBG_NETLINK -#include "OvsDebug.h" +#include "Debug.h" /* * --------------------------------------------------------------------------- diff --git a/datapath-windows/ovsext/Netlink.h b/datapath-windows/ovsext/Netlink.h index bc896662b..0edc2faa0 100644 --- a/datapath-windows/ovsext/Netlink.h +++ b/datapath-windows/ovsext/Netlink.h @@ -17,7 +17,7 @@ #ifndef __NETLINK_H_ #define __NETLINK_H_ 1 -#include "OvsTypes.h" +#include "Types.h" #include "NetlinkProto.h" /* Netlink attribute types. */ diff --git a/datapath-windows/ovsext/NetlinkProto.h b/datapath-windows/ovsext/NetlinkProto.h index 20ab750b7..399b286ea 100644 --- a/datapath-windows/ovsext/NetlinkProto.h +++ b/datapath-windows/ovsext/NetlinkProto.h @@ -27,7 +27,7 @@ * This header provides access to the Netlink message framing definitions * regardless of platform. */ -#include "OvsTypes.h" +#include "Types.h" #define BUILD_ASSERT(EXPR) \ typedef char AssertOnCompileFailed[(EXPR) ? 1: -1] @@ -113,4 +113,4 @@ BUILD_ASSERT_DECL(sizeof(NL_ATTR) == 4); #define OVS_HDRLEN NLMSG_ALIGN(sizeof(OVS_HDR)) #define NLA_HDRLEN ((INT) NLA_ALIGN(sizeof(NL_ATTR))) -#endif /* NetlinProto.h */ +#endif /* __NETLINK_PROTO_H_ */ diff --git a/datapath-windows/ovsext/Oid.c b/datapath-windows/ovsext/Oid.c new file mode 100644 index 000000000..a67534714 --- /dev/null +++ b/datapath-windows/ovsext/Oid.c @@ -0,0 +1,854 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "Switch.h" +#include "Vport.h" +#include "NetProto.h" +#include "User.h" +#include "Flow.h" +#include "Event.h" +#include "User.h" +#include "Oid.h" + +/* Due to an imported header file */ +#pragma warning( disable:4505 ) + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DISPATCH +#include "Debug.h" + +typedef struct _OVS_OID_CONTEXT { + NDIS_EVENT oidComplete; + NDIS_STATUS status; +} OVS_OID_CONTEXT, *POVS_OID_CONTEXT; + + +VOID +OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); +static VOID +OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status); + +static NDIS_STATUS +OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); +static NDIS_STATUS +OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); +static NDIS_STATUS +OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest); + +__inline BOOLEAN +OvsCheckOidHeaderFunc(PNDIS_OBJECT_HEADER header, + LONG propRev, + LONG propSize) +{ + return header->Type != NDIS_OBJECT_TYPE_DEFAULT || + header->Revision < propRev || + header->Size < propSize; +} + +#define OvsCheckOidHeader(_hdr, _rev) \ + OvsCheckOidHeaderFunc(_hdr, _rev, ##NDIS_SIZEOF_##_rev) + +static __inline VOID +OvsOidSetOrigRequest(PNDIS_OID_REQUEST clonedRequest, + PNDIS_OID_REQUEST origRequest) +{ + *(PVOID*)(&clonedRequest->SourceReserved[0]) = origRequest; +} + +static __inline PNDIS_OID_REQUEST +OvsOidGetOrigRequest(PNDIS_OID_REQUEST clonedRequest) +{ + return *((PVOID*)(&clonedRequest->SourceReserved[0])); +} + +static __inline VOID +OvsOidSetContext(PNDIS_OID_REQUEST clonedRequest, + POVS_OID_CONTEXT origRequest) +{ + *(PVOID*)(&clonedRequest->SourceReserved[8]) = origRequest; +} + +static __inline POVS_OID_CONTEXT +OvsOidGetContext(PNDIS_OID_REQUEST clonedRequest) +{ + return *((PVOID*)(&clonedRequest->SourceReserved[8])); +} + +static NDIS_STATUS +OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_PORT_PROPERTY_PARAMETERS portPropParam = + setInfo->InformationBuffer; + BOOLEAN checkFailed = TRUE; + + UNREFERENCED_PARAMETER(switchObject); + + if (setInfo->Oid == OID_SWITCH_PORT_PROPERTY_DELETE) { + checkFailed = OvsCheckOidHeader( + (PNDIS_OBJECT_HEADER)portPropParam, + NDIS_SWITCH_PORT_PROPERTY_DELETE_PARAMETERS_REVISION_1); + } else { + /* it must be a add or update request */ + checkFailed = OvsCheckOidHeader( + (PNDIS_OBJECT_HEADER)portPropParam, + NDIS_SWITCH_PORT_PROPERTY_PARAMETERS_REVISION_1); + } + + if (checkFailed) { + status = NDIS_STATUS_INVALID_PARAMETER; + goto done; + } + + if (portPropParam->PropertyType == NdisSwitchPortPropertyTypeVlan) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + +done: + return status; +} + +static NDIS_STATUS +OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_PORT_PARAMETERS portParam = setInfo->InformationBuffer; + + if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)portParam, + NDIS_SWITCH_PORT_PARAMETERS_REVISION_1)) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_CREATE: + status = OvsCreatePort(switchObject, portParam); + break; + case OID_SWITCH_PORT_TEARDOWN: + OvsTeardownPort(switchObject, portParam); + break; + case OID_SWITCH_PORT_DELETE: + OvsDeletePort(switchObject, portParam); + break; + default: + break; + } + +done: + return status; +} + +static NDIS_STATUS +OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + PNDIS_SWITCH_NIC_PARAMETERS nicParam = setInfo->InformationBuffer; + + if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)nicParam, + NDIS_SWITCH_NIC_PARAMETERS_REVISION_1)) { + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_NIC_CREATE: + status = OvsCreateNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_CONNECT: + OvsConnectNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_UPDATED: + OvsUpdateNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_DISCONNECT: + OvsDisconnectNic(switchObject, nicParam); + break; + case OID_SWITCH_NIC_DELETE: + OvsDeleteNic(switchObject, nicParam); + break; + default: + break; + } + +done: + return status; + +} + +static NDIS_STATUS +OvsProcessSetOid(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PBOOLEAN complete) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + + *complete = FALSE; + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, setInfo->Oid); + + /* Verify the basic Oid paramters first */ + if (setInfo->InformationBufferLength && + (setInfo->InformationBufferLength < sizeof(NDIS_OBJECT_HEADER))) { + status = NDIS_STATUS_INVALID_OID; + OVS_LOG_INFO("Invalid input %d", setInfo->InformationBufferLength); + goto error; + } + + /* Documentation does not specify what should be done + * if informationBuffer is not present. Although it mentions the + * structure type informationBUffer points to for each oid request, + * but it does not explicitly mention that it is a MUST. + * hence we are following this scenario same way as what sample code + * mentions. */ + if (!(setInfo->InformationBufferLength)) { + /* We cannot do anything about this oid request, + * lets just pass it down. */ + OVS_LOG_INFO("Buffer Length Zero"); + goto done; + } + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_PROPERTY_ADD: + case OID_SWITCH_PORT_PROPERTY_UPDATE: + case OID_SWITCH_PORT_PROPERTY_DELETE: + status = OvsProcessSetOidPortProp(switchObject, oidRequest); + break; + + case OID_SWITCH_PORT_CREATE: + case OID_SWITCH_PORT_UPDATED: + case OID_SWITCH_PORT_TEARDOWN: + case OID_SWITCH_PORT_DELETE: + status = OvsProcessSetOidPort(switchObject, oidRequest); + break; + + case OID_SWITCH_NIC_CREATE: + case OID_SWITCH_NIC_CONNECT: + case OID_SWITCH_NIC_UPDATED: + case OID_SWITCH_NIC_DISCONNECT: + case OID_SWITCH_NIC_DELETE: + status = OvsProcessSetOidNic(switchObject, oidRequest); + break; + + default: + /* Non handled OID request */ + break; + } + + if (status != NDIS_STATUS_SUCCESS) { + goto error; + } + + goto done; + +error: + *complete = TRUE; +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +static NDIS_STATUS +OvsProcessMethodOid(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PBOOLEAN complete, + PULONG bytesNeededParam) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + struct _SET *nicReqSetInfo = NULL; + PNDIS_OBJECT_HEADER header = NULL; + PNDIS_OID_REQUEST nicOidRequest = NULL; + + UNREFERENCED_PARAMETER(switchObject); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, methodInfo->Oid); + + *complete = FALSE; + *bytesNeededParam = 0; + header = methodInfo->InformationBuffer; + + switch(methodInfo->Oid) { + /* We deal with only OID_SWITCH_NIC_REQUEST as of now */ + case OID_SWITCH_NIC_REQUEST: + if (OvsCheckOidHeader(header, + NDIS_SWITCH_NIC_OID_REQUEST_REVISION_1)) { + OVS_LOG_INFO("Check Header failed"); + status = NDIS_STATUS_NOT_SUPPORTED; + *complete = TRUE; + goto done; + } + + nicOidRequest = (((PNDIS_SWITCH_NIC_OID_REQUEST)header)->OidRequest); + nicReqSetInfo = &(nicOidRequest->DATA.SET_INFORMATION); + + /* Fail the SR-IOV VF case */ + if ((nicOidRequest->RequestType == NdisRequestSetInformation) && + (nicReqSetInfo->Oid == OID_NIC_SWITCH_ALLOCATE_VF)) { + OVS_LOG_INFO("We do not support Oid: " + "OID_NIC_SWITCH_ALLOCATE_VF"); + status = NDIS_STATUS_FAILURE; + *complete = TRUE; + } + break; + default: + /* No op */ + break; + } + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterOidRequest function. + * -------------------------------------------------------------------------- + */ + +NDIS_STATUS +OvsExtOidRequest(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest) +{ + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNDIS_OID_REQUEST clonedOidRequest = NULL; + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + BOOLEAN completeOid = FALSE; + ULONG bytesNeeded = 0; + + OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", + oidRequest, oidRequest->RequestType); + status = NdisAllocateCloneOidRequest(switchObject->NdisFilterHandle, + oidRequest, OVS_MEMORY_TAG, + &clonedOidRequest); + if (status != NDIS_STATUS_SUCCESS) { + goto done; + } + + NdisInterlockedIncrement(&(switchObject->pendingOidCount)); + + /* set the original oid request in cloned one. */ + OvsOidSetOrigRequest(clonedOidRequest, oidRequest); + OvsOidSetContext(clonedOidRequest, NULL); + + switch(clonedOidRequest->RequestType) { + case NdisRequestSetInformation: + status = OvsProcessSetOid(switchObject, clonedOidRequest, + &completeOid); + break; + case NdisRequestMethod: + status = OvsProcessMethodOid(switchObject, clonedOidRequest, + &completeOid, &bytesNeeded); + break; + default: + /* We do not handle other request types as of now. + * We are just a passthrough for those. */ + break; + } + + if (completeOid == TRUE) { + /* dont leave any reference back to original request, + * even if we are freeing it up. */ + OVS_LOG_INFO("Complete True oidRequest %p.", oidRequest); + OvsOidSetOrigRequest(clonedOidRequest, NULL); + NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, + clonedOidRequest); + methodInfo->BytesNeeded = bytesNeeded; + NdisInterlockedDecrement(&switchObject->pendingOidCount); + goto done; + } + + /* pass the request down */ + status = NdisFOidRequest(switchObject->NdisFilterHandle, clonedOidRequest); + if (status != NDIS_STATUS_PENDING) { + OvsExtOidRequestComplete(switchObject, clonedOidRequest, status); + /* sample code says so */ + status = NDIS_STATUS_PENDING; + } + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterOidRequestComplete function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, + PNDIS_OID_REQUEST oidRequest, + NDIS_STATUS status) +{ + POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; + PNDIS_OID_REQUEST origReq = OvsOidGetOrigRequest(oidRequest); + POVS_OID_CONTEXT oidContext = OvsOidGetContext(oidRequest); + + /* Only one of the two should be set */ + ASSERT(origReq != NULL || oidContext != NULL); + ASSERT(oidContext != NULL || origReq != NULL); + + OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", + oidRequest, oidRequest->RequestType); + + if (origReq == NULL) { + NdisInterlockedDecrement(&(switchObject->pendingOidCount)); + oidContext->status = status; + NdisSetEvent(&oidContext->oidComplete); + OVS_LOG_INFO("Internally generated request"); + goto done; + } + + switch(oidRequest->RequestType) { + case NdisRequestMethod: + OvsOidRequestCompleteMethod(switchObject, oidRequest, + origReq, status); + break; + + case NdisRequestSetInformation: + OvsOidRequestCompleteSetInfo(switchObject, oidRequest, + origReq, status); + break; + + case NdisRequestQueryInformation: + case NdisRequestQueryStatistics: + default: + OvsOidRequestCompleteQuery(switchObject, oidRequest, + origReq, status); + break; + } + + OvsOidSetOrigRequest(oidRequest, NULL); + + NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, oidRequest); + NdisFOidRequestComplete(switchObject->NdisFilterHandle, origReq, status); + NdisInterlockedDecrement(&(switchObject->pendingOidCount)); + +done: + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + UNREFERENCED_PARAMETER(status); + UNREFERENCED_PARAMETER(switchObject); + + struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); + struct _METHOD *origMethodInfo = &(origOidRequest->DATA. + METHOD_INFORMATION); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, methodInfo->Oid); + + origMethodInfo->OutputBufferLength = methodInfo->OutputBufferLength; + origMethodInfo->BytesRead = methodInfo->BytesRead; + origMethodInfo->BytesNeeded = methodInfo->BytesNeeded; + origMethodInfo->BytesWritten = methodInfo->BytesWritten; + + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); + struct _SET *origSetInfo = &(origOidRequest->DATA.SET_INFORMATION); + PNDIS_OBJECT_HEADER origHeader = origSetInfo->InformationBuffer; + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, setInfo->Oid); + + origSetInfo->BytesRead = setInfo->BytesRead; + origSetInfo->BytesNeeded = setInfo->BytesNeeded; + + if (status != NDIS_STATUS_SUCCESS) { + + switch(setInfo->Oid) { + case OID_SWITCH_PORT_CREATE: + OvsDeletePort(switchObject, + (PNDIS_SWITCH_PORT_PARAMETERS)origHeader); + break; + + case OID_SWITCH_NIC_CREATE: + OvsDeleteNic(switchObject, + (PNDIS_SWITCH_NIC_PARAMETERS)origHeader); + break; + + default: + break; + } + } + + OVS_LOG_TRACE("Exit"); +} + +static VOID +OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, + PNDIS_OID_REQUEST oidRequest, + PNDIS_OID_REQUEST origOidRequest, + NDIS_STATUS status) +{ + UNREFERENCED_PARAMETER(switchObject); + UNREFERENCED_PARAMETER(status); + + struct _QUERY *queryInfo = &((oidRequest->DATA).QUERY_INFORMATION); + struct _QUERY *origQueryInfo = &((origOidRequest->DATA).QUERY_INFORMATION); + + OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", + oidRequest, queryInfo->Oid); + + origQueryInfo->BytesWritten = queryInfo->BytesWritten; + origQueryInfo->BytesNeeded = queryInfo->BytesNeeded; + + OVS_LOG_TRACE("Exit"); +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterCancelOidRequest function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtCancelOidRequest(NDIS_HANDLE filterModuleContext, + PVOID requestId) +{ + OVS_LOG_TRACE("Enter: requestId: %p", requestId); + + UNREFERENCED_PARAMETER(filterModuleContext); + UNREFERENCED_PARAMETER(requestId); +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to issue the specified OID to the NDIS stack. The OID is + * directed towards the miniport edge of the extensible switch. + * An OID that gets issued may not complete immediately, and in such cases, the + * function waits for the OID to complete. Thus, this function must not be + * called at the PASSIVE_LEVEL. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext, + NDIS_REQUEST_TYPE oidType, + UINT32 oidRequestEnum, + PVOID oidInputBuffer, + UINT32 inputSize, + PVOID oidOutputBuffer, + UINT32 outputSize, + UINT32 *outputSizeNeeded) +{ + NDIS_STATUS status; + PNDIS_OID_REQUEST oidRequest; + POVS_OID_CONTEXT oidContext; + ULONG OvsExtOidRequestId = 'ISVO'; + + DBG_UNREFERENCED_PARAMETER(inputSize); + DBG_UNREFERENCED_PARAMETER(oidInputBuffer); + + OVS_LOG_TRACE("Enter: switchContext: %p, oidType: %d", + switchContext, oidType); + + ASSERT(oidInputBuffer == NULL || inputSize != 0); + ASSERT(oidOutputBuffer == NULL || outputSize != 0); + ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); + + oidRequest = OvsAllocateMemory(sizeof *oidRequest); + if (!oidRequest) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + oidContext = OvsAllocateMemory(sizeof *oidContext); + if (!oidContext) { + OvsFreeMemory(oidRequest); + status = NDIS_STATUS_RESOURCES; + goto done; + } + + RtlZeroMemory(oidRequest, sizeof *oidRequest); + RtlZeroMemory(oidContext, sizeof *oidContext); + + oidRequest->Header.Type = NDIS_OBJECT_TYPE_OID_REQUEST; + oidRequest->Header.Revision = NDIS_OID_REQUEST_REVISION_1; + oidRequest->Header.Size = NDIS_SIZEOF_OID_REQUEST_REVISION_1; + + oidRequest->RequestType = oidType; + oidRequest->PortNumber = 0; + oidRequest->Timeout = 0; + oidRequest->RequestId = (PVOID)OvsExtOidRequestId; + + switch(oidType) { + case NdisRequestQueryInformation: + oidRequest->DATA.QUERY_INFORMATION.Oid = oidRequestEnum; + oidRequest->DATA.QUERY_INFORMATION.InformationBuffer = oidOutputBuffer; + oidRequest->DATA.QUERY_INFORMATION.InformationBufferLength = outputSize; + break; + default: + ASSERT(FALSE); + status = NDIS_STATUS_INVALID_PARAMETER; + break; + } + + /* + * We make use of the SourceReserved field in the OID request to store + * pointers to the original OID (if any), and also context for completion + * (if any). + */ + oidContext->status = NDIS_STATUS_SUCCESS; + NdisInitializeEvent(&oidContext->oidComplete); + + OvsOidSetOrigRequest(oidRequest, NULL); + OvsOidSetContext(oidRequest, oidContext); + + NdisInterlockedIncrement(&(switchContext->pendingOidCount)); + status = NdisFOidRequest(switchContext->NdisFilterHandle, oidRequest); + if (status == NDIS_STATUS_PENDING) { + NdisWaitEvent(&oidContext->oidComplete, 0); + } else { + NdisInterlockedDecrement(&(switchContext->pendingOidCount)); + } + + if (status == NDIS_STATUS_INVALID_LENGTH || + oidContext->status == NDIS_STATUS_INVALID_LENGTH) { + switch(oidType) { + case NdisRequestQueryInformation: + *outputSizeNeeded = oidRequest->DATA.QUERY_INFORMATION.BytesNeeded; + } + } + + status = oidContext->status; + ASSERT(status != NDIS_STATUS_PENDING); + + OvsFreeMemory(oidRequest); + OvsFreeMemory(oidContext); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to query if the extensible switch has completed activation + * successfully. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, + BOOLEAN *switchActive) +{ + NDIS_STATUS status; + PNDIS_SWITCH_PARAMETERS switchParams; + UINT32 outputSizeNeeded; + + OVS_LOG_TRACE("Enter: switchContext: %p, switchActive: %p", + switchContext, switchActive); + + switchParams = OvsAllocateMemory(sizeof *switchParams); + if (!switchParams) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'switchParms' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(switchParams, sizeof *switchParams); + switchParams->Header.Revision = NDIS_SWITCH_PARAMETERS_REVISION_1; + switchParams->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + switchParams->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_PARAMETERS, NULL, 0, + (PVOID)switchParams, sizeof *switchParams, + &outputSizeNeeded); + + ASSERT(status != NDIS_STATUS_INVALID_LENGTH); + ASSERT(status != NDIS_STATUS_PENDING); + if (status == NDIS_STATUS_SUCCESS) { + ASSERT(switchParams->Header.Type == NDIS_OBJECT_TYPE_DEFAULT); + ASSERT(switchParams->Header.Revision == NDIS_SWITCH_PARAMETERS_REVISION_1); + ASSERT(switchParams->Header.Size == + NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1); + *switchActive = switchParams->IsActive; + } + + OvsFreeMemory(switchParams); + +done: + OVS_LOG_TRACE("Exit: status %8x, switchActive: %d.", + status, *switchActive); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to get the array of ports on the extensible switch. Upon + * success, the caller needs to free the returned array. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_ARRAY *portArrayOut) +{ + PNDIS_SWITCH_PORT_ARRAY portArray; + UINT32 arraySize = sizeof *portArray; + NDIS_STATUS status = NDIS_STATUS_FAILURE; + + OVS_LOG_TRACE("Enter: switchContext: %p, portArray: %p", + switchContext, portArrayOut); + do { + UINT32 reqdArraySize; + + portArray = OvsAllocateMemory(arraySize); + if (!portArray) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'portArray' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(portArray, sizeof *portArray); + portArray->Header.Revision = NDIS_SWITCH_PORT_ARRAY_REVISION_1; + portArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + portArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PORT_ARRAY_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_PORT_ARRAY, NULL, 0, + (PVOID)portArray, arraySize, + &reqdArraySize); + if (status == NDIS_STATUS_SUCCESS) { + *portArrayOut = portArray; + break; + } + + OvsFreeMemory(portArray); + arraySize = reqdArraySize; + if (status != NDIS_STATUS_INVALID_LENGTH) { + break; + } + } while(status == NDIS_STATUS_INVALID_LENGTH); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Utility function to get the array of nics on the extensible switch. Upon + * success, the caller needs to free the returned array. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_ARRAY *nicArrayOut) +{ + PNDIS_SWITCH_NIC_ARRAY nicArray; + UINT32 arraySize = sizeof *nicArray; + NDIS_STATUS status = NDIS_STATUS_FAILURE; + + OVS_LOG_TRACE("Enter: switchContext: %p, nicArray: %p", + switchContext, nicArrayOut); + + do { + UINT32 reqdArraySize; + + nicArray = OvsAllocateMemory(arraySize); + if (!nicArray) { + status = NDIS_STATUS_RESOURCES; + goto done; + } + + /* + * Even though 'nicArray' is supposed to be populated by the OID, it + * needs to be initialized nevertheless. Otherwise, OID returns + * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. + */ + RtlZeroMemory(nicArray, sizeof *nicArray); + nicArray->Header.Revision = NDIS_SWITCH_NIC_ARRAY_REVISION_1; + nicArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; + nicArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_NIC_ARRAY_REVISION_1; + + status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, + OID_SWITCH_NIC_ARRAY, NULL, 0, + (PVOID)nicArray, arraySize, + &reqdArraySize); + if (status == NDIS_STATUS_SUCCESS) { + *nicArrayOut = nicArray; + break; + } + + OvsFreeMemory(nicArray); + arraySize = reqdArraySize; + if (status != NDIS_STATUS_INVALID_LENGTH) { + break; + } + } while(status == NDIS_STATUS_INVALID_LENGTH); + +done: + OVS_LOG_TRACE("Exit: status %8x.", status); + return status; +} diff --git a/datapath-windows/ovsext/Oid.h b/datapath-windows/ovsext/Oid.h new file mode 100644 index 000000000..88a3d7d04 --- /dev/null +++ b/datapath-windows/ovsext/Oid.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __OID_H_ +#define __OID_H_ 1 + +NDIS_STATUS OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, + BOOLEAN *switchActive); +NDIS_STATUS OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_ARRAY *portArrayOut); +NDIS_STATUS OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_ARRAY *nicArrayOut); +#endif /* __OID_H_ */ diff --git a/datapath-windows/ovsext/OvsActions.c b/datapath-windows/ovsext/OvsActions.c deleted file mode 100644 index d8fd295a7..000000000 --- a/datapath-windows/ovsext/OvsActions.c +++ /dev/null @@ -1,1548 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" - -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" -#include "OvsVxlan.h" -#include "OvsChecksum.h" -#include "OvsPacketIO.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_ACTION -#include "OvsDebug.h" - -typedef struct _OVS_ACTION_STATS { - UINT64 rxVxlan; - UINT64 txVxlan; - UINT64 flowMiss; - UINT64 flowUserspace; - UINT64 txTcp; - UINT32 failedFlowMiss; - UINT32 noVport; - UINT32 failedFlowExtract; - UINT32 noResource; - UINT32 noCopiedNbl; - UINT32 failedEncap; - UINT32 failedDecap; - UINT32 cannotGrowDest; - UINT32 zeroActionLen; - UINT32 failedChecksum; -} OVS_ACTION_STATS, *POVS_ACTION_STATS; - -OVS_ACTION_STATS ovsActionStats; - -/* - * There a lot of data that needs to be maintained while executing the pipeline - * as dictated by the actions of a flow, across different functions at different - * levels. Such data is put together in a 'context' structure. Care should be - * exercised while adding new members to the structure - only add ones that get - * used across multiple stages in the pipeline/get used in multiple functions. - */ -#define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2 -typedef struct OvsForwardingContext { - POVS_SWITCH_CONTEXT switchContext; - /* The NBL currently used in the pipeline. */ - PNET_BUFFER_LIST curNbl; - /* NDIS forwarding detail for 'curNbl'. */ - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - /* Array of destination ports for 'curNbl'. */ - PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts; - /* send flags while sending 'curNbl' into NDIS. */ - ULONG sendFlags; - /* Total number of output ports, used + unused, in 'curNbl'. */ - UINT32 destPortsSizeIn; - /* Total number of used output ports in 'curNbl'. */ - UINT32 destPortsSizeOut; - /* - * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to - * be freed/completed. - */ - OvsCompletionList *completionList; - /* - * vport number of 'curNbl' when it is passed from the PIF bridge to the INT - * bridge. ie. during tunneling on the Rx side. - */ - UINT32 srcVportNo; - - /* - * Tunnel key: - * - specified in actions during tunneling Tx - * - extracted from an NBL during tunneling Rx - */ - OvsIPv4TunnelKey tunKey; - - /* - * Tunneling - Tx: - * To store the output port, when it is a tunneled port. We don't foresee - * multiple tunneled ports as outport for any given NBL. - */ - POVS_VPORT_ENTRY tunnelTxNic; - - /* - * Tunneling - Rx: - * Points to the Internal port on the PIF Bridge, if the packet needs to be - * de-tunneled. - */ - POVS_VPORT_ENTRY tunnelRxNic; - - /* header information */ - OVS_PACKET_HDR_INFO layers; -} OvsForwardingContext; - - -/* - * -------------------------------------------------------------------------- - * OvsInitForwardingCtx -- - * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline - * is being executed. - * - * Result: - * NDIS_STATUS_SUCCESS on success - * Other NDIS_STATUS upon failure. Upon failure, it is safe to call - * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized - * enough for OvsCompleteNBLForwardingCtx() to do its work. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx, - POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - UINT32 srcVportNo, - ULONG sendFlags, - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail, - OvsCompletionList *completionList, - OVS_PACKET_HDR_INFO *layers, - BOOLEAN resetTunnelInfo) -{ - ASSERT(ovsFwdCtx); - ASSERT(switchContext); - ASSERT(curNbl); - ASSERT(fwdDetail); - - /* - * Set values for curNbl and switchContext so upon failures, we have enough - * information to do cleanup. - */ - ovsFwdCtx->curNbl = curNbl; - ovsFwdCtx->switchContext = switchContext; - ovsFwdCtx->completionList = completionList; - ovsFwdCtx->fwdDetail = fwdDetail; - - if (fwdDetail->NumAvailableDestinations > 0) { - /* - * XXX: even though MSDN says GetNetBufferListDestinations() returns - * NDIS_STATUS, the header files say otherwise. - */ - switchContext->NdisSwitchHandlers.GetNetBufferListDestinations( - switchContext->NdisSwitchContext, curNbl, - &ovsFwdCtx->destinationPorts); - - ASSERT(ovsFwdCtx->destinationPorts); - /* Ensure that none of the elements are consumed yet. */ - ASSERT(ovsFwdCtx->destinationPorts->NumElements == - fwdDetail->NumAvailableDestinations); - } else { - ovsFwdCtx->destinationPorts = NULL; - } - ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations; - ovsFwdCtx->destPortsSizeOut = 0; - ovsFwdCtx->srcVportNo = srcVportNo; - ovsFwdCtx->sendFlags = sendFlags; - if (layers) { - ovsFwdCtx->layers = *layers; - } else { - RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers); - } - if (resetTunnelInfo) { - ovsFwdCtx->tunnelTxNic = NULL; - ovsFwdCtx->tunnelRxNic = NULL; - RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey); - } - - return NDIS_STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsDetectTunnelRxPkt -- - * Utility function for an RX packet to detect its tunnel type. - * - * Result: - * True - if the tunnel type was detected. - * False - if not a tunnel packet or tunnel type not supported. - * -------------------------------------------------------------------------- - */ -static __inline BOOLEAN -OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx, - const OvsFlowKey *flowKey) -{ - POVS_VPORT_ENTRY tunnelVport = NULL; - - /* XXX: we should also check for the length of the UDP payload to pick - * packets only if they are at least VXLAN header size. - */ - if (!flowKey->ipKey.nwFrag && - flowKey->ipKey.nwProto == IPPROTO_UDP && - flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) { - tunnelVport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); - ovsActionStats.rxVxlan++; - } - - // We might get tunnel packets even before the tunnel gets initialized. - if (tunnelVport) { - ASSERT(ovsFwdCtx->tunnelRxNic == NULL); - ovsFwdCtx->tunnelRxNic = tunnelVport; - return TRUE; - } - - return FALSE; -} - -/* - * -------------------------------------------------------------------------- - * OvsDetectTunnelPkt -- - * Utility function to detect if a packet is to be subjected to - * tunneling (Tx) or de-tunneling (Rx). Various factors such as source - * port, destination port, packet contents, and previously setup tunnel - * context are used. - * - * Result: - * True - If the packet is to be subjected to tunneling. - * In case of invalid tunnel context, the tunneling functionality is - * a no-op and is completed within this function itself by consuming - * all of the tunneling context. - * False - If not a tunnel packet or tunnel type not supported. Caller should - * process the packet as a non-tunnel packet. - * -------------------------------------------------------------------------- - */ -static __inline BOOLEAN -OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx, - const POVS_VPORT_ENTRY dstVport, - const OvsFlowKey *flowKey) -{ - if (OvsIsInternalVportType(dstVport->ovsType)) { - /* - * Rx: - * The source of NBL during tunneling Rx could be the external - * port or if it is being executed from userspace, the source port is - * default port. - */ - BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId == - ovsFwdCtx->switchContext->externalPortId) || - (ovsFwdCtx->fwdDetail->SourcePortId == - NDIS_SWITCH_DEFAULT_PORT_ID); - - if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) { - ASSERT(ovsFwdCtx->tunnelTxNic == NULL); - ASSERT(ovsFwdCtx->tunnelRxNic != NULL); - return TRUE; - } - } else if (OvsIsTunnelVportType(dstVport->ovsType)) { - ASSERT(ovsFwdCtx->tunnelTxNic == NULL); - ASSERT(ovsFwdCtx->tunnelRxNic == NULL); - - /* - * Tx: - * The destination port is a tunnel port. Encapsulation must be - * performed only on packets that originate from a VIF port or from - * userspace (default port) - * - * If the packet will not be encapsulated, consume the tunnel context - * by clearing it. - */ - if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO && - !OvsIsVifVportNo(ovsFwdCtx->srcVportNo)) { - ovsFwdCtx->tunKey.dst = 0; - } - - /* Tunnel the packet only if tunnel context is set. */ - if (ovsFwdCtx->tunKey.dst != 0) { - ovsActionStats.txVxlan++; - ovsFwdCtx->tunnelTxNic = dstVport; - } - - return TRUE; - } - - return FALSE; -} - - -/* - * -------------------------------------------------------------------------- - * OvsAddPorts -- - * Add the specified destination vport into the forwarding context. If the - * vport is a VIF/external port, it is added directly to the NBL. If it is - * a tunneling port, it is NOT added to the NBL. - * - * Result: - * NDIS_STATUS_SUCCESS on success - * Other NDIS_STATUS upon failure. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsAddPorts(OvsForwardingContext *ovsFwdCtx, - OvsFlowKey *flowKey, - NDIS_SWITCH_PORT_ID dstPortId, - BOOLEAN preserveVLAN, - BOOLEAN preservePriority) -{ - POVS_VPORT_ENTRY vport; - PNDIS_SWITCH_PORT_DESTINATION fwdPort; - NDIS_STATUS status; - POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; - - /* - * We hold the dispatch lock that protects the list of vports, so vports - * validated here can be added as destinations safely before we call into - * NDIS. - * - * Some of the vports can be tunnelled ports as well in which case - * they should be added to a separate list of tunnelled destination ports - * instead of the VIF ports. The context for the tunnel is settable - * in OvsForwardingContext. - */ - vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId); - if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { - /* - * There may be some latency between a port disappearing, and userspace - * updating the recalculated flows. In the meantime, handle invalid - * ports gracefully. - */ - ovsActionStats.noVport++; - return NDIS_STATUS_SUCCESS; - } - ASSERT(vport->nicState == NdisSwitchNicStateConnected); - vport->stats.txPackets++; - vport->stats.txBytes += - NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl)); - - if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) { - return NDIS_STATUS_SUCCESS; - } - - if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) { - if (ovsFwdCtx->destPortsSizeIn == 0) { - ASSERT(ovsFwdCtx->destinationPorts == NULL); - ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); - status = - switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( - switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, - OVS_DEST_PORTS_ARRAY_MIN_SIZE, - &ovsFwdCtx->destinationPorts); - if (status != NDIS_STATUS_SUCCESS) { - ovsActionStats.cannotGrowDest++; - return status; - } - ovsFwdCtx->destPortsSizeIn = - ovsFwdCtx->fwdDetail->NumAvailableDestinations; - ASSERT(ovsFwdCtx->destinationPorts); - } else { - ASSERT(ovsFwdCtx->destinationPorts != NULL); - /* - * NumElements: - * A ULONG value that specifies the total number of - * NDIS_SWITCH_PORT_DESTINATION elements in the - * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure. - * - * NumDestinations: - * A ULONG value that specifies the number of - * NDIS_SWITCH_PORT_DESTINATION elements in the - * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that - * specify port destinations. - * - * NumAvailableDestinations: - * A value that specifies the number of unused extensible switch - * destination ports elements within an NET_BUFFER_LIST structure. - */ - ASSERT(ovsFwdCtx->destinationPorts->NumElements == - ovsFwdCtx->destPortsSizeIn); - ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == - ovsFwdCtx->destPortsSizeOut - - ovsFwdCtx->fwdDetail->NumAvailableDestinations); - ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0); - /* - * Before we grow the array of destination ports, the current set - * of ports needs to be committed. Only the ports added since the - * last commit need to be part of the new update. - */ - status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( - switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, - ovsFwdCtx->fwdDetail->NumAvailableDestinations, - ovsFwdCtx->destinationPorts); - if (status != NDIS_STATUS_SUCCESS) { - ovsActionStats.cannotGrowDest++; - return status; - } - ASSERT(ovsFwdCtx->destinationPorts->NumElements == - ovsFwdCtx->destPortsSizeIn); - ASSERT(ovsFwdCtx->destinationPorts->NumDestinations == - ovsFwdCtx->destPortsSizeOut); - ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0); - - status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations( - switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, - ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts); - if (status != NDIS_STATUS_SUCCESS) { - ovsActionStats.cannotGrowDest++; - return status; - } - ASSERT(ovsFwdCtx->destinationPorts != NULL); - ovsFwdCtx->destPortsSizeIn <<= 1; - } - } - - ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn); - fwdPort = - NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts, - ovsFwdCtx->destPortsSizeOut); - - fwdPort->PortId = vport->portId; - fwdPort->NicIndex = vport->nicIndex; - fwdPort->IsExcluded = 0; - fwdPort->PreserveVLAN = preserveVLAN; - fwdPort->PreservePriority = preservePriority; - ovsFwdCtx->destPortsSizeOut += 1; - - return NDIS_STATUS_SUCCESS; -} - - -/* - * -------------------------------------------------------------------------- - * OvsClearTunTxCtx -- - * Utility function to clear tx tunneling context. - * -------------------------------------------------------------------------- - */ -static __inline VOID -OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx) -{ - ovsFwdCtx->tunnelTxNic = NULL; - ovsFwdCtx->tunKey.dst = 0; -} - - -/* - * -------------------------------------------------------------------------- - * OvsClearTunRxCtx -- - * Utility function to clear rx tunneling context. - * -------------------------------------------------------------------------- - */ -static __inline VOID -OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx) -{ - ovsFwdCtx->tunnelRxNic = NULL; - ovsFwdCtx->tunKey.dst = 0; -} - - -/* - * -------------------------------------------------------------------------- - * OvsCompleteNBLForwardingCtx -- - * This utility function is responsible for freeing/completing an NBL - either - * by adding it to a completion list or by freeing it. - * - * Side effects: - * It also resets the necessary fields in 'ovsFwdCtx'. - * -------------------------------------------------------------------------- - */ -static __inline VOID -OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx, - PCWSTR dropReason) -{ - NDIS_STRING filterReason; - - RtlInitUnicodeString(&filterReason, dropReason); - if (ovsFwdCtx->completionList) { - OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE, - ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1, - &filterReason); - ovsFwdCtx->curNbl = NULL; - } else { - /* If there is no completionList, we assume this is ovs created NBL */ - ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext, - ovsFwdCtx->curNbl, TRUE); - ASSERT(ovsFwdCtx->curNbl == NULL); - } - /* XXX: these can be made debug only to save cycles. Ideally the pipeline - * using these fields should reset the values at the end of the pipeline. */ - ovsFwdCtx->destPortsSizeOut = 0; - ovsFwdCtx->tunnelTxNic = NULL; - ovsFwdCtx->tunnelRxNic = NULL; -} - -/* - * -------------------------------------------------------------------------- - * OvsDoFlowLookupOutput -- - * Function to be used for the second stage of a tunneling workflow, ie.: - * - On the encapsulated packet on Tx path, to do a flow extract, flow - * lookup and excuting the actions. - * - On the decapsulated packet on Rx path, to do a flow extract, flow - * lookup and excuting the actions. - * - * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is - * until the new buffer management framework is adopted. - * - * Side effects: - * The NBL in 'ovsFwdCtx' is consumed. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx) -{ - OvsFlowKey key; - OvsFlow *flow; - UINT64 hash; - NDIS_STATUS status; - POVS_VPORT_ENTRY vport = - OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); - if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { - ASSERT(FALSE); // XXX: let's catch this for now - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped due to internal/tunnel port removal"); - ovsActionStats.noVport++; - return NDIS_STATUS_SUCCESS; - } - ASSERT(vport->nicState == NdisSwitchNicStateConnected); - - /* Assert that in the Rx direction, key is always setup. */ - ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0); - status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo, - &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ? - &ovsFwdCtx->tunKey : NULL); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Flow extract failed"); - ovsActionStats.failedFlowExtract++; - return status; - } - - flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE); - if (flow) { - OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers); - ovsFwdCtx->switchContext->datapath.hits++; - status = OvsActionsExecute(ovsFwdCtx->switchContext, - ovsFwdCtx->completionList, ovsFwdCtx->curNbl, - ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags, - &key, &hash, &ovsFwdCtx->layers, - flow->actions, flow->actionsLen); - ovsFwdCtx->curNbl = NULL; - } else { - LIST_ENTRY missedPackets; - UINT32 num = 0; - ovsFwdCtx->switchContext->datapath.misses++; - InitializeListHead(&missedPackets); - status = OvsCreateAndAddPackets( - OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS, - ovsFwdCtx->srcVportNo, - key.tunKey.dst != 0 ? - (OvsIPv4TunnelKey *)&key.tunKey : NULL, - ovsFwdCtx->curNbl, - ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers, - ovsFwdCtx->switchContext, &missedPackets, &num); - if (num) { - OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); - } - if (status == NDIS_STATUS_SUCCESS) { - /* Complete the packet since it was copied to user buffer. */ - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped since packet was copied to userspace"); - ovsActionStats.flowMiss++; - status = NDIS_STATUS_SUCCESS; - } else { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped due to failure to queue to userspace"); - status = NDIS_STATUS_FAILURE; - ovsActionStats.failedFlowMiss++; - } - } - - return status; -} - -/* - * -------------------------------------------------------------------------- - * OvsTunnelPortTx -- - * The start function for Tx tunneling - encapsulates the packet, and - * outputs the packet on the PIF bridge. - * - * Side effects: - * The NBL in 'ovsFwdCtx' is consumed. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx) -{ - NDIS_STATUS status = NDIS_STATUS_FAILURE; - PNET_BUFFER_LIST newNbl = NULL; - - /* - * Setup the source port to be the internal port to as to facilitate the - * second OvsLookupFlow. - */ - if (ovsFwdCtx->switchContext->internalVport == NULL) { - OvsClearTunTxCtx(ovsFwdCtx); - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped since internal port is absent"); - return NDIS_STATUS_FAILURE; - } - ovsFwdCtx->srcVportNo = - ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo; - - ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId; - ovsFwdCtx->fwdDetail->SourceNicIndex = - ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex; - - /* Do the encap. Encap function does not consume the NBL. */ - switch(ovsFwdCtx->tunnelTxNic->ovsType) { - case OVSWIN_VPORT_TYPE_VXLAN: - status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey, - ovsFwdCtx->switchContext, - (VOID *)ovsFwdCtx->completionList, - &ovsFwdCtx->layers, &newNbl); - break; - default: - ASSERT(! "Tx: Unhandled tunnel type"); - } - - /* Reset the tunnel context so that it doesn't get used after this point. */ - OvsClearTunTxCtx(ovsFwdCtx); - - if (status == NDIS_STATUS_SUCCESS) { - ASSERT(newNbl); - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"Complete after cloning NBL for encapsulation"); - ovsFwdCtx->curNbl = newNbl; - status = OvsDoFlowLookupOutput(ovsFwdCtx); - ASSERT(ovsFwdCtx->curNbl == NULL); - } else { - /* - * XXX: Temporary freeing of the packet until we register a - * callback to IP helper. - */ - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped due to encap failure"); - ovsActionStats.failedEncap++; - status = NDIS_STATUS_SUCCESS; - } - - return status; -} - -/* - * -------------------------------------------------------------------------- - * OvsTunnelPortRx -- - * Decapsulate the incoming NBL based on the tunnel type and goes through - * the flow lookup for the inner packet. - * - * Note: IP checksum is validate here, but L4 checksum validation needs - * to be done by the corresponding tunnel types. - * - * Side effects: - * The NBL in 'ovsFwdCtx' is consumed. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNET_BUFFER_LIST newNbl = NULL; - POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic; - - if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers) - != NDIS_STATUS_SUCCESS) { - ovsActionStats.failedChecksum++; - OVS_LOG_INFO("Packet dropped due to IP checksum failure."); - goto dropNbl; - } - - switch(tunnelRxVport->ovsType) { - case OVSWIN_VPORT_TYPE_VXLAN: - /* - * OvsDoDecapVxlan should return a new NBL if it was copied, and - * this new NBL should be setup as the ovsFwdCtx->curNbl. - */ - status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - &ovsFwdCtx->tunKey, &newNbl); - break; - default: - OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n", - tunnelRxVport->ovsType); - ASSERT(! "Rx: Unhandled tunnel type"); - status = NDIS_STATUS_NOT_SUPPORTED; - } - - if (status != NDIS_STATUS_SUCCESS) { - ovsActionStats.failedDecap++; - goto dropNbl; - } - - /* - * tunnelRxNic and other fields will be cleared, re-init the context - * before usage. - */ - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-dropped due to new decap packet"); - - /* Decapsulated packet is in a new NBL */ - ovsFwdCtx->tunnelRxNic = tunnelRxVport; - OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, - newNbl, tunnelRxVport->portNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), - ovsFwdCtx->completionList, - &ovsFwdCtx->layers, FALSE); - - /* - * Set the NBL's SourcePortId and SourceNicIndex to default values to - * keep NDIS happy when we forward the packet. - */ - ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; - ovsFwdCtx->fwdDetail->SourceNicIndex = 0; - - status = OvsDoFlowLookupOutput(ovsFwdCtx); - ASSERT(ovsFwdCtx->curNbl == NULL); - OvsClearTunRxCtx(ovsFwdCtx); - - return status; - -dropNbl: - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-dropped due to decap failure"); - OvsClearTunRxCtx(ovsFwdCtx); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * OvsOutputForwardingCtx -- - * This function outputs an NBL to NDIS or to a tunneling pipeline based on - * the ports added so far into 'ovsFwdCtx'. - * - * Side effects: - * This function consumes the NBL - either by forwarding it successfully to - * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it. - * - * Also makes sure that the list of destination ports - tunnel or otherwise is - * drained. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx) -{ - NDIS_STATUS status = STATUS_SUCCESS; - POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext; - - /* - * Handle the case where the some of the destination ports are tunneled - * ports - the non-tunneled ports get a unmodified copy of the NBL, and the - * tunneling pipeline starts when we output the packet to tunneled port. - */ - if (ovsFwdCtx->destPortsSizeOut > 0) { - PNET_BUFFER_LIST newNbl = NULL; - PNET_BUFFER nb; - UINT32 portsToUpdate = - ovsFwdCtx->fwdDetail->NumAvailableDestinations - - (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut); - - ASSERT(ovsFwdCtx->destinationPorts != NULL); - - /* - * Create a copy of the packet in order to do encap on it later. Also, - * don't copy the offload context since the encap'd packet has a - * different set of headers. This will change when we implement offloads - * before doing encapsulation. - */ - if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) { - nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - 0, 0, TRUE /*copy NBL info*/); - if (newNbl == NULL) { - status = NDIS_STATUS_RESOURCES; - ovsActionStats.noCopiedNbl++; - goto dropit; - } - } - - /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */ - ASSERT(portsToUpdate > 0); - status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations( - switchContext->NdisSwitchContext, ovsFwdCtx->curNbl, - portsToUpdate, ovsFwdCtx->destinationPorts); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); - ovsActionStats.cannotGrowDest++; - goto dropit; - } - - OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - ovsFwdCtx->sendFlags); - /* End this pipeline by resetting the corresponding context. */ - ovsFwdCtx->destPortsSizeOut = 0; - ovsFwdCtx->curNbl = NULL; - if (newNbl) { - status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, - newNbl, ovsFwdCtx->srcVportNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), - ovsFwdCtx->completionList, - &ovsFwdCtx->layers, FALSE); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"Dropped due to resouces"); - goto dropit; - } - } - } - - if (ovsFwdCtx->tunnelTxNic != NULL) { - status = OvsTunnelPortTx(ovsFwdCtx); - ASSERT(ovsFwdCtx->tunnelTxNic == NULL); - ASSERT(ovsFwdCtx->tunKey.dst == 0); - } else if (ovsFwdCtx->tunnelRxNic != NULL) { - status = OvsTunnelPortRx(ovsFwdCtx); - ASSERT(ovsFwdCtx->tunnelRxNic == NULL); - ASSERT(ovsFwdCtx->tunKey.dst == 0); - } - ASSERT(ovsFwdCtx->curNbl == NULL); - - return status; - -dropit: - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX"); - } - - return status; -} - - -/* - * -------------------------------------------------------------------------- - * OvsLookupFlowOutput -- - * Utility function for external callers to do flow extract, lookup, - * actions execute on a given NBL. - * - * Note: If this is being used from a callback function, make sure that the - * arguments specified are still valid in the asynchronous context. - * - * Side effects: - * This function consumes the NBL. - * -------------------------------------------------------------------------- - */ -VOID -OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, - VOID *compList, - PNET_BUFFER_LIST curNbl) -{ - NDIS_STATUS status; - OvsForwardingContext ovsFwdCtx; - POVS_VPORT_ENTRY internalVport = - (POVS_VPORT_ENTRY)switchContext->internalVport; - - /* XXX: make sure comp list was not a stack variable previously. */ - OvsCompletionList *completionList = (OvsCompletionList *)compList; - - /* - * XXX: can internal port disappear while we are busy doing ARP resolution? - * It could, but will we get this callback from IP helper in that case. Need - * to check. - */ - ASSERT(switchContext->internalVport); - status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, - internalVport->portNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl), - completionList, NULL, TRUE); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(&ovsFwdCtx, - L"OVS-Dropped due to resources"); - return; - } - - ASSERT(FALSE); - /* - * XXX: We need to acquire the dispatch lock and the datapath lock. - */ - - OvsDoFlowLookupOutput(&ovsFwdCtx); -} - - -/* - * -------------------------------------------------------------------------- - * OvsOutputBeforeSetAction -- - * Function to be called to complete one set of actions on an NBL, before - * we start the next one. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx) -{ - PNET_BUFFER_LIST newNbl; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNET_BUFFER nb; - - /* - * Create a copy and work on the copy after this point. The original NBL is - * forwarded. One reason to not use the copy for forwarding is that - * ports have already been added to the original NBL, and it might be - * inefficient/impossible to remove/re-add them to the copy. There's no - * notion of removing the ports, the ports need to be marked as - * "isExcluded". There's seems no real advantage to retaining the original - * and sending out the copy instead. - * - * XXX: We are copying the offload context here. This is to handle actions - * such as: - * outport, pop_vlan(), outport, push_vlan(), outport - * - * copy size needs to include inner ether + IP + TCP, need to revisit - * if we support IP options. - * XXX Head room needs to include the additional encap. - * XXX copySize check is not considering multiple NBs. - */ - nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - 0, 0, TRUE /*copy NBL info*/); - - ASSERT(ovsFwdCtx->destPortsSizeOut > 0 || - ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL); - - /* Send the original packet out */ - status = OvsOutputForwardingCtx(ovsFwdCtx); - ASSERT(ovsFwdCtx->curNbl == NULL); - ASSERT(ovsFwdCtx->destPortsSizeOut == 0); - ASSERT(ovsFwdCtx->tunnelRxNic == NULL); - ASSERT(ovsFwdCtx->tunnelTxNic == NULL); - - /* If we didn't make a copy, can't continue. */ - if (newNbl == NULL) { - ovsActionStats.noCopiedNbl++; - return NDIS_STATUS_RESOURCES; - } - - /* Finish the remaining actions with the new NBL */ - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE); - } else { - status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, - newNbl, ovsFwdCtx->srcVportNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), - ovsFwdCtx->completionList, - &ovsFwdCtx->layers, FALSE); - } - - return status; -} - - -/* - * -------------------------------------------------------------------------- - * OvsPopVlanInPktBuf -- - * Function to pop a VLAN tag when the tag is in the packet buffer. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx) -{ - PNET_BUFFER curNb; - PMDL curMdl; - PUINT8 bufferStart; - ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48); - UINT32 packetLen, mdlLen; - PNET_BUFFER_LIST newNbl; - NDIS_STATUS status; - - /* - * Declare a dummy vlanTag structure since we need to compute the size - * of shiftLength. The NDIS one is a unionized structure. - */ - NDIS_PACKET_8021Q_INFO vlanTag = {0}; - ULONG shiftLength = sizeof (vlanTag.TagHeader); - PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)]; - - newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - 0, 0, TRUE /* copy NBL info */); - if (!newNbl) { - ovsActionStats.noCopiedNbl++; - return NDIS_STATUS_RESOURCES; - } - - /* Complete the original NBL and create a copy to modify. */ - OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy"); - - status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, - newNbl, ovsFwdCtx->srcVportNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), - NULL, &ovsFwdCtx->layers, FALSE); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"Dropped due to resouces"); - return NDIS_STATUS_RESOURCES; - } - - curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - packetLen = NET_BUFFER_DATA_LENGTH(curNb); - ASSERT(curNb->Next == NULL); - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); - if (!bufferStart) { - return NDIS_STATUS_RESOURCES; - } - mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */ - if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) { - ASSERT(FALSE); - return NDIS_STATUS_FAILURE; - } - bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - RtlCopyMemory(tempBuffer, bufferStart, dataLength); - RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength); - NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL); - - return NDIS_STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsTunnelAttrToIPv4TunnelKey -- - * Convert tunnel attribute to OvsIPv4TunnelKey. - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr, - OvsIPv4TunnelKey *tunKey) -{ - PNL_ATTR a; - INT rem; - - tunKey->attr[0] = 0; - tunKey->attr[1] = 0; - tunKey->attr[2] = 0; - ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL); - - NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr), - NlAttrGetSize(attr)) { - switch (NlAttrType(a)) { - case OVS_TUNNEL_KEY_ATTR_ID: - tunKey->tunnelId = NlAttrGetBe64(a); - tunKey->flags |= OVS_TNL_F_KEY; - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: - tunKey->src = NlAttrGetBe32(a); - break; - case OVS_TUNNEL_KEY_ATTR_IPV4_DST: - tunKey->dst = NlAttrGetBe32(a); - break; - case OVS_TUNNEL_KEY_ATTR_TOS: - tunKey->tos = NlAttrGetU8(a); - break; - case OVS_TUNNEL_KEY_ATTR_TTL: - tunKey->ttl = NlAttrGetU8(a); - break; - case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT; - break; - case OVS_TUNNEL_KEY_ATTR_CSUM: - tunKey->flags |= OVS_TNL_F_CSUM; - break; - default: - ASSERT(0); - } - } - - return NDIS_STATUS_SUCCESS; -} - -/* - *---------------------------------------------------------------------------- - * OvsUpdateEthHeader -- - * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the - * specified key. - *---------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, - const struct ovs_key_ethernet *ethAttr) -{ - PNET_BUFFER curNb; - PMDL curMdl; - PUINT8 bufferStart; - EthHdr *ethHdr; - UINT32 packetLen, mdlLen; - - curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - ASSERT(curNb->Next == NULL); - packetLen = NET_BUFFER_DATA_LENGTH(curNb); - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); - if (!bufferStart) { - ovsActionStats.noResource++; - return NDIS_STATUS_RESOURCES; - } - mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - ASSERT(mdlLen > 0); - /* Bail out if the L2 header is not in a contiguous buffer. */ - if (MIN(packetLen, mdlLen) < sizeof *ethHdr) { - ASSERT(FALSE); - return NDIS_STATUS_FAILURE; - } - ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)); - - RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, - sizeof ethHdr->Destination); - RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source); - - return NDIS_STATUS_SUCCESS; -} - -/* - *---------------------------------------------------------------------------- - * OvsUpdateIPv4Header -- - * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the - * specified key. - *---------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, - const struct ovs_key_ipv4 *ipAttr) -{ - PNET_BUFFER curNb; - PMDL curMdl; - ULONG curMdlOffset; - PUINT8 bufferStart; - UINT32 mdlLen, hdrSize, packetLen; - OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers; - NDIS_STATUS status; - IPHdr *ipHdr; - TCPHdr *tcpHdr = NULL; - UDPHdr *udpHdr = NULL; - - ASSERT(layers->value != 0); - - /* - * Peek into the MDL to get a handle to the IP header and if required - * the TCP/UDP header as well. We check if the required headers are in one - * contiguous MDL, and if not, we copy them over to one MDL. - */ - curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - ASSERT(curNb->Next == NULL); - packetLen = NET_BUFFER_DATA_LENGTH(curNb); - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); - if (!bufferStart) { - ovsActionStats.noResource++; - return NDIS_STATUS_RESOURCES; - } - curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - mdlLen -= curMdlOffset; - ASSERT((INT)mdlLen >= 0); - - if (layers->isTcp || layers->isUdp) { - hdrSize = layers->l4Offset + - layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr); - } else { - hdrSize = layers->l3Offset + sizeof (*ipHdr); - } - - /* Count of number of bytes of valid data there are in the first MDL. */ - mdlLen = MIN(packetLen, mdlLen); - if (mdlLen < hdrSize) { - PNET_BUFFER_LIST newNbl; - newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl, - hdrSize, 0, TRUE /*copy NBL info*/); - if (!newNbl) { - ovsActionStats.noCopiedNbl++; - return NDIS_STATUS_RESOURCES; - } - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"Complete after partial copy."); - - status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, - newNbl, ovsFwdCtx->srcVportNo, 0, - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl), - NULL, &ovsFwdCtx->layers, FALSE); - if (status != NDIS_STATUS_SUCCESS) { - OvsCompleteNBLForwardingCtx(ovsFwdCtx, - L"OVS-Dropped due to resources"); - return NDIS_STATUS_RESOURCES; - } - - curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl); - ASSERT(curNb->Next == NULL); - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority); - if (!curMdl) { - ovsActionStats.noResource++; - return NDIS_STATUS_RESOURCES; - } - curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - mdlLen -= curMdlOffset; - ASSERT(mdlLen >= hdrSize); - } - - ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset); - - if (layers->isTcp) { - tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); - } else if (layers->isUdp) { - udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset); - } - - /* - * Adjust the IP header inline as dictated by the action, nad also update - * the IP and the TCP checksum for the data modified. - * - * In the future, this could be optimized to make one call to - * ChecksumUpdate32(). Ignoring this for now, since for the most common - * case, we only update the TTL. - */ - if (ipHdr->saddr != ipAttr->ipv4_src) { - if (tcpHdr) { - tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr, - ipAttr->ipv4_src); - } else if (udpHdr && udpHdr->check) { - udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr, - ipAttr->ipv4_src); - } - - if (ipHdr->check != 0) { - ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr, - ipAttr->ipv4_src); - } - ipHdr->saddr = ipAttr->ipv4_src; - } - if (ipHdr->daddr != ipAttr->ipv4_dst) { - if (tcpHdr) { - tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr, - ipAttr->ipv4_dst); - } else if (udpHdr && udpHdr->check) { - udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr, - ipAttr->ipv4_dst); - } - - if (ipHdr->check != 0) { - ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr, - ipAttr->ipv4_dst); - } - ipHdr->daddr = ipAttr->ipv4_dst; - } - if (ipHdr->protocol != ipAttr->ipv4_proto) { - UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00; - UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00; - if (tcpHdr) { - tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto); - } else if (udpHdr && udpHdr->check) { - udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto); - } - - if (ipHdr->check != 0) { - ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto); - } - ipHdr->protocol = ipAttr->ipv4_proto; - } - if (ipHdr->ttl != ipAttr->ipv4_ttl) { - UINT16 oldTtl = (ipHdr->ttl) & 0xff; - UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff; - if (ipHdr->check != 0) { - ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl); - } - ipHdr->ttl = ipAttr->ipv4_ttl; - } - - return NDIS_STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsExecuteSetAction -- - * Executes a set() action, but storing the actions into 'ovsFwdCtx' - * -------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, - OvsFlowKey *key, - UINT64 *hash, - const PNL_ATTR a) -{ - enum ovs_key_attr type = NlAttrType(a); - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - - switch (type) { - case OVS_KEY_ATTR_ETHERNET: - status = OvsUpdateEthHeader(ovsFwdCtx, - NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet))); - break; - - case OVS_KEY_ATTR_IPV4: - status = OvsUpdateIPv4Header(ovsFwdCtx, - NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4))); - break; - - case OVS_KEY_ATTR_TUNNEL: - { - OvsIPv4TunnelKey tunKey; - - status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey); - ASSERT(status == NDIS_STATUS_SUCCESS); - tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key)); - RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); - - break; - } - case OVS_KEY_ATTR_SKB_MARK: - /* XXX: Not relevant to Hyper-V. Return OK */ - break; - case OVS_KEY_ATTR_UNSPEC: - case OVS_KEY_ATTR_ENCAP: - case OVS_KEY_ATTR_ETHERTYPE: - case OVS_KEY_ATTR_IN_PORT: - case OVS_KEY_ATTR_VLAN: - case OVS_KEY_ATTR_ICMP: - case OVS_KEY_ATTR_ICMPV6: - case OVS_KEY_ATTR_ARP: - case OVS_KEY_ATTR_ND: - case __OVS_KEY_ATTR_MAX: - default: - OVS_LOG_INFO("Unhandled attribute %#x", type); - ASSERT(FALSE); - } - return status; -} - -/* - * -------------------------------------------------------------------------- - * OvsActionsExecute -- - * Interpret and execute the specified 'actions' on the specifed packet - * 'curNbl'. The expectation is that if the packet needs to be dropped - * (completed) for some reason, it is added to 'completionList' so that the - * caller can complete the packet. If 'completionList' is NULL, the NBL is - * assumed to be generated by OVS and freed up. Otherwise, the function - * consumes the NBL by generating a NDIS send indication for the packet. - * - * There are one or more of "clone" NBLs that may get generated while - * executing the actions. Upon any failures, the "cloned" NBLs are freed up, - * and the caller does not have to worry about them. - * - * Success or failure is returned based on whether the specified actions - * were executed successfully on the packet or not. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, - OvsCompletionList *completionList, - PNET_BUFFER_LIST curNbl, - UINT32 portNo, - ULONG sendFlags, - OvsFlowKey *key, - UINT64 *hash, - OVS_PACKET_HDR_INFO *layers, - const PNL_ATTR actions, - INT actionsLen) -{ - PNL_ATTR a; - INT rem; - UINT32 dstPortID; - OvsForwardingContext ovsFwdCtx; - PCWSTR dropReason = L""; - NDIS_STATUS status; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail = - NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); - - /* XXX: ASSERT that the flow table lock is held. */ - status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo, - sendFlags, fwdDetail, completionList, - layers, TRUE); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-initing destination port list failed"; - goto dropit; - } - - if (actionsLen == 0) { - dropReason = L"OVS-Dropped due to Flow action"; - ovsActionStats.zeroActionLen++; - goto dropit; - } - - NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) { - switch(NlAttrType(a)) { - case OVS_ACTION_ATTR_OUTPUT: - dstPortID = NlAttrGetU32(a); - status = OvsAddPorts(&ovsFwdCtx, key, dstPortID, - TRUE, TRUE); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-adding destination port failed"; - goto dropit; - } - break; - - case OVS_ACTION_ATTR_PUSH_VLAN: - { - struct ovs_action_push_vlan *vlan; - PVOID vlanTagValue; - PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag; - - if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL - || ovsFwdCtx.tunnelRxNic != NULL) { - status = OvsOutputBeforeSetAction(&ovsFwdCtx); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-adding destination failed"; - goto dropit; - } - } - - vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, - Ieee8021QNetBufferListInfo); - if (vlanTagValue != NULL) { - /* - * XXX: We don't support double VLAN tag offload. In such cases, - * we need to insert the existing one into the packet buffer, - * and add the new one as offload. This will take care of - * guest tag-in-tag case as well as OVS rules that specify - * tag-in-tag. - */ - } else { - vlanTagValue = 0; - vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; - vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a); - vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff; - vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13; - - NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, - Ieee8021QNetBufferListInfo) = vlanTagValue; - } - break; - } - - case OVS_ACTION_ATTR_POP_VLAN: - { - if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL - || ovsFwdCtx.tunnelRxNic != NULL) { - status = OvsOutputBeforeSetAction(&ovsFwdCtx); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-adding destination failed"; - goto dropit; - } - } - - if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, - Ieee8021QNetBufferListInfo) != 0) { - NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl, - Ieee8021QNetBufferListInfo) = 0; - } else { - /* - * The VLAN tag is inserted into the packet buffer. Pop the tag - * by packet buffer modification. - */ - status = OvsPopVlanInPktBuf(&ovsFwdCtx); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-pop vlan action failed"; - goto dropit; - } - } - break; - } - - case OVS_ACTION_ATTR_USERSPACE: - { - PNL_ATTR userdataAttr; - PNL_ATTR queueAttr; - POVS_PACKET_QUEUE_ELEM elem; - UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE; - //XXX confusing that portNo is actually portId for external port. - BOOLEAN isRecv = (portNo == switchContext->externalPortId) - || OvsIsTunnelVportNo(portNo); - - queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID); - userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA); - - elem = OvsCreateQueuePacket(queueId, (PVOID)userdataAttr, - userdataAttr->nlaLen, - OVS_PACKET_CMD_ACTION, - portNo, (OvsIPv4TunnelKey *)&key->tunKey, - ovsFwdCtx.curNbl, - NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl), - isRecv, - layers); - if (elem) { - LIST_ENTRY missedPackets; - InitializeListHead(&missedPackets); - InsertTailList(&missedPackets, &elem->link); - OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); - dropReason = L"OVS-Completed since packet was copied to " - L"userspace"; - } else { - dropReason = L"OVS-Dropped due to failure to queue to " - L"userspace"; - goto dropit; - } - break; - } - case OVS_ACTION_ATTR_SET: - { - if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL - || ovsFwdCtx.tunnelRxNic != NULL) { - status = OvsOutputBeforeSetAction(&ovsFwdCtx); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-adding destination failed"; - goto dropit; - } - } - - status = OvsExecuteSetAction(&ovsFwdCtx, key, hash, - (const PNL_ATTR)NlAttrGet - ((const PNL_ATTR)a)); - if (status != NDIS_STATUS_SUCCESS) { - dropReason = L"OVS-set action failed"; - goto dropit; - } - break; - } - case OVS_ACTION_ATTR_SAMPLE: - break; - case OVS_ACTION_ATTR_UNSPEC: - case __OVS_ACTION_ATTR_MAX: - default: - break; - } - } - - if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL - || ovsFwdCtx.tunnelRxNic != NULL) { - status = OvsOutputForwardingCtx(&ovsFwdCtx); - ASSERT(ovsFwdCtx.curNbl == NULL); - } - - ASSERT(ovsFwdCtx.destPortsSizeOut == 0); - ASSERT(ovsFwdCtx.tunnelRxNic == NULL); - ASSERT(ovsFwdCtx.tunnelTxNic == NULL); - -dropit: - /* - * If curNbl != NULL, it implies the NBL has not been not freed up so far. - */ - if (ovsFwdCtx.curNbl) { - OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason); - } - - return status; -} diff --git a/datapath-windows/ovsext/OvsAtomic.h b/datapath-windows/ovsext/OvsAtomic.h deleted file mode 100644 index a94d1fb15..000000000 --- a/datapath-windows/ovsext/OvsAtomic.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_ATOMIC_H_ -#define __OVS_ATOMIC_H_ 1 - -static __inline UINT64 -atomic_add64(UINT64 *ptr, UINT32 val) -{ - return InterlockedAdd64((LONGLONG volatile *) ptr, (LONGLONG) val); -} - -static __inline UINT64 -atomic_inc64(UINT64 *ptr) -{ - return InterlockedIncrement64((LONGLONG volatile *) ptr); -} - -#endif /* __OVS_ATOMIC_H_ */ diff --git a/datapath-windows/ovsext/OvsBufferMgmt.c b/datapath-windows/ovsext/OvsBufferMgmt.c deleted file mode 100644 index 8aa806061..000000000 --- a/datapath-windows/ovsext/OvsBufferMgmt.c +++ /dev/null @@ -1,1535 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * **************************************************************************** - * - * Simple Buffer Management framework for OVS - * - * It introduces four NDIS buffer pools - * **Fix size net buffer list pool--this is used for small buffer - * One allocation will include NBL + NB + MDL + Data + CONTEXT. - * - * **Variable size net buffer list pool--this is used for variable size - * buffer. The allocation of net buffer list will include NBL + NB + - * CONTEXT, a separate allocation of MDL + data buffer is required. - * - * **NBL only net buffer list pool-- this is used for partial copy - * (or clone). In this case we can not allocate net buffer list and - * net buffer at the same time. - * - * **Net buffer pool-- this is required when net buffer need to be - * allocated separately. - * - * A Buffer context is defined to track the buffer specific information - * so that during NBL completion, proper action can be taken. Please see - * code for details. - * - * Here is the usage of the management API - * All external NBL should be initialized its NBL context by calling - * OvsInitExternalNBLContext() - * - * After the external NBL context is initialized, it can call the following - * API to allocate, copy or partial copy NBL. - * - * OvsAllocateFixSizeNBL() - * OvsAllocateVariableSizeNBL() - * - * OvsPartialCopyNBL() - * OvsPartialCopyToMultipleNBLs() - * - * OvsFullCopyNBL() - * OvsFullCopyToMultipleNBLs() - * - * See code comments for detail description of the functions. - * - * All NBLs is completed through - * OvsCompleteNBL() - * If this API return non NULL value, then the returned NBL should be - * returned to upper layer by calling - * NdisFSendNetBufferListsComplete() if the buffer is from upper - * layer. In case of WFP, it can call the corresponding completion routine - * to return the NBL to the framework. - * - * NOTE: - * 1. Copy or partial copy will not copy destination port array - * 2. Copy or partial copy will copy src port id and index - * 3. New Allocated NBL will have src port set to default port id - * 4. If original packet has direction flag set, the copied or partial - * copied NBL will still be in same direction. - * 5. When you advance or retreate the buffer, you may need to update - * relevant meta data to keep it consistent. - * - * **************************************************************************** - */ - -#include "precomp.h" -#include "OvsSwitch.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_BUFMGMT -#include "OvsDebug.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" -#include "OvsChecksum.h" -#include "OvsPacketParser.h" - -/* - * -------------------------------------------------------------------------- - * OvsInitBufferPool -- - * - * Allocate NBL and NB pool - * - * XXX: more optimization may be done for buffer management include local cache - * of NBL, NB, data, context, MDL. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsInitBufferPool(PVOID ovsContext) -{ - POVS_NBL_POOL ovsPool; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - NET_BUFFER_LIST_POOL_PARAMETERS nblParam; - NET_BUFFER_POOL_PARAMETERS nbParam; - - C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8); - - OVS_LOG_TRACE("Enter: context: %p", context); - - ovsPool = &context->ovsPool; - RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL)); - ovsPool->ndisHandle = context->NdisFilterHandle; - ovsPool->ndisContext = context->NdisSwitchContext; - /* - * fix size NBL pool includes - * NBL + NB + MDL + DATA + Context - * This is mainly used for Packet execute or slow path when copy is - * required and size is less than OVS_DEFAULT_DATA_SIZE. We expect - * Most of packet from user space will use this Pool. (This is - * true for all bfd and cfm packet. - */ - RtlZeroMemory(&nblParam, sizeof (nblParam)); - OVS_INIT_OBJECT_HEADER(&nblParam.Header, - NDIS_OBJECT_TYPE_DEFAULT, - NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, - NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); - nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; - nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG; - nblParam.fAllocateNetBuffer = TRUE; - nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE; - - ovsPool->fixSizePool = - NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); - if (ovsPool->fixSizePool == NULL) { - goto pool_cleanup; - } - - /* - * Zero Size NBL Pool includes - * NBL + NB + Context - * This is mainly for packet with large data Size, in this case MDL and - * Data will be allocate separately. - */ - RtlZeroMemory(&nblParam, sizeof (nblParam)); - OVS_INIT_OBJECT_HEADER(&nblParam.Header, - NDIS_OBJECT_TYPE_DEFAULT, - NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, - NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); - - nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; - nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG; - nblParam.fAllocateNetBuffer = TRUE; - nblParam.DataSize = 0; - - ovsPool->zeroSizePool = - NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); - if (ovsPool->zeroSizePool == NULL) { - goto pool_cleanup; - } - - /* - * NBL only pool just includes - * NBL (+ context) - * This is mainly used for clone and partial copy - */ - RtlZeroMemory(&nblParam, sizeof (nblParam)); - OVS_INIT_OBJECT_HEADER(&nblParam.Header, - NDIS_OBJECT_TYPE_DEFAULT, - NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1, - NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1); - - nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE; - nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG; - nblParam.fAllocateNetBuffer = FALSE; - nblParam.DataSize = 0; - - ovsPool->nblOnlyPool = - NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam); - if (ovsPool->nblOnlyPool == NULL) { - goto pool_cleanup; - } - - /* nb Pool - * NB only pool, used for copy - */ - - OVS_INIT_OBJECT_HEADER(&nbParam.Header, - NDIS_OBJECT_TYPE_DEFAULT, - NET_BUFFER_POOL_PARAMETERS_REVISION_1, - NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1); - nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG; - nbParam.DataSize = 0; - ovsPool->nbPool = - NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam); - if (ovsPool->nbPool == NULL) { - goto pool_cleanup; - } - OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p" - "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool, - ovsPool->nblOnlyPool, ovsPool->nbPool); - return NDIS_STATUS_SUCCESS; - -pool_cleanup: - OvsCleanupBufferPool(context); - OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool"); - return NDIS_STATUS_RESOURCES; -} - - -/* - * -------------------------------------------------------------------------- - * OvsCleanupBufferPool -- - * Free Buffer pool for NBL and NB. - * -------------------------------------------------------------------------- - */ -VOID -OvsCleanupBufferPool(PVOID ovsContext) -{ - POVS_NBL_POOL ovsPool; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - ovsPool = &context->ovsPool; - OVS_LOG_TRACE("Enter: context: %p", context); -#ifdef DBG - ASSERT(ovsPool->fixNBLCount == 0); - ASSERT(ovsPool->zeroNBLCount == 0); - ASSERT(ovsPool->nblOnlyCount == 0); - ASSERT(ovsPool->nbCount == 0); - ASSERT(ovsPool->sysNBLCount == 0); - ASSERT(ovsPool->fragNBLCount == 0); -#endif - - if (ovsPool->fixSizePool) { - NdisFreeNetBufferListPool(ovsPool->fixSizePool); - ovsPool->fixSizePool = NULL; - } - if (ovsPool->zeroSizePool) { - NdisFreeNetBufferListPool(ovsPool->zeroSizePool); - ovsPool->zeroSizePool = NULL; - } - if (ovsPool->nblOnlyPool) { - NdisFreeNetBufferListPool(ovsPool->nblOnlyPool); - ovsPool->nblOnlyPool = NULL; - } - if (ovsPool->nbPool) { - NdisFreeNetBufferPool(ovsPool->nbPool); - ovsPool->nbPool = NULL; - } - OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool"); -} - - -static VOID -OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx, - UINT16 flags, - UINT32 origDataLength, - UINT32 srcPortNo) -{ - ctx->magic = OVS_CTX_MAGIC; - ctx->refCount = 1; - ctx->flags = flags; - ctx->srcPortNo = srcPortNo; - ctx->origDataLength = origDataLength; -} - - -static VOID -OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl) -{ - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; - info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); - if (info == NULL) { - return; - } - OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d " - "isDataSafe: %s, safeDataSize: %d", - nbl, info->NumAvailableDestinations, info->SourcePortId, - info->SourceNicIndex, - info->IsPacketDataSafe ? "TRUE" : "FALSE", - info->IsPacketDataSafe ? 0 : info->SafePacketDataSize); - -} - -static VOID -OvsDumpNBLContext(PNET_BUFFER_LIST nbl) -{ - PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context; - if (ctx == NULL) { - OVS_LOG_INFO("No Net Buffer List context"); - return; - } - while (ctx) { - OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d", - nbl, ctx, ctx->Size, ctx->Offset); - ctx = ctx->Next; - } -} - - -static VOID -OvsDumpMDLChain(PMDL mdl) -{ - PMDL tmp; - tmp = mdl; - while (tmp) { - OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p" - " ByteCount: %d, ByteOffset: %d", - tmp, tmp->Size, tmp->MappedSystemVa, - tmp->StartVa, tmp->ByteCount, tmp->ByteOffset); - tmp = tmp->Next; - } -} - - -static VOID -OvsDumpNetBuffer(PNET_BUFFER nb) -{ - OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p " - "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d", - nb, - NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle, - NET_BUFFER_FIRST_MDL(nb), - NET_BUFFER_CURRENT_MDL(nb), - NET_BUFFER_CURRENT_MDL_OFFSET(nb), - NET_BUFFER_DATA_LENGTH(nb), - NET_BUFFER_DATA_OFFSET(nb)); - OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb)); -} - - -static VOID -OvsDumpNetBufferList(PNET_BUFFER_LIST nbl) -{ - PNET_BUFFER nb; - OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d " - "poolHandle: %p", - nbl, nbl->ParentNetBufferList, - nbl->SourceHandle, nbl->ChildRefCount, - nbl->NdisPoolHandle); - OvsDumpNBLContext(nbl); - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - while (nb) { - OvsDumpNetBuffer(nb); - nb = NET_BUFFER_NEXT_NB(nb); - } -} - -/* - * -------------------------------------------------------------------------- - * OvsAllocateFixSizeNBL -- - * - * Allocate fix size NBL which include - * NBL + NB + MBL + Data + Context - * Please note: - * * Forwarding Context is allocated, but forwarding detail information - * is not initailized. - * * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128 - * byte). - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsAllocateFixSizeNBL(PVOID ovsContext, - UINT32 size, - UINT32 headRoom) -{ - PNET_BUFFER_LIST nbl = NULL; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - POVS_BUFFER_CONTEXT ctx; - POVS_NBL_POOL ovsPool = &context->ovsPool; - NDIS_STATUS status; - UINT32 line; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; - - if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) { - line = __LINE__; - goto allocate_done; - } - - nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool, - (UINT16)sizeof (OVS_BUFFER_CONTEXT), - (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); - - if (nbl == NULL) { - line = __LINE__; - goto allocate_done; - } - - nbl->SourceHandle = ovsPool->ndisHandle; - status = context->NdisSwitchHandlers. - AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); - - if (status != NDIS_STATUS_SUCCESS) { - NdisFreeNetBufferList(nbl); - nbl = NULL; - line = __LINE__; - goto allocate_done; - } - info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); - ASSERT(info); - info->IsPacketDataSafe = TRUE; - info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; - - status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), - size, 0, NULL); - ASSERT(status == NDIS_STATUS_SUCCESS); - -#ifdef DBG - InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount); - OvsDumpNetBufferList(nbl); - OvsDumpForwardingDetails(nbl); -#endif - - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - ASSERT(ctx); - - OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL | - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size, - OVS_DEFAULT_PORT_NO); - line = __LINE__; -allocate_done: - OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line); - return nbl; -} - - -static PMDL -OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle, - UINT32 dataSize) -{ - PMDL mdl; - PVOID data; - - data = OvsAllocateMemory(dataSize); - if (data == NULL) { - return NULL; - } - - mdl = NdisAllocateMdl(ndisHandle, data, dataSize); - if (mdl == NULL) { - OvsFreeMemory(data); - } - - return mdl; -} - - -static VOID -OvsFreeMDLAndData(PMDL mdl) -{ - PVOID data; - - data = MmGetMdlVirtualAddress(mdl); - NdisFreeMdl(mdl); - OvsFreeMemory(data); -} - - -/* - * -------------------------------------------------------------------------- - * OvsAllocateVariableSizeNBL -- - * - * Allocate variable size NBL, the NBL looks like - * NBL + NB + Context - * MDL + Data - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsAllocateVariableSizeNBL(PVOID ovsContext, - UINT32 size, - UINT32 headRoom) -{ - PNET_BUFFER_LIST nbl = NULL; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - POVS_NBL_POOL ovsPool = &context->ovsPool; - POVS_BUFFER_CONTEXT ctx; - UINT32 realSize; - PMDL mdl; - NDIS_STATUS status; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info; - if (size == 0) { - return NULL; - } - realSize = MEM_ALIGN_SIZE(size + headRoom); - - mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize); - if (mdl == NULL) { - return NULL; - } - - nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool, - (UINT16)sizeof (OVS_BUFFER_CONTEXT), - (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL, - mdl, realSize, 0); - if (nbl == NULL) { - OvsFreeMDLAndData(mdl); - return NULL; - } - - nbl->SourceHandle = ovsPool->ndisHandle; - status = context->NdisSwitchHandlers. - AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); - - if (status != NDIS_STATUS_SUCCESS) { - /* - * do we need to remove mdl from nbl XXX - */ - OvsFreeMDLAndData(mdl); - NdisFreeNetBufferList(nbl); - return NULL; - } - - info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); - ASSERT(info); - info->IsPacketDataSafe = TRUE; - info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; - status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl), - size, 0, NULL); - ASSERT(status == NDIS_STATUS_SUCCESS); - -#ifdef DBG - InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount); - OvsDumpNetBufferList(nbl); - OvsDumpForwardingDetails(nbl); -#endif - - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - - OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | - OVS_BUFFER_FROM_ZERO_SIZE_POOL, - size, OVS_DEFAULT_PORT_NO); - - OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl); - return nbl; -} - - -/* - * -------------------------------------------------------------------------- - * OvsInitExternalNBLContext -- - * - * For NBL not allocated by OVS, it will allocate and initialize - * the NBL context. - * -------------------------------------------------------------------------- - */ -POVS_BUFFER_CONTEXT -OvsInitExternalNBLContext(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - BOOLEAN isRecv) -{ - NDIS_HANDLE poolHandle; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - POVS_BUFFER_CONTEXT ctx; - PNET_BUFFER nb; - NDIS_STATUS status; - UINT16 flags; - - poolHandle = NdisGetPoolFromNetBufferList(nbl); - - if (poolHandle == context->ovsPool.ndisHandle) { - return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - } - status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT), - OVS_DEFAULT_NBL_CONTEXT_FILL, - OVS_OTHER_POOL_TAG); - if (status != NDIS_STATUS_SUCCESS) { - return NULL; - } -#ifdef DBG - OvsDumpNBLContext(nbl); - InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount); -#endif - flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER; - flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT; - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - /* - * we use first nb to decide whether we need advance or retreat during - * complete. - */ - OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO); - return ctx; -} - -/* - * -------------------------------------------------------------------------- - * OvsAllocateNBLContext - * - * Create NBL buffer context and forwarding context. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context, - PNET_BUFFER_LIST nbl) -{ - POVS_NBL_POOL ovsPool = &context->ovsPool; - NDIS_STATUS status; - - status = NdisAllocateNetBufferListContext(nbl, - sizeof (OVS_BUFFER_CONTEXT), - OVS_DEFAULT_NBL_CONTEXT_FILL, - OVS_OTHER_POOL_TAG); - if (status != NDIS_STATUS_SUCCESS) { - return NDIS_STATUS_FAILURE; - } - - nbl->SourceHandle = ovsPool->ndisHandle; - status = context->NdisSwitchHandlers. - AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl); - - if (status != NDIS_STATUS_SUCCESS) { - NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); - return NDIS_STATUS_FAILURE; - } - return status; -} - -/* - * -------------------------------------------------------------------------- - * OvsFreeNBLContext - * - * Free the NBL buffer context and forwarding context. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsFreeNBLContext(POVS_SWITCH_CONTEXT context, - PNET_BUFFER_LIST nbl) -{ - POVS_NBL_POOL ovsPool = &context->ovsPool; - - context->NdisSwitchHandlers. - FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); - NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); - - return NDIS_STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsCopyNBLInfo - * - * Copy NBL info from src to dst - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl, - POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize, - BOOLEAN copyNblInfo) -{ - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - - srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl); - dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl); - if (srcInfo) { -#ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO - status = context->NdisSwitchHandlers. - CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0); - - if (status != NDIS_STATUS_SUCCESS) { - return status; - } -#else - dstInfo->SourcePortId = srcInfo->SourcePortId; - dstInfo->SourceNicIndex = srcInfo->SourceNicIndex; - if (copyNblInfo) { - if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) { - NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl); - } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) { - NdisCopySendNetBufferListInfo(dstNbl, srcNbl); - } - } -#endif - dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe; - if (!srcInfo->IsPacketDataSafe && copySize > - srcInfo->SafePacketDataSize) { - srcInfo->SafePacketDataSize = copySize; - } - } else { - /* - * Assume all data are safe - */ - dstInfo->IsPacketDataSafe = TRUE; - dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; - } - return status; -} - -/* - * -------------------------------------------------------------------------- - * OvsPartialCopyNBL -- - * - * Partial copy NBL, if there is multiple NB in NBL, each one will be - * copied. We also reserve headroom for the new NBL. - * - * Please note, - * NBL should have OVS_BUFFER_CONTEXT setup before calling - * this function. - * The NBL should already have ref to itself so that during copy - * it will not be freed. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsPartialCopyNBL(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - UINT32 copySize, - UINT32 headRoom, - BOOLEAN copyNblInfo) -{ - PNET_BUFFER_LIST newNbl; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - NDIS_STATUS status; - PNET_BUFFER srcNb, dstNb; - ULONG byteCopied; - POVS_NBL_POOL ovsPool = &context->ovsPool; - POVS_BUFFER_CONTEXT srcCtx, dstCtx; - UINT16 flags; - - srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { - OVS_LOG_INFO("src nbl must have ctx initialized"); - ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); - return NULL; - } - - if (copySize) { - NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL); - } - newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool, - NULL, 0); - if (copySize) { - status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0, - NULL, NULL); - ASSERT(status == NDIS_STATUS_SUCCESS); - } - - if (newNbl == NULL) { - return NULL; - } - - /* - * Allocate private memory for copy - */ - if (copySize + headRoom) { - status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom, - 0, NULL, NULL); - if (status != NDIS_STATUS_SUCCESS) { - goto retreat_error; - } - - if (headRoom) { - NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL); - } - if (copySize) { - srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); - dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl); - - while (srcNb) { - status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize, - srcNb, 0, - &byteCopied); - if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) { - goto nbl_context_error; - } - srcNb = NET_BUFFER_NEXT_NB(srcNb); - dstNb = NET_BUFFER_NEXT_NB(dstNb); - } - } - } - - status = OvsAllocateNBLContext(context, newNbl); - if (status != NDIS_STATUS_SUCCESS) { - goto nbl_context_error; - } - - status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo); - if (status != NDIS_STATUS_SUCCESS) { - goto copy_list_info_error; - } - -#ifdef DBG - InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); -#endif - - newNbl->ParentNetBufferList = nbl; - - dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); - ASSERT(dstCtx != NULL); - - flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); - - flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT | - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; - - srcNb = NET_BUFFER_LIST_FIRST_NB(nbl); - OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize, - OVS_DEFAULT_PORT_NO); - - InterlockedIncrement((LONG volatile *)&srcCtx->refCount); -#ifdef DBG - OvsDumpNetBufferList(nbl); - OvsDumpForwardingDetails(nbl); - - OvsDumpNetBufferList(newNbl); - OvsDumpForwardingDetails(newNbl); -#endif - OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl); - return newNbl; - -copy_list_info_error: - OvsFreeNBLContext(context, newNbl); -nbl_context_error: - if (copySize) { - NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL); - } -retreat_error: - NdisFreeCloneNetBufferList(newNbl, 0); - return NULL; -} - -/* - * -------------------------------------------------------------------------- - * OvsPartialCopyToMultipleNBLs -- - * - * This is similar to OvsPartialCopyNBL() except that each NB will - * have its own NBL. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsPartialCopyToMultipleNBLs(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - UINT32 copySize, - UINT32 headRoom, - BOOLEAN copyNblInfo) -{ - PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL; - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - if (NET_BUFFER_NEXT_NB(nb) == NULL) { - return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo); - } - - firstNb = nb; - prevNb = nb; - - while (nb) { - nextNb = NET_BUFFER_NEXT_NB(nb); - NET_BUFFER_NEXT_NB(nb) = NULL; - - NET_BUFFER_LIST_FIRST_NB(nbl) = nb; - - newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom, - copyNblInfo); - if (newNbl == NULL) { - goto cleanup; - } - if (prevNbl == NULL) { - firstNbl = newNbl; - } else { - NET_BUFFER_LIST_NEXT_NBL(prevNbl) = nbl; - NET_BUFFER_NEXT_NB(prevNb) = nb; - } - prevNbl = newNbl; - prevNb = nb; - nb = nextNb; - } - NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; - return firstNbl; - -cleanup: - NET_BUFFER_NEXT_NB(prevNb) = nb; - NET_BUFFER_NEXT_NB(nb) = nextNb; - NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb; - - newNbl = firstNbl; - while (newNbl) { - firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl); - NET_BUFFER_LIST_NEXT_NBL(firstNbl) = NULL; - OvsCompleteNBL(context, newNbl, TRUE); - newNbl = firstNbl; - } - return NULL; -} - - -static PNET_BUFFER_LIST -OvsCopySinglePacketNBL(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - PNET_BUFFER nb, - UINT32 headRoom, - BOOLEAN copyNblInfo) -{ - UINT32 size; - ULONG copiedSize; - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - PNET_BUFFER_LIST newNbl; - PNET_BUFFER newNb; - NDIS_STATUS status; - POVS_BUFFER_CONTEXT srcCtx, dstCtx; - - size = NET_BUFFER_DATA_LENGTH(nb); - if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) { - newNbl = OvsAllocateFixSizeNBL(context, size, headRoom); - } else { - newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom); - } - if (newNbl == NULL) { - return NULL; - } - newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); - status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, - &copiedSize); - - srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (status == NDIS_STATUS_SUCCESS) { - status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo); - } - - if (status != NDIS_STATUS_SUCCESS || copiedSize != size) { - OvsCompleteNBL(context, newNbl, TRUE); - return NULL; - } - - dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); - ASSERT(dstCtx && srcCtx); - ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC); - - dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | - OVS_BUFFER_SEND_BUFFER); -#ifdef DBG - OvsDumpNetBufferList(newNbl); - OvsDumpForwardingDetails(newNbl); -#endif - OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl); - return newNbl; -} - -/* - * -------------------------------------------------------------------------- - * OvsFullCopyNBL -- - * - * Copy the NBL to a new NBL including data. - * - * Notes: - * The NBL can have multiple NBs, but the final result is one NBL. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsFullCopyNBL(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - UINT32 headRoom, - BOOLEAN copyNblInfo) -{ - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - POVS_NBL_POOL ovsPool = &context->ovsPool; - PNET_BUFFER_LIST newNbl; - PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL; - POVS_BUFFER_CONTEXT dstCtx, srcCtx; - PMDL mdl; - NDIS_STATUS status; - UINT32 size, totalSize; - ULONG copiedSize; - UINT16 flags; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo; - - srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { - OVS_LOG_INFO("src nbl must have ctx initialized"); - ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); - return NULL; - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - - if (NET_BUFFER_NEXT_NB(nb) == NULL) { - return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); - } - - newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool, - (UINT16)sizeof (OVS_BUFFER_CONTEXT), - (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL); - if (newNbl == NULL) { - return NULL; - } - - while (nb) { - size = NET_BUFFER_DATA_LENGTH(nb); - totalSize = MEM_ALIGN_SIZE(size + headRoom); - mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize); - - if (mdl == NULL) { - goto nblcopy_error; - } - newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0); - if (newNb == NULL) { - OvsFreeMDLAndData(mdl); - goto nblcopy_error; - } - if (firstNb == NULL) { - firstNb = newNb; - } else { - NET_BUFFER_NEXT_NB(prevNb) = newNb; - } - prevNb = newNb; -#ifdef DBG - InterlockedIncrement((LONG volatile *)&ovsPool->nbCount); -#endif - status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL); - ASSERT(status == NDIS_STATUS_SUCCESS); - - status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0, - &copiedSize); - if (status != NDIS_STATUS_SUCCESS || size != copiedSize) { - goto nblcopy_error; - } - - nb = NET_BUFFER_NEXT_NB(nb); - } - - NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb; - - newNbl->SourceHandle = ovsPool->ndisHandle; - status = context->NdisSwitchHandlers. - AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl); - - if (status != NDIS_STATUS_SUCCESS) { - goto nblcopy_error; - } - - status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo); - if (status != NDIS_STATUS_SUCCESS) { - goto nblcopy_error; - } - - dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl); - dstInfo->IsPacketDataSafe = TRUE; - - dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); - - flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER); - - flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA | - OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL | - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT; - - OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb), - OVS_DEFAULT_PORT_NO); - -#ifdef DBG - OvsDumpNetBufferList(nbl); - OvsDumpForwardingDetails(nbl); - InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount); -#endif - OVS_LOG_LOUD("newNbl: %p", newNbl); - return newNbl; - -nblcopy_error: - while (firstNb) { -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); -#endif - prevNb = firstNb; - firstNb = NET_BUFFER_NEXT_NB(prevNb); - mdl = NET_BUFFER_FIRST_MDL(prevNb); - NET_BUFFER_FIRST_MDL(prevNb) = NULL; - NdisFreeNetBuffer(prevNb); - OvsFreeMDLAndData(mdl); - } - NdisFreeNetBufferList(newNbl); - OVS_LOG_ERROR("OvsFullCopyNBL failed"); - return NULL; -} - -/* - * -------------------------------------------------------------------------- - * GetSegmentHeaderInfo - * - * Extract header size and sequence number for the segment. - * -------------------------------------------------------------------------- - */ -static NDIS_STATUS -GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl, - const POVS_PACKET_HDR_INFO hdrInfo, - UINT32 *hdrSize, UINT32 *seqNumber) -{ - TCPHdr tcpStorage; - const TCPHdr *tcp; - - /* Parse the orginal Eth/IP/TCP header */ - tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage); - if (tcp == NULL) { - return NDIS_STATUS_FAILURE; - } - *seqNumber = ntohl(tcp->seq); - *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp); - - return NDIS_STATUS_SUCCESS; -} - - -/* - * -------------------------------------------------------------------------- - * FixSegmentHeader - * - * Fix IP length, IP checksum, TCP sequence number and TCP checksum - * in the segment. - * -------------------------------------------------------------------------- - */ -static NDIS_STATUS -FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber) -{ - EthHdr *dstEth; - IPHdr *dstIP; - TCPHdr *dstTCP; - PMDL mdl; - PUINT8 bufferStart; - - mdl = NET_BUFFER_FIRST_MDL(nb); - - bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority); - if (!bufferStart) { - return NDIS_STATUS_RESOURCES; - } - dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb)); - ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) - >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr)); - dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth); - dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4); - ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb) - >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); - - /* Fix IP length and checksum */ - ASSERT(dstIP->protocol == IPPROTO_TCP); - dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP)); - dstIP->check = 0; - dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0); - - /* Fix TCP checksum */ - dstTCP->seq = htonl(seqNumber); - dstTCP->check = - IPPseudoChecksum((UINT32 *)&dstIP->saddr, - (UINT32 *)&dstIP->daddr, - IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP)); - dstTCP->check = CalculateChecksumNB(nb, - (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4), - sizeof *dstEth + dstIP->ihl * 4); - return STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsTcpSegmentyNBL -- - * - * Segment TCP payload, and prepend each segment with ether/IP/TCP header. - * Leave headRoom for additional encap. - * - * Please note, - * NBL should have OVS_BUFFER_CONTEXT setup before calling - * this function. - * The NBL should already have ref to itself so that during copy - * it will not be freed. - * Currently this API assert there is only one NB in an NBL, it needs - * to be fixed if we receive multiple NBs in an NBL. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsTcpSegmentNBL(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - POVS_PACKET_HDR_INFO hdrInfo, - UINT32 mss, - UINT32 headRoom) -{ - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; -#ifdef DBG - POVS_NBL_POOL ovsPool = &context->ovsPool; -#endif - POVS_BUFFER_CONTEXT dstCtx, srcCtx; - UINT32 size, hdrSize, seqNumber; - PNET_BUFFER_LIST newNbl; - PNET_BUFFER nb, newNb; - NDIS_STATUS status; - UINT16 segmentSize; - ULONG copiedSize; - - srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { - OVS_LOG_INFO("src nbl must have ctx initialized"); - ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); - return NULL; - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL); - - /* Figure out the segment header size */ - status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber); - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_INFO("Cannot parse NBL header"); - return NULL; - } - - size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize; - - /* XXX add to ovsPool counters? */ - newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL, - NULL, hdrSize, mss, hdrSize + headRoom , 0, 0); - if (newNbl == NULL) { - return NULL; - } - - /* Now deal with TCP payload */ - for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL; - newNb = NET_BUFFER_NEXT_NB(newNb)) { - segmentSize = (size > mss ? mss : size) & 0xffff; - if (headRoom) { - NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL); - } - - /* Now copy the eth/IP/TCP header and fix up */ - status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0, - &copiedSize); - if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) { - goto nblcopy_error; - } - - status = FixSegmentHeader(newNb, segmentSize, seqNumber); - if (status != NDIS_STATUS_SUCCESS) { - goto nblcopy_error; - } - - - /* Move on to the next segment */ - size -= segmentSize; - seqNumber += segmentSize; - } - - status = OvsAllocateNBLContext(context, newNbl); - if (status != NDIS_STATUS_SUCCESS) { - goto nblcopy_error; - } - - status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE); - if (status != NDIS_STATUS_SUCCESS) { - goto nbl_context_error; - } - - newNbl->ParentNetBufferList = nbl; - - /* Remember it's a fragment NBL so we can free it properly */ - dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl); - ASSERT(dstCtx != NULL); - dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT | - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER; - dstCtx->refCount = 1; - dstCtx->magic = OVS_CTX_MAGIC; - dstCtx->dataOffsetDelta = hdrSize + headRoom; - - InterlockedIncrement((LONG volatile *)&srcCtx->refCount); -#ifdef DBG - InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount); - - OvsDumpNetBufferList(nbl); - OvsDumpForwardingDetails(nbl); - - OvsDumpNetBufferList(newNbl); - OvsDumpForwardingDetails(newNbl); -#endif - OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl); - return newNbl; - -nbl_context_error: - OvsFreeNBLContext(context, newNbl); -nblcopy_error: -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); -#endif - NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0); - return NULL; -} - - -/* - * -------------------------------------------------------------------------- - * OvsFullCopyToMultipleNBLs -- - * - * Copy NBL to multiple NBLs, each NB will have its own NBL - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsFullCopyToMultipleNBLs(PVOID ovsContext, - PNET_BUFFER_LIST nbl, - UINT32 headRoom, - BOOLEAN copyNblInfo) -{ - - POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext; - PNET_BUFFER_LIST firstNbl, currNbl, newNbl; - PNET_BUFFER nb; - POVS_BUFFER_CONTEXT srcCtx; - - srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) { - OVS_LOG_INFO("src nbl must have ctx initialized"); - ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC); - return NULL; - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo); - - if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) { - return newNbl; - } else { - firstNbl = newNbl; - currNbl = newNbl; - } - - while (nb) { - newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, - copyNblInfo); - if (newNbl == NULL) { - goto copymultiple_error; - } - NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl; - currNbl = newNbl; - nb = NET_BUFFER_NEXT_NB(nb); - } - return firstNbl; - -copymultiple_error: - while (firstNbl) { - currNbl = firstNbl; - firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl); - NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL; - OvsCompleteNBL(context, currNbl, TRUE); - } - return NULL; - -} - - -/* - * -------------------------------------------------------------------------- - * OvsCompleteNBL -- - * - * This function tries to free the NBL allocated by OVS buffer - * management module. If it trigger the completion of the parent - * NBL, it will recursively call itself. If it trigger the completion - * of external NBL, it will be returned to the caller. The caller - * is responsible to call API to return to upper layer. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsCompleteNBL(POVS_SWITCH_CONTEXT context, - PNET_BUFFER_LIST nbl, - BOOLEAN updateRef) -{ - POVS_BUFFER_CONTEXT ctx; - UINT16 flags; - PNET_BUFFER_LIST parent; - NDIS_STATUS status; - NDIS_HANDLE poolHandle; - LONG value; - POVS_NBL_POOL ovsPool = &context->ovsPool; - PNET_BUFFER nb; - - - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - - ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); - - OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d", - nbl, ctx, ctx->refCount, updateRef); - - if (updateRef) { - value = InterlockedDecrement((LONG volatile *)&ctx->refCount); - if (value != 0) { - return NULL; - } - } else { - /* - * This is a special case, the refCount must be zero - */ - ASSERT(ctx->refCount == 0); - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - - flags = ctx->flags; - if (!(flags & OVS_BUFFER_FRAGMENT) && - NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) { - UINT32 diff; - if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) { - diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb); - status = NdisRetreatNetBufferListDataStart(nbl, diff, 0, - NULL, NULL); - ASSERT(status == NDIS_STATUS_SUCCESS); - } else { - diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength; - NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL); - } - } - - if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) { - NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT)); - } - - if (flags & OVS_BUFFER_NEED_COMPLETE) { - /* - * return to caller for completion - */ -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount); -#endif - return nbl; - } - - if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) { - context->NdisSwitchHandlers. - FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl); - } - - if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) { - PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl); - while (nb) { - PMDL mdl = NET_BUFFER_FIRST_MDL(nb); - NET_BUFFER_FIRST_MDL(nb) = NULL; - ASSERT(mdl->Next == NULL); - OvsFreeMDLAndData(mdl); - nb = NET_BUFFER_NEXT_NB(nb); - } - } - - if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) { - PNET_BUFFER nb, nextNb; - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - while (nb) { - nextNb = NET_BUFFER_NEXT_NB(nb); - NdisFreeNetBuffer(nb); -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->nbCount); -#endif - nb = nextNb; - } - NET_BUFFER_LIST_FIRST_NB(nbl) = NULL; - } - - parent = nbl->ParentNetBufferList; - - poolHandle = NdisGetPoolFromNetBufferList(nbl); - if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) { - ASSERT(poolHandle == ovsPool->fixSizePool); -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount); -#endif - NdisFreeNetBufferList(nbl); - } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) { - ASSERT(poolHandle == ovsPool->zeroSizePool); -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount); -#endif - NdisFreeNetBufferList(nbl); - } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) { - ASSERT(poolHandle == ovsPool->nblOnlyPool); -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount); -#endif - NdisFreeCloneNetBufferList(nbl, 0); - } else if (flags & OVS_BUFFER_FRAGMENT) { - OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent); -#ifdef DBG - InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount); -#endif - NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0); - } - - if (parent != NULL) { - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent); - ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); - value = InterlockedDecrement((LONG volatile *)&ctx->refCount); - if (value == 0) { - return OvsCompleteNBL(context, parent, FALSE); - } - } - return NULL; -} - -/* - * -------------------------------------------------------------------------- - * OvsSetCtxSourcePortNo -- - * Setter function which stores the source port of an NBL in the NBL - * Context Info. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, - UINT32 portNo) -{ - POVS_BUFFER_CONTEXT ctx; - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (ctx == NULL) { - ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); - return STATUS_INVALID_PARAMETER; - } - - ctx->srcPortNo = portNo; - return NDIS_STATUS_SUCCESS; -} - -/* - * -------------------------------------------------------------------------- - * OvsGetCtxSourcePortNo -- - * Get source port of an NBL from its Context Info. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, - UINT32 *portNo) -{ - POVS_BUFFER_CONTEXT ctx; - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl); - if (ctx == NULL || portNo == NULL) { - ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); - return STATUS_INVALID_PARAMETER; - } - *portNo = ctx->srcPortNo; - return NDIS_STATUS_SUCCESS; -} diff --git a/datapath-windows/ovsext/OvsBufferMgmt.h b/datapath-windows/ovsext/OvsBufferMgmt.h deleted file mode 100644 index 9c00b1b5c..000000000 --- a/datapath-windows/ovsext/OvsBufferMgmt.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_BUFFER_MGMT_H_ -#define __OVS_BUFFER_MGMT_H_ 1 - -#define MEM_ALIGN MEMORY_ALLOCATION_ALIGNMENT -#define MEM_ALIGN_SIZE(_x) ((MEM_ALIGN - 1 + (_x))/MEM_ALIGN * MEM_ALIGN) -#define OVS_CTX_MAGIC 0xabcd - -#define OVS_DEFAULT_NBL_CONTEXT_SIZE MEM_ALIGN_SIZE(64) -#define OVS_DEFAULT_NBL_CONTEXT_FILL \ - (OVS_DEFAULT_NBL_CONTEXT_SIZE - sizeof (OVS_BUFFER_CONTEXT)) - -#define OVS_DEFAULT_DATA_SIZE 256 -#define OVS_DEFAULT_HEADROOM_SIZE 128 -#define OVS_FIX_NBL_DATA_SIZE (OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE) - -/* Default we copy 18 bytes, to make sure ethernet header and vlan is in - * continuous buffer */ -#define OVS_DEFAULT_COPY_SIZE 18 - -enum { - OVS_BUFFER_NEED_COMPLETE = BIT16(0), - OVS_BUFFER_PRIVATE_MDL = BIT16(1), - OVS_BUFFER_PRIVATE_DATA = BIT16(2), - OVS_BUFFER_PRIVATE_NET_BUFFER = BIT16(3), - OVS_BUFFER_PRIVATE_FORWARD_CONTEXT = BIT16(4), - OVS_BUFFER_PRIVATE_CONTEXT = BIT16(5), - OVS_BUFFER_FROM_FIX_SIZE_POOL = BIT16(6), - OVS_BUFFER_FROM_ZERO_SIZE_POOL = BIT16(7), - OVS_BUFFER_FROM_NBL_ONLY_POOL = BIT16(8), - OVS_BUFFER_RECV_BUFFER = BIT16(9), - OVS_BUFFER_SEND_BUFFER = BIT16(10), - OVS_BUFFER_FRAGMENT = BIT16(11), -}; - -typedef union _OVS_BUFFER_CONTEXT { - struct { - UINT16 magic; - UINT16 flags; - UINT32 srcPortNo; - UINT32 refCount; - union { - UINT32 origDataLength; - UINT32 dataOffsetDelta; - }; - }; - - UINT64 value[MEM_ALIGN_SIZE(16) >> 3]; -} OVS_BUFFER_CONTEXT, *POVS_BUFFER_CONTEXT; - - -typedef struct _OVS_NBL_POOL { - NDIS_SWITCH_CONTEXT ndisContext; - NDIS_HANDLE ndisHandle; - NDIS_HANDLE fixSizePool; // data size of 256 - NDIS_HANDLE zeroSizePool; // no data, NBL + NB + Context - NDIS_HANDLE nblOnlyPool; // NBL + context for clone - NDIS_HANDLE nbPool; // NB for clone -#ifdef DBG - LONG fixNBLCount; - LONG zeroNBLCount; - LONG nblOnlyCount; - LONG nbCount; - LONG sysNBLCount; - LONG fragNBLCount; -#endif -} OVS_NBL_POOL, *POVS_NBL_POOL; - - -NDIS_STATUS OvsInitBufferPool(PVOID context); -VOID OvsCleanupBufferPool(PVOID context); - -PNET_BUFFER_LIST OvsAllocateFixSizeNBL(PVOID context, - UINT32 size, - UINT32 headRoom); -PNET_BUFFER_LIST OvsAllocateVariableSizeNBL(PVOID context, - UINT32 size, - UINT32 headRoom); - -POVS_BUFFER_CONTEXT OvsInitExternalNBLContext(PVOID context, - PNET_BUFFER_LIST nbl, - BOOLEAN isRecv); - -PNET_BUFFER_LIST OvsPartialCopyNBL(PVOID context, - PNET_BUFFER_LIST nbl, - UINT32 copySize, - UINT32 headRoom, - BOOLEAN copyNblInfo); -PNET_BUFFER_LIST OvsPartialCopyToMultipleNBLs(PVOID context, - PNET_BUFFER_LIST nbl, - UINT32 copySize, - UINT32 headRoom, - BOOLEAN copyNblInfo); -PNET_BUFFER_LIST OvsFullCopyNBL(PVOID context, PNET_BUFFER_LIST nbl, - UINT32 headRoom, BOOLEAN copyNblInfo); -PNET_BUFFER_LIST OvsTcpSegmentNBL(PVOID context, - PNET_BUFFER_LIST nbl, - POVS_PACKET_HDR_INFO hdrInfo, - UINT32 MSS, - UINT32 headRoom); -PNET_BUFFER_LIST OvsFullCopyToMultipleNBLs(PVOID context, - PNET_BUFFER_LIST nbl, UINT32 headRoom, BOOLEAN copyNblInfo); -PNET_BUFFER_LIST OvsCompleteNBL(PVOID context, PNET_BUFFER_LIST nbl, - BOOLEAN updateRef); -NDIS_STATUS OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 portNo); - -NDIS_STATUS OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl, UINT32 *portNo); - -#endif /* __OVS_BUFFER_MGMT_H_ */ diff --git a/datapath-windows/ovsext/OvsChecksum.c b/datapath-windows/ovsext/OvsChecksum.c deleted file mode 100644 index e19237389..000000000 --- a/datapath-windows/ovsext/OvsChecksum.c +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsChecksum.h" -#include "OvsFlow.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_CHECKSUM -#include "OvsDebug.h" -#include "OvsPacketParser.h" - -#ifndef htons -#define htons(_x) (((UINT16)(_x) >> 8) + (((UINT16)(_x) << 8) & 0xff00)) -#endif - -#ifndef swap64 -#define swap64(_x) ((((UINT64)(_x) >> 8) & 0x00ff00ff00ff00ff) + \ - (((UINT64)(_x) << 8) & 0xff00ff00ff00ff00)) -#endif - -#define fold64(_x) \ - _x = ((_x) >> 32) + ((_x) & 0xffffffff); \ - _x = (UINT32)(((_x) >> 32) + (_x)); \ - _x = ((_x) >> 16) + ((_x) & 0xffff); \ - _x = (UINT16)(((_x) >> 16) + (_x)) - -#define fold32(_x) \ - _x = ((_x) >> 16) + ((_x) & 0xffff); \ - _x = (UINT16)(((_x) >> 16) + (_x)) - - -/* - *---------------------------------------------------------------------------- - * CalculateOnesComplement -- - * - * Given the start address and buffer length, calculate the 1's complement - * This routine can be used when multiple buffers are used for a packets. - * - * PLEASE NOTE, even though the last parameter is UINT64, but the assumption - * is it will not overflowed after adding the extra data. - * ------------------------------------------------ - * - * Result: - * As name indicate, the final data is not 1's complemnent - *---------------------------------------------------------------------------- - */ -UINT64 -CalculateOnesComplement(UINT8 *start, - UINT16 totalLength, - UINT64 initial, - BOOLEAN isEvenStart) -{ - UINT64 sum = 0, val; - UINT64 *src = (UINT64 *)start; - union { - UINT32 val; - UINT8 b8[4]; - } tmp; - - while (totalLength > 7) { - val = *src; - sum += (val >> 32) + (val & 0xffffffff); - src++; - totalLength -= 8; - } - if (totalLength > 3) { - sum += *(UINT32 *)src; - src = (UINT64 *)((UINT8 *)src + 4); - totalLength -= 4; - } - start = (UINT8 *)src; - tmp.val = 0; - switch (totalLength) { - case 3: - tmp.b8[2] = start[2]; - case 2: - tmp.b8[1] = start[1]; - case 1: - tmp.b8[0] = start[0]; - sum += tmp.val; - } - sum = (isEvenStart ? sum : swap64(sum)) + initial; - return sum; -} - -/* - *---------------------------------------------------------------------------- - * CalculateChecksum -- - * - * Given the start point, and length, calculate the checksum - * as 1's complement of 1's comlement. - * - * This assume the checksum field is initailized properly. - * - * Input Parameter: - * ptr: point to the data to be checksumed - * totalLength: total length of the data - * initial: inital value to remit the checksum. Please note this - * value should be network byte order value. - * - * The last parameter may be useful where you don't want to set - * checksum field to zero, in that case you can pass ~checksum, - * this is equivalent of set checksum field to zero. - * - * Result: - * The result can be assigned to checksum field directly. - *---------------------------------------------------------------------------- - */ -UINT16 -CalculateChecksum(UINT8 *ptr, - UINT16 totalLength, - UINT16 initial) -{ - UINT64 sum = CalculateOnesComplement(ptr, totalLength, initial, TRUE); - fold64(sum); - return (UINT16)~sum; -} - -/* - *---------------------------------------------------------------------------- - * CopyAndCalculateOnesComplement -- - * - * Given the start address and buffer length, calculate the 1's complement - * at same time, copt the data from src to dst. - * - * This routine can be used when multiple buffers are used for a packets. - * - * PLEASE NOTE, even though the last parameter is UINT64, but the assumption - * is it will not overflowed after adding the extra data. - * ------------------------------------------------ - * - * Result: - * As name indicate, the final data is not 1's complemnent - *---------------------------------------------------------------------------- - */ -UINT64 -CopyAndCalculateOnesComplement(UINT8 *dst, - UINT8 *src, - UINT16 length, - UINT64 initial, - BOOLEAN isEvenStart) -{ - UINT64 sum =0, val; - UINT64 *src64, *dst64; - union { - UINT32 val; - UINT8 b8[4]; - } tmp; - - src64 = (UINT64 *)src; - dst64 = (UINT64 *)dst; - - while (length > 7) { - val = *src64; - *dst64 = val; - sum += (val >> 32) + (val & 0xffffffff); - src64++; - dst64++; - length -= 8; - } - - if (length > 3) { - val = *(UINT32 *)src64; - *(UINT32 *)dst64 = (UINT32)val; - sum += (UINT32)val; - dst64 = (UINT64 *)((UINT8 *)dst64 + 4); - src64 = (UINT64 *)((UINT8 *)src64 + 4); - length -= 4; - } - src = (UINT8 *)src64; - dst = (UINT8 *)dst64; - tmp.val = 0; - switch (length) { - case 3: - dst[2] = src[2]; - tmp.b8[2] = src[2]; - case 2: - dst[1] = src[1]; - tmp.b8[1] = src[1]; - case 1: - dst[0] = src[0]; - tmp.b8[0] = src[0]; - sum += tmp.val; - } - sum = (isEvenStart ? sum : swap64(sum)) + initial; - return sum; -} - -/* - *---------------------------------------------------------------------------- - * CopyAndCalculateChecksum -- - * - * This is similar to CalculateChecksum, except it will also copy data to - * destination address. - *---------------------------------------------------------------------------- - */ -UINT16 -CopyAndCalculateChecksum(UINT8 *dst, - UINT8 *src, - UINT16 length, - UINT16 initial) -{ - - UINT64 sum = CopyAndCalculateOnesComplement(dst, src, length, initial, - TRUE); - fold64(sum); - return (UINT16)~sum; -} - - -/* - *---------------------------------------------------------------------------- - * IPChecksum -- - * - * Give IP header, calculate the IP checksum. - * We assume IP checksum field is initialized properly - * - * Input Pramater: - * ipHdr: IP header start point - * length: IP header length (potentially include IP options) - * initial: same as CalculateChecksum - * - * Result: - * The result is already 1's complement, so can be assigned - * to checksum field directly - *---------------------------------------------------------------------------- - */ -UINT16 -IPChecksum(UINT8 *ipHdr, - UINT16 length, - UINT16 initial) -{ - UINT32 sum = initial; - UINT16 *ptr = (UINT16 *)ipHdr; - ASSERT((length & 0x3) == 0); - while (length > 1) { - sum += ptr[0]; - ptr++; - length -= 2; - } - fold32(sum); - return (UINT16)~sum; -} - -/* - *---------------------------------------------------------------------------- - * IPPseudoChecksum -- - * - * Give src and dst IP address, protocol value and total - * upper layer length(not include IP header, but include - * upller layer protocol header, for example it include - * TCP header for TCP checksum), calculate the pseudo - * checksum, please note this checksum is just 1's complement - * addition. - * - * Input Parameter: - * src: please note it is in network byte order - * dst: same as src - * protocol: protocol value in IP header - * totalLength: total length of upper layer data including - * header. - * - * Result: - * - * This value should be put in TCP checksum field before - * calculating TCP checksum using CalculateChecksum with - * initial value of 0. - *---------------------------------------------------------------------------- - */ -UINT16 -IPPseudoChecksum(UINT32 *src, - UINT32 *dst, - UINT8 protocol, - UINT16 totalLength) -{ - UINT32 sum = (UINT32)htons(totalLength) + htons(protocol); - sum += (*src >> 16) + (*src & 0xffff); - sum += (*dst >> 16) + (*dst & 0xffff); - fold32(sum); - return (UINT16)sum; -} - -/* - *---------------------------------------------------------------------------- - * IPv6PseudoChecksum -- - * - * Given IPv6 src and dst address, upper layer protocol and total - * upper layer protocol data length including upper layer header - * part, calculate the pseudo checksum for upper layer protocol - * checksum. - * - * please note this checksum is just 1's complement addition. - * - * Input Parameter: - * src: src IPv6 address in network byte order - * dst: dst IPv6 address. - * protocol: upper layer protocol - * totalLength: total length of upper layer data. Please note this is - * in host byte order. - * - * Result: - * - * Place in upper layer checksum field before calculate upper layer - * checksum. - *---------------------------------------------------------------------------- - */ -UINT16 -IPv6PseudoChecksum(UINT32 *src, - UINT32 *dst, - UINT8 protocol, - UINT16 totalLength) -{ - UINT64 sum = (UINT32)htons(totalLength) + htons(protocol); - sum += (UINT64)src[0] + src[1] + src[2] + src[3]; - sum += (UINT64)dst[0] + dst[1] + dst[2] + dst[3]; - fold64(sum); - return (UINT16)sum; -} - -/* - *---------------------------------------------------------------------------- - * ChecksumUpdate32 -- - * - * Given old checksum value (as it is in checksum field), - * prev value of the relevant field in network byte order - * new value of the relevant field in the network byte order - * calculate the new checksum. - * Please check relevant RFC for reference. - * - * Input Pramater: - * oldSum: old checksum value in checksum field - * prev: previous value of relevant 32 bit feld in network - * byte order. - * new: new value of the relevant 32 bit field in network - * byte order. - * - * Result: - * new checksum value to be placed in the checksum field. - *---------------------------------------------------------------------------- - */ -UINT16 -ChecksumUpdate32(UINT16 oldSum, - UINT32 prev, - UINT32 newValue) -{ - UINT32 sum = ~prev; - sum = (sum >> 16) + (sum & 0xffff); - sum += (newValue >> 16) + (newValue & 0xffff); - sum += (UINT16)~oldSum; - fold32(sum); - return (UINT16)~sum; -} - - -/* - *---------------------------------------------------------------------------- - * ChecksumUpdate16 -- - * - * Given old checksum value (as it is in checksum field), - * prev value of the relevant field in network byte order - * new value of the relevant field in the network byte order - * calculate the new checksum. - * Please check relevant RFC for reference. - * - * Input Pramater: - * oldSum: old checksum value in checksum field - * prev: previous value of relevant 32 bit feld in network - * byte order. - * new: new value of the relevant 32 bit field in network - * byte order. - * - * Result: - * new checksum value to be placed in the checksum field. - *---------------------------------------------------------------------------- - */ -UINT16 -ChecksumUpdate16(UINT16 oldSum, - UINT16 prev, - UINT16 newValue) -{ - UINT32 sum = (UINT16)~oldSum; - sum += (UINT32)((UINT16)~prev) + newValue; - fold32(sum); - return (UINT16)~sum; -} - -/* - *---------------------------------------------------------------------------- - * CalculateChecksumNB -- - * - * Calculates checksum over a length of bytes contained in an NB. - * - * nb : NB which contains the packet bytes. - * csumDataLen : Length of bytes to be checksummed. - * offset : offset to the first bytes of the data stream to be - * checksumed. - * - * Result: - * return 0, if there is a failure. - *---------------------------------------------------------------------------- - */ -UINT16 -CalculateChecksumNB(const PNET_BUFFER nb, - UINT16 csumDataLen, - UINT32 offset) -{ - ULONG mdlLen; - UINT16 csLen; - PUCHAR src; - UINT64 csum = 0; - PMDL currentMdl; - ULONG firstMdlLen; - /* Running count of bytes in remainder of the MDLs including current. */ - ULONG packetLen; - - if ((nb == NULL) || (csumDataLen == 0) - || (offset >= NET_BUFFER_DATA_LENGTH(nb)) - || (offset + csumDataLen > NET_BUFFER_DATA_LENGTH(nb))) { - OVS_LOG_ERROR("Invalid parameters - csum length %u, offset %u," - "pkt%s len %u", csumDataLen, offset, nb? "":"(null)", - nb? NET_BUFFER_DATA_LENGTH(nb) : 0); - return 0; - } - - currentMdl = NET_BUFFER_CURRENT_MDL(nb); - packetLen = NET_BUFFER_DATA_LENGTH(nb); - firstMdlLen = - MmGetMdlByteCount(currentMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb); - - firstMdlLen = MIN(firstMdlLen, packetLen); - if (offset < firstMdlLen) { - src = (PUCHAR) MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); - if (!src) { - return 0; - } - src += (NET_BUFFER_CURRENT_MDL_OFFSET(nb) + offset); - mdlLen = firstMdlLen - offset; - packetLen -= firstMdlLen; - ASSERT((INT)packetLen >= 0); - } else { - offset -= firstMdlLen; - packetLen -= firstMdlLen; - ASSERT((INT)packetLen >= 0); - currentMdl = NDIS_MDL_LINKAGE(currentMdl); - mdlLen = MmGetMdlByteCount(currentMdl); - mdlLen = MIN(mdlLen, packetLen); - - while (offset >= mdlLen) { - offset -= mdlLen; - packetLen -= mdlLen; - ASSERT((INT)packetLen >= 0); - currentMdl = NDIS_MDL_LINKAGE(currentMdl); - mdlLen = MmGetMdlByteCount(currentMdl); - mdlLen = MIN(mdlLen, packetLen); - } - - src = (PUCHAR)MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); - if (!src) { - return 0; - } - - src += offset; - mdlLen -= offset; - } - - while (csumDataLen && (currentMdl != NULL)) { - ASSERT(mdlLen < 65536); - csLen = MIN((UINT16) mdlLen, csumDataLen); - //XXX Not handling odd bytes yet. - ASSERT(((csLen & 0x1) == 0) || csumDataLen <= mdlLen); - - csum = CalculateOnesComplement(src, csLen, csum, TRUE); - fold64(csum); - - csumDataLen -= csLen; - currentMdl = NDIS_MDL_LINKAGE(currentMdl); - if (csumDataLen && currentMdl) { - src = MmGetSystemAddressForMdlSafe(currentMdl, LowPagePriority); - if (!src) { - return 0; - } - - mdlLen = MmGetMdlByteCount(currentMdl); - mdlLen = MIN(mdlLen, packetLen); - /* packetLen does not include the current MDL from here on. */ - packetLen -= mdlLen; - ASSERT((INT)packetLen >= 0); - } - } - - ASSERT(csumDataLen == 0); - ASSERT((csum & ~0xffff) == 0); - return (UINT16) ~csum; -} - -/* - * -------------------------------------------------------------------------- - * OvsValidateIPChecksum - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, - POVS_PACKET_HDR_INFO hdrInfo) -{ - NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; - uint16_t checksum, hdrChecksum; - struct IPHdr ip_storage; - const IPHdr *ipHdr; - - if (!hdrInfo->isIPv4) { - return NDIS_STATUS_SUCCESS; - } - - /* First check if NIC has indicated checksum failure. */ - csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, - TcpIpChecksumNetBufferListInfo); - if (csumInfo.Receive.IpChecksumFailed) { - return NDIS_STATUS_FAILURE; - } - - /* Next, check if the NIC did not validate the RX checksum. */ - if (!csumInfo.Receive.IpChecksumSucceeded) { - ipHdr = OvsGetIp(curNbl, hdrInfo->l3Offset, &ip_storage); - if (ipHdr) { - ip_storage = *ipHdr; - hdrChecksum = ipHdr->check; - ip_storage.check = 0; - checksum = IPChecksum((uint8 *)&ip_storage, ipHdr->ihl * 4, 0); - if (checksum != hdrChecksum) { - return NDIS_STATUS_FAILURE; - } - } - } - return NDIS_STATUS_SUCCESS; -} - -/* - *---------------------------------------------------------------------------- - * OvsValidateUDPChecksum - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, BOOLEAN udpCsumZero) -{ - NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; - - csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); - - if (udpCsumZero) { - /* Zero is valid checksum. */ - csumInfo.Receive.UdpChecksumFailed = 0; - NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; - return NDIS_STATUS_SUCCESS; - } - - /* First check if NIC has indicated UDP checksum failure. */ - if (csumInfo.Receive.UdpChecksumFailed) { - return NDIS_STATUS_INVALID_PACKET; - } - - return NDIS_STATUS_SUCCESS; -} diff --git a/datapath-windows/ovsext/OvsChecksum.h b/datapath-windows/ovsext/OvsChecksum.h deleted file mode 100644 index d0070d2f5..000000000 --- a/datapath-windows/ovsext/OvsChecksum.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_CHECKSUM_H_ -#define __OVS_CHECKSUM_H_ 1 - -typedef union _OVS_PACKET_HDR_INFO *POVS_PACKET_HDR_INFO; - -UINT16 CalculateChecksum(UINT8 *ptr, UINT16 length, UINT16 initial); -UINT16 CopyAndCalculateChecksum(UINT8 *dst, UINT8 *src, UINT16 length, - UINT16 initial); -UINT16 IPChecksum(UINT8 *ipHdr, UINT16 length, UINT16 initial); -UINT16 IPPseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, - UINT16 totalLength); -UINT16 IPv6PseudoChecksum(UINT32 *src, UINT32 *dst, UINT8 protocol, - UINT16 totalLength); -UINT16 ChecksumUpdate32(UINT16 oldSum, UINT32 prev, UINT32 newValue); -UINT16 ChecksumUpdate16(UINT16 oldSum, UINT16 prev, UINT16 newValue); -UINT16 CalculateChecksumNB(const PNET_BUFFER nb, UINT16 csumDataLen, - UINT32 offset); -NDIS_STATUS OvsValidateIPChecksum(PNET_BUFFER_LIST curNbl, - POVS_PACKET_HDR_INFO hdrInfo); -NDIS_STATUS OvsValidateUDPChecksum(PNET_BUFFER_LIST curNbl, - BOOLEAN udpCsumZero); - -#endif /* __OVS_CHECKSUM_H_ */ diff --git a/datapath-windows/ovsext/OvsDebug.c b/datapath-windows/ovsext/OvsDebug.c deleted file mode 100644 index 8610008df..000000000 --- a/datapath-windows/ovsext/OvsDebug.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" - -#include "OvsDebug.h" -#ifdef DBG -#define OVS_DBG_DEFAULT OVS_DBG_INFO -#else -#define OVS_DBG_DEFAULT OVS_DBG_ERROR -#endif - -UINT32 ovsLogFlags = 0xffffffff; -UINT32 ovsLogLevel = OVS_DBG_DEFAULT; - -#define OVS_LOG_BUFFER_SIZE 384 - -/* - * -------------------------------------------------------------------------- - * OvsLog -- - * Utility function to log to the Windows debug console. - * -------------------------------------------------------------------------- - */ -VOID -OvsLog(UINT32 level, - UINT32 flag, - CHAR *funcName, - UINT32 line, - CHAR *format, - ...) -{ - va_list args; - CHAR buf[OVS_LOG_BUFFER_SIZE]; - - if (level > ovsLogLevel || (ovsLogFlags & flag) == 0) { - return; - } - - buf[0] = 0; - va_start(args, format); - RtlStringCbVPrintfA(buf, sizeof (buf), format, args); - va_end(args); - - DbgPrintEx(DPFLTR_IHVNETWORK_ID, level, "%s:%lu %s\n", funcName, line, buf); -} diff --git a/datapath-windows/ovsext/OvsDebug.h b/datapath-windows/ovsext/OvsDebug.h deleted file mode 100644 index a57e73e41..000000000 --- a/datapath-windows/ovsext/OvsDebug.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_DEBUG_H_ -#define __OVS_DEBUG_H_ 1 - -#define OVS_DBG_INIT BIT32(0) -#define OVS_DBG_SWITCH BIT32(1) -#define OVS_DBG_VPORT BIT32(2) -#define OVS_DBG_FLOW BIT32(3) -#define OVS_DBG_QOS BIT32(4) -#define OVS_DBG_USER BIT32(5) -#define OVS_DBG_EXECUTE BIT32(6) -#define OVS_DBG_EVENT BIT32(7) -#define OVS_DBG_DISPATCH BIT32(8) -#define OVS_DBG_OID BIT32(9) -#define OVS_DBG_STATUS BIT32(10) -#define OVS_DBG_CHECKSUM BIT32(11) -#define OVS_DBG_VXLAN BIT32(12) -#define OVS_DBG_GRE BIT32(13) -#define OVS_DBG_GRE64 BIT32(14) -#define OVS_DBG_ACTION BIT32(15) -#define OVS_DBG_DATAPATH BIT32(16) -#define OVS_DBG_PROPERTY BIT32(17) -#define OVS_DBG_IPHELPER BIT32(18) -#define OVS_DBG_BUFMGMT BIT32(19) -#define OVS_DBG_OTHERS BIT32(21) -#define OVS_DBG_NETLINK BIT32(22) - -#define OVS_DBG_RESERVED BIT32(31) -//Please add above OVS_DBG_RESERVED. - -#define OVS_DBG_ERROR DPFLTR_ERROR_LEVEL -#define OVS_DBG_WARN DPFLTR_WARNING_LEVEL -#define OVS_DBG_TRACE DPFLTR_TRACE_LEVEL -#define OVS_DBG_INFO DPFLTR_INFO_LEVEL -#define OVS_DBG_LOUD (DPFLTR_INFO_LEVEL + 1) - - - -VOID OvsLog(UINT32 level, UINT32 flag, CHAR *funcName, - UINT32 line, CHAR *format, ...); - - -#define OVS_LOG_LOUD(_format, ...) \ - OvsLog(OVS_DBG_LOUD, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) - -#define OVS_LOG_INFO(_format, ...) \ - OvsLog(OVS_DBG_INFO, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) - -#define OVS_LOG_TRACE(_format, ...) \ - OvsLog(OVS_DBG_TRACE, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) - -#define OVS_LOG_ERROR(_format, ...) \ - OvsLog(OVS_DBG_ERROR, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) - -#define OVS_LOG_WARN(_format, ...) \ - OvsLog(OVS_DBG_WARN, OVS_DBG_MOD, __FUNCTION__, __LINE__, _format, __VA_ARGS__) - -#if DBG -#define OVS_VERIFY_IRQL(_x) \ - if (KeGetCurrentIrql() != (KIRQL)_x) { \ - OVS_LOG_WARN("expected IRQL %u, actual IRQL: %u", \ - _x, KeGetCurrentIrql()); \ - } - -#define OVS_VERIFY_IRQL_LE(_x) \ - if (KeGetCurrentIrql() > (KIRQL)_x) { \ - OVS_LOG_WARN("expected IRQL <= %u, actual IRQL: %u", \ - _x, KeGetCurrentIrql()); \ - } - -#else -#define OVS_VERIFY_IRQL(_x) -#define OVS_VERIFY_IRQL_LE(_x) -#endif - -#endif /* __OVS_DEBUG_H_ */ diff --git a/datapath-windows/ovsext/OvsDriver.c b/datapath-windows/ovsext/OvsDriver.c deleted file mode 100644 index 11632217a..000000000 --- a/datapath-windows/ovsext/OvsDriver.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsSwitch.h" -#include "Datapath.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_DRIVER -#include "OvsDebug.h" - -/* Global handles. XXX: Some of them need not be global. */ -/* - * Maps to DriverObject and FilterDriverContext parameters in the NDIS filter - * driver functions. - * DriverObject is specified by NDIS. - * FilterDriverContext is specified by the filter driver. - */ -NDIS_HANDLE gOvsExtDriverObject; - -/* - * Maps to NdisFilterHandle parameter in the NDIS filter driver functions. - * NdisFilterHandle is returned by NDISFRegisterFilterDriver. - */ -NDIS_HANDLE gOvsExtDriverHandle; - -/* - * Maps to FilterModuleContext parameter in the NDIS filter driver functions. - * FilterModuleContext is a allocated by the driver in the FilterAttach - * function. - */ -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; - -static PWCHAR ovsExtFriendlyName = L"Open vSwitch Extension"; -static PWCHAR ovsExtServiceName = L"OVSExt"; -NDIS_STRING ovsExtGuidUC; -NDIS_STRING ovsExtFriendlyNameUC; - -static PWCHAR ovsExtGuidStr = L"{583CC151-73EC-4A6A-8B47-578297AD7623}"; -static const GUID ovsExtGuid = { - 0x583cc151, - 0x73ec, - 0x4a6a, - {0x8b, 0x47, 0x57, 0x82, 0x97, 0xad, 0x76, 0x23} -}; - -/* Declarations of callback functions for the filter driver. */ -DRIVER_UNLOAD OvsExtUnload; -FILTER_NET_PNP_EVENT OvsExtNetPnPEvent; -FILTER_STATUS OvsExtStatus; - -FILTER_ATTACH OvsExtAttach; -FILTER_DETACH OvsExtDetach; -FILTER_RESTART OvsExtRestart; -FILTER_PAUSE OvsExtPause; - -FILTER_SEND_NET_BUFFER_LISTS OvsExtSendNBL; -FILTER_SEND_NET_BUFFER_LISTS_COMPLETE OvsExtSendNBLComplete; -FILTER_CANCEL_SEND_NET_BUFFER_LISTS OvsExtCancelSendNBL; -FILTER_RECEIVE_NET_BUFFER_LISTS OvsExtReceiveNBL; -FILTER_RETURN_NET_BUFFER_LISTS OvsExtReturnNBL; - -FILTER_OID_REQUEST OvsExtOidRequest; -FILTER_OID_REQUEST_COMPLETE OvsExtOidRequestComplete; -FILTER_CANCEL_OID_REQUEST OvsExtCancelOidRequest; - - -/* - * -------------------------------------------------------------------------- - * Init/Load function for the OVSEXT filter Driver. - * -------------------------------------------------------------------------- - */ -NTSTATUS -DriverEntry(PDRIVER_OBJECT driverObject, - PUNICODE_STRING registryPath) -{ - NDIS_STATUS status; - NDIS_FILTER_DRIVER_CHARACTERISTICS driverChars; - - UNREFERENCED_PARAMETER(registryPath); - - gOvsExtDriverObject = driverObject; - - RtlZeroMemory(&driverChars, sizeof driverChars); - driverChars.Header.Type = NDIS_OBJECT_TYPE_FILTER_DRIVER_CHARACTERISTICS; - driverChars.Header.Size = sizeof driverChars; - driverChars.Header.Revision = NDIS_FILTER_CHARACTERISTICS_REVISION_2; - driverChars.MajorNdisVersion = NDIS_FILTER_MAJOR_VERSION; - driverChars.MinorNdisVersion = NDIS_FILTER_MINOR_VERSION; - driverChars.MajorDriverVersion = 1; - driverChars.MinorDriverVersion = 0; - driverChars.Flags = 0; - - RtlInitUnicodeString(&driverChars.ServiceName, ovsExtServiceName); - RtlInitUnicodeString(&ovsExtFriendlyNameUC, ovsExtFriendlyName); - RtlInitUnicodeString(&ovsExtGuidUC, ovsExtGuidStr); - - driverChars.FriendlyName = ovsExtFriendlyNameUC; - driverChars.UniqueName = ovsExtGuidUC; - - driverChars.AttachHandler = OvsExtAttach; - driverChars.DetachHandler = OvsExtDetach; - driverChars.RestartHandler = OvsExtRestart; - driverChars.PauseHandler = OvsExtPause; - - driverChars.SendNetBufferListsHandler = OvsExtSendNBL; - driverChars.SendNetBufferListsCompleteHandler = OvsExtSendNBLComplete; - driverChars.CancelSendNetBufferListsHandler = OvsExtCancelSendNBL; - driverChars.ReceiveNetBufferListsHandler = NULL; - driverChars.ReturnNetBufferListsHandler = NULL; - - driverChars.OidRequestHandler = OvsExtOidRequest; - driverChars.OidRequestCompleteHandler = OvsExtOidRequestComplete; - driverChars.CancelOidRequestHandler = OvsExtCancelOidRequest; - - driverChars.DevicePnPEventNotifyHandler = NULL; - driverChars.NetPnPEventHandler = OvsExtNetPnPEvent; - driverChars.StatusHandler = NULL; - - driverObject->DriverUnload = OvsExtUnload; - - status = NdisFRegisterFilterDriver(driverObject, - (NDIS_HANDLE) gOvsExtDriverObject, - &driverChars, &gOvsExtDriverHandle); - if (status != NDIS_STATUS_SUCCESS) { - return status; - } - - /* Create the communication channel for usersapce. */ - status = OvsCreateDeviceObject(gOvsExtDriverHandle); - if (status != NDIS_STATUS_SUCCESS) { - NdisFDeregisterFilterDriver(gOvsExtDriverHandle); - gOvsExtDriverHandle = NULL; - } - - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Un-init/Unload function for the OVS intermediate Driver. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtUnload(struct _DRIVER_OBJECT *driverObject) -{ - UNREFERENCED_PARAMETER(driverObject); - - OvsDeleteDeviceObject(); - NdisFDeregisterFilterDriver(gOvsExtDriverHandle); -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterStatus function. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtStatus(NDIS_HANDLE filterModuleContext, - PNDIS_STATUS_INDICATION statusIndication) -{ - UNREFERENCED_PARAMETER(statusIndication); - POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; - - NdisFIndicateStatus(switchObject->NdisFilterHandle, statusIndication); - return; -} diff --git a/datapath-windows/ovsext/OvsEth.h b/datapath-windows/ovsext/OvsEth.h deleted file mode 100644 index 271fd85eb..000000000 --- a/datapath-windows/ovsext/OvsEth.h +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_ETH_H_ -#define __OVS_ETH_H_ 1 - -#define ETH_LADRF_LEN 2 -#define ETH_ADDR_LENGTH 6 - -typedef UINT8 Eth_Address[ETH_ADDR_LENGTH]; - -#define ETH_ADDR_FMT_STR "%02x:%02x:%02x:%02x:%02x:%02x" -#define ETH_ADDR_FMT_ARGS(a) ((UINT8 *)a)[0], ((UINT8 *)a)[1], ((UINT8 *)a)[2], \ - ((UINT8 *)a)[3], ((UINT8 *)a)[4], ((UINT8 *)a)[5] - -#define ETH_MAX_EXACT_MULTICAST_ADDRS 32 - -typedef enum Eth_RxMode { - ETH_FILTER_UNICAST = 0x0001, /* pass unicast (directed) frames */ - ETH_FILTER_MULTICAST = 0x0002, /* pass some multicast frames */ - ETH_FILTER_ALLMULTI = 0x0004, /* pass *all* multicast frames */ - ETH_FILTER_BROADCAST = 0x0008, /* pass broadcast frames */ - ETH_FILTER_PROMISC = 0x0010, /* pass all frames (ie no filter) */ - ETH_FILTER_USE_LADRF = 0x0020, /* use the LADRF for multicast filtering */ - ETH_FILTER_SINK = 0x10000 /* pass not-matched unicast frames */ -} Eth_RxMode; - -/* filter flags printf helpers */ -#define ETH_FILTER_FLAG_FMT_STR "%s%s%s%s%s%s%s" -#define ETH_FILTER_FLAG_FMT_ARGS(f) (f) & ETH_FILTER_UNICAST ? " UNICAST" : "", \ - (f) & ETH_FILTER_MULTICAST ? " MULTICAST" : "", \ - (f) & ETH_FILTER_ALLMULTI ? " ALLMULTI" : "", \ - (f) & ETH_FILTER_BROADCAST ? " BROADCAST" : "", \ - (f) & ETH_FILTER_PROMISC ? " PROMISC" : "", \ - (f) & ETH_FILTER_USE_LADRF ? " USE_LADRF" : "", \ - (f) & ETH_FILTER_SINK ? " SINK" : "" - -/* Ethernet header type */ -typedef enum { - ETH_HEADER_TYPE_DIX, - ETH_HEADER_TYPE_802_1PQ, - ETH_HEADER_TYPE_802_3, - ETH_HEADER_TYPE_802_1PQ_802_3, -} Eth_HdrType; - -/* DIX type fields we care about */ -typedef enum { - ETH_TYPE_IPV4 = 0x0800, - ETH_TYPE_IPV6 = 0x86DD, - ETH_TYPE_ARP = 0x0806, - ETH_TYPE_RARP = 0x8035, - ETH_TYPE_LLDP = 0x88CC, - ETH_TYPE_CDP = 0x2000, - ETH_TYPE_802_1PQ = 0x8100, // not really a DIX type, but used as such - ETH_TYPE_LLC = 0xFFFF, // 0xFFFF is IANA reserved, used to mark LLC -} Eth_DixType; - -typedef enum { - ETH_TYPE_IPV4_NBO = 0x0008, - ETH_TYPE_IPV6_NBO = 0xDD86, - ETH_TYPE_ARP_NBO = 0x0608, - ETH_TYPE_RARP_NBO = 0x3580, - ETH_TYPE_LLDP_NBO = 0xCC88, - ETH_TYPE_CDP_NBO = 0x0020, - ETH_TYPE_AKIMBI_NBO = 0xDE88, - ETH_TYPE_802_1PQ_NBO = 0x0081, // not really a DIX type, but used as such -} Eth_DixTypeNBO; - -/* low two bits of the LLC control byte */ -typedef enum { - ETH_LLC_CONTROL_IFRAME = 0x0, // both 0x0 and 0x2, only low bit of 0 needed - ETH_LLC_CONTROL_SFRAME = 0x1, - ETH_LLC_CONTROL_UFRAME = 0x3, -} Eth_LLCControlBits; - -#define ETH_LLC_CONTROL_UFRAME_MASK (0x3) - -typedef struct Eth_DIX { - UINT16 typeNBO; // indicates the higher level protocol -} Eth_DIX; - -/* - * LLC header come in two varieties: 8 bit control and 16 bit control. - * when the lower two bits of the first byte's control are '11', this - * indicated the 8 bit control field. - */ -typedef struct Eth_LLC8 { - UINT8 dsap; - UINT8 ssap; - UINT8 control; -} Eth_LLC8; - -typedef struct Eth_LLC16 { - UINT8 dsap; - UINT8 ssap; - UINT16 control; -} Eth_LLC16; - -typedef struct Eth_SNAP { - UINT8 snapOrg[3]; - Eth_DIX snapType; -} Eth_SNAP; - -typedef struct Eth_802_3 { - UINT16 lenNBO; // length of the frame - Eth_LLC8 llc; // LLC header - Eth_SNAP snap; // SNAP header -} Eth_802_3; - -// 802.1p QOS/priority tags -enum { - ETH_802_1_P_BEST_EFFORT = 0, - ETH_802_1_P_BACKGROUND = 1, - ETH_802_1_P_EXCELLENT_EFFORT = 2, - ETH_802_1_P_CRITICAL_APPS = 3, - ETH_802_1_P_VIDEO = 4, - ETH_802_1_P_VOICE = 5, - ETH_802_1_P_INTERNETWORK_CONROL = 6, - ETH_802_1_P_NETWORK_CONTROL = 7 -}; - -typedef struct Eth_802_1pq_Tag { - UINT16 typeNBO; // always ETH_TYPE_802_1PQ - UINT16 vidHi:4, // 802.1q vlan ID high nibble - canonical:1, // bit order? (should always be 0) - priority:3, // 802.1p priority tag - vidLo:8; // 802.1q vlan ID low byte -} Eth_802_1pq_Tag; - -typedef struct Eth_802_1pq { - Eth_802_1pq_Tag tag; // VLAN/QOS tag - union { - Eth_DIX dix; // DIX header follows - Eth_802_3 e802_3; // or 802.3 header follows - }; -} Eth_802_1pq; - -typedef struct Eth_Header { - Eth_Address dst; // all types of ethernet frame have dst first - Eth_Address src; // and the src next (at least all the ones we'll see) - union { - Eth_DIX dix; // followed by a DIX header... - Eth_802_3 e802_3; // ...or an 802.3 header - Eth_802_1pq e802_1pq; // ...or an 802.1[pq] tag and a header - }; -} Eth_Header; - -#define ETH_BROADCAST_ADDRESS { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } - -static Eth_Address netEthBroadcastAddr = ETH_BROADCAST_ADDRESS; - -/* - * simple predicate for 1536 boundary. - * the parameter is a network ordered UINT16, which is compared to 0x06, - * testing for "length" values greater than or equal to 0x0600 (1536) - */ - -#define ETH_TYPENOT8023(x) (((x) & 0xff) >= 0x06) - -/* - * header length macros - * - * first two are typical: ETH_HEADER_LEN_DIX, ETH_HEADER_LEN_802_1PQ - * last two are suspicious, due to 802.3 incompleteness - */ - -#define ETH_HEADER_LEN_DIX (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_DIX)) -#define ETH_HEADER_LEN_802_1PQ (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_802_1pq_Tag) + \ - sizeof(Eth_DIX)) -#define ETH_HEADER_LEN_802_2_LLC (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(UINT16) + \ - sizeof(Eth_LLC8)) -#define ETH_HEADER_LEN_802_2_LLC16 (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(UINT16) + \ - sizeof(Eth_LLC16)) -#define ETH_HEADER_LEN_802_3 (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_802_3)) -#define ETH_HEADER_LEN_802_1PQ_LLC (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_802_1pq_Tag) + \ - sizeof(UINT16) + \ - sizeof(Eth_LLC8)) -#define ETH_HEADER_LEN_802_1PQ_LLC16 (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_802_1pq_Tag) + \ - sizeof(UINT16) + \ - sizeof(Eth_LLC16)) -#define ETH_HEADER_LEN_802_1PQ_802_3 (sizeof(Eth_Address) + \ - sizeof(Eth_Address) + \ - sizeof(Eth_802_1pq_Tag) + \ - sizeof(Eth_802_3)) - -#define ETH_MIN_HEADER_LEN (ETH_HEADER_LEN_DIX) -#define ETH_MAX_HEADER_LEN (ETH_HEADER_LEN_802_1PQ_802_3) - -#define ETH_MIN_FRAME_LEN 60 -#define ETH_MAX_STD_MTU 1500 -#define ETH_MAX_STD_FRAMELEN (ETH_MAX_STD_MTU + ETH_MAX_HEADER_LEN) -#define ETH_MAX_JUMBO_MTU 9000 -#define ETH_MAX_JUMBO_FRAMELEN (ETH_MAX_JUMBO_MTU + ETH_MAX_HEADER_LEN) - -#define ETH_DEFAULT_MTU 1500 - -#define ETH_FCS_LEN 4 -#define ETH_VLAN_LEN sizeof(Eth_802_1pq_Tag) - - -/* - *---------------------------------------------------------------------------- - * Do the two ethernet addresses match? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsAddrMatch(const Eth_Address addr1, const Eth_Address addr2) -{ - return !memcmp(addr1, addr2, ETH_ADDR_LENGTH); -} - - -/* - *---------------------------------------------------------------------------- - * Is the address the broadcast address? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsBroadcastAddr(const Eth_Address addr) -{ - return Eth_IsAddrMatch(addr, netEthBroadcastAddr); -} - - -/* - *---------------------------------------------------------------------------- - * Is the address a unicast address? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsUnicastAddr(const Eth_Address addr) -{ - // broadcast and multicast frames always have the low bit set in byte 0 - return !(((CHAR *)addr)[0] & 0x1); -} - -/* - *---------------------------------------------------------------------------- - * Is the address the all-zeros address? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsNullAddr(const Eth_Address addr) -{ - return ((addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]) == 0); -} - -/* - *---------------------------------------------------------------------------- - * - * Eth_HeaderType -- - * return an Eth_HdrType depending on the eth header - * contents. will not work in all cases, especially since it - * requres ETH_HEADER_LEN_802_1PQ bytes to determine the type - * - * HeaderType isn't sufficient to determine the length of - * the eth header. for 802.3 header, its not clear without - * examination, whether a SNAP is included - * - * returned type: - * - * ETH_HEADER_TYPE_DIX: typical 14 byte eth header - * ETH_HEADER_TYPE_802_1PQ: DIX+vlan tagging - * ETH_HEADER_TYPE_802_3: 802.3 eth header - * ETH_HEADER_TYPE_802_1PQ_802_3: 802.3 + vlan tag - * - * the test for DIX was moved from a 1500 boundary to a 1536 - * boundary, since the vmxnet2 MTU was updated to 1514. when - * W2K8 attempted to send LLC frames, these were interpreted - * as DIX frames instead of the correct 802.3 type - * - * these links may help if they're valid: - * - * http://standards.ieee.org/regauth/ethertype/type-tut.html - * http://standards.ieee.org/regauth/ethertype/type-pub.html - * - * Results: - * Eth_HdrType value - * - *---------------------------------------------------------------------------- - */ -static __inline Eth_HdrType -Eth_HeaderType(const Eth_Header *eh) -{ - /* - * we use 1536 (IEEE 802.3-std mentions 1536, but iana indicates - * type of 0-0x5dc are 802.3) instead of some #def symbol to prevent - * inadvertant reuse of the same macro for buffer size decls. - */ - if (ETH_TYPENOT8023(eh->dix.typeNBO)) { - if (eh->dix.typeNBO != ETH_TYPE_802_1PQ_NBO) { - /* typical case */ - return ETH_HEADER_TYPE_DIX; - } - - /* some type of 802.1pq tagged frame */ - if (ETH_TYPENOT8023(eh->e802_1pq.dix.typeNBO)) { - /* vlan tagging with dix style type */ - return ETH_HEADER_TYPE_802_1PQ; - } - - /* vlan tagging with 802.3 header */ - return ETH_HEADER_TYPE_802_1PQ_802_3; - } - - /* assume 802.3 */ - return ETH_HEADER_TYPE_802_3; -} - - -/* - *---------------------------------------------------------------------------- - * - * Eth_EncapsulatedPktType -- - * Get the encapsulated (layer 3) frame type. - * for LLC frames without SNAP, we don't have - * an encapsulated type, and return ETH_TYPE_LLC. - * - * IANA reserves 0xFFFF, which we reuse to indicate - * ETH_TYPE_LLC. - * - * Results: - * NBO frame type. - * - *---------------------------------------------------------------------------- - */ -static __inline UINT16 -Eth_EncapsulatedPktType(const Eth_Header *eh) -{ - Eth_HdrType type = Eth_HeaderType(eh); - - switch (type) { - case ETH_HEADER_TYPE_DIX: return eh->dix.typeNBO; - case ETH_HEADER_TYPE_802_1PQ: return eh->e802_1pq.dix.typeNBO; - case ETH_HEADER_TYPE_802_3: - /* - * Documentation describes SNAP headers as having ONLY - * 0x03 as the control fields, not just the lower two bits - * This prevents the use of Eth_IsLLCControlUFormat. - */ - if ((eh->e802_3.llc.dsap == 0xaa) && (eh->e802_3.llc.ssap == 0xaa) && - (eh->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { - return eh->e802_3.snap.snapType.typeNBO; - } else { - // LLC, no snap header, then no type - return ETH_TYPE_LLC; - } - - case ETH_HEADER_TYPE_802_1PQ_802_3: - if ((eh->e802_1pq.e802_3.llc.dsap == 0xaa) && - (eh->e802_1pq.e802_3.llc.ssap == 0xaa) && - (eh->e802_1pq.e802_3.llc.control == ETH_LLC_CONTROL_UFRAME)) { - return eh->e802_1pq.e802_3.snap.snapType.typeNBO; - } else { - // tagged LLC, no snap header, then no type - return ETH_TYPE_LLC; - } - } - - ASSERT(FALSE); - return 0; -} - -/* - *---------------------------------------------------------------------------- - * Is the frame of the requested protocol type or is it an 802.1[pq] - * encapsulation of such a frame? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsDixType(const Eth_Header *eh, const Eth_DixTypeNBO type) -{ - return Eth_EncapsulatedPktType(eh) == type; -} - - -/* - *---------------------------------------------------------------------------- - * Is the frame an IPV4 frame? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsIPV4(const Eth_Header *eh) -{ - return Eth_IsDixType(eh, ETH_TYPE_IPV4_NBO); -} - - -/* - *---------------------------------------------------------------------------- - * Is the frame an IPV6 frame? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsIPV6(const Eth_Header *eh) -{ - return Eth_IsDixType(eh, ETH_TYPE_IPV6_NBO); -} - - -/* - *---------------------------------------------------------------------------- - * Is the frame an ARP frame? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsARP(const Eth_Header *eh) -{ - return Eth_IsDixType(eh, ETH_TYPE_ARP_NBO); -} - - -/* - *---------------------------------------------------------------------------- - * Does the frame contain an 802.1[pq] tag? - *---------------------------------------------------------------------------- - */ -static __inline BOOLEAN -Eth_IsFrameTagged(const Eth_Header *eh) -{ - return (eh->dix.typeNBO == ETH_TYPE_802_1PQ_NBO); -} -#endif /* __OVS_ETH_H_ */ diff --git a/datapath-windows/ovsext/OvsEvent.c b/datapath-windows/ovsext/OvsEvent.c deleted file mode 100644 index d324bc606..000000000 --- a/datapath-windows/ovsext/OvsEvent.c +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" - -#include "Datapath.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_EVENT -#include "OvsDebug.h" - -LIST_ENTRY ovsEventQueue; -UINT32 ovsNumEventQueue; -UINT32 ovsNumPollAll; - -extern PNDIS_SPIN_LOCK gOvsCtrlLock; - -NTSTATUS -OvsInitEventQueue() -{ - InitializeListHead(&ovsEventQueue); - return STATUS_SUCCESS; -} - -VOID -OvsCleanupEventQueue() -{ - ASSERT(IsListEmpty(&ovsEventQueue)); - ASSERT(ovsNumEventQueue == 0); -} - -static __inline VOID -OvsAcquireEventQueueLock() -{ - NdisAcquireSpinLock(gOvsCtrlLock); -} - -static __inline VOID -OvsReleaseEventQueueLock() -{ - NdisReleaseSpinLock(gOvsCtrlLock); -} - -/* - * -------------------------------------------------------------------------- - * Cleanup the event queue of the OpenInstance. - * -------------------------------------------------------------------------- - */ -VOID -OvsCleanupEvent(POVS_OPEN_INSTANCE instance) -{ - POVS_EVENT_QUEUE queue; - PIRP irp = NULL; - queue = (POVS_EVENT_QUEUE)instance->eventQueue; - if (queue) { - POVS_EVENT_QUEUE_ELEM elem; - PLIST_ENTRY link, next; - - OvsAcquireEventQueueLock(); - RemoveEntryList(&queue->queueLink); - ovsNumEventQueue--; - if (queue->pendingIrp) { - PDRIVER_CANCEL cancelRoutine; - irp = queue->pendingIrp; - cancelRoutine = IoSetCancelRoutine(irp, NULL); - queue->pendingIrp = NULL; - if (cancelRoutine == NULL) { - irp = NULL; - } - } - instance->eventQueue = NULL; - OvsReleaseEventQueueLock(); - if (irp) { - OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); - } - - LIST_FORALL_SAFE(&queue->elemList, link, next) { - elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); - OvsFreeMemory(elem); - } - OvsFreeMemory(queue); - } -} - -/* - * -------------------------------------------------------------------------- - * When event is generated, we need to post the event to all - * the event queues. If there is pending Irp waiting for event - * complete the Irp to wakeup the user thread. - * - * Side effects: User thread may be woken up. - * -------------------------------------------------------------------------- - */ -VOID -OvsPostEvent(UINT32 portNo, - UINT32 status) -{ - POVS_EVENT_QUEUE_ELEM elem; - POVS_EVENT_QUEUE queue; - PLIST_ENTRY link; - BOOLEAN triggerPollAll = FALSE; - LIST_ENTRY list; - PLIST_ENTRY entry; - PIRP irp; - - InitializeListHead(&list); - - OVS_LOG_TRACE("Enter: portNo: %#x, status: %#x", portNo, status); - - OvsAcquireEventQueueLock(); - - LIST_FORALL(&ovsEventQueue, link) { - queue = CONTAINING_RECORD(link, OVS_EVENT_QUEUE, queueLink); - if ((status & queue->mask) == 0 || - queue->pollAll) { - continue; - } - if (queue->numElems > (OVS_MAX_VPORT_ARRAY_SIZE >> 1) || - portNo == OVS_DEFAULT_PORT_NO) { - queue->pollAll = TRUE; - } else { - elem = (POVS_EVENT_QUEUE_ELEM)OvsAllocateMemory(sizeof(*elem)); - if (elem == NULL) { - queue->pollAll = TRUE; - } else { - elem->portNo = portNo; - elem->status = (status & queue->mask); - InsertTailList(&queue->elemList, &elem->link); - queue->numElems++; - OVS_LOG_INFO("Queue: %p, numElems: %d", - queue, queue->numElems); - } - } - if (queue->pollAll) { - PLIST_ENTRY curr, next; - triggerPollAll = TRUE; - ovsNumPollAll++; - LIST_FORALL_SAFE(&queue->elemList, curr, next) { - RemoveEntryList(curr); - elem = CONTAINING_RECORD(curr, OVS_EVENT_QUEUE_ELEM, link); - OvsFreeMemory(elem); - } - queue->numElems = 0; - } - if (queue->pendingIrp != NULL) { - PDRIVER_CANCEL cancelRoutine; - irp = queue->pendingIrp; - queue->pendingIrp = NULL; - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine) { - InsertTailList(&list, &irp->Tail.Overlay.ListEntry); - } - } - } - OvsReleaseEventQueueLock(); - while (!IsListEmpty(&list)) { - entry = RemoveHeadList(&list); - irp = CONTAINING_RECORD(entry, IRP, Tail.Overlay.ListEntry); - OVS_LOG_INFO("Wakeup thread with IRP: %p", irp); - OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); - } - OVS_LOG_TRACE("Exit: triggered pollAll: %s", - (triggerPollAll ? "TRUE" : "FALSE")); -} - - -/* - * -------------------------------------------------------------------------- - * Subscribe for event notification. - * - * Results: - * STATUS_SUCCESS for valid request and enough resource. - * STATUS_NO_RESOURCES for queue allocation failure - * STATUS_INVALID_PARAMETER for invalid request - * - * Side effects: - * Event queue is created for the current open instance. - * -------------------------------------------------------------------------- - */ -NTSTATUS -OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, - PVOID inputBuffer, - UINT32 inputLength) -{ - POVS_EVENT_SUBSCRIBE request = (POVS_EVENT_SUBSCRIBE)inputBuffer; - NTSTATUS status = STATUS_SUCCESS; - POVS_OPEN_INSTANCE instance; - POVS_EVENT_QUEUE queue = NULL; - - OVS_LOG_TRACE("Enter: fileObject: %p, inputLength: %d", fileObject, - inputLength); - - if (inputLength < sizeof (OVS_EVENT_SUBSCRIBE) || - (request->mask & OVS_EVENT_MASK_ALL) == 0) { - OVS_LOG_TRACE("Exit: subscribe failed with invalid request."); - return STATUS_INVALID_PARAMETER; - } - - OvsAcquireEventQueueLock(); - - instance = OvsGetOpenInstance(fileObject, request->dpNo); - - if (instance == NULL) { - status = STATUS_INVALID_PARAMETER; - OVS_LOG_WARN("can not find open instance"); - goto done_event_subscribe; - } - - /* - * XXX for now, we don't allow change mask. - */ - queue = (POVS_EVENT_QUEUE)instance->eventQueue; - if (request->subscribe && queue) { - if (queue->mask != request->mask) { - status = STATUS_INVALID_PARAMETER; - OVS_LOG_WARN("Can not chnage mask when the queue is subscribed"); - } - status = STATUS_SUCCESS; - goto done_event_subscribe; - } else if (!request->subscribe && queue == NULL) { - status = STATUS_SUCCESS; - goto done_event_subscribe; - } - - if (request->subscribe) { - queue = (POVS_EVENT_QUEUE)OvsAllocateMemory(sizeof (OVS_EVENT_QUEUE)); - if (queue == NULL) { - status = STATUS_NO_MEMORY; - OVS_LOG_WARN("Fail to allocate event queue"); - goto done_event_subscribe; - } - InitializeListHead(&queue->elemList); - queue->mask = request->mask; - queue->pendingIrp = NULL; - queue->numElems = 0; - queue->pollAll = TRUE; /* always poll all in the begining */ - InsertHeadList(&ovsEventQueue, &queue->queueLink); - ovsNumEventQueue++; - instance->eventQueue = queue; - queue->instance = instance; - } else { - queue = (POVS_EVENT_QUEUE)instance->eventQueue; - RemoveEntryList(&queue->queueLink); - ovsNumEventQueue--; - instance->eventQueue = NULL; - } -done_event_subscribe: - if (!request->subscribe && queue) { - POVS_EVENT_QUEUE_ELEM elem; - PLIST_ENTRY link, next; - PIRP irp = NULL; - if (queue->pendingIrp) { - PDRIVER_CANCEL cancelRoutine; - irp = queue->pendingIrp; - queue->pendingIrp = NULL; - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine == NULL) { - irp = NULL; - } - } - OvsReleaseEventQueueLock(); - if (irp) { - OvsCompleteIrpRequest(queue->pendingIrp, 0, STATUS_SUCCESS); - } - LIST_FORALL_SAFE(&queue->elemList, link, next) { - elem = CONTAINING_RECORD(link, OVS_EVENT_QUEUE_ELEM, link); - OvsFreeMemory(elem); - } - OvsFreeMemory(queue); - } else { - OvsReleaseEventQueueLock(); - } - OVS_LOG_TRACE("Exit: subscribe event with status: %#x.", status); - return status; -} - -/* - * -------------------------------------------------------------------------- - * Poll event queued in the event queue. always synchronous. - * - * Results: - * STATUS_SUCCESS for valid request - * STATUS_BUFFER_TOO_SMALL if outputBuffer is too small. - * STATUS_INVALID_PARAMETER for invalid request - * - * Side effects: - * Event will be removed from event queue. - * -------------------------------------------------------------------------- - */ -NTSTATUS -OvsPollEventIoctl(PFILE_OBJECT fileObject, - PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - POVS_EVENT_POLL poll; - POVS_EVENT_STATUS eventStatus; - POVS_EVENT_ENTRY entry; - POVS_EVENT_QUEUE queue; - POVS_EVENT_QUEUE_ELEM elem; - POVS_OPEN_INSTANCE instance; - UINT32 numEntry, i; - - OVS_LOG_TRACE("Enter: inputLength:%d, outputLength: %d", - inputLength, outputLength); - - ASSERT(replyLen); - if (inputLength < sizeof (OVS_EVENT_POLL)) { - OVS_LOG_TRACE("Exit: input buffer too small"); - return STATUS_INVALID_PARAMETER; - } - *replyLen = sizeof (OVS_EVENT_STATUS) + sizeof (OVS_EVENT_ENTRY); - if (outputLength < *replyLen) { - OVS_LOG_TRACE("Exit: output buffer too small"); - return STATUS_BUFFER_TOO_SMALL; - } - poll = (POVS_EVENT_POLL)inputBuffer; - - OvsAcquireEventQueueLock(); - instance = OvsGetOpenInstance(fileObject, poll->dpNo); - if (instance == NULL) { - OvsReleaseEventQueueLock(); - *replyLen = 0; - OVS_LOG_TRACE("Exit: can not find Open instance"); - return STATUS_INVALID_PARAMETER; - } - - eventStatus = (POVS_EVENT_STATUS)outputBuffer; - numEntry = - (outputLength - sizeof (OVS_EVENT_STATUS)) / sizeof (OVS_EVENT_ENTRY); - queue = (POVS_EVENT_QUEUE)instance->eventQueue; - if (queue->pollAll) { - eventStatus->numberEntries = 1; - numEntry = 1; - entry = &eventStatus->eventEntries[0]; - entry->portNo = OVS_DEFAULT_PORT_NO; - entry->status = OVS_DEFAULT_EVENT_STATUS; - queue->pollAll = FALSE; - goto event_poll_done; - } - numEntry = MIN(numEntry, queue->numElems); - eventStatus->numberEntries = numEntry; - - for (i = 0; i < numEntry; i++) { - elem = (POVS_EVENT_QUEUE_ELEM)RemoveHeadList(&queue->elemList); - entry = &eventStatus->eventEntries[i]; - entry->portNo = elem->portNo; - entry->status = elem->status; - OvsFreeMemory(elem); - queue->numElems--; - } -event_poll_done: - OvsReleaseEventQueueLock(); - *replyLen = sizeof (OVS_EVENT_STATUS) + - numEntry * sizeof (OVS_EVENT_ENTRY); - OVS_LOG_TRACE("Exit: numEventPolled: %d", numEntry); - return STATUS_SUCCESS; -} - - -/* - * -------------------------------------------------------------------------- - * Cancel wait IRP for event - * - * Please note, when this routine is called, it is always guaranteed that - * IRP is valid. - * - * Side effects: Pending IRP is completed. - * -------------------------------------------------------------------------- - */ -VOID -OvsCancelIrp(PDEVICE_OBJECT deviceObject, - PIRP irp) -{ - PIO_STACK_LOCATION irpSp; - PFILE_OBJECT fileObject; - POVS_EVENT_QUEUE queue; - POVS_OPEN_INSTANCE instance; - - UNREFERENCED_PARAMETER(deviceObject); - - IoReleaseCancelSpinLock(irp->CancelIrql); - - irpSp = IoGetCurrentIrpStackLocation(irp); - fileObject = irpSp->FileObject; - - if (fileObject == NULL) { - goto done; - } - OvsAcquireEventQueueLock(); - instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - if (instance == NULL || instance->eventQueue == NULL) { - OvsReleaseEventQueueLock(); - goto done; - } - queue = instance->eventQueue; - if (queue->pendingIrp == irp) { - queue->pendingIrp = NULL; - } - OvsReleaseEventQueueLock(); -done: - OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); -} - -/* - * -------------------------------------------------------------------------- - * Wait for event. - * - * Results: - * STATUS_SUCCESS for valid request - * STATUS_DEVICE_BUSY if already in waiting state. - * STATUS_INVALID_PARAMETER for invalid request - * STATUS_PENDING wait for event - * - * Side effects: - * May return pending to IO manager. - * -------------------------------------------------------------------------- - */ -NTSTATUS -OvsWaitEventIoctl(PIRP irp, - PFILE_OBJECT fileObject, - PVOID inputBuffer, - UINT32 inputLength) -{ - NTSTATUS status; - POVS_EVENT_POLL poll; - POVS_EVENT_QUEUE queue; - POVS_OPEN_INSTANCE instance; - BOOLEAN cancelled = FALSE; - OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); - - if (inputLength < sizeof (OVS_EVENT_POLL)) { - OVS_LOG_TRACE("Exit: Invalid input buffer length."); - return STATUS_INVALID_PARAMETER; - } - poll = (POVS_EVENT_POLL)inputBuffer; - - OvsAcquireEventQueueLock(); - - instance = OvsGetOpenInstance(fileObject, poll->dpNo); - if (instance == NULL) { - OvsReleaseEventQueueLock(); - OVS_LOG_TRACE("Exit: Can not find open instance, dpNo: %d", poll->dpNo); - return STATUS_INVALID_PARAMETER; - } - - queue = (POVS_EVENT_QUEUE)instance->eventQueue; - if (queue->pendingIrp) { - OvsReleaseEventQueueLock(); - OVS_LOG_TRACE("Exit: Event queue already in pending state"); - return STATUS_DEVICE_BUSY; - } - - status = (queue->numElems != 0 || queue->pollAll) ? - STATUS_SUCCESS : STATUS_PENDING; - if (status == STATUS_PENDING) { - PDRIVER_CANCEL cancelRoutine; - IoMarkIrpPending(irp); - IoSetCancelRoutine(irp, OvsCancelIrp); - if (irp->Cancel) { - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine) { - cancelled = TRUE; - } - } else { - queue->pendingIrp = irp; - } - } - OvsReleaseEventQueueLock(); - if (cancelled) { - OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); - OVS_LOG_INFO("Event IRP cancelled: %p", irp); - } - OVS_LOG_TRACE("Exit: return status: %#x", status); - return status; -} diff --git a/datapath-windows/ovsext/OvsEvent.h b/datapath-windows/ovsext/OvsEvent.h deleted file mode 100644 index 4ae2ba29d..000000000 --- a/datapath-windows/ovsext/OvsEvent.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_EVENT_H_ -#define __OVS_EVENT_H_ 1 - -typedef struct _OVS_EVENT_QUEUE_ELEM { - LIST_ENTRY link; - UINT32 portNo; - UINT32 status; -} OVS_EVENT_QUEUE_ELEM, *POVS_EVENT_QUEUE_ELEM; - -typedef struct _OVS_EVENT_QUEUE { - LIST_ENTRY queueLink; - LIST_ENTRY elemList; - UINT32 mask; - UINT16 numElems; - BOOLEAN pollAll; - PIRP pendingIrp; - PVOID instance; -} OVS_EVENT_QUEUE, *POVS_EVENT_QUEUE; - -NTSTATUS OvsInitEventQueue(VOID); -VOID OvsCleanupEventQueue(VOID); - -struct _OVS_OPEN_INSTANCE; - -VOID OvsCleanupEvent(struct _OVS_OPEN_INSTANCE *instance); -VOID OvsPostEvent(UINT32 portNo, UINT32 status); -NTSTATUS OvsSubscribeEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, - UINT32 inputLength); -NTSTATUS OvsPollEventIoctl(PFILE_OBJECT fileObject, PVOID inputBuffer, - UINT32 inputLength, PVOID outputBuffer, - UINT32 outputLength, UINT32 *replyLen); -NTSTATUS OvsWaitEventIoctl(PIRP irp, PFILE_OBJECT fileObject, - PVOID inputBuffer, UINT32 inputLength); -#endif /* __OVS_EVENT_H_ */ diff --git a/datapath-windows/ovsext/OvsFlow.c b/datapath-windows/ovsext/OvsFlow.c deleted file mode 100644 index 4e31f17c3..000000000 --- a/datapath-windows/ovsext/OvsFlow.c +++ /dev/null @@ -1,978 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsNetProto.h" -#include "OvsUtil.h" -#include "OvsJhash.h" -#include "OvsFlow.h" -#include "OvsPacketParser.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_FLOW -#include "OvsDebug.h" - -#pragma warning( push ) -#pragma warning( disable:4127 ) - -extern PNDIS_SPIN_LOCK gOvsCtrlLock; -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -extern UINT64 ovsTimeIncrementPerTick; - -static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags, - UINT32 getActionsLen, OvsFlowInfo *info); -static NTSTATUS HandleFlowPut(OvsFlowPut *put, - OVS_DATAPATH *datapath, - struct OvsFlowStats *stats); -static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put, - UINT64 hash); -static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow); -static VOID DeleteAllFlows(OVS_DATAPATH *datapath); -static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow); -static VOID FreeFlow(OvsFlow *flow); -static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB); - -#define OVS_FLOW_TABLE_SIZE 2048 -#define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1) -#define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK) - -/* - *---------------------------------------------------------------------------- - * OvsDeleteFlowTable -- - * Results: - * NDIS_STATUS_SUCCESS always. - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsDeleteFlowTable(OVS_DATAPATH *datapath) -{ - if (datapath == NULL || datapath->flowTable == NULL) { - return NDIS_STATUS_SUCCESS; - } - - DeleteAllFlows(datapath); - OvsFreeMemory(datapath->flowTable); - datapath->flowTable = NULL; - NdisFreeRWLock(datapath->lock); - - return NDIS_STATUS_SUCCESS; -} - -/* - *---------------------------------------------------------------------------- - * OvsAllocateFlowTable -- - * Results: - * NDIS_STATUS_SUCCESS on success. - * NDIS_STATUS_RESOURCES if memory couldn't be allocated - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsAllocateFlowTable(OVS_DATAPATH *datapath, - POVS_SWITCH_CONTEXT switchContext) -{ - PLIST_ENTRY bucket; - int i; - - datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE * - sizeof (LIST_ENTRY)); - if (!datapath->flowTable) { - return NDIS_STATUS_RESOURCES; - } - for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { - bucket = &(datapath->flowTable[i]); - InitializeListHead(bucket); - } - datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle); - - return NDIS_STATUS_SUCCESS; -} - - -/* - *---------------------------------------------------------------------------- - * GetStartAddrNBL -- - * Get the virtual address of the frame. - * - * Results: - * Virtual address of the frame. - *---------------------------------------------------------------------------- - */ -static __inline VOID * -GetStartAddrNBL(const NET_BUFFER_LIST *_pNB) -{ - PMDL curMdl; - PUINT8 curBuffer; - PEthHdr curHeader; - - ASSERT(_pNB); - - // Ethernet Header is a guaranteed safe access. - curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl; - curBuffer = MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); - if (!curBuffer) { - return NULL; - } - - curHeader = (PEthHdr) - (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset); - - return (VOID *) curHeader; -} - -VOID -OvsFlowUsed(OvsFlow *flow, - const NET_BUFFER_LIST *packet, - const POVS_PACKET_HDR_INFO layers) -{ - LARGE_INTEGER tickCount; - - KeQueryTickCount(&tickCount); - flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick; - flow->packetCount++; - flow->byteCount += OvsPacketLenNBL(packet); - flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers); -} - - -VOID -DeleteAllFlows(OVS_DATAPATH *datapath) -{ - INT i; - PLIST_ENTRY bucket; - - for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) { - PLIST_ENTRY next; - bucket = &(datapath->flowTable[i]); - while (!IsListEmpty(bucket)) { - OvsFlow *flow; - next = bucket->Flink; - flow = CONTAINING_RECORD(next, OvsFlow, ListEntry); - RemoveFlow(datapath, &flow); - } - } -} - -/* - *---------------------------------------------------------------------------- - * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and - * 'ofp_in_port'. - * - * Initializes 'packet' header pointers as follows: - * - * - packet->l2 to the start of the Ethernet header. - * - * - packet->l3 to just past the Ethernet header, or just past the - * vlan_header if one is present, to the first byte of the payload of the - * Ethernet frame. - * - * - packet->l4 to just past the IPv4 header, if one is present and has a - * correct length, and otherwise NULL. - * - * - packet->l7 to just past the TCP or UDP or ICMP header, if one is - * present and has a correct length, and otherwise NULL. - * - * Returns NDIS_STATUS_SUCCESS normally. Fails only if packet data cannot be accessed - * (e.g. if Pkt_CopyBytesOut() returns an error). - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsExtractFlow(const NET_BUFFER_LIST *packet, - UINT32 inPort, - OvsFlowKey *flow, - POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey) -{ - struct Eth_Header *eth; - UINT8 offset = 0; - PVOID vlanTagValue; - - layers->value = 0; - - if (tunKey) { - ASSERT(tunKey->dst != 0); - RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey); - flow->l2.offset = 0; - } else { - flow->tunKey.dst = 0; - flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE; - } - - flow->l2.inPort = inPort; - - if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) { - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset; - return NDIS_STATUS_SUCCESS; - } - - /* Link layer. */ - eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet); - memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH); - memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH); - - /* - * vlan_tci. - */ - vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo); - if (vlanTagValue) { - PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag = - (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue; - flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI | - (vlanTag->TagHeader.UserPriority << 13)); - } else { - if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) { - Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)ð->dix.typeNBO; - flow->l2.vlanTci = ((UINT16)tag->priority << 13) | - OVSWIN_VLAN_CFI | - ((UINT16)tag->vidHi << 8) | tag->vidLo; - offset = sizeof (Eth_802_1pq_Tag); - } else { - flow->l2.vlanTci = 0; - } - /* - * XXX - * Please note after this point, src mac and dst mac should - * not be accessed through eth - */ - eth = (Eth_Header *)((UINT8 *)eth + offset); - } - - /* - * dl_type. - * - * XXX assume that at least the first - * 12 bytes of received packets are mapped. This code has the stronger - * assumption that at least the first 22 bytes of 'packet' is mapped (if my - * arithmetic is right). - */ - if (ETH_TYPENOT8023(eth->dix.typeNBO)) { - flow->l2.dlType = eth->dix.typeNBO; - layers->l3Offset = ETH_HEADER_LEN_DIX + offset; - } else if (OvsPacketLenNBL(packet) >= ETH_HEADER_LEN_802_3 && - eth->e802_3.llc.dsap == 0xaa && - eth->e802_3.llc.ssap == 0xaa && - eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME && - eth->e802_3.snap.snapOrg[0] == 0x00 && - eth->e802_3.snap.snapOrg[1] == 0x00 && - eth->e802_3.snap.snapOrg[2] == 0x00) { - flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO; - layers->l3Offset = ETH_HEADER_LEN_802_3 + offset; - } else { - flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE); - layers->l3Offset = ETH_HEADER_LEN_DIX + offset; - } - - flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset; - /* Network layer. */ - if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) { - struct IPHdr ip_storage; - const struct IPHdr *nh; - IpKey *ipKey = &flow->ipKey; - - flow->l2.keyLen += OVS_IP_KEY_SIZE; - layers->isIPv4 = 1; - nh = OvsGetIp(packet, layers->l3Offset, &ip_storage); - if (nh) { - layers->l4Offset = layers->l3Offset + nh->ihl * 4; - - ipKey->nwSrc = nh->saddr; - ipKey->nwDst = nh->daddr; - ipKey->nwProto = nh->protocol; - - ipKey->nwTos = nh->tos; - if (nh->frag_off & htons(IP_MF | IP_OFFSET)) { - ipKey->nwFrag = OVSWIN_NW_FRAG_ANY; - if (nh->frag_off & htons(IP_OFFSET)) { - ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER; - } - } else { - ipKey->nwFrag = 0; - } - - ipKey->nwTtl = nh->ttl; - ipKey->l4.tpSrc = 0; - ipKey->l4.tpDst = 0; - - if (!(nh->frag_off & htons(IP_OFFSET))) { - if (ipKey->nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &ipKey->l4, layers); - } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &ipKey->l4, layers); - } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) { - ICMPHdr icmpStorage; - const ICMPHdr *icmp; - - icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage); - if (icmp) { - ipKey->l4.tpSrc = htons(icmp->type); - ipKey->l4.tpDst = htons(icmp->code); - layers->l7Offset = layers->l4Offset + sizeof *icmp; - } - } - } - } else { - ((UINT64 *)ipKey)[0] = 0; - ((UINT64 *)ipKey)[1] = 0; - } - } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) { - NDIS_STATUS status; - flow->l2.keyLen += OVS_IPV6_KEY_SIZE; - status = OvsParseIPv6(packet, flow, layers); - if (status != NDIS_STATUS_SUCCESS) { - memset(&flow->ipv6Key, 0, sizeof (Ipv6Key)); - return status; - } - layers->isIPv6 = 1; - flow->ipv6Key.l4.tpSrc = 0; - flow->ipv6Key.l4.tpDst = 0; - flow->ipv6Key.pad = 0; - - if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) { - OvsParseTcp(packet, &(flow->ipv6Key.l4), layers); - } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) { - OvsParseUdp(packet, &(flow->ipv6Key.l4), layers); - } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) { - OvsParseIcmpV6(packet, flow, layers); - flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE); - } - } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) { - EtherArp arpStorage; - const EtherArp *arp; - ArpKey *arpKey = &flow->arpKey; - ((UINT64 *)arpKey)[0] = 0; - ((UINT64 *)arpKey)[1] = 0; - ((UINT64 *)arpKey)[2] = 0; - flow->l2.keyLen += OVS_ARP_KEY_SIZE; - arp = OvsGetArp(packet, layers->l3Offset, &arpStorage); - if (arp && arp->ea_hdr.ar_hrd == htons(1) && - arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) && - arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH && - arp->ea_hdr.ar_pln == 4) { - /* We only match on the lower 8 bits of the opcode. */ - if (ntohs(arp->ea_hdr.ar_op) <= 0xff) { - arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op); - } - if (arpKey->nwProto == ARPOP_REQUEST - || arpKey->nwProto == ARPOP_REPLY) { - memcpy(&arpKey->nwSrc, arp->arp_spa, 4); - memcpy(&arpKey->nwDst, arp->arp_tpa, 4); - memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH); - memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH); - } - } - } - - return NDIS_STATUS_SUCCESS; -} - -__inline BOOLEAN -FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size) -{ - UINT32 i; - ASSERT((size & 0x7) == 0); - ASSERT(((UINT64)src & 0x7) == 0); - ASSERT(((UINT64)dst & 0x7) == 0); - for (i = 0; i < (size >> 3); i++) { - if (src[i] != dst[i]) { - return FALSE; - } - } - return TRUE; -} - - -/* - * ---------------------------------------------------------------------------- - * AddFlow -- - * Add a flow to flow table. - * - * Results: - * NDIS_STATUS_SUCCESS if no same flow in the flow table. - * ---------------------------------------------------------------------------- - */ -NTSTATUS -AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow) -{ - PLIST_ENTRY head; - - if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) { - return STATUS_INVALID_HANDLE; - } - - head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]); - /* - * We need fence here to make sure flow's nextPtr is updated before - * head->nextPtr is updated. - */ - KeMemoryBarrier(); - - //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql); - InsertTailList(head, &flow->ListEntry); - //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql); - - datapath->nFlows++; - - return STATUS_SUCCESS; -} - - -/* ---------------------------------------------------------------------------- - * RemoveFlow -- - * Remove a flow from flow table, and added to wait list - * ---------------------------------------------------------------------------- - */ -VOID -RemoveFlow(OVS_DATAPATH *datapath, - OvsFlow **flow) -{ - OvsFlow *f = *flow; - *flow = NULL; - UNREFERENCED_PARAMETER(datapath); - - ASSERT(datapath->nFlows); - datapath->nFlows--; - // Remove the flow from queue - RemoveEntryList(&f->ListEntry); - FreeFlow(f); -} - - -/* - * ---------------------------------------------------------------------------- - * OvsLookupFlow -- - * - * Find flow from flow table based on flow key. - * Caller should either hold portset handle or should - * have a flowRef in datapath or Acquired datapath. - * - * Results: - * Flow pointer if lookup successful. - * NULL if not exists. - * ---------------------------------------------------------------------------- - */ -OvsFlow * -OvsLookupFlow(OVS_DATAPATH *datapath, - const OvsFlowKey *key, - UINT64 *hash, - BOOLEAN hashValid) -{ - PLIST_ENTRY link, head; - UINT16 offset = key->l2.offset; - UINT16 size = key->l2.keyLen; - UINT8 *start; - - ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); - ASSERT(!key->tunKey.dst || offset == 0); - - start = (UINT8 *)key + offset; - - if (!hashValid) { - *hash = OvsJhashBytes(start, size, 0); - } - - head = &datapath->flowTable[HASH_BUCKET(*hash)]; - link = head->Flink; - while (link != head) { - OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry); - - if (flow->hash == *hash && - flow->key.l2.val == key->l2.val && - FlowEqual((UINT64 *)((uint8 *)&flow->key + offset), - (UINT64 *)start, size)) { - return flow; - } - link = link->Flink; - } - return NULL; -} - - -/* - * ---------------------------------------------------------------------------- - * OvsHashFlow -- - * Calculate the hash for the given flow key. - * ---------------------------------------------------------------------------- - */ -UINT64 -OvsHashFlow(const OvsFlowKey *key) -{ - UINT16 offset = key->l2.offset; - UINT16 size = key->l2.keyLen; - UINT8 *start; - - ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey)); - ASSERT(!key->tunKey.dst || offset == 0); - start = (UINT8 *)key + offset; - return OvsJhashBytes(start, size, 0); -} - - -/* - * ---------------------------------------------------------------------------- - * FreeFlow -- - * Free a flow and its actions. - * ---------------------------------------------------------------------------- - */ -VOID -FreeFlow(OvsFlow *flow) -{ - ASSERT(flow); - OvsFreeMemory(flow); -} - -NTSTATUS -OvsDoDumpFlows(OvsFlowDumpInput *dumpInput, - OvsFlowDumpOutput *dumpOutput, - UINT32 *replyLen) -{ - UINT32 dpNo; - OVS_DATAPATH *datapath = NULL; - OvsFlow *flow; - PLIST_ENTRY node, head; - UINT32 column = 0; - UINT32 rowIndex, columnIndex; - LOCK_STATE_EX dpLockState; - NTSTATUS status = STATUS_SUCCESS; - BOOLEAN findNextNonEmpty = FALSE; - - dpNo = dumpInput->dpNo; - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - rowIndex = dumpInput->position[0]; - if (rowIndex >= OVS_FLOW_TABLE_SIZE) { - dumpOutput->n = 0; - *replyLen = sizeof(*dumpOutput); - goto unlock; - } - - columnIndex = dumpInput->position[1]; - - datapath = &gOvsSwitchContext->datapath; - ASSERT(datapath); - OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); - - head = &datapath->flowTable[rowIndex]; - node = head->Flink; - - while (column < columnIndex) { - if (node == head) { - break; - } - node = node->Flink; - column++; - } - - if (node == head) { - findNextNonEmpty = TRUE; - columnIndex = 0; - } - - if (findNextNonEmpty) { - while (head == node) { - if (++rowIndex >= OVS_FLOW_TABLE_SIZE) { - dumpOutput->n = 0; - goto dp_unlock; - } - head = &datapath->flowTable[rowIndex]; - node = head->Flink; - } - } - - ASSERT(node != head); - ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE); - - flow = CONTAINING_RECORD(node, OvsFlow, ListEntry); - status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen, - &dumpOutput->flow); - - if (status == STATUS_BUFFER_TOO_SMALL) { - dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen; - *replyLen = sizeof(*dumpOutput); - } else { - dumpOutput->n = 1; //one flow reported. - *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen; - } - - dumpOutput->position[0] = rowIndex; - dumpOutput->position[1] = ++columnIndex; - -dp_unlock: - OvsReleaseDatapath(datapath, &dpLockState); - -unlock: - NdisReleaseSpinLock(gOvsCtrlLock); - return status; -} - -NTSTATUS -OvsDumpFlowIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer; - OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer; - - if (inputBuffer == NULL || outputBuffer == NULL) { - return STATUS_INVALID_PARAMETER; - } - - if ((inputLength != sizeof(OvsFlowDumpInput)) - || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen); -} - -static NTSTATUS -ReportFlowInfo(OvsFlow *flow, - UINT32 getFlags, - UINT32 getActionsLen, - OvsFlowInfo *info) -{ - NTSTATUS status = STATUS_SUCCESS; - - if (getFlags & FLOW_GET_KEY) { - // always copy the tunnel key part - RtlCopyMemory(&info->key, &flow->key, - flow->key.l2.keyLen + flow->key.l2.offset); - } - - if (getFlags & FLOW_GET_STATS) { - OvsFlowStats *stats = &info->stats; - stats->packetCount = flow->packetCount; - stats->byteCount = flow->byteCount; - stats->used = (UINT32)flow->used; - stats->tcpFlags = flow->tcpFlags; - } - - if (getFlags & FLOW_GET_ACTIONS) { - if (flow->actionsLen == 0) { - info->actionsLen = 0; - } else if (flow->actionsLen > getActionsLen) { - info->actionsLen = 0; - status = STATUS_BUFFER_TOO_SMALL; - } else { - RtlCopyMemory(info->actions, flow->actions, flow->actionsLen); - info->actionsLen = flow->actionsLen; - } - } - - return status; -} - -NTSTATUS -OvsPutFlowIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - NTSTATUS status = STATUS_SUCCESS; - OVS_DATAPATH *datapath = NULL; - struct OvsFlowStats stats; - ULONG actionsLen; - OvsFlowPut *put; - UINT32 dpNo; - LOCK_STATE_EX dpLockState; - - if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - if ((outputLength != sizeof(stats)) || (outputBuffer == NULL)) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - put = (OvsFlowPut *)inputBuffer; - if (put->actionsLen > 0) { - actionsLen = put->actionsLen; - } else { - actionsLen = 0; - } - if (inputLength != actionsLen + sizeof(*put)) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - dpNo = put->dpNo; - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - datapath = &gOvsSwitchContext->datapath; - ASSERT(datapath); - RtlZeroMemory(&stats, sizeof(stats)); - OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); - status = HandleFlowPut(put, datapath, &stats); - OvsReleaseDatapath(datapath, &dpLockState); - - if (status == STATUS_SUCCESS) { - // Copy stats to User mode app - NdisMoveMemory(outputBuffer, (PVOID)&stats, sizeof(stats)); - *replyLen = sizeof stats; - } - -unlock: - NdisReleaseSpinLock(gOvsCtrlLock); - return status; -} - - -/* Handles flow add, modify as well as delete */ -static NTSTATUS -HandleFlowPut(OvsFlowPut *put, - OVS_DATAPATH *datapath, - struct OvsFlowStats *stats) -{ - BOOLEAN mayCreate, mayModify, mayDelete; - OvsFlow *KernelFlow; - UINT64 hash; - NTSTATUS status = STATUS_SUCCESS; - - mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0; - mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0; - mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0; - - if ((mayCreate || mayModify) == mayDelete) { - return STATUS_INVALID_PARAMETER; - } - - KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE); - if (!KernelFlow) { - if (!mayCreate) { - return STATUS_INVALID_PARAMETER; - } - - status = OvsPrepareFlow(&KernelFlow, put, hash); - if (status != STATUS_SUCCESS) { - FreeFlow(KernelFlow); - return STATUS_UNSUCCESSFUL; - } - - status = AddFlow(datapath, KernelFlow); - if (status != STATUS_SUCCESS) { - FreeFlow(KernelFlow); - return STATUS_UNSUCCESSFUL; - } - - /* Validate the flow addition */ - { - UINT64 newHash; - OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash, - FALSE); - ASSERT(flow); - ASSERT(newHash == hash); - if (!flow || newHash != hash) { - return STATUS_UNSUCCESSFUL; - } - } - } else { - stats->packetCount = KernelFlow->packetCount; - stats->byteCount = KernelFlow->byteCount; - stats->tcpFlags = KernelFlow->tcpFlags; - stats->used = (UINT32)KernelFlow->used; - - if (mayModify) { - OvsFlow *newFlow; - status = OvsPrepareFlow(&newFlow, put, hash); - if (status != STATUS_SUCCESS) { - return STATUS_UNSUCCESSFUL; - } - - KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE); - if (KernelFlow) { - if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { - newFlow->packetCount = KernelFlow->packetCount; - newFlow->byteCount = KernelFlow->byteCount; - newFlow->tcpFlags = KernelFlow->tcpFlags; - } - RemoveFlow(datapath, &KernelFlow); - } else { - if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) { - newFlow->packetCount = stats->packetCount; - newFlow->byteCount = stats->byteCount; - newFlow->tcpFlags = stats->tcpFlags; - } - } - status = AddFlow(datapath, newFlow); - ASSERT(status == STATUS_SUCCESS); - - /* Validate the flow addition */ - { - UINT64 newHash; - OvsFlow *testflow = OvsLookupFlow(datapath, &put->key, - &newHash, FALSE); - ASSERT(testflow); - ASSERT(newHash == hash); - if (!testflow || newHash != hash) { - FreeFlow(newFlow); - return STATUS_UNSUCCESSFUL; - } - } - } else { - if (mayDelete) { - if (KernelFlow) { - RemoveFlow(datapath, &KernelFlow); - } - } else { - return STATUS_UNSUCCESSFUL; - } - } - } - return STATUS_SUCCESS; -} - -static NTSTATUS -OvsPrepareFlow(OvsFlow **flow, - const OvsFlowPut *put, - UINT64 hash) -{ - OvsFlow *localFlow = *flow; - NTSTATUS status = STATUS_SUCCESS; - - do { - *flow = localFlow = - OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen); - if (localFlow == NULL) { - status = STATUS_NO_MEMORY; - break; - } - - localFlow->key = put->key; - localFlow->actionsLen = put->actionsLen; - if (put->actionsLen) { - NdisMoveMemory((PUCHAR)localFlow->actions, put->actions, - put->actionsLen); - } - localFlow->userActionsLen = 0; // 0 indicate no conversion is made - localFlow->used = 0; - localFlow->packetCount = 0; - localFlow->byteCount = 0; - localFlow->tcpFlags = 0; - localFlow->hash = hash; - } while(FALSE); - - return status; -} - -NTSTATUS -OvsGetFlowIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - NTSTATUS status = STATUS_SUCCESS; - OVS_DATAPATH *datapath = NULL; - OvsFlow *flow; - UINT32 getFlags, getActionsLen; - OvsFlowGetInput *getInput; - OvsFlowGetOutput *getOutput; - UINT64 hash; - UINT32 dpNo; - LOCK_STATE_EX dpLockState; - - if (inputLength != sizeof(OvsFlowGetInput) - || inputBuffer == NULL) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - getInput = (OvsFlowGetInput *) inputBuffer; - getFlags = getInput->getFlags; - getActionsLen = getInput->actionsLen; - if (getInput->getFlags & FLOW_GET_KEY) { - return STATUS_INVALID_PARAMETER; - } - - if (outputBuffer == NULL - || outputLength != (sizeof *getOutput + - getInput->actionsLen)) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - dpNo = getInput->dpNo; - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - datapath = &gOvsSwitchContext->datapath; - ASSERT(datapath); - OvsAcquireDatapathRead(datapath, &dpLockState, FALSE); - flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE); - if (!flow) { - status = STATUS_INVALID_PARAMETER; - goto dp_unlock; - } - - // XXX: can be optimized to return only how much is written out - *replyLen = outputLength; - getOutput = (OvsFlowGetOutput *)outputBuffer; - ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info); - -dp_unlock: - OvsReleaseDatapath(datapath, &dpLockState); -unlock: - NdisReleaseSpinLock(gOvsCtrlLock); - return status; -} - -NTSTATUS -OvsFlushFlowIoctl(PVOID inputBuffer, - UINT32 inputLength) -{ - NTSTATUS status = STATUS_SUCCESS; - OVS_DATAPATH *datapath = NULL; - UINT32 dpNo; - LOCK_STATE_EX dpLockState; - - if (inputLength != sizeof(UINT32) || inputBuffer == NULL) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - dpNo = *(UINT32 *)inputBuffer; - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - datapath = &gOvsSwitchContext->datapath; - ASSERT(datapath); - OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE); - DeleteAllFlows(datapath); - OvsReleaseDatapath(datapath, &dpLockState); - -unlock: - NdisReleaseSpinLock(gOvsCtrlLock); - return status; -} - -#pragma warning( pop ) diff --git a/datapath-windows/ovsext/OvsFlow.h b/datapath-windows/ovsext/OvsFlow.h deleted file mode 100644 index fa29c6838..000000000 --- a/datapath-windows/ovsext/OvsFlow.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_FLOW_H_ -#define __OVS_FLOW_H_ 1 - -#include "precomp.h" -#include "OvsSwitch.h" -#include "OvsUser.h" -#include "OvsNetProto.h" - -typedef struct _OvsFlow { - LIST_ENTRY ListEntry; // In Datapath's flowTable. - OvsFlowKey key; - UINT64 hash; - UINT32 actionsLen; - UINT8 tcpFlags; - UINT64 used; - UINT64 packetCount; - UINT64 byteCount; - UINT32 userActionsLen; // used for flow query - UINT32 actionBufferLen; // used for flow reuse - NL_ATTR actions[1]; -} OvsFlow; - - -typedef struct _OvsLayers { - UINT32 l3Ofs; // IPv4, IPv6, ARP, or other L3 header. - UINT32 l4Ofs; // TCP, UDP, ICMP, ICMPv6, or other L4 header. - UINT32 l7Ofs; // L4 protocol's payload. -} OvsLayers; - -extern UINT64 ovsUserTimestampDelta; -extern UINT64 ovsTimeIncrementPerTick; - -NDIS_STATUS OvsDeleteFlowTable(OVS_DATAPATH *datapath); -NDIS_STATUS OvsAllocateFlowTable(OVS_DATAPATH *datapath, - POVS_SWITCH_CONTEXT switchContext); - -NDIS_STATUS OvsExtractFlow(const NET_BUFFER_LIST *pkt, UINT32 inPort, - OvsFlowKey *flow, POVS_PACKET_HDR_INFO layers, - OvsIPv4TunnelKey *tunKey); -OvsFlow *OvsLookupFlow(OVS_DATAPATH *datapath, const OvsFlowKey *key, - UINT64 *hash, BOOLEAN hashValid); -UINT64 OvsHashFlow(const OvsFlowKey *key); -VOID OvsFlowUsed(OvsFlow *flow, const NET_BUFFER_LIST *pkt, - const POVS_PACKET_HDR_INFO layers); - -NTSTATUS OvsDumpFlowIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsPutFlowIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsGetFlowIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsFlushFlowIoctl(PVOID inputBuffer, UINT32 inputLength); - -/* Flags for tunneling */ -#define OVS_TNL_F_DONT_FRAGMENT (1 << 0) -#define OVS_TNL_F_CSUM (1 << 1) -#define OVS_TNL_F_KEY (1 << 2) - -#endif /* __OVS_FLOW_H_ */ diff --git a/datapath-windows/ovsext/OvsIoctl.c b/datapath-windows/ovsext/OvsIoctl.c deleted file mode 100644 index ef4864dd8..000000000 --- a/datapath-windows/ovsext/OvsIoctl.c +++ /dev/null @@ -1,768 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface - * alive while we transition over to the netlink based interface. - * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c - * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c - */ -#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 0 - -#include "precomp.h" -#include "OvsIoctl.h" -#include "OvsJhash.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsPacketIO.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" -#include "OvsUser.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_DATAPATH -#include "OvsDebug.h" - -/* Handles to the device object for communication with userspace. */ -NDIS_HANDLE gOvsDeviceHandle; -PDEVICE_OBJECT gOvsDeviceObject; - -/* - * There seems to be a skew between the kernel's version of current time and - * the userspace's version of current time. The skew was seen to - * monotonically increase as well. - * - * In order to deal with the situation, we pass down the userspace's version - * of the timestamp to the kernel, and let the kernel calculate the delta. - */ -UINT64 ovsUserTimestampDelta; -UINT64 ovsTimeIncrementPerTick; - -_Dispatch_type_(IRP_MJ_CREATE) -_Dispatch_type_(IRP_MJ_CLOSE) -DRIVER_DISPATCH OvsOpenCloseDevice; - -_Dispatch_type_(IRP_MJ_CLEANUP) -DRIVER_DISPATCH OvsCleanupDevice; - -_Dispatch_type_(IRP_MJ_DEVICE_CONTROL) -DRIVER_DISPATCH OvsDeviceControl; - -#ifdef ALLOC_PRAGMA -#pragma alloc_text(INIT, OvsCreateDeviceObject) -#pragma alloc_text(PAGE, OvsOpenCloseDevice) -#pragma alloc_text(PAGE, OvsCleanupDevice) -#pragma alloc_text(PAGE, OvsDeviceControl) -#endif // ALLOC_PRAGMA - - -#define OVS_MAX_OPEN_INSTANCES 128 - -POVS_OPEN_INSTANCE ovsOpenInstanceArray[OVS_MAX_OPEN_INSTANCES]; -UINT32 ovsNumberOfOpenInstances; -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; - -NDIS_SPIN_LOCK ovsCtrlLockObj; -NDIS_SPIN_LOCK ovsFlowLockObj; -PNDIS_SPIN_LOCK gOvsCtrlLock; -PNDIS_SPIN_LOCK ovsFlowLock; - -VOID -OvsInitIoctl() -{ - gOvsCtrlLock = &ovsCtrlLockObj; - ovsFlowLock = &ovsFlowLockObj; - NdisAllocateSpinLock(ovsFlowLock); - NdisAllocateSpinLock(gOvsCtrlLock); -} - -VOID -OvsCleanupIoctl() -{ - if (ovsFlowLock) { - NdisFreeSpinLock(ovsFlowLock); - NdisFreeSpinLock(gOvsCtrlLock); - gOvsCtrlLock = NULL; - gOvsCtrlLock = NULL; - } -} - -VOID -OvsInit() -{ - OvsInitIoctl(); - OvsInitEventQueue(); - OvsUserInit(); -} - -VOID -OvsCleanup() -{ - OvsCleanupEventQueue(); - OvsCleanupIoctl(); - OvsUserCleanup(); -} - -VOID -OvsAcquireCtrlLock() -{ - NdisAcquireSpinLock(gOvsCtrlLock); -} -VOID -OvsReleaseCtrlLock() -{ - NdisReleaseSpinLock(gOvsCtrlLock); -} - - -/* - * -------------------------------------------------------------------------- - * Creates the communication device between user and kernel, and also - * initializes the data associated data structures. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - UNICODE_STRING deviceName; - UNICODE_STRING symbolicDeviceName; - PDRIVER_DISPATCH dispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1]; - NDIS_DEVICE_OBJECT_ATTRIBUTES deviceAttributes; - OVS_LOG_TRACE("ovsExtDriverHandle: %p", ovsExtDriverHandle); - - RtlZeroMemory(dispatchTable, - (IRP_MJ_MAXIMUM_FUNCTION + 1) * sizeof (PDRIVER_DISPATCH)); - dispatchTable[IRP_MJ_CREATE] = OvsOpenCloseDevice; - dispatchTable[IRP_MJ_CLOSE] = OvsOpenCloseDevice; - dispatchTable[IRP_MJ_CLEANUP] = OvsCleanupDevice; - dispatchTable[IRP_MJ_DEVICE_CONTROL] = OvsDeviceControl; - - NdisInitUnicodeString(&deviceName, OVS_NT_DEVICE_NAME); - NdisInitUnicodeString(&symbolicDeviceName, OVS_DOS_DEVICE_NAME); - - RtlZeroMemory(&deviceAttributes, sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); - - OVS_INIT_OBJECT_HEADER(&deviceAttributes.Header, - NDIS_OBJECT_TYPE_DEVICE_OBJECT_ATTRIBUTES, - NDIS_DEVICE_OBJECT_ATTRIBUTES_REVISION_1, - sizeof (NDIS_DEVICE_OBJECT_ATTRIBUTES)); - - deviceAttributes.DeviceName = &deviceName; - deviceAttributes.SymbolicName = &symbolicDeviceName; - deviceAttributes.MajorFunctions = dispatchTable; - deviceAttributes.ExtensionSize = sizeof (OVS_DEVICE_EXTENSION); - - status = NdisRegisterDeviceEx(ovsExtDriverHandle, - &deviceAttributes, - &gOvsDeviceObject, - &gOvsDeviceHandle); - if (status != NDIS_STATUS_SUCCESS) { - POVS_DEVICE_EXTENSION ovsExt = - (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(gOvsDeviceObject); - ASSERT(gOvsDeviceObject != NULL); - ASSERT(gOvsDeviceHandle != NULL); - - if (ovsExt) { - ovsExt->numberOpenInstance = 0; - } - } else { - /* Initialize the associated data structures. */ - OvsInit(); - } - OVS_LOG_TRACE("DeviceObject: %p", gOvsDeviceObject); - return status; -} - - -VOID -OvsDeleteDeviceObject() -{ - if (gOvsDeviceHandle) { -#ifdef DBG - POVS_DEVICE_EXTENSION ovsExt = (POVS_DEVICE_EXTENSION) - NdisGetDeviceReservedExtension(gOvsDeviceObject); - if (ovsExt) { - ASSERT(ovsExt->numberOpenInstance == 0); - } -#endif - - ASSERT(gOvsDeviceObject); - NdisDeregisterDeviceEx(gOvsDeviceHandle); - gOvsDeviceHandle = NULL; - gOvsDeviceObject = NULL; - } - OvsCleanup(); -} - -POVS_OPEN_INSTANCE -OvsGetOpenInstance(PFILE_OBJECT fileObject, - UINT32 dpNo) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - ASSERT(instance); - ASSERT(instance->fileObject == fileObject); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - return NULL; - } - return instance; -} - - -POVS_OPEN_INSTANCE -OvsFindOpenInstance(PFILE_OBJECT fileObject) -{ - UINT32 i, j; - for (i = 0, j = 0; i < OVS_MAX_OPEN_INSTANCES && - j < ovsNumberOfOpenInstances; i++) { - if (ovsOpenInstanceArray[i]) { - if (ovsOpenInstanceArray[i]->fileObject == fileObject) { - return ovsOpenInstanceArray[i]; - } - j++; - } - } - return NULL; -} - -NTSTATUS -OvsAddOpenInstance(PFILE_OBJECT fileObject) -{ - POVS_OPEN_INSTANCE instance = - (POVS_OPEN_INSTANCE) OvsAllocateMemory(sizeof (OVS_OPEN_INSTANCE)); - UINT32 i; - - if (instance == NULL) { - return STATUS_NO_MEMORY; - } - OvsAcquireCtrlLock(); - ASSERT(OvsFindOpenInstance(fileObject) == NULL); - - if (ovsNumberOfOpenInstances >= OVS_MAX_OPEN_INSTANCES) { - OvsReleaseCtrlLock(); - OvsFreeMemory(instance); - return STATUS_INSUFFICIENT_RESOURCES; - } - RtlZeroMemory(instance, sizeof (OVS_OPEN_INSTANCE)); - - for (i = 0; i < OVS_MAX_OPEN_INSTANCES; i++) { - if (ovsOpenInstanceArray[i] == NULL) { - ovsOpenInstanceArray[i] = instance; - instance->cookie = i; - break; - } - } - ASSERT(i < OVS_MAX_OPEN_INSTANCES); - instance->fileObject = fileObject; - ASSERT(fileObject->FsContext == NULL); - fileObject->FsContext = instance; - OvsReleaseCtrlLock(); - return STATUS_SUCCESS; -} - -static VOID -OvsCleanupOpenInstance(PFILE_OBJECT fileObject) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - ASSERT(instance); - ASSERT(fileObject == instance->fileObject); - OvsCleanupEvent(instance); - OvsCleanupPacketQueue(instance); -} - -VOID -OvsRemoveOpenInstance(PFILE_OBJECT fileObject) -{ - POVS_OPEN_INSTANCE instance; - ASSERT(fileObject->FsContext); - instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - ASSERT(instance->cookie < OVS_MAX_OPEN_INSTANCES); - - OvsAcquireCtrlLock(); - fileObject->FsContext = NULL; - ASSERT(ovsOpenInstanceArray[instance->cookie] == instance); - ovsOpenInstanceArray[instance->cookie] = NULL; - OvsReleaseCtrlLock(); - ASSERT(instance->eventQueue == NULL); - ASSERT (instance->packetQueue == NULL); - OvsFreeMemory(instance); -} - -NTSTATUS -OvsCompleteIrpRequest(PIRP irp, - ULONG_PTR infoPtr, - NTSTATUS status) -{ - irp->IoStatus.Information = infoPtr; - irp->IoStatus.Status = status; - IoCompleteRequest(irp, IO_NO_INCREMENT); - return status; -} - - -NTSTATUS -OvsOpenCloseDevice(PDEVICE_OBJECT deviceObject, - PIRP irp) -{ - PIO_STACK_LOCATION irpSp; - NTSTATUS status = STATUS_SUCCESS; - PFILE_OBJECT fileObject; - POVS_DEVICE_EXTENSION ovsExt = - (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); - - ASSERT(deviceObject == gOvsDeviceObject); - ASSERT(ovsExt != NULL); - - irpSp = IoGetCurrentIrpStackLocation(irp); - fileObject = irpSp->FileObject; - OVS_LOG_TRACE("DeviceObject: %p, fileObject:%p, instance: %u", - deviceObject, fileObject, - ovsExt->numberOpenInstance); - - switch (irpSp->MajorFunction) { - case IRP_MJ_CREATE: - status = OvsAddOpenInstance(fileObject); - if (STATUS_SUCCESS == status) { - InterlockedIncrement((LONG volatile *)&ovsExt->numberOpenInstance); - } - break; - case IRP_MJ_CLOSE: - ASSERT(ovsExt->numberOpenInstance > 0); - OvsRemoveOpenInstance(fileObject); - InterlockedDecrement((LONG volatile *)&ovsExt->numberOpenInstance); - break; - default: - ASSERT(0); - } - return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); -} - -_Use_decl_annotations_ -NTSTATUS -OvsCleanupDevice(PDEVICE_OBJECT deviceObject, - PIRP irp) -{ - - PIO_STACK_LOCATION irpSp; - PFILE_OBJECT fileObject; - - NTSTATUS status = STATUS_SUCCESS; -#ifdef DBG - POVS_DEVICE_EXTENSION ovsExt = - (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); - if (ovsExt) { - ASSERT(ovsExt->numberOpenInstance > 0); - } -#else - UNREFERENCED_PARAMETER(deviceObject); -#endif - ASSERT(deviceObject == gOvsDeviceObject); - irpSp = IoGetCurrentIrpStackLocation(irp); - fileObject = irpSp->FileObject; - - ASSERT(irpSp->MajorFunction == IRP_MJ_CLEANUP); - - OvsCleanupOpenInstance(fileObject); - - return OvsCompleteIrpRequest(irp, (ULONG_PTR)0, status); -} - -/* - *---------------------------------------------------------------------------- - * OvsGetVersionIoctl -- - * - * On entry None - * On exit Driver version - * - * Result: - * STATUS_SUCCESS - * STATUS_BUFFER_TOO_SMALL - *---------------------------------------------------------------------------- - */ -NTSTATUS -OvsGetVersionIoctl(PVOID outputBuffer, - uint32 outputLength, - uint32 *replyLen) -{ - POVS_VERSION driverOut = (POVS_VERSION)outputBuffer; - - if (outputLength < sizeof (*driverOut)) { - return STATUS_BUFFER_TOO_SMALL; - } - *replyLen = sizeof (*driverOut); - driverOut->mjrDrvVer = OVS_DRIVER_MAJOR_VER; - driverOut->mnrDrvVer = OVS_DRIVER_MINOR_VER; - - return STATUS_SUCCESS; -} - - -/* - *---------------------------------------------------------------------------- - * OvsDpDumpIoctl -- - * Get All Datapath. For now, we only support one datapath. - * - * Result: - * STATUS_SUCCESS - * STATUS_BUFFER_TOO_SMALL - *---------------------------------------------------------------------------- - */ -NTSTATUS -OvsDpDumpIoctl(PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - *replyLen = sizeof (UINT32); - if (outputLength < sizeof (UINT32)) { - return STATUS_BUFFER_TOO_SMALL; - } - OvsAcquireCtrlLock(); - if (gOvsSwitchContext) { - *(UINT32 *)outputBuffer = gOvsSwitchContext->dpNo; - } else { - *replyLen = 0; - } - OvsReleaseCtrlLock(); - - return STATUS_SUCCESS; -} - - -/* - *---------------------------------------------------------------------------- - * OvsDpGetIoctl -- - * Given dpNo, get all datapath info as defined in OVS_DP_INFO. - * - * Result: - * STATUS_SUCCESS - * STATUS_BUFFER_TOO_SMALL - * STATUS_INVALID_PARAMETER - *---------------------------------------------------------------------------- - */ -NTSTATUS -OvsDpGetIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - UINT32 dpNo; - POVS_DP_INFO info; - OVS_DATAPATH *datapath; - - if (inputLength < sizeof (UINT32)) { - return STATUS_INVALID_PARAMETER; - } - - if (outputLength < sizeof (OVS_DP_INFO)) { - *replyLen = sizeof (OVS_DP_INFO); - return STATUS_BUFFER_TOO_SMALL; - } - - dpNo = *(UINT32 *)inputBuffer; - OvsAcquireCtrlLock(); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - OvsReleaseCtrlLock(); - return STATUS_INVALID_PARAMETER; - } - *replyLen = sizeof (OVS_DP_INFO); - RtlZeroMemory(outputBuffer, sizeof (OVS_DP_INFO)); - info = (POVS_DP_INFO)outputBuffer; - RtlCopyMemory(info->name, "ovs-system", sizeof ("ovs-system")); - datapath = &gOvsSwitchContext->datapath; - info->nMissed = datapath->misses; - info->nHit = datapath->hits; - info->nLost = datapath->lost; - info->nFlows = datapath->nFlows; - OvsReleaseCtrlLock(); - return STATUS_SUCCESS; -} - -NTSTATUS -OvsDeviceControl(PDEVICE_OBJECT deviceObject, - PIRP irp) -{ - - PIO_STACK_LOCATION irpSp; - NTSTATUS status = STATUS_SUCCESS; - PFILE_OBJECT fileObject; - PVOID inputBuffer; - PVOID outputBuffer; - UINT32 inputBufferLen, outputBufferLen, mdlBufferLen; - UINT32 code, replyLen = 0; -#ifdef DBG - POVS_DEVICE_EXTENSION ovsExt = - (POVS_DEVICE_EXTENSION)NdisGetDeviceReservedExtension(deviceObject); - ASSERT(deviceObject == gOvsDeviceObject); - ASSERT(ovsExt); - ASSERT(ovsExt->numberOpenInstance > 0); -#else - UNREFERENCED_PARAMETER(deviceObject); -#endif - - irpSp = IoGetCurrentIrpStackLocation(irp); - - - ASSERT(irpSp->MajorFunction == IRP_MJ_DEVICE_CONTROL); - ASSERT(irpSp->FileObject != NULL); - - fileObject = irpSp->FileObject; - code = irpSp->Parameters.DeviceIoControl.IoControlCode; - inputBufferLen = irpSp->Parameters.DeviceIoControl.InputBufferLength; - outputBufferLen = irpSp->Parameters.DeviceIoControl.OutputBufferLength; - /* - * In case of an IRP with METHOD_IN_DIRECT or METHOD_OUT_DIRECT, the size - * of the MDL is stored in Parameters.DeviceIoControl.OutputBufferLength. - */ - mdlBufferLen = outputBufferLen; - outputBuffer = inputBuffer = irp->AssociatedIrp.SystemBuffer; - - switch(code) { - case OVS_IOCTL_VERSION_GET: - status = OvsGetVersionIoctl(outputBuffer, outputBufferLen, - &replyLen); - break; - case OVS_IOCTL_DP_DUMP: - status = OvsDpDumpIoctl(outputBuffer, outputBufferLen, &replyLen); - break; - case OVS_IOCTL_DP_GET: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer == NULL) { - status = STATUS_INSUFFICIENT_RESOURCES; - } else { - status = OvsDpGetIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, &replyLen); - } - break; - case OVS_IOCTL_DP_SET: - status = STATUS_NOT_IMPLEMENTED; - break; - case OVS_IOCTL_VPORT_DUMP: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsDumpVportIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_VPORT_GET: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsGetVportIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_VPORT_SET: - status = STATUS_NOT_IMPLEMENTED; - break; - case OVS_IOCTL_VPORT_ADD: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsAddVportIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_VPORT_DEL: - status = OvsDelVportIoctl(inputBuffer, inputBufferLen, - &replyLen); - break; - case OVS_IOCTL_VPORT_EXT_INFO: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsGetExtInfoIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - OVS_LOG_INFO("ExtInfo: fail to get outputBuffer address"); - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_FLOW_DUMP: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsDumpFlowIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_FLOW_GET: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsGetFlowIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_FLOW_PUT: - // XXX: This is not really working - mapping the input buffer - // XXX: inputBufferLen = mdlBufferLen; - // inputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - // NormalPagePriority); - status = OvsPutFlowIoctl(inputBuffer, inputBufferLen, - outputBuffer, outputBufferLen, - &replyLen); - break; - case OVS_IOCTL_FLOW_FLUSH: - status = OvsFlushFlowIoctl(inputBuffer, inputBufferLen); - break; - case OVS_IOCTL_QOS_QUEUE_DUMP: - case OVS_IOCTL_QOS_QUEUE_GET: - case OVS_IOCTL_QOS_QUEUE_SET: - status = STATUS_NOT_IMPLEMENTED; - break; - case OVS_IOCTL_DATAPATH_SUBSCRIBE: - status = OvsSubscribeDpIoctl(fileObject, inputBuffer, - inputBufferLen); - break; - case OVS_IOCTL_DATAPATH_READ: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer) { - status = OvsReadDpIoctl(fileObject, outputBuffer, - outputBufferLen, &replyLen); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - break; - case OVS_IOCTL_DATAPATH_OPERATE: - status = STATUS_NOT_IMPLEMENTED; - break; - case OVS_IOCTL_DATAPATH_EXECUTE: - // XXX: need to make the input direct - status = OvsExecuteDpIoctl(inputBuffer, inputBufferLen, - outputBufferLen); - break; - case OVS_IOCTL_DATAPATH_PURGE: - status = OvsPurgeDpIoctl(fileObject); - break; - case OVS_IOCTL_DATAPATH_WAIT: - status = OvsWaitDpIoctl(irp, fileObject); - break; - case OVS_IOCTL_EVENT_SUBSCRIBE: - status = OvsSubscribeEventIoctl(fileObject, inputBuffer, - inputBufferLen); - break; - case OVS_IOCTL_EVENT_POLL: - if (irp->MdlAddress == NULL) { - status = STATUS_INVALID_PARAMETER; - break; - } - outputBuffer = MmGetSystemAddressForMdlSafe(irp->MdlAddress, - NormalPagePriority); - if (outputBuffer == NULL) { - status = STATUS_INSUFFICIENT_RESOURCES; - } else { - status = OvsPollEventIoctl(fileObject, inputBuffer, - inputBufferLen, outputBuffer, - outputBufferLen, &replyLen); - } - break; - case OVS_IOCTL_EVENT_WAIT: - status = OvsWaitEventIoctl(irp, fileObject, - inputBuffer, inputBufferLen); - break; - case OVS_IOCTL_DP_TIMESTAMP_SET: - if (inputBufferLen != sizeof (ovsUserTimestampDelta)) { - status = STATUS_INFO_LENGTH_MISMATCH; - } else { - int64 currentUserTS = *(int64 *)inputBuffer; - LARGE_INTEGER tickCount; - - /* So many ticks since system booted. */ - KeQueryTickCount(&tickCount); - ovsUserTimestampDelta = currentUserTS - - (tickCount.QuadPart * ovsTimeIncrementPerTick); - status = STATUS_SUCCESS; - } - break; - default: - status = STATUS_INVALID_DEVICE_REQUEST; - break; - } - - if (status == STATUS_PENDING) { - return status; - } else { - /* - * When the system-address-space mapping that is returned by - * MmGetSystemAddressForMdlSafe is no longer needed, it must be - * released. - * http://msdn.microsoft.com/en-us/library/windows/hardware/ff554559(v=vs.85).aspx - * - * We might have to release the MDL here. - */ - return OvsCompleteIrpRequest(irp, (ULONG_PTR)replyLen, status); - } -} - -#endif /* OVS_USE_NL_INTERFACE */ diff --git a/datapath-windows/ovsext/OvsIoctl.h b/datapath-windows/ovsext/OvsIoctl.h deleted file mode 100644 index 5487694c7..000000000 --- a/datapath-windows/ovsext/OvsIoctl.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * XXX: OVS_USE_NL_INTERFACE is being used to keep the legacy DPIF interface - * alive while we transition over to the netlink based interface. - * OVS_USE_NL_INTERFACE = 0 => legacy inteface to use with dpif-windows.c - * OVS_USE_NL_INTERFACE = 1 => netlink inteface to use with ported dpif-linux.c - */ -#if defined OVS_USE_NL_INTERFACE && OVS_USE_NL_INTERFACE == 0 - -#ifndef __OVS_IOCTL_H_ -#define __OVS_IOCTL_H_ 1 - -typedef struct _OVS_DEVICE_EXTENSION { - INT numberOpenInstance; -} OVS_DEVICE_EXTENSION, *POVS_DEVICE_EXTENSION; - - -typedef struct _OVS_OPEN_INSTANCE { - UINT32 cookie; - PFILE_OBJECT fileObject; - PVOID eventQueue; - PVOID packetQueue; -} OVS_OPEN_INSTANCE, *POVS_OPEN_INSTANCE; - -NDIS_STATUS OvsCreateDeviceObject(NDIS_HANDLE ovsExtDriverHandle); -VOID OvsDeleteDeviceObject(); - -POVS_OPEN_INSTANCE OvsGetOpenInstance(PFILE_OBJECT fileObject, - UINT32 dpNo); - -NTSTATUS OvsCompleteIrpRequest(PIRP irp, ULONG_PTR infoPtr, NTSTATUS status); - -#endif /* __OVS_IOCTL_H_ */ - -#endif /* OVS_USE_NL_INTERFACE */ diff --git a/datapath-windows/ovsext/OvsIpHelper.c b/datapath-windows/ovsext/OvsIpHelper.c deleted file mode 100644 index cd2625a30..000000000 --- a/datapath-windows/ovsext/OvsIpHelper.c +++ /dev/null @@ -1,1689 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsIpHelper.h" -#include "OvsSwitch.h" -#include "OvsJhash.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_IPHELPER -#include "OvsDebug.h" - -/* - * Fow now, we assume only one internal adapter - */ - -KSTART_ROUTINE OvsStartIpHelper; - - -/* - * Only when the internal IP is configured and virtual - * internal port is connected, the IP helper request can be - * queued. - */ -static BOOLEAN ovsInternalIPConfigured; -static UINT32 ovsInternalPortNo; -static GUID ovsInternalNetCfgId; -static MIB_IF_ROW2 ovsInternalRow; -static MIB_IPINTERFACE_ROW ovsInternalIPRow; - -/* we only keep one internal IP for reference, it will not be used for - * determining SRC IP of Tunnel - */ -static UINT32 ovsInternalIP; - - -/* - * FWD_ENTRY --------> IPFORWARD_ENTRY - * | - * |--------------------------------------> IPENIGH_ENTRY - * - * IPFORWARD_ENTRY ------> FWD_ENTRY LIST with same IPFORWARD - * - * IPNEIGH_ENTRY ------> FWD_ENTRY LIST with same IPNEIGH - * - */ - -static PLIST_ENTRY ovsFwdHashTable; // based on DST IP -static PLIST_ENTRY ovsRouteHashTable; // based on DST PREFIX -static PLIST_ENTRY ovsNeighHashTable; // based on DST IP -static LIST_ENTRY ovsSortedIPNeighList; -static UINT32 ovsNumFwdEntries; - - -static PNDIS_RW_LOCK_EX ovsTableLock; -static NDIS_SPIN_LOCK ovsIpHelperLock; - -static LIST_ENTRY ovsIpHelperRequestList; -static UINT32 ovsNumIpHelperRequests; - -static HANDLE ipInterfaceNotificationHandle; -static HANDLE ipRouteNotificationHandle; -static HANDLE unicastIPNotificationHandle; - -static OVS_IP_HELPER_THREAD_CONTEXT ovsIpHelperThreadContext; - -static POVS_IPFORWARD_ENTRY OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix); -static VOID OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf); -static VOID OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr); -static VOID OvsCleanupIpHelperRequestList(VOID); -static VOID OvsCleanupFwdTable(VOID); -static VOID OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn); - -static VOID -OvsDumpIfRow(PMIB_IF_ROW2 ifRow) -{ - OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", - ifRow->InterfaceLuid.Info.NetLuidIndex, - ifRow->InterfaceLuid.Info.IfType); - OVS_LOG_INFO("InterfaceIndex: %d", ifRow->InterfaceIndex); - - OVS_LOG_INFO("Interface GUID: %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", - ifRow->InterfaceGuid.Data1, - ifRow->InterfaceGuid.Data2, - ifRow->InterfaceGuid.Data3, - *(UINT16 *)ifRow->InterfaceGuid.Data4, - ifRow->InterfaceGuid.Data4[2], - ifRow->InterfaceGuid.Data4[3], - ifRow->InterfaceGuid.Data4[4], - ifRow->InterfaceGuid.Data4[5], - ifRow->InterfaceGuid.Data4[6], - ifRow->InterfaceGuid.Data4[7]); - OVS_LOG_INFO("Perm MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", - ifRow->PermanentPhysicalAddress[0], - ifRow->PermanentPhysicalAddress[1], - ifRow->PermanentPhysicalAddress[2], - ifRow->PermanentPhysicalAddress[3], - ifRow->PermanentPhysicalAddress[4], - ifRow->PermanentPhysicalAddress[5]); -} - - -static VOID -OvsDumpIfTable(PMIB_IF_TABLE2 ifTable) -{ - PMIB_IF_ROW2 ifRow; - UINT32 i; - - OVS_LOG_INFO("======Number of entries: %d========", ifTable->NumEntries); - - for (i = 0; i < ifTable->NumEntries; i++) { - ifRow = &ifTable->Table[i]; - OvsDumpIfRow(ifRow); - } -} - - -NTSTATUS -OvsGetIfEntry(GUID *interfaceGuid, PMIB_IF_ROW2 ifEntry) -{ - NTSTATUS status; - PMIB_IF_TABLE2 ifTable; - UINT32 i; - - if (interfaceGuid == NULL || ifEntry == NULL) { - return STATUS_INVALID_PARAMETER; - } - - status = GetIfTable2Ex(MibIfTableNormal, &ifTable); - - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to get if table, status: %x", status); - return status; - } - status = STATUS_NOT_FOUND; - - for (i = 0; i < ifTable->NumEntries; i++) { - PMIB_IF_ROW2 ifRow; - - ifRow = &ifTable->Table[i]; - if (!memcmp(interfaceGuid, &ifRow->InterfaceGuid, sizeof (GUID))) { - RtlCopyMemory(ifEntry, ifRow, sizeof (MIB_IF_ROW2)); - status = STATUS_SUCCESS; - OvsDumpIfRow(ifEntry); - break; - } - } - - FreeMibTable(ifTable); - return status; -} - - -static VOID -OvsDumpIPInterfaceEntry(PMIB_IPINTERFACE_ROW ipRow) -{ - OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", - ipRow->InterfaceLuid.Info.NetLuidIndex, - ipRow->InterfaceLuid.Info.IfType); - OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); - - OVS_LOG_INFO("MaxReassembleSize: %u", ipRow->MaxReassemblySize); -} - - -NTSTATUS -OvsGetIPInterfaceEntry(NET_LUID luid, - PMIB_IPINTERFACE_ROW ipRow) -{ - NTSTATUS status; - - if (ipRow == NULL) { - return STATUS_INVALID_PARAMETER; - } - - ipRow->Family = AF_INET; - ipRow->InterfaceLuid.Value = luid.Value; - - status = GetIpInterfaceEntry(ipRow); - - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to get internal IP Interface mib row, status: %x", - status); - return status; - } - OvsDumpIPInterfaceEntry(ipRow); - return status; -} - - -static VOID -OvsDumpIPEntry(PMIB_UNICASTIPADDRESS_ROW ipRow) -{ - UINT32 ipAddr; - - OVS_LOG_INFO("InterfaceLuid: NetLuidIndex: %d, type: %d", - ipRow->InterfaceLuid.Info.NetLuidIndex, - ipRow->InterfaceLuid.Info.IfType); - - OVS_LOG_INFO("InterfaceIndex: %d", ipRow->InterfaceIndex); - - ASSERT(ipRow->Address.si_family == AF_INET); - - ipAddr = ipRow->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Unicast Address: %d.%d.%d.%d\n", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, ipAddr >> 24); -} - - -NTSTATUS -OvsGetIPEntry(NET_LUID interfaceLuid, - PMIB_UNICASTIPADDRESS_ROW ipEntry) -{ - PMIB_UNICASTIPADDRESS_TABLE ipTable; - NTSTATUS status; - UINT32 i; - - if (ipEntry == NULL || ipEntry == NULL) { - return STATUS_INVALID_PARAMETER; - } - - status = GetUnicastIpAddressTable(AF_INET, &ipTable); - - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to get unicast address table, status: %x", status); - return status; - } - - status = STATUS_NOT_FOUND; - - for (i = 0; i < ipTable->NumEntries; i++) { - PMIB_UNICASTIPADDRESS_ROW ipRow; - - ipRow = &ipTable->Table[i]; - if (ipRow->InterfaceLuid.Value == interfaceLuid.Value) { - RtlCopyMemory(ipEntry, ipRow, sizeof (*ipRow)); - OvsDumpIPEntry(ipEntry); - status = STATUS_SUCCESS; - break; - } - } - - FreeMibTable(ipTable); - return status; -} - -#ifdef OVS_ENABLE_IPPATH -static VOID -OvsDumpIPPath(PMIB_IPPATH_ROW ipPath) -{ - UINT32 ipAddr = ipPath->Source.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("Source: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Destination: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = ipPath->CurrentNextHop.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("NextHop: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); -} - - -NTSTATUS -OvsGetIPPathEntry(PMIB_IPPATH_ROW ipPath) -{ - NTSTATUS status; - UINT32 ipAddr = ipPath->Destination.Ipv4.sin_addr.s_addr; - - status = GetIpPathEntry(ipPath); - - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to get IP path to %d.%d.%d.%d, status:%x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); - return status; - } - OvsDumpIPPath(ipPath); - return status; -} -#endif - -static VOID -OvsDumpRoute(const SOCKADDR_INET *sourceAddress, - const SOCKADDR_INET *destinationAddress, - PMIB_IPFORWARD_ROW2 route) -{ - UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("Destination: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = sourceAddress->Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Source: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - - ipAddr = route->NextHop.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("NextHop: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); -} - - -NTSTATUS -OvsGetRoute(NET_LUID interfaceLuid, - const SOCKADDR_INET *destinationAddress, - PMIB_IPFORWARD_ROW2 route, - SOCKADDR_INET *sourceAddress) -{ - NTSTATUS status; - - if (destinationAddress == NULL || route == NULL) { - return STATUS_INVALID_PARAMETER; - } - - status = GetBestRoute2(&interfaceLuid, 0, - NULL, destinationAddress, - 0, route, sourceAddress); - - if (status != STATUS_SUCCESS) { - UINT32 ipAddr = destinationAddress->Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to get route to %d.%d.%d.%d, status: %x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); - return status; - } - - OvsDumpRoute(sourceAddress, destinationAddress, route); - return status; -} - -static VOID -OvsDumpIPNeigh(PMIB_IPNET_ROW2 ipNeigh) -{ - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("Neigh: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - OVS_LOG_INFO("MAC Address: %02x:%02x:%02x:%02x:%02x:%02x", - ipNeigh->PhysicalAddress[0], - ipNeigh->PhysicalAddress[1], - ipNeigh->PhysicalAddress[2], - ipNeigh->PhysicalAddress[3], - ipNeigh->PhysicalAddress[4], - ipNeigh->PhysicalAddress[5]); -} - - -NTSTATUS -OvsGetIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) -{ - NTSTATUS status; - - ASSERT(ipNeigh); - - status = GetIpNetEntry2(ipNeigh); - - if (status != STATUS_SUCCESS) { - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to get ARP entry: %d.%d.%d.%d, status: %x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); - return status; - } - if (ipNeigh->State == NlnsReachable || - ipNeigh->State == NlnsPermanent) { - OvsDumpIPNeigh(ipNeigh); - return STATUS_SUCCESS; - } - return STATUS_FWP_TCPIP_NOT_READY; -} - - -NTSTATUS -OvsResolveIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) -{ - NTSTATUS status; - - ASSERT(ipNeigh); - status = ResolveIpNetEntry2(ipNeigh, NULL); - - if (status != STATUS_SUCCESS) { - UINT32 ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("Fail to resolve ARP entry: %d.%d.%d.%d, status: %x", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, status); - return status; - } - - if (ipNeigh->State == NlnsReachable || - ipNeigh->State == NlnsPermanent) { - OvsDumpIPNeigh(ipNeigh); - return STATUS_SUCCESS; - } - return STATUS_FWP_TCPIP_NOT_READY; -} - - -NTSTATUS -OvsGetOrResolveIPNeigh(UINT32 ipAddr, - PMIB_IPNET_ROW2 ipNeigh) -{ - NTSTATUS status; - - ASSERT(ipNeigh); - - RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); - ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; - ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; - ipNeigh->Address.si_family = AF_INET; - ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; - - status = OvsGetIPNeighEntry(ipNeigh); - - if (status != STATUS_SUCCESS) { - RtlZeroMemory(ipNeigh, sizeof (*ipNeigh)); - ipNeigh->InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; - ipNeigh->InterfaceIndex = ovsInternalRow.InterfaceIndex; - ipNeigh->Address.si_family = AF_INET; - ipNeigh->Address.Ipv4.sin_addr.s_addr = ipAddr; - status = OvsResolveIPNeighEntry(ipNeigh); - } - return status; -} - - -static VOID -OvsChangeCallbackIpInterface(PVOID context, - PMIB_IPINTERFACE_ROW ipRow, - MIB_NOTIFICATION_TYPE notificationType) -{ - UNREFERENCED_PARAMETER(context); - switch (notificationType) { - case MibParameterNotification: - case MibAddInstance: - if (ipRow->InterfaceLuid.Info.NetLuidIndex == - ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && - ipRow->InterfaceLuid.Info.IfType == - ovsInternalRow.InterfaceLuid.Info.IfType && - ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { - /* - * Update the IP Interface Row - */ - NdisAcquireSpinLock(&ovsIpHelperLock); - RtlCopyMemory(&ovsInternalIPRow, ipRow, - sizeof (PMIB_IPINTERFACE_ROW)); - ovsInternalIPConfigured = TRUE; - NdisReleaseSpinLock(&ovsIpHelperLock); - } - OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d is %s", - ipRow->InterfaceLuid.Info.NetLuidIndex, - ipRow->InterfaceLuid.Info.IfType, - notificationType == MibAddInstance ? "added" : "modified"); - break; - case MibDeleteInstance: - OVS_LOG_INFO("IP Interface with NetLuidIndex: %d, type: %d, deleted", - ipRow->InterfaceLuid.Info.NetLuidIndex, - ipRow->InterfaceLuid.Info.IfType); - if (ipRow->InterfaceLuid.Info.NetLuidIndex == - ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && - ipRow->InterfaceLuid.Info.IfType == - ovsInternalRow.InterfaceLuid.Info.IfType && - ipRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { - - NdisAcquireSpinLock(&ovsIpHelperLock); - ovsInternalIPConfigured = FALSE; - NdisReleaseSpinLock(&ovsIpHelperLock); - - OvsCleanupIpHelperRequestList(); - - OvsCleanupFwdTable(); - } - - break; - case MibInitialNotification: - OVS_LOG_INFO("Get Initial notification for IP Interface change."); - default: - return; - } -} - - -static VOID -OvsChangeCallbackIpRoute(PVOID context, - PMIB_IPFORWARD_ROW2 ipRoute, - MIB_NOTIFICATION_TYPE notificationType) -{ - UINT32 ipAddr, nextHop; - - UNREFERENCED_PARAMETER(context); - switch (notificationType) { - case MibAddInstance: - - ASSERT(ipRoute); - ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; - nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d added", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - ipRoute->DestinationPrefix.PrefixLength, - nextHop & 0xff, (nextHop >> 8) & 0xff, - (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff); - break; - - case MibParameterNotification: - case MibDeleteInstance: - ASSERT(ipRoute); - ipAddr = ipRoute->DestinationPrefix.Prefix.Ipv4.sin_addr.s_addr; - nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; - - OVS_LOG_INFO("IPRoute: To %d.%d.%d.%d/%d through %d.%d.%d.%d %s.", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - ipRoute->DestinationPrefix.PrefixLength, - nextHop & 0xff, (nextHop >> 8) & 0xff, - (nextHop >> 16) & 0xff, (nextHop >> 24) & 0xff, - notificationType == MibDeleteInstance ? "deleted" : - "modified"); - - if (ipRoute->InterfaceLuid.Info.NetLuidIndex == - ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && - ipRoute->InterfaceLuid.Info.IfType == - ovsInternalRow.InterfaceLuid.Info.IfType && - ipRoute->InterfaceIndex == ovsInternalRow.InterfaceIndex) { - - POVS_IPFORWARD_ENTRY ipf; - LOCK_STATE_EX lockState; - - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - ipf = OvsLookupIPForwardEntry(&ipRoute->DestinationPrefix); - if (ipf != NULL) { - OvsRemoveIPForwardEntry(ipf); - } - NdisReleaseRWLock(ovsTableLock, &lockState); - } - break; - - case MibInitialNotification: - OVS_LOG_INFO("Get Initial notification for IP Route change."); - default: - return; - } -} - - -static VOID -OvsChangeCallbackUnicastIpAddress(PVOID context, - PMIB_UNICASTIPADDRESS_ROW unicastRow, - MIB_NOTIFICATION_TYPE notificationType) -{ - UINT32 ipAddr; - - UNREFERENCED_PARAMETER(context); - switch (notificationType) { - case MibParameterNotification: - case MibAddInstance: - ASSERT(unicastRow); - ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; - if (unicastRow->InterfaceLuid.Info.NetLuidIndex == - ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && - unicastRow->InterfaceLuid.Info.IfType == - ovsInternalRow.InterfaceLuid.Info.IfType && - unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { - ovsInternalIP = ipAddr; - } - OVS_LOG_INFO("IP Address: %d.%d.%d.%d is %s", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff, - notificationType == MibAddInstance ? "added": "modified"); - break; - - case MibDeleteInstance: - ASSERT(unicastRow); - ipAddr = unicastRow->Address.Ipv4.sin_addr.s_addr; - OVS_LOG_INFO("IP Address removed: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - if (unicastRow->InterfaceLuid.Info.NetLuidIndex == - ovsInternalRow.InterfaceLuid.Info.NetLuidIndex && - unicastRow->InterfaceLuid.Info.IfType == - ovsInternalRow.InterfaceLuid.Info.IfType && - unicastRow->InterfaceIndex == ovsInternalRow.InterfaceIndex) { - - LOCK_STATE_EX lockState; - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - OvsRemoveAllFwdEntriesWithSrc(ipAddr); - NdisReleaseRWLock(ovsTableLock, &lockState); - - } - break; - - case MibInitialNotification: - OVS_LOG_INFO("Get Initial notification for Unicast IP Address change."); - default: - return; - } -} - - -static VOID -OvsCancelChangeNotification() -{ - if (ipInterfaceNotificationHandle != NULL) { - CancelMibChangeNotify2(ipInterfaceNotificationHandle); - ipInterfaceNotificationHandle = NULL; - } - if (ipRouteNotificationHandle != NULL) { - CancelMibChangeNotify2(ipRouteNotificationHandle); - ipRouteNotificationHandle = NULL; - } - if (unicastIPNotificationHandle != NULL) { - CancelMibChangeNotify2(unicastIPNotificationHandle); - unicastIPNotificationHandle = NULL; - } -} - - -static NTSTATUS -OvsRegisterChangeNotification() -{ - NTSTATUS status; - - - status = NotifyIpInterfaceChange(AF_INET, OvsChangeCallbackIpInterface, - NULL, TRUE, - &ipInterfaceNotificationHandle); - if (status != STATUS_SUCCESS) { - OVS_LOG_ERROR("Fail to register Notify IP interface change, status:%x.", - status); - return status; - } - - status = NotifyRouteChange2(AF_INET, OvsChangeCallbackIpRoute, NULL, - TRUE, &ipRouteNotificationHandle); - if (status != STATUS_SUCCESS) { - OVS_LOG_ERROR("Fail to regiter ip route change, status: %x.", - status); - goto register_cleanup; - } - status = NotifyUnicastIpAddressChange(AF_INET, - OvsChangeCallbackUnicastIpAddress, - NULL, TRUE, - &unicastIPNotificationHandle); - if (status != STATUS_SUCCESS) { - OVS_LOG_ERROR("Fail to regiter unicast ip change, status: %x.", status); - } -register_cleanup: - if (status != STATUS_SUCCESS) { - OvsCancelChangeNotification(); - } - - return status; -} - - -static POVS_IPNEIGH_ENTRY -OvsLookupIPNeighEntry(UINT32 ipAddr) -{ - PLIST_ENTRY link; - POVS_IPNEIGH_ENTRY entry; - UINT32 hash = OvsJhashWords(&ipAddr, 1, OVS_HASH_BASIS); - - LIST_FORALL(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], link) { - entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, link); - if (entry->ipAddr == ipAddr) { - return entry; - } - } - return NULL; -} - - -static UINT32 -OvsHashIPPrefix(PIP_ADDRESS_PREFIX prefix) -{ - UINT64 words = (UINT64)prefix->Prefix.Ipv4.sin_addr.s_addr << 32 | - (UINT32)prefix->PrefixLength; - return OvsJhashWords((UINT32 *)&words, 2, OVS_HASH_BASIS); -} - - -static POVS_IPFORWARD_ENTRY -OvsLookupIPForwardEntry(PIP_ADDRESS_PREFIX prefix) -{ - - PLIST_ENTRY link; - POVS_IPFORWARD_ENTRY ipfEntry; - UINT32 hash; - ASSERT(prefix->Prefix.si_family == AF_INET); - - hash = RtlUlongByteSwap(prefix->Prefix.Ipv4.sin_addr.s_addr); - - ASSERT(prefix->PrefixLength >= 32 || - (hash & (((UINT32)1 << (32 - prefix->PrefixLength)) - 1)) == 0); - - hash = OvsHashIPPrefix(prefix); - LIST_FORALL(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], link) { - ipfEntry = CONTAINING_RECORD(link, OVS_IPFORWARD_ENTRY, link); - if (ipfEntry->prefix.PrefixLength == prefix->PrefixLength && - ipfEntry->prefix.Prefix.Ipv4.sin_addr.s_addr == - prefix->Prefix.Ipv4.sin_addr.s_addr) { - return ipfEntry; - } - } - return NULL; -} - - -static POVS_FWD_ENTRY -OvsLookupIPFwdEntry(UINT32 dstIp) -{ - PLIST_ENTRY link; - POVS_FWD_ENTRY entry; - UINT32 hash = OvsJhashWords(&dstIp, 1, OVS_HASH_BASIS); - - LIST_FORALL(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], link) { - entry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); - if (entry->info.dstIpAddr == dstIp) { - return entry; - } - } - return NULL; -} - - -NTSTATUS -OvsLookupIPFwdInfo(UINT32 dstIp, - POVS_FWD_INFO info) -{ - POVS_FWD_ENTRY entry; - LOCK_STATE_EX lockState; - NTSTATUS status = STATUS_NOT_FOUND; - - NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); - entry = OvsLookupIPFwdEntry(dstIp); - if (entry) { - info->value[0] = entry->info.value[0]; - info->value[1] = entry->info.value[1]; - info->value[2] = entry->info.value[2]; - status = STATUS_SUCCESS; - } - NdisReleaseRWLock(ovsTableLock, &lockState); - return status; -} - - -static POVS_IPNEIGH_ENTRY -OvsCreateIPNeighEntry(PMIB_IPNET_ROW2 ipNeigh) -{ - - POVS_IPNEIGH_ENTRY entry; - UINT64 timeVal; - - ASSERT(ipNeigh != NULL); - entry = (POVS_IPNEIGH_ENTRY)OvsAllocateMemory(sizeof (OVS_IPNEIGH_ENTRY)); - if (entry == NULL) { - return NULL; - } - - RtlZeroMemory(entry, sizeof (OVS_IPNEIGH_ENTRY)); - entry->ipAddr = ipNeigh->Address.Ipv4.sin_addr.s_addr; - KeQuerySystemTime((LARGE_INTEGER *)&timeVal); - entry->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; - RtlCopyMemory(entry->macAddr, ipNeigh->PhysicalAddress, - MAC_ADDRESS_LEN); - InitializeListHead(&entry->fwdList); - - return entry; -} - - -static POVS_IPFORWARD_ENTRY -OvsCreateIPForwardEntry(PMIB_IPFORWARD_ROW2 ipRoute) -{ - - POVS_IPFORWARD_ENTRY entry; - - ASSERT(ipRoute); - - entry = - (POVS_IPFORWARD_ENTRY)OvsAllocateMemory(sizeof (OVS_IPFORWARD_ENTRY)); - if (entry == NULL) { - return NULL; - } - - RtlZeroMemory(entry, sizeof (OVS_IPFORWARD_ENTRY)); - RtlCopyMemory(&entry->prefix, &ipRoute->DestinationPrefix, - sizeof (IP_ADDRESS_PREFIX)); - entry->nextHop = ipRoute->NextHop.Ipv4.sin_addr.s_addr; - InitializeListHead(&entry->fwdList); - - return entry; -} - - -static POVS_FWD_ENTRY -OvsCreateFwdEntry(POVS_FWD_INFO fwdInfo) -{ - POVS_FWD_ENTRY entry; - - entry = (POVS_FWD_ENTRY)OvsAllocateMemory(sizeof (OVS_FWD_ENTRY)); - if (entry == NULL) { - return NULL; - } - - RtlZeroMemory(entry, sizeof (OVS_FWD_ENTRY)); - RtlCopyMemory(&entry->info, fwdInfo, sizeof (OVS_FWD_INFO)); - return entry; -} - - -static VOID -OvsRemoveFwdEntry(POVS_FWD_ENTRY fwdEntry) -{ - POVS_IPFORWARD_ENTRY ipf; - POVS_IPNEIGH_ENTRY ipn; - - ipf = fwdEntry->ipf; - ipn = fwdEntry->ipn; - - RemoveEntryList(&fwdEntry->link); - ovsNumFwdEntries--; - - RemoveEntryList(&fwdEntry->ipfLink); - ipf->refCount--; - - RemoveEntryList(&fwdEntry->ipnLink); - ipn->refCount--; - - if (ipf->refCount == 0) { - ASSERT(IsListEmpty(&ipf->fwdList)); - RemoveEntryList(&ipf->link); - OvsFreeMemory(ipf); - } - - if (ipn->refCount == 0) { - ASSERT(IsListEmpty(&ipn->fwdList)); - RemoveEntryList(&ipn->link); - NdisAcquireSpinLock(&ovsIpHelperLock); - RemoveEntryList(&ipn->slink); - NdisReleaseSpinLock(&ovsIpHelperLock); - OvsFreeMemory(ipn); - } - - OvsFreeMemory(fwdEntry); -} - - -static VOID -OvsRemoveIPForwardEntry(POVS_IPFORWARD_ENTRY ipf) -{ - POVS_FWD_ENTRY fwdEntry; - PLIST_ENTRY link, next; - - ipf->refCount++; - - LIST_FORALL_SAFE(&ipf->fwdList, link, next) { - fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); - OvsRemoveFwdEntry(fwdEntry); - } - ASSERT(ipf->refCount == 1); - - RemoveEntryList(&ipf->link); - OvsFreeMemory(ipf); -} - - -static VOID -OvsRemoveIPNeighEntry(POVS_IPNEIGH_ENTRY ipn) -{ - PLIST_ENTRY link, next; - POVS_FWD_ENTRY fwdEntry; - - ipn->refCount++; - - LIST_FORALL_SAFE(&ipn->fwdList, link, next) { - fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); - OvsRemoveFwdEntry(fwdEntry); - } - - if (ipn->refCount == 1) { - RemoveEntryList(&ipn->link); - NdisAcquireSpinLock(&ovsIpHelperLock); - RemoveEntryList(&ipn->slink); - NdisReleaseSpinLock(&ovsIpHelperLock); - OvsFreeMemory(ipn); - } -} - - -static VOID -OvsAddToSortedNeighList(POVS_IPNEIGH_ENTRY ipn) -{ - PLIST_ENTRY link; - POVS_IPNEIGH_ENTRY entry; - - if (!IsListEmpty(&ovsSortedIPNeighList)) { - link = ovsSortedIPNeighList.Blink; - entry = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); - if (entry->timeout > ipn->timeout) { - ipn->timeout++; - } - } - InsertTailList(&ovsSortedIPNeighList, &ipn->slink); -} - - -static VOID -OvsAddIPFwdCache(POVS_FWD_ENTRY fwdEntry, - POVS_IPFORWARD_ENTRY ipf, - POVS_IPNEIGH_ENTRY ipn) - -{ - UINT32 hash; - - if (ipn->refCount == 0) { - NdisAcquireSpinLock(&ovsIpHelperLock); - OvsAddToSortedNeighList(ipn); - NdisReleaseSpinLock(&ovsIpHelperLock); - hash = OvsJhashWords(&ipn->ipAddr, 1, OVS_HASH_BASIS); - InsertHeadList(&ovsNeighHashTable[hash & OVS_NEIGH_HASH_TABLE_MASK], - &ipn->link); - } - if (ipf->refCount == 0) { - hash = OvsHashIPPrefix(&ipf->prefix); - InsertHeadList(&ovsRouteHashTable[hash & OVS_ROUTE_HASH_TABLE_MASK], - &ipf->link); - } - - InsertHeadList(&ipf->fwdList, &fwdEntry->ipfLink); - ipf->refCount++; - fwdEntry->ipf = ipf; - - InsertHeadList(&ipn->fwdList, &fwdEntry->ipnLink); - ipn->refCount++; - fwdEntry->ipn = ipn; - - hash = OvsJhashWords(&fwdEntry->info.dstIpAddr, 1, OVS_HASH_BASIS); - InsertHeadList(&ovsFwdHashTable[hash & OVS_FWD_HASH_TABLE_MASK], - &fwdEntry->link); - ovsNumFwdEntries++; -} - - -static VOID -OvsRemoveAllFwdEntriesWithSrc(UINT32 ipAddr) -{ - UINT32 i; - POVS_FWD_ENTRY fwdEntry; - PLIST_ENTRY link, next; - - for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { - LIST_FORALL_SAFE(&ovsFwdHashTable[i], link, next) { - fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, link); - if (fwdEntry->info.srcIpAddr == ipAddr) { - OvsRemoveFwdEntry(fwdEntry); - } - } - } -} - - -static VOID -OvsCleanupFwdTable(VOID) -{ - PLIST_ENTRY link, next; - POVS_IPNEIGH_ENTRY ipn; - UINT32 i; - LOCK_STATE_EX lockState; - - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - if (ovsNumFwdEntries) { - LIST_FORALL_SAFE(&ovsSortedIPNeighList, link, next) { - ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); - OvsRemoveIPNeighEntry(ipn); - } - } - for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { - ASSERT(IsListEmpty(&ovsFwdHashTable[i])); - } - for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { - ASSERT(IsListEmpty(&ovsRouteHashTable[i])); - } - NdisReleaseRWLock(ovsTableLock, &lockState); -} - - -static VOID -OvsCleanupIpHelperRequestList(VOID) -{ - LIST_ENTRY list; - PLIST_ENTRY next, link; - POVS_IP_HELPER_REQUEST request; - - NdisAcquireSpinLock(&ovsIpHelperLock); - if (ovsNumIpHelperRequests == 0) { - NdisReleaseSpinLock(&ovsIpHelperLock); - return; - } - - InitializeListHead(&list); - OvsAppendList(&list, &ovsIpHelperRequestList); - ovsNumIpHelperRequests = 0; - NdisReleaseSpinLock(&ovsIpHelperLock); - - LIST_FORALL_SAFE(&list, link, next) { - request = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); - - if (request->command == OVS_IP_HELPER_FWD_REQUEST && - request->fwdReq.cb) { - request->fwdReq.cb(request->fwdReq.nbl, - request->fwdReq.inPort, - &request->fwdReq.tunnelKey, - request->fwdReq.cbData1, - request->fwdReq.cbData2, - STATUS_DEVICE_NOT_READY, - NULL); - } - OvsFreeMemory(request); - } -} - - - -static VOID -OvsWakeupIPHelper(VOID) -{ - KeSetEvent(&ovsIpHelperThreadContext.event, 0, FALSE); -} - -VOID -OvsInternalAdapterDown(VOID) -{ - NdisAcquireSpinLock(&ovsIpHelperLock); - ovsInternalPortNo = OVS_DEFAULT_PORT_NO; - ovsInternalIPConfigured = FALSE; - NdisReleaseSpinLock(&ovsIpHelperLock); - - OvsCleanupIpHelperRequestList(); - - OvsCleanupFwdTable(); -} - - -VOID -OvsInternalAdapterUp(UINT32 portNo, - GUID *netCfgInstanceId) -{ - POVS_IP_HELPER_REQUEST request; - - RtlCopyMemory(&ovsInternalNetCfgId, netCfgInstanceId, sizeof (GUID)); - RtlZeroMemory(&ovsInternalRow, sizeof (MIB_IF_ROW2)); - - request = - (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); - if (request == NULL) { - OVS_LOG_ERROR("Fail to initialize Internal Adapter"); - return; - } - RtlZeroMemory(request, sizeof (OVS_IP_HELPER_REQUEST)); - request->command = OVS_IP_HELPER_INTERNAL_ADAPTER_UP; - - NdisAcquireSpinLock(&ovsIpHelperLock); - ovsInternalPortNo = portNo; - InsertHeadList(&ovsIpHelperRequestList, &request->link); - ovsNumIpHelperRequests++; - if (ovsNumIpHelperRequests == 1) { - OvsWakeupIPHelper(); - } - NdisReleaseSpinLock(&ovsIpHelperLock); -} - - -static VOID -OvsHandleInternalAdapterUp(POVS_IP_HELPER_REQUEST request) -{ - NTSTATUS status; - MIB_UNICASTIPADDRESS_ROW ipEntry; - GUID *netCfgInstanceId = &ovsInternalNetCfgId; - - OvsFreeMemory(request); - - status = OvsGetIfEntry(&ovsInternalNetCfgId, &ovsInternalRow); - - if (status != STATUS_SUCCESS) { - OVS_LOG_ERROR("Fali to get IF entry for internal port with GUID" - " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", - netCfgInstanceId->Data1, - netCfgInstanceId->Data2, - netCfgInstanceId->Data3, - *(UINT16 *)netCfgInstanceId->Data4, - netCfgInstanceId->Data4[2], - netCfgInstanceId->Data4[3], - netCfgInstanceId->Data4[4], - netCfgInstanceId->Data4[5], - netCfgInstanceId->Data4[6], - netCfgInstanceId->Data4[7]); - return; - } - - status = OvsGetIPInterfaceEntry(ovsInternalRow.InterfaceLuid, - &ovsInternalIPRow); - - if (status == STATUS_SUCCESS) { - NdisAcquireSpinLock(&ovsIpHelperLock); - ovsInternalIPConfigured = TRUE; - NdisReleaseSpinLock(&ovsIpHelperLock); - } else { - return; - } - - status = OvsGetIPEntry(ovsInternalRow.InterfaceLuid, &ipEntry); - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fali to get IP entry for internal port with GUID" - " %08x-%04x-%04x-%04x-%02x%02x%02x%02x%02x%02x", - netCfgInstanceId->Data1, - netCfgInstanceId->Data2, - netCfgInstanceId->Data3, - *(UINT16 *)netCfgInstanceId->Data4, - netCfgInstanceId->Data4[2], - netCfgInstanceId->Data4[3], - netCfgInstanceId->Data4[4], - netCfgInstanceId->Data4[5], - netCfgInstanceId->Data4[6], - netCfgInstanceId->Data4[7]); - } -} - - -static NTSTATUS -OvsEnqueueIpHelperRequest(POVS_IP_HELPER_REQUEST request) -{ - - NdisAcquireSpinLock(&ovsIpHelperLock); - - if (ovsInternalPortNo == OVS_DEFAULT_PORT_NO || - ovsInternalIPConfigured == FALSE) { - NdisReleaseSpinLock(&ovsIpHelperLock); - OvsFreeMemory(request); - return STATUS_NDIS_ADAPTER_NOT_READY; - } else { - InsertHeadList(&ovsIpHelperRequestList, &request->link); - ovsNumIpHelperRequests++; - if (ovsNumIpHelperRequests == 1) { - OvsWakeupIPHelper(); - } - NdisReleaseSpinLock(&ovsIpHelperLock); - return STATUS_SUCCESS; - } -} - - -NTSTATUS -OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, - UINT32 inPort, - const OvsIPv4TunnelKey *tunnelKey, - OvsIPHelperCallback cb, - PVOID cbData1, - PVOID cbData2) -{ - POVS_IP_HELPER_REQUEST request; - - request = - (POVS_IP_HELPER_REQUEST)OvsAllocateMemory(sizeof (OVS_IP_HELPER_REQUEST)); - - if (request == NULL) { - return STATUS_INSUFFICIENT_RESOURCES; - } - request->command = OVS_IP_HELPER_FWD_REQUEST; - request->fwdReq.nbl = nbl; - request->fwdReq.inPort = inPort; - RtlCopyMemory(&request->fwdReq.tunnelKey, tunnelKey, - sizeof (*tunnelKey)); - request->fwdReq.cb = cb; - request->fwdReq.cbData1 = cbData1; - request->fwdReq.cbData2 = cbData2; - - return OvsEnqueueIpHelperRequest(request); -} - - -static VOID -OvsHandleFwdRequest(POVS_IP_HELPER_REQUEST request) -{ - SOCKADDR_INET dst, src; - NTSTATUS status = STATUS_SUCCESS; - MIB_IPFORWARD_ROW2 ipRoute; - MIB_IPNET_ROW2 ipNeigh; - OVS_FWD_INFO fwdInfo; - UINT32 ipAddr; - UINT32 srcAddr; - POVS_FWD_ENTRY fwdEntry = NULL; - POVS_IPFORWARD_ENTRY ipf = NULL; - POVS_IPNEIGH_ENTRY ipn = NULL; - LOCK_STATE_EX lockState; - BOOLEAN newIPF = FALSE; - BOOLEAN newIPN = FALSE; - BOOLEAN newFWD = FALSE; - - status = OvsLookupIPFwdInfo(request->fwdReq.tunnelKey.dst, - &fwdInfo); - if (status == STATUS_SUCCESS) { - goto fwd_handle_nbl; - } - - /* find IPRoute */ - RtlZeroMemory(&dst, sizeof(dst)); - RtlZeroMemory(&src, sizeof(src)); - RtlZeroMemory(&ipRoute, sizeof (MIB_IPFORWARD_ROW2)); - dst.si_family = AF_INET; - dst.Ipv4.sin_addr.s_addr = request->fwdReq.tunnelKey.dst; - - status = OvsGetRoute(ovsInternalRow.InterfaceLuid, &dst, &ipRoute, &src); - if (status != STATUS_SUCCESS) { - goto fwd_handle_nbl; - } - srcAddr = src.Ipv4.sin_addr.s_addr; - - /* find IPNeigh */ - ipAddr = ipRoute.NextHop.Ipv4.sin_addr.s_addr; - if (ipAddr != 0) { - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - ipn = OvsLookupIPNeighEntry(ipAddr); - if (ipn) { - goto fwd_request_done; - } - NdisReleaseRWLock(ovsTableLock, &lockState); - } - RtlZeroMemory(&ipNeigh, sizeof (ipNeigh)); - ipNeigh.InterfaceLuid.Value = ovsInternalRow.InterfaceLuid.Value; - if (ipAddr == 0) { - ipAddr = request->fwdReq.tunnelKey.dst; - } - status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); - if (status != STATUS_SUCCESS) { - goto fwd_handle_nbl; - } - - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - -fwd_request_done: - - /* - * Initialize ipf - */ - ipf = OvsLookupIPForwardEntry(&ipRoute.DestinationPrefix); - if (ipf == NULL) { - ipf = OvsCreateIPForwardEntry(&ipRoute); - if (ipf == NULL) { - NdisReleaseRWLock(ovsTableLock, &lockState); - status = STATUS_INSUFFICIENT_RESOURCES; - goto fwd_handle_nbl; - } - newIPF = TRUE; - } else { - PLIST_ENTRY link; - link = ipf->fwdList.Flink; - fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipfLink); - srcAddr = fwdEntry->info.srcIpAddr; - } - - /* - * initialize ipn - */ - if (ipn == NULL) { - ipn = OvsLookupIPNeighEntry(ipAddr); - if (ipn == NULL) { - ipn = OvsCreateIPNeighEntry(&ipNeigh); - if (ipn == NULL) { - NdisReleaseRWLock(ovsTableLock, &lockState); - status = STATUS_INSUFFICIENT_RESOURCES; - goto fwd_handle_nbl; - } - newIPN = TRUE; - } - } - - /* - * initialize fwdEntry - */ - fwdInfo.dstIpAddr = request->fwdReq.tunnelKey.dst; - fwdInfo.srcIpAddr = srcAddr; - RtlCopyMemory(fwdInfo.dstMacAddr, ipn->macAddr, MAC_ADDRESS_LEN); - RtlCopyMemory(fwdInfo.srcMacAddr, ovsInternalRow.PhysicalAddress, - MAC_ADDRESS_LEN); - fwdInfo.srcPortNo = request->fwdReq.inPort; - - fwdEntry = OvsCreateFwdEntry(&fwdInfo); - if (fwdEntry == NULL) { - NdisReleaseRWLock(ovsTableLock, &lockState); - status = STATUS_INSUFFICIENT_RESOURCES; - goto fwd_handle_nbl; - } - newFWD = TRUE; - /* - * Cache the result - */ - OvsAddIPFwdCache(fwdEntry, ipf, ipn); - NdisReleaseRWLock(ovsTableLock, &lockState); - -fwd_handle_nbl: - - if (status != STATUS_SUCCESS) { - if (newFWD) { - ASSERT(fwdEntry != NULL); - OvsFreeMemory(fwdEntry); - } - if (newIPF) { - ASSERT(ipf && ipf->refCount == 0); - OvsFreeMemory(ipf); - } - if (newIPN) { - ASSERT(ipn && ipn->refCount == 0); - OvsFreeMemory(ipn); - } - ipAddr = request->fwdReq.tunnelKey.dst; - OVS_LOG_INFO("Fail to handle IP helper request for dst: %d.%d.%d.%d", - ipAddr & 0xff, (ipAddr >> 8) & 0xff, - (ipAddr >> 16) & 0xff, (ipAddr >> 24) & 0xff); - } - if (request->fwdReq.cb) { - request->fwdReq.cb(request->fwdReq.nbl, - request->fwdReq.inPort, - &request->fwdReq.tunnelKey, - request->fwdReq.cbData1, - request->fwdReq.cbData2, - status, - status == STATUS_SUCCESS ? &fwdInfo : NULL); - } - OvsFreeMemory(request); -} - - -static VOID -OvsUpdateIPNeighEntry(UINT32 ipAddr, - PMIB_IPNET_ROW2 ipNeigh, - NTSTATUS status) -{ - UINT64 timeVal; - POVS_IPNEIGH_ENTRY ipn; - LOCK_STATE_EX lockState; - KeQuerySystemTime((LARGE_INTEGER *)&timeVal); - /* - * if mac changed, update all relevant fwdEntry - */ - if (status != STATUS_SUCCESS) { - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - } else { - NdisAcquireRWLockRead(ovsTableLock, &lockState, 0); - } - ipn = OvsLookupIPNeighEntry(ipAddr); - if (ipn == NULL) { - NdisReleaseRWLock(ovsTableLock, &lockState); - return; - } - if (status != STATUS_SUCCESS) { - OvsRemoveIPNeighEntry(ipn); - NdisReleaseRWLock(ovsTableLock, &lockState); - return; - } - - if (memcmp((const PVOID)ipn->macAddr, - (const PVOID)ipNeigh->PhysicalAddress, - (size_t)MAC_ADDRESS_LEN)) { - PLIST_ENTRY link; - POVS_FWD_ENTRY fwdEntry; - NdisReleaseRWLock(ovsTableLock, &lockState); - /* - * need update, release and acquire write lock - * This is not the common case. - */ - - NdisAcquireRWLockWrite(ovsTableLock, &lockState, 0); - ipn = OvsLookupIPNeighEntry(ipAddr); - - if (ipn == NULL) { - NdisReleaseRWLock(ovsTableLock, &lockState); - return; - } - - LIST_FORALL(&ipn->fwdList, link) { - fwdEntry = CONTAINING_RECORD(link, OVS_FWD_ENTRY, ipnLink); - RtlCopyMemory(fwdEntry->info.dstMacAddr, - ipNeigh->PhysicalAddress, MAC_ADDRESS_LEN); - } - } - /* - * update timeout and move to the end of - * the sorted list - */ - - NdisAcquireSpinLock(&ovsIpHelperLock); - RemoveEntryList(&ipn->slink); - ipn->timeout = timeVal + OVS_IPNEIGH_TIMEOUT; - OvsAddToSortedNeighList(ipn); - NdisReleaseSpinLock(&ovsIpHelperLock); - NdisReleaseRWLock(ovsTableLock, &lockState); -} - - -static VOID -OvsHandleIPNeighTimeout(UINT32 ipAddr) -{ - MIB_IPNET_ROW2 ipNeigh; - NTSTATUS status; - - status = OvsGetOrResolveIPNeigh(ipAddr, &ipNeigh); - - OvsUpdateIPNeighEntry(ipAddr, &ipNeigh, status); -} - - -/* - *---------------------------------------------------------------------------- - * IP Helper system threash handle following request - * 1. Intialize Internal port row when internal port is connected - * 2. Handle FWD request - * 3. Handle IP Neigh timeout - * - * IP Interface, unicast address, and IP route change will be handled - * by the revelant callback. - *---------------------------------------------------------------------------- - */ -VOID -OvsStartIpHelper(PVOID data) -{ - POVS_IP_HELPER_THREAD_CONTEXT context = (POVS_IP_HELPER_THREAD_CONTEXT)data; - POVS_IP_HELPER_REQUEST req; - POVS_IPNEIGH_ENTRY ipn; - PLIST_ENTRY link; - UINT64 timeVal, timeout; - - OVS_LOG_INFO("Start the IP Helper Thread, context: %p", context); - - NdisAcquireSpinLock(&ovsIpHelperLock); - while (!context->exit) { - - timeout = 0; - while (!IsListEmpty(&ovsIpHelperRequestList)) { - if (context->exit) { - goto ip_helper_wait; - } - link = ovsIpHelperRequestList.Flink; - RemoveEntryList(link); - NdisReleaseSpinLock(&ovsIpHelperLock); - req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); - switch (req->command) { - case OVS_IP_HELPER_INTERNAL_ADAPTER_UP: - OvsHandleInternalAdapterUp(req); - break; - case OVS_IP_HELPER_FWD_REQUEST: - OvsHandleFwdRequest(req); - break; - default: - OvsFreeMemory(req); - } - NdisAcquireSpinLock(&ovsIpHelperLock); - } - - /* for now, let us hold the lock here, if this cause any issue - * we will change to use IpHelper lock only to protect - * IPN - */ - while (!IsListEmpty(&ovsSortedIPNeighList)) { - UINT32 ipAddr; - if (context->exit) { - goto ip_helper_wait; - } - link = ovsSortedIPNeighList.Flink; - ipn = CONTAINING_RECORD(link, OVS_IPNEIGH_ENTRY, slink); - KeQuerySystemTime((LARGE_INTEGER *)&timeVal); - if (ipn->timeout > timeVal) { - timeout = ipn->timeout; - break; - } - ipAddr = ipn->ipAddr; - - NdisReleaseSpinLock(&ovsIpHelperLock); - - OvsHandleIPNeighTimeout(ipAddr); - - NdisAcquireSpinLock(&ovsIpHelperLock); - } - if (!IsListEmpty(&ovsIpHelperRequestList)) { - continue; - } - -ip_helper_wait: - if (context->exit) { - break; - } - - KeClearEvent(&context->event); - NdisReleaseSpinLock(&ovsIpHelperLock); - - KeWaitForSingleObject(&context->event, Executive, KernelMode, - FALSE, (LARGE_INTEGER *)&timeout); - NdisAcquireSpinLock(&ovsIpHelperLock); - } - NdisReleaseSpinLock(&ovsIpHelperLock); - OvsCleanupFwdTable(); - OvsCleanupIpHelperRequestList(); - - OVS_LOG_INFO("Terminating the OVS IP Helper system thread"); - - PsTerminateSystemThread(STATUS_SUCCESS); -} - - -NTSTATUS -OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle) -{ - NTSTATUS status; - HANDLE threadHandle; - UINT32 i; - - ovsFwdHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * - OVS_FWD_HASH_TABLE_SIZE); - - ovsRouteHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * - OVS_ROUTE_HASH_TABLE_SIZE); - - ovsNeighHashTable = (PLIST_ENTRY)OvsAllocateMemory(sizeof(LIST_ENTRY) * - OVS_NEIGH_HASH_TABLE_SIZE); - - RtlZeroMemory(&ovsInternalRow, sizeof(MIB_IF_ROW2)); - RtlZeroMemory(&ovsInternalIPRow, sizeof (MIB_IPINTERFACE_ROW)); - ovsInternalIP = 0; - - ovsInternalPortNo = OVS_DEFAULT_PORT_NO; - - InitializeListHead(&ovsSortedIPNeighList); - - ovsTableLock = NdisAllocateRWLock(ndisFilterHandle); - NdisAllocateSpinLock(&ovsIpHelperLock); - - InitializeListHead(&ovsIpHelperRequestList); - ovsNumIpHelperRequests = 0; - ipInterfaceNotificationHandle = NULL; - ipRouteNotificationHandle = NULL; - unicastIPNotificationHandle = NULL; - - if (ovsFwdHashTable == NULL || - ovsRouteHashTable == NULL || - ovsNeighHashTable == NULL || - ovsTableLock == NULL) { - status = STATUS_INSUFFICIENT_RESOURCES; - goto init_cleanup; - } - - for (i = 0; i < OVS_FWD_HASH_TABLE_SIZE; i++) { - InitializeListHead(&ovsFwdHashTable[i]); - } - - for (i = 0; i < OVS_ROUTE_HASH_TABLE_SIZE; i++) { - InitializeListHead(&ovsRouteHashTable[i]); - } - - for (i = 0; i < OVS_NEIGH_HASH_TABLE_SIZE; i++) { - InitializeListHead(&ovsNeighHashTable[i]); - } - - - KeInitializeEvent(&ovsIpHelperThreadContext.event, NotificationEvent, - FALSE); - status = OvsRegisterChangeNotification(); - ovsIpHelperThreadContext.exit = 0; - if (status == STATUS_SUCCESS) { - status = PsCreateSystemThread(&threadHandle, SYNCHRONIZE, - NULL, NULL, NULL, OvsStartIpHelper, - &ovsIpHelperThreadContext); - if (status != STATUS_SUCCESS) { - goto init_cleanup; - } - ObReferenceObjectByHandle(threadHandle, SYNCHRONIZE, NULL, - KernelMode, - &ovsIpHelperThreadContext.threadObject, - NULL); - ZwClose(threadHandle); - } - -init_cleanup: - - if (status != STATUS_SUCCESS) { - OvsCancelChangeNotification(); - if (ovsFwdHashTable) { - OvsFreeMemory(ovsFwdHashTable); - ovsFwdHashTable = NULL; - } - if (ovsRouteHashTable) { - OvsFreeMemory(ovsRouteHashTable); - ovsRouteHashTable = NULL; - } - if (ovsNeighHashTable) { - OvsFreeMemory(ovsNeighHashTable); - ovsNeighHashTable = NULL; - } - if (ovsTableLock) { - NdisFreeRWLock(ovsTableLock); - ovsTableLock = NULL; - } - NdisFreeSpinLock(&ovsIpHelperLock); - } - return STATUS_SUCCESS; -} - - -VOID -OvsCleanupIpHelper(VOID) -{ - OvsCancelChangeNotification(); - - NdisAcquireSpinLock(&ovsIpHelperLock); - ovsIpHelperThreadContext.exit = 1; - OvsWakeupIPHelper(); - NdisReleaseSpinLock(&ovsIpHelperLock); - - KeWaitForSingleObject(ovsIpHelperThreadContext.threadObject, Executive, - KernelMode, FALSE, NULL); - ObDereferenceObject(ovsIpHelperThreadContext.threadObject); - - OvsFreeMemory(ovsFwdHashTable); - OvsFreeMemory(ovsRouteHashTable); - OvsFreeMemory(ovsNeighHashTable); - - NdisFreeRWLock(ovsTableLock); - NdisFreeSpinLock(&ovsIpHelperLock); -} - -VOID -OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl) -{ - PLIST_ENTRY link, next; - POVS_IP_HELPER_REQUEST req; - LIST_ENTRY list; - InitializeListHead(&list); - - NdisAcquireSpinLock(&ovsIpHelperLock); - LIST_FORALL_SAFE(&ovsIpHelperRequestList, link, next) { - req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); - if (req->command == OVS_IP_HELPER_FWD_REQUEST && - (nbl == NULL || req->fwdReq.nbl == nbl)) { - RemoveEntryList(link); - InsertHeadList(&list, link); - if (nbl != NULL) { - break; - } - } - } - NdisReleaseSpinLock(&ovsIpHelperLock); - - LIST_FORALL_SAFE(&list, link, next) { - req = CONTAINING_RECORD(link, OVS_IP_HELPER_REQUEST, link); - if (req->fwdReq.cb) { - req->fwdReq.cb(req->fwdReq.nbl, req->fwdReq.inPort, - &req->fwdReq.tunnelKey, - req->fwdReq.cbData1, - req->fwdReq.cbData2, - STATUS_DEVICE_NOT_READY, - NULL); - } - OvsFreeMemory(req); - } -} diff --git a/datapath-windows/ovsext/OvsIpHelper.h b/datapath-windows/ovsext/OvsIpHelper.h deleted file mode 100644 index dc2602bde..000000000 --- a/datapath-windows/ovsext/OvsIpHelper.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_IP_HELPER_H_ -#define __OVS_IP_HELPER_H_ 1 - -#include -#include - -#define OVS_FWD_HASH_TABLE_SIZE ((UINT32)1 << 10) -#define OVS_FWD_HASH_TABLE_MASK (OVS_FWD_HASH_TABLE_SIZE - 1) - -#define OVS_ROUTE_HASH_TABLE_SIZE ((UINT32)1 << 8) -#define OVS_ROUTE_HASH_TABLE_MASK (OVS_ROUTE_HASH_TABLE_SIZE - 1) - -#define OVS_NEIGH_HASH_TABLE_SIZE ((UINT32)1 << 8) -#define OVS_NEIGH_HASH_TABLE_MASK (OVS_NEIGH_HASH_TABLE_SIZE - 1) - -#define OVS_IPNEIGH_TIMEOUT 100000000 // 10 s - - -typedef struct _OVS_IPNEIGH_ENTRY { - UINT8 macAddr[MAC_ADDRESS_LEN]; - UINT16 refCount; - UINT32 ipAddr; - UINT32 pad; - UINT64 timeout; - LIST_ENTRY link; - LIST_ENTRY slink; - LIST_ENTRY fwdList; -} OVS_IPNEIGH_ENTRY, *POVS_IPNEIGH_ENTRY; - -typedef struct _OVS_IPFORWARD_ENTRY { - IP_ADDRESS_PREFIX prefix; - UINT32 nextHop; - UINT16 refCount; - LIST_ENTRY link; - LIST_ENTRY fwdList; -} OVS_IPFORWARD_ENTRY, *POVS_IPFORWARD_ENTRY; - -typedef union _OVS_FWD_INFO { - struct { - UINT32 dstIpAddr; - UINT32 srcIpAddr; - UINT8 dstMacAddr[MAC_ADDRESS_LEN]; - UINT8 srcMacAddr[MAC_ADDRESS_LEN]; - UINT32 srcPortNo; - }; - UINT64 value[3]; -} OVS_FWD_INFO, *POVS_FWD_INFO; - -typedef struct _OVS_FWD_ENTRY { - OVS_FWD_INFO info; - POVS_IPFORWARD_ENTRY ipf; - POVS_IPNEIGH_ENTRY ipn; - LIST_ENTRY link; - LIST_ENTRY ipfLink; - LIST_ENTRY ipnLink; -} OVS_FWD_ENTRY, *POVS_FWD_ENTRY; - - -enum { - OVS_IP_HELPER_INTERNAL_ADAPTER_UP, - OVS_IP_HELPER_FWD_REQUEST, -}; - -typedef VOID (*OvsIPHelperCallback)(PNET_BUFFER_LIST nbl, - UINT32 inPort, - PVOID tunnelKey, - PVOID cbData1, - PVOID cbData2, - NTSTATUS status, - POVS_FWD_INFO fwdInfo); - -typedef struct _OVS_FWD_REQUEST_INFO { - PNET_BUFFER_LIST nbl; - UINT32 inPort; - OvsIPv4TunnelKey tunnelKey; - OvsIPHelperCallback cb; - PVOID cbData1; - PVOID cbData2; -} OVS_FWD_REQUEST_INFO, *POVS_FWD_REQUEST_INFO; - - -typedef struct _OVS_IP_HELPER_REQUEST { - LIST_ENTRY link; - UINT32 command; - union { - OVS_FWD_REQUEST_INFO fwdReq; - UINT32 dummy; - }; -} OVS_IP_HELPER_REQUEST, *POVS_IP_HELPER_REQUEST; - - -typedef struct _OVS_IP_HELPER_THREAD_CONTEXT { - KEVENT event; - PVOID threadObject; - UINT32 exit; -} OVS_IP_HELPER_THREAD_CONTEXT, *POVS_IP_HELPER_THREAD_CONTEXT; - -NTSTATUS OvsInitIpHelper(NDIS_HANDLE ndisFilterHandle); -VOID OvsCleanupIpHelper(VOID); - -VOID OvsInternalAdapterUp(UINT32 portNo, GUID *netCfgInstanceId); -VOID OvsInternalAdapterDown(VOID); - -NTSTATUS OvsFwdIPHelperRequest(PNET_BUFFER_LIST nbl, UINT32 inPort, - const PVOID tunnelKey, - OvsIPHelperCallback cb, - PVOID cbData1, - PVOID cbData2); -NTSTATUS OvsLookupIPFwdInfo(UINT32 dstIp, POVS_FWD_INFO info); -VOID OvsCancelFwdIpHelperRequest(PNET_BUFFER_LIST nbl); - -#endif /* __OVS_IP_HELPER_H_ */ diff --git a/datapath-windows/ovsext/OvsJhash.c b/datapath-windows/ovsext/OvsJhash.c deleted file mode 100644 index db08d0b46..000000000 --- a/datapath-windows/ovsext/OvsJhash.c +++ /dev/null @@ -1,129 +0,0 @@ -/* - * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" - -static __inline UINT32 -GetUnalignedU32(const UINT32 *p_) -{ - const UINT8 *p = (const UINT8 *)p_; - return ntohl((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]); -} - -/* This is the public domain lookup3 hash by Bob Jenkins from - * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */ - -static __inline UINT32 -JhashRot(UINT32 x, INT k) -{ - return (x << k) | (x >> (32 - k)); -} - -static __inline VOID -JhashMix(UINT32 *a, UINT32 *b, UINT32 *c) -{ - *a -= *c; *a ^= JhashRot(*c, 4); *c += *b; - *b -= *a; *b ^= JhashRot(*a, 6); *a += *c; - *c -= *b; *c ^= JhashRot(*b, 8); *b += *a; - *a -= *c; *a ^= JhashRot(*c, 16); *c += *b; - *b -= *a; *b ^= JhashRot(*a, 19); *a += *c; - *c -= *b; *c ^= JhashRot(*b, 4); *b += *a; -} - -static __inline VOID -JhashFinal(UINT32 *a, UINT32 *b, UINT32 *c) -{ - *c ^= *b; *c -= JhashRot(*b, 14); - *a ^= *c; *a -= JhashRot(*c, 11); - *b ^= *a; *b -= JhashRot(*a, 25); - *c ^= *b; *c -= JhashRot(*b, 16); - *a ^= *c; *a -= JhashRot(*c, 4); - *b ^= *a; *b -= JhashRot(*a, 14); - *c ^= *b; *c -= JhashRot(*b, 24); -} - -/* Returns the Jenkins hash of the 'n' 32-bit words at 'p', starting from - * 'basis'. 'p' must be properly aligned. - * - * Use hash_words() instead, unless you're computing a hash function whose - * value is exposed "on the wire" so we don't want to change it. */ -UINT32 -OvsJhashWords(const UINT32 *p, SIZE_T n, UINT32 basis) -{ - UINT32 a, b, c; - - a = b = c = 0xdeadbeef + (((UINT32) n) << 2) + basis; - - while (n > 3) { - a += p[0]; - b += p[1]; - c += p[2]; - JhashMix(&a, &b, &c); - n -= 3; - p += 3; - } - - switch (n) { - case 3: - c += p[2]; - /* fall through */ - case 2: - b += p[1]; - /* fall through */ - case 1: - a += p[0]; - JhashFinal(&a, &b, &c); - /* fall through */ - case 0: - break; - } - return c; -} - -/* Returns the Jenkins hash of the 'n' bytes at 'p', starting from 'basis'. - * - * Use hash_bytes() instead, unless you're computing a hash function whose - * value is exposed "on the wire" so we don't want to change it. */ -UINT32 -OvsJhashBytes(const VOID *p_, SIZE_T n, UINT32 basis) -{ - const UINT32 *p = p_; - UINT32 a, b, c; - - a = b = c = 0xdeadbeef + (UINT32)n + basis; - - while (n >= 12) { - a += GetUnalignedU32(p); - b += GetUnalignedU32(p + 1); - c += GetUnalignedU32(p + 2); - JhashMix(&a, &b, &c); - n -= 12; - p += 3; - } - - if (n) { - UINT32 tmp[3]; - - tmp[0] = tmp[1] = tmp[2] = 0; - memcpy(tmp, p, n); - a += tmp[0]; - b += tmp[1]; - c += tmp[2]; - JhashFinal(&a, &b, &c); - } - - return c; -} diff --git a/datapath-windows/ovsext/OvsJhash.h b/datapath-windows/ovsext/OvsJhash.h deleted file mode 100644 index a12be8e27..000000000 --- a/datapath-windows/ovsext/OvsJhash.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2008, 2009, 2010, 2012, 2014 Nicira, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_JHASH_H_ -#define __OVS_JHASH_H_ 1 - -/* This is the public domain lookup3 hash by Bob Jenkins from - * http://burtleburtle.net/bob/c/lookup3.c, modified for style. - * - * Use the functions in hash.h instead if you can. These are here just for - * places where we've exposed a hash function "on the wire" and don't want it - * to change. */ - -uint32_t OvsJhashWords(const uint32_t *, size_t n_word, uint32_t basis); -uint32_t OvsJhashBytes(const void *, size_t n_bytes, uint32_t basis); - -#endif /* __OVS_JHASH_H */ diff --git a/datapath-windows/ovsext/OvsNetProto.h b/datapath-windows/ovsext/OvsNetProto.h deleted file mode 100644 index a21ab5cfd..000000000 --- a/datapath-windows/ovsext/OvsNetProto.h +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_NET_PROTO_H_ -#define __OVS_NET_PROTO_H_ 1 - -#include "precomp.h" -#include "OvsEth.h" - -#define ETH_ADDR_LENGTH 6 -/* - * There is a more inclusive definition of ethernet header (Eth_Header) in - * OvsEth.h that is used for packet parsing. For simple cases, , use the following definition. - */ -typedef struct EthHdr { - UINT8 Destination[ETH_ADDR_LENGTH]; - UINT8 Source[ETH_ADDR_LENGTH]; - UINT16 Type; -} EthHdr, *PEthHdr; - -#define IPV4 4 -#define IPV6 6 - -#define IP_HDR_MIN_LENGTH 20 -#define TCP_HDR_MIN_LENGTH 20 -#define TCP_CSUM_OFFSET 16 -#define UDP_CSUM_OFFSET 6 -#define ICMP_CSUM_OFFSET 2 -#define INET_CSUM_LENGTH (sizeof(UINT16)) - -#define IP4_UNITS_TO_BYTES(x) ((x) << 2) -#define IP4_BYTES_TO_UNITS(x) ((x) >> 2) - -// length unit for ip->ihl, tcp->doff -typedef UINT32 IP4UnitLength; - -#define IP4_LENGTH_UNIT (sizeof(IP4UnitLength)) -#define IP4_HDR_MIN_LENGTH_IN_UNITS (IP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) -#define TCP_HDR_MIN_LENGTH_IN_UNITS (TCP_HDR_MIN_LENGTH / IP4_LENGTH_UNIT) - -#define IP4_IHL_NO_OPTIONS IP4_HDR_MIN_LENGTH_IN_UNITS -#define IP4_HDR_LEN(iph) IP4_UNITS_TO_BYTES((iph)->ihl) - -// length unit for ip->frag_off -typedef UINT64 IP4FragUnitLength; - -#define IP4_FRAG_UNIT_LENGTH (sizeof(IP4FragUnitLength)) - -// length UINT for ipv6 header length. -typedef UINT64 IP6UnitLength; - -#define TCP_HDR_LEN(tcph) IP4_UNITS_TO_BYTES((tcph)->doff) -#define TCP_DATA_LENGTH(iph, tcph) (ntohs(iph->tot_len) - \ - IP4_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) - -#define TCP_DATA_OFFSET_NO_OPTIONS TCP_HDR_MIN_LENGTH_IN_UNITS -#define TCP_DATA_OFFSET_WITH_TIMESTAMP 8 - -/* - * This is the maximum value for the length field in the IP header. The meaning - * varies with IP protocols: - * IPv4: the total ip length (including ip header and extention) - * IPv6: the IP payload length (including IP extensions) - */ -#define IP_MAX_PACKET 0xFFFF - -#define IPPROTO_ICMP 1 -#define IPPROTO_IGMP 2 -#define IPPROTO_UDP 17 -#define IPPROTO_GRE 47 -#define IPPROTO_TCP 6 -#define IPPROTO_RSVD 0xff - -#define IPPROTO_HOPOPTS 0 /* Hop-by-hop option header */ -#define IPPROTO_IPV6 41 /* IPv6 in IPv6 */ -#define IPPROTO_ROUTING 43 /* Routing header */ -#define IPPROTO_FRAGMENT 44 /* Fragmentation/reassembly header */ -#define IPPROTO_GRE 47 /* General Routing Encapsulation */ -#define IPPROTO_ESP 50 /* Encap. Security Payload */ -#define IPPROTO_AH 51 /* Authentication header */ -#define IPPROTO_ICMPV6 58 /* ICMP for IPv6 */ -#define IPPROTO_NONE 59 /* No next header */ -#define IPPROTO_DSTOPTS 60 /* Destination options header */ -#define IPPROTO_ETHERIP 97 /* etherIp tunneled protocol */ - -/* ICMPv6 types. */ -#define ND_NEIGHBOR_SOLICIT 135 /* neighbor solicitation */ -#define ND_NEIGHBOR_ADVERT 136 /* neighbor advertisment */ - -/* IPv6 Neighbor discovery option header. */ -#define ND_OPT_SOURCE_LINKADDR 1 -#define ND_OPT_TARGET_LINKADDR 2 - -/* Collides with MS definition (opposite order) */ -#define IP6F_OFF_HOST_ORDER_MASK 0xfff8 - -#define ARPOP_REQUEST 1 /* ARP request. */ -#define ARPOP_REPLY 2 /* ARP reply. */ -#define RARPOP_REQUEST 3 /* RARP request. */ -#define RARPOP_REPLY 4 /* RARP reply. */ - - /* all ARP NBO's assume short ar_op */ -#define ARPOP_REQUEST_NBO 0x0100 /* NBO ARP request. */ -#define ARPOP_REPLY_NBO 0x0200 /* NBO ARP reply. */ -#define RARPOP_REQUEST_NBO 0x0300 /* NBO RARP request. */ -#define RARPOP_REPLY_NBO 0x0300 /* NBO RARP reply. */ - -#define ICMP_ECHO 8 /* Echo Request */ -#define ICMP_ECHOREPLY 0 /* Echo Reply */ -#define ICMP_DEST_UNREACH 3 /* Destination Unreachable */ - -/* IGMP related constants */ -#define IGMP_UNKNOWN 0x00 /* For IGMP packets where we don't know the type */ - /* Eg: Fragmented packets without the header */ - -/* Constants from RFC 3376 */ -#define IGMP_QUERY 0x11 /* IGMP Host Membership Query. */ -#define IGMP_V1REPORT 0x12 /* IGMPv1 Host Membership Report. */ -#define IGMP_V2REPORT 0x16 /* IGMPv2 Host Membership Report. */ -#define IGMP_V3REPORT 0x22 /* IGMPv3 Host Membership Report. */ -#define IGMP_V2LEAVE 0x17 /* IGMPv2 Leave. */ - -/* Constants from RFC 2710 and RFC 3810 */ -#define MLD_QUERY 0x82 /* Multicast Listener Query. */ -#define MLD_V1REPORT 0x83 /* Multicast Listener V1 Report. */ -#define MLD_V2REPORT 0x8F /* Multicast Listener V2 Report. */ -#define MLD_DONE 0x84 /* Multicast Listener Done. */ - -/* IPv4 offset flags */ -#define IP_CE 0x8000 /* Flag: "Congestion" */ -#define IP_DF 0x4000 /* Flag: "Don't Fragment" */ -#define IP_MF 0x2000 /* Flag: "More Fragments" */ -#define IP_OFFSET 0x1FFF /* "Fragment Offset" part */ - -#define IP_OFFSET_NBO 0xFF1F /* "Fragment Offset" part, NBO */ -#define IP_DF_NBO 0x0040 /* NBO version of don't fragment */ -#define IP_MF_NBO 0x0020 /* NBO version of more fragments */ - -#define IPOPT_RTRALT 0x94 - -/* IP Explicit Congestion Notification bits (TOS field) */ -#define IP_ECN_NOT_ECT 0 -#define IP_ECN_ECT_1 1 -#define IP_ECN_ECT_0 2 -#define IP_ECN_CE 3 -#define IP_ECN_MASK 3 - -/* TCP options */ -#define TCP_OPT_NOP 1 /* Padding */ -#define TCP_OPT_EOL 0 /* End of options */ -#define TCP_OPT_MSS 2 /* Segment size negotiating */ -#define TCP_OPT_WINDOW 3 /* Window scaling */ -#define TCP_OPT_SACK_PERM 4 /* SACK Permitted */ -#define TCP_OPT_SACK 5 /* SACK Block */ -#define TCP_OPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */ -#define TCP_OPT_MD5SIG 19 /* MD5 Signature (RFC2385) */ - -#define TCP_OPT_LEN_MSS 4 -#define TCP_OPT_LEN_WINDOW 3 -#define TCP_OPT_LEN_SACK_PERM 2 -#define TCP_OPT_LEN_TIMESTAMP 10 -#define TCP_OPT_LEN_MD5SIG 18 - -#define SOCKET_IPPROTO_HOPOPTS IPPROTO_HOPOPTS -#define SOCKET_IPPROTO_ROUTING IPPROTO_ROUTING -#define SOCKET_IPPROTO_FRAGMENT IPPROTO_FRAGMENT -#define SOCKET_IPPROTO_AH IPPROTO_AH -#define SOCKET_IPPROTO_ICMPV6 IPPROTO_ICMPV6 -#define SOCKET_IPPROTO_NONE IPPROTO_NONE -#define SOCKET_IPPROTO_DSTOPTS IPPROTO_DSTOPTS -#define SOCKET_IPPROTO_EON 80 -#define SOCKET_IPPROTO_ETHERIP IPPROTO_ETHERIP -#define SOCKET_IPPROTO_ENCAP 98 -#define SOCKET_IPPROTO_PIM 103 -#define SOCKET_IPPROTO_IPCOMP 108 -#define SOCKET_IPPROTO_CARP 112 -#define SOCKET_IPPROTO_PFSYNC 240 -#define SOCKET_IPPROTO_RAW IPPROTO_RSVD - -typedef union _OVS_PACKET_HDR_INFO { - struct { - UINT16 l3Offset; - UINT16 l4Offset; - union { - UINT16 l7Offset; - UINT16 l4PayLoad; - }; - UINT16 isIPv4:1; - UINT16 isIPv6:1; - UINT16 isTcp:1; - UINT16 isUdp:1; - UINT16 tcpCsumNeeded:1; - UINT16 udpCsumNeeded:1; - UINT16 udpCsumZero:1; - UINT16 pad:9; - } ; - UINT64 value; -} OVS_PACKET_HDR_INFO, *POVS_PACKET_HDR_INFO; - -typedef struct IPHdr { - UINT8 ihl:4, - version:4; - UINT8 tos; - UINT16 tot_len; - UINT16 id; - UINT16 frag_off; - UINT8 ttl; - UINT8 protocol; - UINT16 check; - UINT32 saddr; - UINT32 daddr; -} IPHdr; - - - /* - * IPv6 fixed header - * - * BEWARE, it is incorrect. The first 4 bits of flow_lbl - * are glued to priority now, forming "class". - */ - -typedef struct IPv6Hdr { - UINT8 priority:4, - version:4; - UINT8 flow_lbl[3]; - - UINT16 payload_len; - UINT8 nexthdr; - UINT8 hop_limit; - - struct in6_addr saddr; - struct in6_addr daddr; -} IPv6Hdr; - -// Generic IPv6 extension header -typedef struct IPv6ExtHdr { - UINT8 nextHeader; // type of the next header - UINT8 hdrExtLen; // length of header extensions (beyond 8 bytes) - UINT16 optPad1; - UINT32 optPad2; -} IPv6ExtHdr; - -typedef struct IPv6FragHdr { - UINT8 nextHeader; - UINT8 reserved; - UINT16 offlg; - UINT32 ident; -} IPv6FragHdr; - -typedef struct IPv6NdOptHdr { - UINT8 type; - UINT8 len; -} IPv6NdOptHdr; - -typedef struct ICMPHdr { - UINT8 type; - UINT8 code; - UINT16 checksum; -} ICMPHdr; - -typedef struct ICMPEcho { - UINT16 id; - UINT16 seq; -} ICMPEcho; - -typedef struct UDPHdr { - UINT16 source; - UINT16 dest; - UINT16 len; - UINT16 check; -} UDPHdr; - -typedef struct TCPHdr { - UINT16 source; - UINT16 dest; - UINT32 seq; - UINT32 ack_seq; - UINT16 res1:4, - doff:4, - fin:1, - syn:1, - rst:1, - psh:1, - ack:1, - urg:1, - ece:1, - cwr:1; - UINT16 window; - UINT16 check; - UINT16 urg_ptr; -} TCPHdr; - -typedef struct PseudoHdr { - UINT32 sourceIPAddr; - UINT32 destIPAddr; - UINT8 zero; - UINT8 protocol; - UINT16 length; -} PseudoHdr; - -typedef struct PseudoHdrIPv6 { - UINT8 sourceIPAddr[16]; - UINT8 destIPAddr[16]; - UINT8 zero; - UINT8 protocol; - UINT16 length; -} PseudoHdrIPv6; - - -struct ArpHdr { - UINT16 ar_hrd; /* Format of hardware address. */ - UINT16 ar_pro; /* Format of protocol address. */ - UINT8 ar_hln; /* Length of hardware address. */ - UINT8 ar_pln; /* Length of protocol address. */ - UINT16 ar_op; /* ARP opcode (command). */ -}; - -typedef struct EtherArp { - struct ArpHdr ea_hdr; /* fixed-size header */ - Eth_Address arp_sha; /* sender hardware address */ - UINT8 arp_spa[4]; /* sender protocol address */ - Eth_Address arp_tha; /* target hardware address */ - UINT8 arp_tpa[4]; /* target protocol address */ -} EtherArp; - -typedef struct IGMPHdr { - UINT8 type; - UINT8 maxResponseTime; - UINT16 csum; - UINT8 groupAddr[4]; -} IGMPHdr; - -typedef struct IGMPV3Trailer { - UINT8 qrv:3, - s:1, - resv:4; - UINT8 qqic; - UINT16 numSources; -} IGMPV3Trailer; - -typedef struct IPOpt { - UINT8 type; - UINT8 length; - UINT16 value; -} IPOpt; - -/* - * IP protocol types - */ -#define SOCKET_IPPROTO_IP 0 -#define SOCKET_IPPROTO_ICMP 1 -#define SOCKET_IPPROTO_TCP 6 -#define SOCKET_IPPROTO_UDP 17 -#define SOCKET_IPPROTO_GRE 47 - -#endif /* __OVS_NET_PROTO_H_ */ diff --git a/datapath-windows/ovsext/OvsOid.c b/datapath-windows/ovsext/OvsOid.c deleted file mode 100644 index ad66be4c0..000000000 --- a/datapath-windows/ovsext/OvsOid.c +++ /dev/null @@ -1,854 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsNetProto.h" -#include "OvsUser.h" -#include "OvsFlow.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsOid.h" - -/* Due to an imported header file */ -#pragma warning( disable:4505 ) - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_DISPATCH -#include "OvsDebug.h" - -typedef struct _OVS_OID_CONTEXT { - NDIS_EVENT oidComplete; - NDIS_STATUS status; -} OVS_OID_CONTEXT, *POVS_OID_CONTEXT; - - -VOID -OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, - PNDIS_OID_REQUEST oidRequest, - NDIS_STATUS status); -static VOID -OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status); -static VOID -OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status); -static VOID -OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status); - -static NDIS_STATUS -OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest); -static NDIS_STATUS -OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest); -static NDIS_STATUS -OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest); - -__inline BOOLEAN -OvsCheckOidHeaderFunc(PNDIS_OBJECT_HEADER header, - LONG propRev, - LONG propSize) -{ - return header->Type != NDIS_OBJECT_TYPE_DEFAULT || - header->Revision < propRev || - header->Size < propSize; -} - -#define OvsCheckOidHeader(_hdr, _rev) \ - OvsCheckOidHeaderFunc(_hdr, _rev, ##NDIS_SIZEOF_##_rev) - -static __inline VOID -OvsOidSetOrigRequest(PNDIS_OID_REQUEST clonedRequest, - PNDIS_OID_REQUEST origRequest) -{ - *(PVOID*)(&clonedRequest->SourceReserved[0]) = origRequest; -} - -static __inline PNDIS_OID_REQUEST -OvsOidGetOrigRequest(PNDIS_OID_REQUEST clonedRequest) -{ - return *((PVOID*)(&clonedRequest->SourceReserved[0])); -} - -static __inline VOID -OvsOidSetContext(PNDIS_OID_REQUEST clonedRequest, - POVS_OID_CONTEXT origRequest) -{ - *(PVOID*)(&clonedRequest->SourceReserved[8]) = origRequest; -} - -static __inline POVS_OID_CONTEXT -OvsOidGetContext(PNDIS_OID_REQUEST clonedRequest) -{ - return *((PVOID*)(&clonedRequest->SourceReserved[8])); -} - -static NDIS_STATUS -OvsProcessSetOidPortProp(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); - PNDIS_SWITCH_PORT_PROPERTY_PARAMETERS portPropParam = - setInfo->InformationBuffer; - BOOLEAN checkFailed = TRUE; - - UNREFERENCED_PARAMETER(switchObject); - - if (setInfo->Oid == OID_SWITCH_PORT_PROPERTY_DELETE) { - checkFailed = OvsCheckOidHeader( - (PNDIS_OBJECT_HEADER)portPropParam, - NDIS_SWITCH_PORT_PROPERTY_DELETE_PARAMETERS_REVISION_1); - } else { - /* it must be a add or update request */ - checkFailed = OvsCheckOidHeader( - (PNDIS_OBJECT_HEADER)portPropParam, - NDIS_SWITCH_PORT_PROPERTY_PARAMETERS_REVISION_1); - } - - if (checkFailed) { - status = NDIS_STATUS_INVALID_PARAMETER; - goto done; - } - - if (portPropParam->PropertyType == NdisSwitchPortPropertyTypeVlan) { - status = NDIS_STATUS_NOT_SUPPORTED; - goto done; - } - -done: - return status; -} - -static NDIS_STATUS -OvsProcessSetOidPort(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); - PNDIS_SWITCH_PORT_PARAMETERS portParam = setInfo->InformationBuffer; - - if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)portParam, - NDIS_SWITCH_PORT_PARAMETERS_REVISION_1)) { - status = NDIS_STATUS_NOT_SUPPORTED; - goto done; - } - - switch(setInfo->Oid) { - case OID_SWITCH_PORT_CREATE: - status = OvsCreatePort(switchObject, portParam); - break; - case OID_SWITCH_PORT_TEARDOWN: - OvsTeardownPort(switchObject, portParam); - break; - case OID_SWITCH_PORT_DELETE: - OvsDeletePort(switchObject, portParam); - break; - default: - break; - } - -done: - return status; -} - -static NDIS_STATUS -OvsProcessSetOidNic(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); - PNDIS_SWITCH_NIC_PARAMETERS nicParam = setInfo->InformationBuffer; - - if (OvsCheckOidHeader((PNDIS_OBJECT_HEADER)nicParam, - NDIS_SWITCH_NIC_PARAMETERS_REVISION_1)) { - status = NDIS_STATUS_NOT_SUPPORTED; - goto done; - } - - switch(setInfo->Oid) { - case OID_SWITCH_NIC_CREATE: - status = OvsCreateNic(switchObject, nicParam); - break; - case OID_SWITCH_NIC_CONNECT: - OvsConnectNic(switchObject, nicParam); - break; - case OID_SWITCH_NIC_UPDATED: - OvsUpdateNic(switchObject, nicParam); - break; - case OID_SWITCH_NIC_DISCONNECT: - OvsDisconnectNic(switchObject, nicParam); - break; - case OID_SWITCH_NIC_DELETE: - OvsDeleteNic(switchObject, nicParam); - break; - default: - break; - } - -done: - return status; - -} - -static NDIS_STATUS -OvsProcessSetOid(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PBOOLEAN complete) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); - - *complete = FALSE; - - OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", - oidRequest, setInfo->Oid); - - /* Verify the basic Oid paramters first */ - if (setInfo->InformationBufferLength && - (setInfo->InformationBufferLength < sizeof(NDIS_OBJECT_HEADER))) { - status = NDIS_STATUS_INVALID_OID; - OVS_LOG_INFO("Invalid input %d", setInfo->InformationBufferLength); - goto error; - } - - /* Documentation does not specify what should be done - * if informationBuffer is not present. Although it mentions the - * structure type informationBUffer points to for each oid request, - * but it does not explicitly mention that it is a MUST. - * hence we are following this scenario same way as what sample code - * mentions. */ - if (!(setInfo->InformationBufferLength)) { - /* We cannot do anything about this oid request, - * lets just pass it down. */ - OVS_LOG_INFO("Buffer Length Zero"); - goto done; - } - - switch(setInfo->Oid) { - case OID_SWITCH_PORT_PROPERTY_ADD: - case OID_SWITCH_PORT_PROPERTY_UPDATE: - case OID_SWITCH_PORT_PROPERTY_DELETE: - status = OvsProcessSetOidPortProp(switchObject, oidRequest); - break; - - case OID_SWITCH_PORT_CREATE: - case OID_SWITCH_PORT_UPDATED: - case OID_SWITCH_PORT_TEARDOWN: - case OID_SWITCH_PORT_DELETE: - status = OvsProcessSetOidPort(switchObject, oidRequest); - break; - - case OID_SWITCH_NIC_CREATE: - case OID_SWITCH_NIC_CONNECT: - case OID_SWITCH_NIC_UPDATED: - case OID_SWITCH_NIC_DISCONNECT: - case OID_SWITCH_NIC_DELETE: - status = OvsProcessSetOidNic(switchObject, oidRequest); - break; - - default: - /* Non handled OID request */ - break; - } - - if (status != NDIS_STATUS_SUCCESS) { - goto error; - } - - goto done; - -error: - *complete = TRUE; -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} - -static NDIS_STATUS -OvsProcessMethodOid(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PBOOLEAN complete, - PULONG bytesNeededParam) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); - struct _SET *nicReqSetInfo = NULL; - PNDIS_OBJECT_HEADER header = NULL; - PNDIS_OID_REQUEST nicOidRequest = NULL; - - UNREFERENCED_PARAMETER(switchObject); - - OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", - oidRequest, methodInfo->Oid); - - *complete = FALSE; - *bytesNeededParam = 0; - header = methodInfo->InformationBuffer; - - switch(methodInfo->Oid) { - /* We deal with only OID_SWITCH_NIC_REQUEST as of now */ - case OID_SWITCH_NIC_REQUEST: - if (OvsCheckOidHeader(header, - NDIS_SWITCH_NIC_OID_REQUEST_REVISION_1)) { - OVS_LOG_INFO("Check Header failed"); - status = NDIS_STATUS_NOT_SUPPORTED; - *complete = TRUE; - goto done; - } - - nicOidRequest = (((PNDIS_SWITCH_NIC_OID_REQUEST)header)->OidRequest); - nicReqSetInfo = &(nicOidRequest->DATA.SET_INFORMATION); - - /* Fail the SR-IOV VF case */ - if ((nicOidRequest->RequestType == NdisRequestSetInformation) && - (nicReqSetInfo->Oid == OID_NIC_SWITCH_ALLOCATE_VF)) { - OVS_LOG_INFO("We do not support Oid: " - "OID_NIC_SWITCH_ALLOCATE_VF"); - status = NDIS_STATUS_FAILURE; - *complete = TRUE; - } - break; - default: - /* No op */ - break; - } - -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterOidRequest function. - * -------------------------------------------------------------------------- - */ - -NDIS_STATUS -OvsExtOidRequest(NDIS_HANDLE filterModuleContext, - PNDIS_OID_REQUEST oidRequest) -{ - POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNDIS_OID_REQUEST clonedOidRequest = NULL; - struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); - BOOLEAN completeOid = FALSE; - ULONG bytesNeeded = 0; - - OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", - oidRequest, oidRequest->RequestType); - status = NdisAllocateCloneOidRequest(switchObject->NdisFilterHandle, - oidRequest, OVS_MEMORY_TAG, - &clonedOidRequest); - if (status != NDIS_STATUS_SUCCESS) { - goto done; - } - - NdisInterlockedIncrement(&(switchObject->pendingOidCount)); - - /* set the original oid request in cloned one. */ - OvsOidSetOrigRequest(clonedOidRequest, oidRequest); - OvsOidSetContext(clonedOidRequest, NULL); - - switch(clonedOidRequest->RequestType) { - case NdisRequestSetInformation: - status = OvsProcessSetOid(switchObject, clonedOidRequest, - &completeOid); - break; - case NdisRequestMethod: - status = OvsProcessMethodOid(switchObject, clonedOidRequest, - &completeOid, &bytesNeeded); - break; - default: - /* We do not handle other request types as of now. - * We are just a passthrough for those. */ - break; - } - - if (completeOid == TRUE) { - /* dont leave any reference back to original request, - * even if we are freeing it up. */ - OVS_LOG_INFO("Complete True oidRequest %p.", oidRequest); - OvsOidSetOrigRequest(clonedOidRequest, NULL); - NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, - clonedOidRequest); - methodInfo->BytesNeeded = bytesNeeded; - NdisInterlockedDecrement(&switchObject->pendingOidCount); - goto done; - } - - /* pass the request down */ - status = NdisFOidRequest(switchObject->NdisFilterHandle, clonedOidRequest); - if (status != NDIS_STATUS_PENDING) { - OvsExtOidRequestComplete(switchObject, clonedOidRequest, status); - /* sample code says so */ - status = NDIS_STATUS_PENDING; - } - -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterOidRequestComplete function. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtOidRequestComplete(NDIS_HANDLE filterModuleContext, - PNDIS_OID_REQUEST oidRequest, - NDIS_STATUS status) -{ - POVS_SWITCH_CONTEXT switchObject = (POVS_SWITCH_CONTEXT)filterModuleContext; - PNDIS_OID_REQUEST origReq = OvsOidGetOrigRequest(oidRequest); - POVS_OID_CONTEXT oidContext = OvsOidGetContext(oidRequest); - - /* Only one of the two should be set */ - ASSERT(origReq != NULL || oidContext != NULL); - ASSERT(oidContext != NULL || origReq != NULL); - - OVS_LOG_TRACE("Enter: oidRequest %p, reqType: %d", - oidRequest, oidRequest->RequestType); - - if (origReq == NULL) { - NdisInterlockedDecrement(&(switchObject->pendingOidCount)); - oidContext->status = status; - NdisSetEvent(&oidContext->oidComplete); - OVS_LOG_INFO("Internally generated request"); - goto done; - } - - switch(oidRequest->RequestType) { - case NdisRequestMethod: - OvsOidRequestCompleteMethod(switchObject, oidRequest, - origReq, status); - break; - - case NdisRequestSetInformation: - OvsOidRequestCompleteSetInfo(switchObject, oidRequest, - origReq, status); - break; - - case NdisRequestQueryInformation: - case NdisRequestQueryStatistics: - default: - OvsOidRequestCompleteQuery(switchObject, oidRequest, - origReq, status); - break; - } - - OvsOidSetOrigRequest(oidRequest, NULL); - - NdisFreeCloneOidRequest(switchObject->NdisFilterHandle, oidRequest); - NdisFOidRequestComplete(switchObject->NdisFilterHandle, origReq, status); - NdisInterlockedDecrement(&(switchObject->pendingOidCount)); - -done: - OVS_LOG_TRACE("Exit"); -} - -static VOID -OvsOidRequestCompleteMethod(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status) -{ - UNREFERENCED_PARAMETER(status); - UNREFERENCED_PARAMETER(switchObject); - - struct _METHOD *methodInfo = &(oidRequest->DATA.METHOD_INFORMATION); - struct _METHOD *origMethodInfo = &(origOidRequest->DATA. - METHOD_INFORMATION); - - OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", - oidRequest, methodInfo->Oid); - - origMethodInfo->OutputBufferLength = methodInfo->OutputBufferLength; - origMethodInfo->BytesRead = methodInfo->BytesRead; - origMethodInfo->BytesNeeded = methodInfo->BytesNeeded; - origMethodInfo->BytesWritten = methodInfo->BytesWritten; - - OVS_LOG_TRACE("Exit"); -} - -static VOID -OvsOidRequestCompleteSetInfo(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status) -{ - struct _SET *setInfo = &(oidRequest->DATA.SET_INFORMATION); - struct _SET *origSetInfo = &(origOidRequest->DATA.SET_INFORMATION); - PNDIS_OBJECT_HEADER origHeader = origSetInfo->InformationBuffer; - - OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", - oidRequest, setInfo->Oid); - - origSetInfo->BytesRead = setInfo->BytesRead; - origSetInfo->BytesNeeded = setInfo->BytesNeeded; - - if (status != NDIS_STATUS_SUCCESS) { - - switch(setInfo->Oid) { - case OID_SWITCH_PORT_CREATE: - OvsDeletePort(switchObject, - (PNDIS_SWITCH_PORT_PARAMETERS)origHeader); - break; - - case OID_SWITCH_NIC_CREATE: - OvsDeleteNic(switchObject, - (PNDIS_SWITCH_NIC_PARAMETERS)origHeader); - break; - - default: - break; - } - } - - OVS_LOG_TRACE("Exit"); -} - -static VOID -OvsOidRequestCompleteQuery(POVS_SWITCH_CONTEXT switchObject, - PNDIS_OID_REQUEST oidRequest, - PNDIS_OID_REQUEST origOidRequest, - NDIS_STATUS status) -{ - UNREFERENCED_PARAMETER(switchObject); - UNREFERENCED_PARAMETER(status); - - struct _QUERY *queryInfo = &((oidRequest->DATA).QUERY_INFORMATION); - struct _QUERY *origQueryInfo = &((origOidRequest->DATA).QUERY_INFORMATION); - - OVS_LOG_TRACE("Enter: oidRequest %p, Oid: %lu", - oidRequest, queryInfo->Oid); - - origQueryInfo->BytesWritten = queryInfo->BytesWritten; - origQueryInfo->BytesNeeded = queryInfo->BytesNeeded; - - OVS_LOG_TRACE("Exit"); -} - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterCancelOidRequest function. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtCancelOidRequest(NDIS_HANDLE filterModuleContext, - PVOID requestId) -{ - OVS_LOG_TRACE("Enter: requestId: %p", requestId); - - UNREFERENCED_PARAMETER(filterModuleContext); - UNREFERENCED_PARAMETER(requestId); -} - - -/* - * -------------------------------------------------------------------------- - * Utility function to issue the specified OID to the NDIS stack. The OID is - * directed towards the miniport edge of the extensible switch. - * An OID that gets issued may not complete immediately, and in such cases, the - * function waits for the OID to complete. Thus, this function must not be - * called at the PASSIVE_LEVEL. - * -------------------------------------------------------------------------- - */ -static NDIS_STATUS -OvsIssueOidRequest(POVS_SWITCH_CONTEXT switchContext, - NDIS_REQUEST_TYPE oidType, - UINT32 oidRequestEnum, - PVOID oidInputBuffer, - UINT32 inputSize, - PVOID oidOutputBuffer, - UINT32 outputSize, - UINT32 *outputSizeNeeded) -{ - NDIS_STATUS status; - PNDIS_OID_REQUEST oidRequest; - POVS_OID_CONTEXT oidContext; - ULONG OvsExtOidRequestId = 'ISVO'; - - DBG_UNREFERENCED_PARAMETER(inputSize); - DBG_UNREFERENCED_PARAMETER(oidInputBuffer); - - OVS_LOG_TRACE("Enter: switchContext: %p, oidType: %d", - switchContext, oidType); - - ASSERT(oidInputBuffer == NULL || inputSize != 0); - ASSERT(oidOutputBuffer == NULL || outputSize != 0); - ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL); - - oidRequest = OvsAllocateMemory(sizeof *oidRequest); - if (!oidRequest) { - status = NDIS_STATUS_RESOURCES; - goto done; - } - - oidContext = OvsAllocateMemory(sizeof *oidContext); - if (!oidContext) { - OvsFreeMemory(oidRequest); - status = NDIS_STATUS_RESOURCES; - goto done; - } - - RtlZeroMemory(oidRequest, sizeof *oidRequest); - RtlZeroMemory(oidContext, sizeof *oidContext); - - oidRequest->Header.Type = NDIS_OBJECT_TYPE_OID_REQUEST; - oidRequest->Header.Revision = NDIS_OID_REQUEST_REVISION_1; - oidRequest->Header.Size = NDIS_SIZEOF_OID_REQUEST_REVISION_1; - - oidRequest->RequestType = oidType; - oidRequest->PortNumber = 0; - oidRequest->Timeout = 0; - oidRequest->RequestId = (PVOID)OvsExtOidRequestId; - - switch(oidType) { - case NdisRequestQueryInformation: - oidRequest->DATA.QUERY_INFORMATION.Oid = oidRequestEnum; - oidRequest->DATA.QUERY_INFORMATION.InformationBuffer = oidOutputBuffer; - oidRequest->DATA.QUERY_INFORMATION.InformationBufferLength = outputSize; - break; - default: - ASSERT(FALSE); - status = NDIS_STATUS_INVALID_PARAMETER; - break; - } - - /* - * We make use of the SourceReserved field in the OID request to store - * pointers to the original OID (if any), and also context for completion - * (if any). - */ - oidContext->status = NDIS_STATUS_SUCCESS; - NdisInitializeEvent(&oidContext->oidComplete); - - OvsOidSetOrigRequest(oidRequest, NULL); - OvsOidSetContext(oidRequest, oidContext); - - NdisInterlockedIncrement(&(switchContext->pendingOidCount)); - status = NdisFOidRequest(switchContext->NdisFilterHandle, oidRequest); - if (status == NDIS_STATUS_PENDING) { - NdisWaitEvent(&oidContext->oidComplete, 0); - } else { - NdisInterlockedDecrement(&(switchContext->pendingOidCount)); - } - - if (status == NDIS_STATUS_INVALID_LENGTH || - oidContext->status == NDIS_STATUS_INVALID_LENGTH) { - switch(oidType) { - case NdisRequestQueryInformation: - *outputSizeNeeded = oidRequest->DATA.QUERY_INFORMATION.BytesNeeded; - } - } - - status = oidContext->status; - ASSERT(status != NDIS_STATUS_PENDING); - - OvsFreeMemory(oidRequest); - OvsFreeMemory(oidContext); - -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Utility function to query if the extensible switch has completed activation - * successfully. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, - BOOLEAN *switchActive) -{ - NDIS_STATUS status; - PNDIS_SWITCH_PARAMETERS switchParams; - UINT32 outputSizeNeeded; - - OVS_LOG_TRACE("Enter: switchContext: %p, switchActive: %p", - switchContext, switchActive); - - switchParams = OvsAllocateMemory(sizeof *switchParams); - if (!switchParams) { - status = NDIS_STATUS_RESOURCES; - goto done; - } - - /* - * Even though 'switchParms' is supposed to be populated by the OID, it - * needs to be initialized nevertheless. Otherwise, OID returns - * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. - */ - RtlZeroMemory(switchParams, sizeof *switchParams); - switchParams->Header.Revision = NDIS_SWITCH_PARAMETERS_REVISION_1; - switchParams->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; - switchParams->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1; - - status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, - OID_SWITCH_PARAMETERS, NULL, 0, - (PVOID)switchParams, sizeof *switchParams, - &outputSizeNeeded); - - ASSERT(status != NDIS_STATUS_INVALID_LENGTH); - ASSERT(status != NDIS_STATUS_PENDING); - if (status == NDIS_STATUS_SUCCESS) { - ASSERT(switchParams->Header.Type == NDIS_OBJECT_TYPE_DEFAULT); - ASSERT(switchParams->Header.Revision == NDIS_SWITCH_PARAMETERS_REVISION_1); - ASSERT(switchParams->Header.Size == - NDIS_SIZEOF_NDIS_SWITCH_PARAMETERS_REVISION_1); - *switchActive = switchParams->IsActive; - } - - OvsFreeMemory(switchParams); - -done: - OVS_LOG_TRACE("Exit: status %8x, switchActive: %d.", - status, *switchActive); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Utility function to get the array of ports on the extensible switch. Upon - * success, the caller needs to free the returned array. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_ARRAY *portArrayOut) -{ - PNDIS_SWITCH_PORT_ARRAY portArray; - UINT32 arraySize = sizeof *portArray; - NDIS_STATUS status = NDIS_STATUS_FAILURE; - - OVS_LOG_TRACE("Enter: switchContext: %p, portArray: %p", - switchContext, portArrayOut); - do { - UINT32 reqdArraySize; - - portArray = OvsAllocateMemory(arraySize); - if (!portArray) { - status = NDIS_STATUS_RESOURCES; - goto done; - } - - /* - * Even though 'portArray' is supposed to be populated by the OID, it - * needs to be initialized nevertheless. Otherwise, OID returns - * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. - */ - RtlZeroMemory(portArray, sizeof *portArray); - portArray->Header.Revision = NDIS_SWITCH_PORT_ARRAY_REVISION_1; - portArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; - portArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_PORT_ARRAY_REVISION_1; - - status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, - OID_SWITCH_PORT_ARRAY, NULL, 0, - (PVOID)portArray, arraySize, - &reqdArraySize); - if (status == NDIS_STATUS_SUCCESS) { - *portArrayOut = portArray; - break; - } - - OvsFreeMemory(portArray); - arraySize = reqdArraySize; - if (status != NDIS_STATUS_INVALID_LENGTH) { - break; - } - } while(status == NDIS_STATUS_INVALID_LENGTH); - -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Utility function to get the array of nics on the extensible switch. Upon - * success, the caller needs to free the returned array. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_ARRAY *nicArrayOut) -{ - PNDIS_SWITCH_NIC_ARRAY nicArray; - UINT32 arraySize = sizeof *nicArray; - NDIS_STATUS status = NDIS_STATUS_FAILURE; - - OVS_LOG_TRACE("Enter: switchContext: %p, nicArray: %p", - switchContext, nicArrayOut); - - do { - UINT32 reqdArraySize; - - nicArray = OvsAllocateMemory(arraySize); - if (!nicArray) { - status = NDIS_STATUS_RESOURCES; - goto done; - } - - /* - * Even though 'nicArray' is supposed to be populated by the OID, it - * needs to be initialized nevertheless. Otherwise, OID returns - * NDIS_STATUS_INVALID_PARAMETER. This is not clear in the documentation. - */ - RtlZeroMemory(nicArray, sizeof *nicArray); - nicArray->Header.Revision = NDIS_SWITCH_NIC_ARRAY_REVISION_1; - nicArray->Header.Type = NDIS_OBJECT_TYPE_DEFAULT; - nicArray->Header.Size = NDIS_SIZEOF_NDIS_SWITCH_NIC_ARRAY_REVISION_1; - - status = OvsIssueOidRequest(switchContext, NdisRequestQueryInformation, - OID_SWITCH_NIC_ARRAY, NULL, 0, - (PVOID)nicArray, arraySize, - &reqdArraySize); - if (status == NDIS_STATUS_SUCCESS) { - *nicArrayOut = nicArray; - break; - } - - OvsFreeMemory(nicArray); - arraySize = reqdArraySize; - if (status != NDIS_STATUS_INVALID_LENGTH) { - break; - } - } while(status == NDIS_STATUS_INVALID_LENGTH); - -done: - OVS_LOG_TRACE("Exit: status %8x.", status); - return status; -} diff --git a/datapath-windows/ovsext/OvsOid.h b/datapath-windows/ovsext/OvsOid.h deleted file mode 100644 index 40a5ec69a..000000000 --- a/datapath-windows/ovsext/OvsOid.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_OID_H_ -#define __OVS_OID_H_ 1 - -NDIS_STATUS OvsQuerySwitchActivationComplete(POVS_SWITCH_CONTEXT switchContext, - BOOLEAN *switchActive); -NDIS_STATUS OvsGetPortsOnSwitch(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_ARRAY *portArrayOut); -NDIS_STATUS OvsGetNicsOnSwitch(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_ARRAY *nicArrayOut); -#endif /* __OVS_OID_H_ */ diff --git a/datapath-windows/ovsext/OvsPacketIO.c b/datapath-windows/ovsext/OvsPacketIO.c deleted file mode 100644 index 6017f3081..000000000 --- a/datapath-windows/ovsext/OvsPacketIO.c +++ /dev/null @@ -1,440 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains the implementation of the datapath/forwarding - * functionality of the OVS. - */ - -#include "precomp.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsNetProto.h" -#include "OvsUser.h" -#include "OvsPacketIO.h" -#include "OvsFlow.h" -#include "OvsEvent.h" -#include "OvsUser.h" - -/* Due to an imported header file */ -#pragma warning( disable:4505 ) - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_DISPATCH -#include "OvsDebug.h" - -extern NDIS_STRING ovsExtGuidUC; -extern NDIS_STRING ovsExtFriendlyNameUC; - -static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); -static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); - -__inline VOID -OvsInitCompletionList(OvsCompletionList *completionList, - POVS_SWITCH_CONTEXT switchContext, - ULONG sendCompleteFlags) -{ - ASSERT(completionList); - completionList->dropNbl = NULL; - completionList->dropNblNext = &completionList->dropNbl; - completionList->switchContext = switchContext; - completionList->sendCompleteFlags = sendCompleteFlags; -} - -/* Utility function used to complete an NBL. */ -__inline VOID -OvsAddPktCompletionList(OvsCompletionList *completionList, - BOOLEAN incoming, - NDIS_SWITCH_PORT_ID sourcePort, - PNET_BUFFER_LIST netBufferList, - UINT32 netBufferListCount, - PNDIS_STRING filterReason) -{ - POVS_BUFFER_CONTEXT ctx; - - /* XXX: We handle one NBL at a time. */ - ASSERT(netBufferList->Next == NULL); - - /* Make sure it has a context. */ - ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferList); - ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); - - completionList->switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( - completionList->switchContext->NdisSwitchContext, &ovsExtGuidUC, - &ovsExtFriendlyNameUC, sourcePort, - incoming ? NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING : 0, - netBufferListCount, netBufferList, filterReason); - - *completionList->dropNblNext = netBufferList; - completionList->dropNblNext = &netBufferList->Next; - ASSERT(completionList->dropNbl); -} - -static __inline VOID -OvsReportNBLIngressError(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST nblList, - PNDIS_STRING filterReason, - NDIS_STATUS error) -{ - PNET_BUFFER_LIST nbl = nblList; - while (nbl) { - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); - - nbl->Status = error; - - /* This can be optimized by batching NBL's from the same - * SourcePortId. */ - switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( - switchContext->NdisSwitchContext, &ovsExtGuidUC, - &ovsExtFriendlyNameUC, fwdDetail->SourcePortId, - NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING, - 1 /*Nbl count.*/, nbl, filterReason); - - nbl = NET_BUFFER_LIST_NEXT_NBL(nbl); - } -} - -static __inline ULONG -OvsGetSendCompleteFlags(ULONG sendFlags) -{ - BOOLEAN dispatch, sameSource; - ULONG sendCompleteFlags; - - dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags); - sendCompleteFlags = (dispatch ? - NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL : 0); - sameSource = NDIS_TEST_SEND_FLAG(sendFlags, - NDIS_SEND_FLAGS_SWITCH_SINGLE_SOURCE); - sendCompleteFlags |= (sameSource ? - NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE : 0); - - return sendCompleteFlags; -} - -VOID -OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, - ULONG sendFlags) -{ - if (switchContext->dataFlowState == OvsSwitchPaused) { - /* If a filter module is in the Paused state, the filter driver must not - * originate any send requests for that filter module. If NDIS calls - * FilterSendNetBufferLists, the driver must not call - * NdisFSendNetBufferLists to pass on the data until the driver is - * restarted. The driver should call NdisFSendNetBufferListsComplete - * immediately to complete the send operation. It should set the - * complete status in each NET_BUFFER_LIST structure to - * NDIS_STATUS_PAUSED. - * - * http://msdn.microsoft.com/en-us/library/windows/hardware/ - * ff549966(v=vs.85).aspx */ - NDIS_STRING filterReason; - ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); - - RtlInitUnicodeString(&filterReason, - L"Switch state PAUSED, drop before FSendNBL."); - OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, - NDIS_STATUS_PAUSED); - OvsCompleteNBLIngress(switchContext, netBufferLists, - sendCompleteFlags); - return; - } - - ASSERT(switchContext->dataFlowState == OvsSwitchRunning); - - NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, - NDIS_DEFAULT_PORT_NUMBER, sendFlags); -} - -static __inline VOID -OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST nblList, - ULONG sendCompleteFlags, - PNDIS_STRING filterReason, - NDIS_STATUS error) -{ - ASSERT(error); - OvsReportNBLIngressError(switchContext, nblList, filterReason, error); - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, - sendCompleteFlags); -} - -static VOID -OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, - ULONG SendFlags) -{ - NDIS_SWITCH_PORT_ID sourcePort = 0; - NDIS_SWITCH_NIC_INDEX sourceIndex = 0; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; - ULONG sendCompleteFlags; - UCHAR dispatch; - LOCK_STATE_EX lockState, dpLockState; - NDIS_STATUS status; - NDIS_STRING filterReason; - LIST_ENTRY missedPackets; - UINT32 num = 0; - OvsCompletionList completionList; - - dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)? - NDIS_RWL_AT_DISPATCH_LEVEL : 0; - sendCompleteFlags = OvsGetSendCompleteFlags(SendFlags); - SendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; - - InitializeListHead(&missedPackets); - OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags); - - for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { - POVS_VPORT_ENTRY vport; - UINT32 portNo; - OVS_DATAPATH *datapath = &switchContext->datapath; - OVS_PACKET_HDR_INFO layers; - OvsFlowKey key; - UINT64 hash; - PNET_BUFFER curNb; - - nextNbl = curNbl->Next; - curNbl->Next = NULL; - - /* Ethernet Header is a guaranteed safe access. */ - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - if (curNb->Next != NULL) { - /* XXX: This case is not handled yet. */ - ASSERT(FALSE); - } else { - POVS_BUFFER_CONTEXT ctx; - OvsFlow *flow; - - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); - sourcePort = fwdDetail->SourcePortId; - sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; - - /* Take the DispatchLock so none of the VPORTs disconnect while - * we are setting destination ports. - * - * XXX: acquire/release the dispatch lock for a "batch" of packets - * rather than for each packet. */ - NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, - dispatch); - - ctx = OvsInitExternalNBLContext(switchContext, curNbl, - sourcePort == switchContext->externalPortId); - if (ctx == NULL) { - RtlInitUnicodeString(&filterReason, - L"Cannot allocate external NBL context."); - - OvsStartNBLIngressError(switchContext, curNbl, - sendCompleteFlags, &filterReason, - NDIS_STATUS_RESOURCES); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - continue; - } - - vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, - sourceIndex); - if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { - RtlInitUnicodeString(&filterReason, - L"OVS-Cannot forward packet from unknown source port"); - goto dropit; - } else { - portNo = vport->portNo; - } - - vport->stats.rxPackets++; - vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); - - status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL); - if (status != NDIS_STATUS_SUCCESS) { - RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); - goto dropit; - } - - ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); - OvsAcquireDatapathRead(datapath, &dpLockState, dispatch); - - flow = OvsLookupFlow(datapath, &key, &hash, FALSE); - if (flow) { - OvsFlowUsed(flow, curNbl, &layers); - datapath->hits++; - /* If successful, OvsActionsExecute() consumes the NBL. - * Otherwise, it adds it to the completionList. No need to - * check the return value. */ - OvsActionsExecute(switchContext, &completionList, curNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); - OvsReleaseDatapath(datapath, &dpLockState); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - continue; - } else { - OvsReleaseDatapath(datapath, &dpLockState); - - datapath->misses++; - status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE, - NULL, 0, OVS_PACKET_CMD_MISS, - portNo, - key.tunKey.dst != 0 ? - (OvsIPv4TunnelKey *)&key.tunKey : - NULL, curNbl, - sourcePort == - switchContext->externalPortId, - &layers, switchContext, - &missedPackets, &num); - if (status == NDIS_STATUS_SUCCESS) { - /* Complete the packet since it was copied to user - * buffer. */ - RtlInitUnicodeString(&filterReason, - L"OVS-Dropped since packet was copied to userspace"); - } else { - RtlInitUnicodeString(&filterReason, - L"OVS-Dropped due to failure to queue to userspace"); - } - goto dropit; - } - -dropit: - OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0, - &filterReason); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - } - } - - /* Queue the missed packets. */ - OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); - OvsFinalizeCompletionList(&completionList); -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterSendNetBufferLists Function. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtSendNBL(NDIS_HANDLE filterModuleContext, - PNET_BUFFER_LIST netBufferLists, - NDIS_PORT_NUMBER portNumber, - ULONG sendFlags) -{ - UNREFERENCED_PARAMETER(portNumber); - - /* 'filterModuleContext' is the switch context that gets created in the - * AttachHandler. */ - POVS_SWITCH_CONTEXT switchContext; - switchContext = (POVS_SWITCH_CONTEXT) filterModuleContext; - - if (switchContext->dataFlowState == OvsSwitchPaused) { - NDIS_STRING filterReason; - ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); - - RtlInitUnicodeString(&filterReason, - L"Switch state PAUSED, drop on ingress."); - OvsStartNBLIngressError(switchContext, netBufferLists, - sendCompleteFlags, &filterReason, - NDIS_STATUS_PAUSED); - return; - } - - ASSERT(switchContext->dataFlowState == OvsSwitchRunning); - - OvsStartNBLIngress(switchContext, netBufferLists, sendFlags); -} - -static VOID -OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, - ULONG sendCompleteFlags) -{ - PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; - OvsCompletionList newList; - - newList.dropNbl = NULL; - newList.dropNblNext = &newList.dropNbl; - - for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { - nextNbl = curNbl->Next; - curNbl->Next = NULL; - - curNbl = OvsCompleteNBL(switchContext, curNbl, TRUE); - if (curNbl != NULL) { - /* NBL originated from the upper layer. */ - *newList.dropNblNext = curNbl; - newList.dropNblNext = &curNbl->Next; - } - } - - /* Complete the NBL's that were sent by the upper layer. */ - if (newList.dropNbl != NULL) { - NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, - sendCompleteFlags); - } -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterSendNetBufferListsComplete function. - * -------------------------------------------------------------------------- - */ -VOID -OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, - PNET_BUFFER_LIST netBufferLists, - ULONG sendCompleteFlags) -{ - OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, - netBufferLists, sendCompleteFlags); -} - - -VOID -OvsFinalizeCompletionList(OvsCompletionList *completionList) -{ - if (completionList->dropNbl != NULL) { - OvsCompleteNBLIngress(completionList->switchContext, - completionList->dropNbl, - completionList->sendCompleteFlags); - - completionList->dropNbl = NULL; - completionList->dropNblNext = &completionList->dropNbl; - } -} - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterCancelSendNetBufferLists function. - * - * "If a filter driver specifies a FilterSendNetBufferLists function and it - * queues send requests, it must also specify a - * FilterCancelSendNetBufferLists function." - * - * http://msdn.microsoft.com/en-us/library/windows/hardware/ - * ff549966(v=vs.85).aspx - * -------------------------------------------------------------------------- - */ -VOID -OvsExtCancelSendNBL(NDIS_HANDLE filterModuleContext, - PVOID CancelId) -{ - UNREFERENCED_PARAMETER(filterModuleContext); - UNREFERENCED_PARAMETER(CancelId); - - /* All send requests get completed synchronously, so there is no need to - * implement this callback. */ -} diff --git a/datapath-windows/ovsext/OvsPacketIO.h b/datapath-windows/ovsext/OvsPacketIO.h deleted file mode 100644 index 11709dc3f..000000000 --- a/datapath-windows/ovsext/OvsPacketIO.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_PACKETIO_H_ -#define __OVS_PACKETIO_H_ 1 - -typedef union _OVS_PACKET_HDR_INFO OVS_PACKET_HDR_INFO; - -/* - * Data structures and utility functions to help manage a list of packets to be - * completed (dropped). - */ -typedef struct OvsCompletionList { - PNET_BUFFER_LIST dropNbl; - PNET_BUFFER_LIST *dropNblNext; - POVS_SWITCH_CONTEXT switchContext; - ULONG sendCompleteFlags; -} OvsCompletionList; - -VOID OvsInitCompletionList(OvsCompletionList *completionList, - POVS_SWITCH_CONTEXT switchContext, - ULONG sendCompleteFlags); -VOID OvsAddPktCompletionList(OvsCompletionList *completionList, - BOOLEAN incoming, - NDIS_SWITCH_PORT_ID sourcePort, - PNET_BUFFER_LIST netBufferList, - UINT32 netBufferListCount, - PNDIS_STRING filterReason); - - -/* - * Functions related to packet processing. - */ -VOID OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST netBufferLists, - ULONG sendFlags); - -NDIS_STATUS OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, - OvsCompletionList *completionList, - PNET_BUFFER_LIST curNbl, UINT32 srcVportNo, - ULONG sendFlags, OvsFlowKey *key, UINT64 *hash, - OVS_PACKET_HDR_INFO *layers, - const PNL_ATTR actions, int actionsLen); - -VOID OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, - VOID *compList, PNET_BUFFER_LIST curNbl); - -#endif /* __OVS_PACKETIO_H_ */ diff --git a/datapath-windows/ovsext/OvsPacketParser.c b/datapath-windows/ovsext/OvsPacketParser.c deleted file mode 100644 index 0a9343551..000000000 --- a/datapath-windows/ovsext/OvsPacketParser.c +++ /dev/null @@ -1,303 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "OvsPacketParser.h" - -//XXX consider moving to NdisGetDataBuffer. -const VOID * -OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, - UINT32 len, - UINT32 srcOffset, - VOID *storage) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); - PMDL currentMdl; - BOOLEAN firstMDL = TRUE; - ULONG destOffset = 0; - VOID *dest = storage; - const UINT32 copyLen = len; - ULONG packetLen; - - packetLen = NET_BUFFER_DATA_LENGTH(netBuffer); - // Start copy from current MDL - currentMdl = NET_BUFFER_CURRENT_MDL(netBuffer); - - // Data on current MDL may be offset from start of MDL - while (destOffset < copyLen && currentMdl) { - PUCHAR srcMemory = MmGetSystemAddressForMdlSafe(currentMdl, - LowPagePriority); - ULONG length = MmGetMdlByteCount(currentMdl); - if (!srcMemory) { - status = NDIS_STATUS_RESOURCES; - break; - } - - if (firstMDL) { - ULONG mdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(netBuffer); - srcMemory += mdlOffset; - length -= mdlOffset; - firstMDL = FALSE; - } - length = MIN(length, packetLen); - packetLen -= length; - ASSERT((INT)packetLen >= 0); - - if (srcOffset >= length) { - currentMdl = NDIS_MDL_LINKAGE(currentMdl); - srcOffset -= length; - continue; - } else { - srcMemory += srcOffset; - length -= srcOffset; - srcOffset = 0; - } - - length = min(length, copyLen-destOffset); - - NdisMoveMemory((PUCHAR)dest+destOffset, srcMemory, length); - destOffset += length; - - currentMdl = NDIS_MDL_LINKAGE(currentMdl); - } - - if (destOffset == copyLen) { - ASSERT(status == NDIS_STATUS_SUCCESS); - return storage; - } - - return NULL; -} - -NDIS_STATUS -OvsParseIPv6(const NET_BUFFER_LIST *packet, - OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers) -{ - UINT16 ofs = layers->l3Offset; - IPv6Hdr ipv6HdrStorage; - const IPv6Hdr *nh; - UINT32 nextHdr; - Ipv6Key *flow= &key->ipv6Key; - - ofs = layers->l3Offset; - nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); - if (!nh) { - return NDIS_STATUS_FAILURE; - } - - nextHdr = nh->nexthdr; - memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); - memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); - - flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); - flow->ipv6Label = - ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2]; - flow->nwTtl = nh->hop_limit; - flow->nwProto = SOCKET_IPPROTO_NONE; - flow->nwFrag = 0; - - // Parse extended headers and compute L4 offset - ofs += sizeof(IPv6Hdr); - for (;;) { - if ((nextHdr != SOCKET_IPPROTO_HOPOPTS) - && (nextHdr != SOCKET_IPPROTO_ROUTING) - && (nextHdr != SOCKET_IPPROTO_DSTOPTS) - && (nextHdr != SOCKET_IPPROTO_AH) - && (nextHdr != SOCKET_IPPROTO_FRAGMENT)) { - /* - * It's either a terminal header (e.g., TCP, UDP) or one we - * don't understand. In either case, we're done with the - * packet, so use it to fill in 'nw_proto'. - */ - break; - } - - if (nextHdr == SOCKET_IPPROTO_HOPOPTS - || nextHdr == SOCKET_IPPROTO_ROUTING - || nextHdr == SOCKET_IPPROTO_DSTOPTS - || nextHdr == SOCKET_IPPROTO_AH) { - IPv6ExtHdr extHdrStorage; - const IPv6ExtHdr *extHdr; - UINT8 len; - - extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage); - if (!extHdr) { - return NDIS_STATUS_FAILURE; - } - - len = extHdr->hdrExtLen; - ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8; - nextHdr = extHdr->nextHeader; - if (OvsPacketLenNBL(packet) < ofs) { - return NDIS_STATUS_FAILURE; - } - } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { - IPv6FragHdr fragHdrStorage; - const IPv6FragHdr *fragHdr; - - fragHdr = OvsGetPacketBytes(packet, sizeof *fragHdr, ofs, - &fragHdrStorage); - if (!fragHdr) { - return NDIS_STATUS_FAILURE; - } - - nextHdr = fragHdr->nextHeader; - ofs += sizeof *fragHdr; - - /* We only process the first fragment. */ - if (fragHdr->offlg != htons(0)) { - if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) { - flow->nwFrag = OVSWIN_NW_FRAG_ANY; - } else { - flow->nwFrag |= OVSWIN_NW_FRAG_LATER; - nextHdr = SOCKET_IPPROTO_FRAGMENT; - break; - } - } - } - } - - flow->nwProto = (UINT8)nextHdr; - layers->l4Offset = ofs; - return NDIS_STATUS_SUCCESS; -} - -VOID -OvsParseTcp(const NET_BUFFER_LIST *packet, - L4Key *flow, - POVS_PACKET_HDR_INFO layers) -{ - TCPHdr tcpStorage; - const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); - if (tcp) { - flow->tpSrc = tcp->source; - flow->tpDst = tcp->dest; - layers->isTcp = 1; - layers->l7Offset = layers->l4Offset + 4 * tcp->doff; - } -} - -VOID -OvsParseUdp(const NET_BUFFER_LIST *packet, - L4Key *flow, - POVS_PACKET_HDR_INFO layers) -{ - UDPHdr udpStorage; - const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); - if (udp) { - flow->tpSrc = udp->source; - flow->tpDst = udp->dest; - layers->isUdp = 1; - if (udp->check == 0) { - layers->udpCsumZero = 1; - } - layers->l7Offset = layers->l4Offset + sizeof *udp; - } -} - -NDIS_STATUS -OvsParseIcmpV6(const NET_BUFFER_LIST *packet, - OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers) -{ - UINT16 ofs = layers->l4Offset; - ICMPHdr icmpStorage; - const ICMPHdr *icmp; - Icmp6Key *flow = &key->icmp6Key; - - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); - memset(flow->arpSha, 0, sizeof(flow->arpSha)); - memset(flow->arpTha, 0, sizeof(flow->arpTha)); - - icmp = OvsGetIcmp(packet, ofs, &icmpStorage); - if (!icmp) { - return NDIS_STATUS_FAILURE; - } - ofs += sizeof *icmp; - - /* - * The ICMPv6 type and code fields use the 16-bit transport port - * fields, so we need to store them in 16-bit network byte order. - */ - key->ipv6Key.l4.tpSrc = htons(icmp->type); - key->ipv6Key.l4.tpDst = htons(icmp->code); - - if (icmp->code == 0 && - (icmp->type == ND_NEIGHBOR_SOLICIT || - icmp->type == ND_NEIGHBOR_ADVERT)) { - struct in6_addr ndTargetStorage; - const struct in6_addr *ndTarget; - - ndTarget = OvsGetPacketBytes(packet, sizeof *ndTarget, ofs, - &ndTargetStorage); - if (!ndTarget) { - return NDIS_STATUS_FAILURE; - } - flow->ndTarget = *ndTarget; - - while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { - /* - * The minimum size of an option is 8 bytes, which also is - * the size of Ethernet link-layer options. - */ - IPv6NdOptHdr ndOptStorage; - const IPv6NdOptHdr *ndOpt; - UINT16 optLen; - - ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage); - if (!ndOpt) { - return NDIS_STATUS_FAILURE; - } - - optLen = ndOpt->len * 8; - if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) { - goto invalid; - } - - /* - * Store the link layer address if the appropriate option is - * provided. It is considered an error if the same link - * layer option is specified twice. - */ - if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { - if (Eth_IsNullAddr(flow->arpSha)) { - memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); - } else { - goto invalid; - } - } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) { - if (Eth_IsNullAddr(flow->arpTha)) { - memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); - } else { - goto invalid; - } - } - - ofs += optLen; - } - } - - layers->l7Offset = ofs; - return NDIS_STATUS_SUCCESS; - -invalid: - memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); - memset(flow->arpSha, 0, sizeof(flow->arpSha)); - memset(flow->arpTha, 0, sizeof(flow->arpTha)); - - return NDIS_STATUS_FAILURE; -} diff --git a/datapath-windows/ovsext/OvsPacketParser.h b/datapath-windows/ovsext/OvsPacketParser.h deleted file mode 100644 index ab3c613a7..000000000 --- a/datapath-windows/ovsext/OvsPacketParser.h +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_PACKET_PARSER_H_ -#define __OVS_PACKET_PARSER_H_ 1 - -#include "precomp.h" -#include "OvsNetProto.h" - -const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, - UINT32 SrcOffset, VOID *storage); -NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers); -VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, - POVS_PACKET_HDR_INFO layers); -VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, - POVS_PACKET_HDR_INFO layers); -NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, - POVS_PACKET_HDR_INFO layers); - -static __inline ULONG -OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) -{ - INT length = 0; - NET_BUFFER *nb; - - nb = NET_BUFFER_LIST_FIRST_NB(_pNB); - ASSERT(nb); - while(nb) { - length += NET_BUFFER_DATA_LENGTH(nb); - nb = NET_BUFFER_NEXT_NB(nb); - } - - return length; -} - -/* - * Returns the ctl field from the TCP header in 'packet', or 0 if the field - * can't be read. The caller must have ensured that 'packet' contains a TCP - * header. - * - * We can't just use TCPHdr, from netProto.h, for this because that - * breaks the flags down into individual bit-fields. We can't even use - * offsetof because that will try to take the address of a bit-field, - * which C does not allow. - */ -static UINT16 -OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN - const POVS_PACKET_HDR_INFO layers) // IN -{ -#define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header. -#define TCP_FLAGS(CTL) ((CTL) & 0x3f) // Obtain TCP flags from CTL. - - const UINT16 *ctl; - UINT16 storage; - - ctl = OvsGetPacketBytes(packet, sizeof *ctl, layers->l4Offset + TCP_CTL_OFS, - &storage); - return ctl ? *ctl : 0; -} - - -static UINT8 -OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN - const OvsFlowKey *key, // IN - const POVS_PACKET_HDR_INFO layers) // IN -{ - UNREFERENCED_PARAMETER(key); // should be removed later - - if (layers->isTcp) { - return TCP_FLAGS(OvsGetTcpCtl(packet, layers)); - } else { - return 0; - } -} - -static const EtherArp * -OvsGetArp(const NET_BUFFER_LIST *packet, - UINT32 ofs, - EtherArp *storage) -{ - return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); -} - -static const IPHdr * -OvsGetIp(const NET_BUFFER_LIST *packet, - UINT32 ofs, - IPHdr *storage) -{ - const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage); - if (ip) { - int ipLen = ip->ihl * 4; - if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) { - return ip; - } - } - return NULL; -} - -static const TCPHdr * -OvsGetTcp(const NET_BUFFER_LIST *packet, - UINT32 ofs, - TCPHdr *storage) -{ - const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage); - if (tcp) { - int tcpLen = tcp->doff * 4; - if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) { - return tcp; - } - } - return NULL; -} - -static const UDPHdr * -OvsGetUdp(const NET_BUFFER_LIST *packet, - UINT32 ofs, - UDPHdr *storage) -{ - return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); -} - -static const ICMPHdr * -OvsGetIcmp(const NET_BUFFER_LIST *packet, - UINT32 ofs, - ICMPHdr *storage) -{ - return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); -} - -#endif /* __OVS_PACKET_PARSER_H_ */ diff --git a/datapath-windows/ovsext/OvsSwitch.c b/datapath-windows/ovsext/OvsSwitch.c deleted file mode 100644 index f548599d3..000000000 --- a/datapath-windows/ovsext/OvsSwitch.c +++ /dev/null @@ -1,530 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains the implementation of the management functionality of the - * OVS. - */ - -#include "precomp.h" - -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsFlow.h" -#include "OvsIpHelper.h" -#include "OvsTunnelIntf.h" -#include "OvsOid.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_SWITCH -#include "OvsDebug.h" - -POVS_SWITCH_CONTEXT gOvsSwitchContext; -BOOLEAN gOvsInAttach; -UINT64 ovsTimeIncrementPerTick; - -extern PNDIS_SPIN_LOCK gOvsCtrlLock; -extern NDIS_HANDLE gOvsExtDriverHandle; -extern NDIS_HANDLE gOvsExtDriverObject; - -static NDIS_STATUS OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, - POVS_SWITCH_CONTEXT *switchContextOut); -static NDIS_STATUS OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext); -static VOID OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext); -static VOID OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext); -static NDIS_STATUS OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext); - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterAttach function. - * - * This function allocates the switch context, and initializes its necessary - * members. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsExtAttach(NDIS_HANDLE ndisFilterHandle, - NDIS_HANDLE filterDriverContext, - PNDIS_FILTER_ATTACH_PARAMETERS attachParameters) -{ - NDIS_STATUS status = NDIS_STATUS_FAILURE; - NDIS_FILTER_ATTRIBUTES ovsExtAttributes; - POVS_SWITCH_CONTEXT switchContext = NULL; - - UNREFERENCED_PARAMETER(filterDriverContext); - - OVS_LOG_TRACE("Enter: ndisFilterHandle %p", ndisFilterHandle); - - ASSERT(filterDriverContext == (NDIS_HANDLE)gOvsExtDriverObject); - if (attachParameters->MiniportMediaType != NdisMedium802_3) { - status = NDIS_STATUS_INVALID_PARAMETER; - goto cleanup; - } - - if (gOvsExtDriverHandle == NULL) { - OVS_LOG_TRACE("Exit: OVSEXT driver is not loaded."); - ASSERT(FALSE); - goto cleanup; - } - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext) { - NdisReleaseSpinLock(gOvsCtrlLock); - OVS_LOG_TRACE("Exit: Failed to create OVS Switch, only one datapath is" - "supported, %p.", gOvsSwitchContext); - goto cleanup; - } - if (gOvsInAttach) { - NdisReleaseSpinLock(gOvsCtrlLock); - /* Just fail the request. */ - OVS_LOG_TRACE("Exit: Failed to create OVS Switch, since another attach" - "instance is in attach process."); - goto cleanup; - } - gOvsInAttach = TRUE; - NdisReleaseSpinLock(gOvsCtrlLock); - - status = OvsInitIpHelper(ndisFilterHandle); - if (status != STATUS_SUCCESS) { - OVS_LOG_ERROR("Exit: Failed to initialize IP helper."); - goto cleanup; - } - - status = OvsCreateSwitch(ndisFilterHandle, &switchContext); - if (status != NDIS_STATUS_SUCCESS) { - OvsCleanupIpHelper(); - goto cleanup; - } - ASSERT(switchContext); - - /* - * Register the switch context with NDIS so NDIS can pass it back to the - * Filterxxx callback functions as the 'FilterModuleContext' parameter. - */ - RtlZeroMemory(&ovsExtAttributes, sizeof(NDIS_FILTER_ATTRIBUTES)); - ovsExtAttributes.Header.Revision = NDIS_FILTER_ATTRIBUTES_REVISION_1; - ovsExtAttributes.Header.Size = sizeof(NDIS_FILTER_ATTRIBUTES); - ovsExtAttributes.Header.Type = NDIS_OBJECT_TYPE_FILTER_ATTRIBUTES; - ovsExtAttributes.Flags = 0; - - NDIS_DECLARE_FILTER_MODULE_CONTEXT(OVS_SWITCH_CONTEXT); - status = NdisFSetAttributes(ndisFilterHandle, switchContext, &ovsExtAttributes); - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_ERROR("Failed to set attributes."); - OvsCleanupIpHelper(); - goto cleanup; - } - - /* Setup the state machine. */ - switchContext->controlFlowState = OvsSwitchAttached; - switchContext->dataFlowState = OvsSwitchPaused; - - gOvsSwitchContext = switchContext; - KeMemoryBarrier(); - -cleanup: - gOvsInAttach = FALSE; - if (status != NDIS_STATUS_SUCCESS) { - if (switchContext != NULL) { - OvsDeleteSwitch(switchContext); - } - } - OVS_LOG_TRACE("Exit: status %x", status); - - return status; -} - - -/* - * -------------------------------------------------------------------------- - * This function allocated the switch context, and initializes its necessary - * members. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, - POVS_SWITCH_CONTEXT *switchContextOut) -{ - NDIS_STATUS status; - POVS_SWITCH_CONTEXT switchContext; - NDIS_SWITCH_CONTEXT hostSwitchContext; - NDIS_SWITCH_OPTIONAL_HANDLERS hostSwitchHandler; - - OVS_LOG_TRACE("Enter: Create switch object"); - - switchContext = - (POVS_SWITCH_CONTEXT) OvsAllocateMemory(sizeof(OVS_SWITCH_CONTEXT)); - if (switchContext == NULL) { - status = NDIS_STATUS_RESOURCES; - goto create_switch_done; - } - RtlZeroMemory(switchContext, sizeof(OVS_SWITCH_CONTEXT)); - - /* Initialize the switch. */ - hostSwitchHandler.Header.Type = NDIS_OBJECT_TYPE_SWITCH_OPTIONAL_HANDLERS; - hostSwitchHandler.Header.Size = NDIS_SIZEOF_SWITCH_OPTIONAL_HANDLERS_REVISION_1; - hostSwitchHandler.Header.Revision = NDIS_SWITCH_OPTIONAL_HANDLERS_REVISION_1; - - status = NdisFGetOptionalSwitchHandlers(ndisFilterHandle, - &hostSwitchContext, - &hostSwitchHandler); - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_ERROR("OvsExtAttach: Extension is running in " - "non-switch environment."); - OvsFreeMemory(switchContext); - goto create_switch_done; - } - - switchContext->NdisFilterHandle = ndisFilterHandle; - switchContext->NdisSwitchContext = hostSwitchContext; - RtlCopyMemory(&switchContext->NdisSwitchHandlers, &hostSwitchHandler, - sizeof(NDIS_SWITCH_OPTIONAL_HANDLERS)); - - status = OvsInitSwitchContext(switchContext); - if (status != NDIS_STATUS_SUCCESS) { - OvsFreeMemory(switchContext); - goto create_switch_done; - } - - status = OvsTunnelFilterInitialize(gOvsExtDriverObject); - if (status != NDIS_STATUS_SUCCESS) { - OvsFreeMemory(switchContext); - goto create_switch_done; - } - *switchContextOut = switchContext; - -create_switch_done: - OVS_LOG_TRACE("Exit: switchContext: %p status: %#lx", - switchContext, status); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterDetach function. - * -------------------------------------------------------------------------- - */ -_Use_decl_annotations_ -VOID -OvsExtDetach(NDIS_HANDLE filterModuleContext) -{ - POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; - - OVS_LOG_TRACE("Enter: filterModuleContext %p", filterModuleContext); - - ASSERT(switchContext->dataFlowState == OvsSwitchPaused); - switchContext->controlFlowState = OvsSwitchDetached; - KeMemoryBarrier(); - while(switchContext->pendingOidCount > 0) { - NdisMSleep(1000); - } - OvsDeleteSwitch(switchContext); - OvsCleanupIpHelper(); - gOvsSwitchContext = NULL; - /* This completes the cleanup, and a new attach can be handled now. */ - - OVS_LOG_TRACE("Exit: OvsDetach Successfully"); -} - - -/* - * -------------------------------------------------------------------------- - * This function deletes the switch by freeing all memory previously allocated. - * XXX need synchronization with other path. - * -------------------------------------------------------------------------- - */ -VOID -OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext) -{ - UINT32 dpNo = switchContext->dpNo; - - OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); - - OvsTunnelFilterUninitialize(gOvsExtDriverObject); - OvsClearAllSwitchVports(switchContext); - OvsCleanupSwitchContext(switchContext); - OvsFreeMemory(switchContext); - OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo); -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterRestart function. - * -------------------------------------------------------------------------- - */ -_Use_decl_annotations_ -NDIS_STATUS -OvsExtRestart(NDIS_HANDLE filterModuleContext, - PNDIS_FILTER_RESTART_PARAMETERS filterRestartParameters) -{ - POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - BOOLEAN switchActive; - - UNREFERENCED_PARAMETER(filterRestartParameters); - - OVS_LOG_TRACE("Enter: filterModuleContext %p", - filterModuleContext); - - /* Activate the switch if this is the first restart. */ - if (!switchContext->isActivated && !switchContext->isActivateFailed) { - status = OvsQuerySwitchActivationComplete(switchContext, - &switchActive); - if (status != NDIS_STATUS_SUCCESS) { - switchContext->isActivateFailed = TRUE; - status = NDIS_STATUS_RESOURCES; - goto cleanup; - } - - if (switchActive) { - status = OvsActivateSwitch(switchContext); - - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_WARN("Failed to activate switch, dpNo:%d", - switchContext->dpNo); - status = NDIS_STATUS_RESOURCES; - goto cleanup; - } - } - } - - ASSERT(switchContext->dataFlowState == OvsSwitchPaused); - switchContext->dataFlowState = OvsSwitchRunning; - -cleanup: - OVS_LOG_TRACE("Exit: Restart switch:%p, dpNo: %d, status: %#x", - switchContext, switchContext->dpNo, status); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterPause function - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsExtPause(NDIS_HANDLE filterModuleContext, - PNDIS_FILTER_PAUSE_PARAMETERS pauseParameters) -{ - POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; - - UNREFERENCED_PARAMETER(pauseParameters); - OVS_LOG_TRACE("Enter: filterModuleContext %p", - filterModuleContext); - - ASSERT(switchContext->dataFlowState == OvsSwitchRunning); - switchContext->dataFlowState = OvsSwitchPaused; - KeMemoryBarrier(); - while(switchContext->pendingOidCount > 0) { - NdisMSleep(1000); - } - - OVS_LOG_TRACE("Exit: OvsDetach Successfully"); - return NDIS_STATUS_SUCCESS; -} - -static NDIS_STATUS -OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) -{ - int i; - NTSTATUS status; - - OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); - - switchContext->dispatchLock = - NdisAllocateRWLock(switchContext->NdisFilterHandle); - - switchContext->vportArray = - (PVOID *)OvsAllocateMemory(sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); - switchContext->nameHashArray = (PLIST_ENTRY) - OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); - switchContext->portHashArray = (PLIST_ENTRY) - OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); - status = OvsAllocateFlowTable(&switchContext->datapath, switchContext); - - if (status == NDIS_STATUS_SUCCESS) { - status = OvsInitBufferPool(switchContext); - } - if (status != NDIS_STATUS_SUCCESS || - switchContext->dispatchLock == NULL || - switchContext->vportArray == NULL || - switchContext->nameHashArray == NULL || - switchContext->portHashArray == NULL) { - if (switchContext->dispatchLock) { - NdisFreeRWLock(switchContext->dispatchLock); - } - if (switchContext->vportArray) { - OvsFreeMemory(switchContext->vportArray); - } - if (switchContext->nameHashArray) { - OvsFreeMemory(switchContext->nameHashArray); - } - if (switchContext->portHashArray) { - OvsFreeMemory(switchContext->portHashArray); - } - OvsDeleteFlowTable(&switchContext->datapath); - OvsCleanupBufferPool(switchContext); - - OVS_LOG_TRACE("Exit: Failed to init switchContext"); - return NDIS_STATUS_RESOURCES; - } - - for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { - InitializeListHead(&switchContext->nameHashArray[i]); - } - for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { - InitializeListHead(&switchContext->portHashArray[i]); - } - RtlZeroMemory(switchContext->vportArray, - sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); - - switchContext->isActivated = FALSE; - switchContext->isActivateFailed = FALSE; - switchContext->dpNo = OVS_DP_NUMBER; - switchContext->lastPortIndex = OVS_MAX_VPORT_ARRAY_SIZE -1; - ovsTimeIncrementPerTick = KeQueryTimeIncrement() / 10000; - OVS_LOG_TRACE("Exit: Succesfully initialized switchContext: %p", - switchContext); - return NDIS_STATUS_SUCCESS; -} - -static VOID -OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext) -{ - OVS_LOG_TRACE("Enter: Delete switchContext:%p", switchContext); - - /* We need to do cleanup for tunnel port here. */ - ASSERT(switchContext->numVports == 0); - - NdisFreeRWLock(switchContext->dispatchLock); - OvsFreeMemory(switchContext->nameHashArray); - OvsFreeMemory(switchContext->portHashArray); - OvsFreeMemory(switchContext->vportArray); - OvsDeleteFlowTable(&switchContext->datapath); - OvsCleanupBufferPool(switchContext); - OVS_LOG_TRACE("Exit: Delete switchContext: %p", switchContext); -} - -/* - * -------------------------------------------------------------------------- - * This function activates the switch by initializing it with all the runtime - * state. First it queries all of the MAC addresses set as custom switch policy - * to allow sends from, and adds tme to the property list. Then it queries the - * NIC list and verifies it can support all of the NICs currently connected to - * the switch, and adds the NICs to the NIC list. - * -------------------------------------------------------------------------- - */ -static NDIS_STATUS -OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext) -{ - NDIS_STATUS status; - - ASSERT(!switchContext->isActivated); - - OVS_LOG_TRACE("Enter: activate switch %p, dpNo: %ld", - switchContext, switchContext->dpNo); - - status = OvsAddConfiguredSwitchPorts(switchContext); - - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_WARN("Failed to add configured switch ports"); - goto cleanup; - - } - status = OvsInitConfiguredSwitchNics(switchContext); - - if (status != NDIS_STATUS_SUCCESS) { - OVS_LOG_WARN("Failed to add configured vports"); - OvsClearAllSwitchVports(switchContext); - goto cleanup; - } - switchContext->isActivated = TRUE; - OvsPostEvent(OVS_DEFAULT_PORT_NO, OVS_DEFAULT_EVENT_STATUS); - -cleanup: - OVS_LOG_TRACE("Exit: activate switch:%p, isActivated: %s, status = %lx", - switchContext, - (switchContext->isActivated ? "TRUE" : "FALSE"), status); - return status; -} - -PVOID -OvsGetVportFromIndex(UINT16 index) -{ - if (index < OVS_MAX_VPORT_ARRAY_SIZE && - !OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { - return gOvsSwitchContext->vportArray[index]; - } - return NULL; -} - -PVOID -OvsGetExternalVport() -{ - return gOvsSwitchContext->externalVport; -} - - -/* - * -------------------------------------------------------------------------- - * Implements filter driver's FilterNetPnPEvent function. - * -------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsExtNetPnPEvent(NDIS_HANDLE filterModuleContext, - PNET_PNP_EVENT_NOTIFICATION netPnPEvent) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; - BOOLEAN switchActive; - - OVS_LOG_TRACE("Enter: filterModuleContext: %p, NetEvent: %d", - filterModuleContext, (netPnPEvent->NetPnPEvent).NetEvent); - /* - * The only interesting event is the NetEventSwitchActivate. It provides - * an asynchronous notification of the switch completing activation. - */ - if (netPnPEvent->NetPnPEvent.NetEvent == NetEventSwitchActivate) { - status = OvsQuerySwitchActivationComplete(switchContext, &switchActive); - if (status != NDIS_STATUS_SUCCESS) { - switchContext->isActivateFailed = TRUE; - } else { - ASSERT(switchContext->isActivated == FALSE); - ASSERT(switchActive == TRUE); - if (switchContext->isActivated == FALSE && switchActive == TRUE) { - status = OvsActivateSwitch(switchContext); - OVS_LOG_TRACE("OvsExtNetPnPEvent: activated switch: %p " - "status: %s", switchContext, - status ? "TRUE" : "FALSE"); - } - } - } - - if (status == NDIS_STATUS_SUCCESS) { - status = NdisFNetPnPEvent(switchContext->NdisFilterHandle, - netPnPEvent); - } - OVS_LOG_TRACE("Exit: OvsExtNetPnPEvent"); - - return status; -} diff --git a/datapath-windows/ovsext/OvsSwitch.h b/datapath-windows/ovsext/OvsSwitch.h deleted file mode 100644 index d49fe9b70..000000000 --- a/datapath-windows/ovsext/OvsSwitch.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * This file contains the definition of the switch object for the OVS. - */ - -#ifndef __OVS_SWITCH_H_ -#define __OVS_SWITCH_H_ 1 - -#include "OvsNetProto.h" -#include "OvsBufferMgmt.h" -#define OVS_MAX_VPORT_ARRAY_SIZE 1024 - -#define OVS_VPORT_MASK (OVS_MAX_VPORT_ARRAY_SIZE - 1) - -#define OVS_INTERNAL_VPORT_DEFAULT_INDEX 0 - -//Tunnel port indicies -#define RESERVED_START_INDEX1 1 -#define OVS_TUNNEL_INDEX_START RESERVED_START_INDEX1 -#define OVS_VXLAN_VPORT_INDEX 2 -#define OVS_GRE_VPORT_INDEX 3 -#define OVS_GRE64_VPORT_INDEX 4 -#define OVS_TUNNEL_INDEX_END OVS_GRE64_VPORT_INDEX - -#define OVS_EXTERNAL_VPORT_START 8 -#define OVS_EXTERNAL_VPORT_END 40 -#define OVS_INTERNAL_VPORT_START 40 -#define OVS_INTERNAL_VPOR_END 72 -#define OVS_VM_VPORT_START 72 -#define OVS_VM_VPORT_MAX 0xffff -#define OVS_VPORT_INDEX(_portNo) ((_portNo) & 0xffffff) -#define OVS_VPORT_PORT_NO(_index, _gen) \ - (((_index) & 0xffffff) | ((UINT32)(_gen) << 24)) -#define OVS_VPORT_GEN(portNo) (portNo >> 24) - -#define OVS_MAX_PHYS_ADAPTERS 32 -#define OVS_MAX_IP_VPOR 32 - -#define OVS_HASH_BASIS 0x13578642 - -typedef struct _OVS_DATAPATH -{ - PLIST_ENTRY flowTable; // Contains OvsFlows. - UINT32 nFlows; // Number of entries in flowTable. - - // List_Links queues[64]; // Hash table of queue IDs. - - /* Statistics. */ - UINT64 hits; // Number of flow table hits. - UINT64 misses; // Number of flow table misses. - UINT64 lost; // Number of dropped misses. - - /* Used to protect the flows in the flowtable. */ - PNDIS_RW_LOCK_EX lock; -} OVS_DATAPATH, *POVS_DATAPATH; - -/* - * OVS_SWITCH_CONTEXT - * - * The context allocated per switch., For OVS, we only - * support one switch which corresponding to one datapath. - * Each datapath can have multiple logical bridges configured - * which is maintained by vswitchd. - */ - -typedef enum OVS_SWITCH_DATAFLOW_STATE -{ - OvsSwitchPaused, - OvsSwitchRunning -} OVS_SWITCH_DATAFLOW_STATE, *POVS_SWITCH_DATAFLOW_STATE; - -typedef enum OVS_SWITCH_CONTROFLOW_STATE -{ - OvsSwitchUnknown, - OvsSwitchAttached, - OvsSwitchDetached -} OVS_SWITCH_CONTROLFLOW_STATE, *POVS_SWITCH_CONTROLFLOW_STATE; - -// XXX: Take care of alignment and grouping members by cacheline -typedef struct _OVS_SWITCH_CONTEXT -{ - /* Coarse and fine-grained switch states. */ - OVS_SWITCH_DATAFLOW_STATE dataFlowState; - OVS_SWITCH_CONTROLFLOW_STATE controlFlowState; - BOOLEAN isActivated; - BOOLEAN isActivateFailed; - - UINT32 dpNo; - - NDIS_SWITCH_PORT_ID externalPortId; - NDIS_SWITCH_PORT_ID internalPortId; - PVOID externalVport; // the virtual adapter vport - PVOID internalVport; - - PVOID *vportArray; - PLIST_ENTRY nameHashArray; // based on ovsName - PLIST_ENTRY portHashArray; // based on portId - - UINT32 numPhysicalNics; - UINT32 numVports; // include validation port - UINT32 lastPortIndex; - - /* Lock taken over the switch. This protects the ports on the switch. */ - PNDIS_RW_LOCK_EX dispatchLock; - - /* The flowtable. */ - OVS_DATAPATH datapath; - - /* Handle to the OVSExt filter driver. Same as 'gOvsExtDriverHandle'. */ - NDIS_HANDLE NdisFilterHandle; - - /* Handle and callbacks exposed by the underlying hyper-v switch. */ - NDIS_SWITCH_CONTEXT NdisSwitchContext; - NDIS_SWITCH_OPTIONAL_HANDLERS NdisSwitchHandlers; - - volatile LONG pendingInjectedNblCount; - volatile LONG pendingOidCount; - - OVS_NBL_POOL ovsPool; -} OVS_SWITCH_CONTEXT, *POVS_SWITCH_CONTEXT; - - -static __inline VOID -OvsAcquireDatapathRead(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState, - BOOLEAN dispatch) -{ - ASSERT(datapath); - NdisAcquireRWLockRead(datapath->lock, lockState, dispatch); -} - -static __inline VOID -OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState, - BOOLEAN dispatch) -{ - ASSERT(datapath); - NdisAcquireRWLockWrite(datapath->lock, lockState, dispatch); -} - - -static __inline VOID -OvsReleaseDatapath(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState) -{ - ASSERT(datapath); - NdisReleaseRWLock(datapath->lock, lockState); -} - - -PVOID OvsGetVportFromIndex(UINT16 index); -PVOID OvsGetExternalVport(); - -#endif /* __OVS_SWITCH_H_ */ diff --git a/datapath-windows/ovsext/OvsTunnel.c b/datapath-windows/ovsext/OvsTunnel.c deleted file mode 100644 index d0288cc7f..000000000 --- a/datapath-windows/ovsext/OvsTunnel.c +++ /dev/null @@ -1,345 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * OvsTunnel.c - * WFP Classified callback function and Action code for injecting a packet to the vswitch - */ - -#include "precomp.h" - -#pragma warning(push) -#pragma warning(disable:4201) // unnamed struct/union -#include -#pragma warning(pop) - -#pragma warning( push ) -#pragma warning( disable:4127 ) - -#include -#include "OvsTunnel.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsVxlan.h" -#include "OvsPacketIO.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" - -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; - -static NTSTATUS -OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, - OVS_TUNNEL_PENDED_PACKET *packet); - -VOID OvsAcquireDatapathRead(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState, - BOOLEAN dispatch); -VOID OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState, - BOOLEAN dispatch); -VOID OvsReleaseDatapath(OVS_DATAPATH *datapath, - LOCK_STATE_EX *lockState); - - -NTSTATUS -OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, - const GUID *filterKey, - const FWPS_FILTER *filter) -{ - UNREFERENCED_PARAMETER(notifyType); - UNREFERENCED_PARAMETER(filterKey); - UNREFERENCED_PARAMETER(filter); - - return STATUS_SUCCESS; -} - -static NTSTATUS -OvsTunnelAnalyzePacket(OVS_TUNNEL_PENDED_PACKET *packet) -{ - NTSTATUS status = STATUS_SUCCESS; - UINT32 packetLength = 0; - ULONG bytesCopied = 0; - NET_BUFFER_LIST *copiedNBL = NULL; - NET_BUFFER *netBuffer; - NDIS_STATUS ndisStatus; - - /* - * For inbound net buffer list, we can assume it contains only one - * net buffer (unless it was an re-assembeled fragments). in both cases - * the first net buffer should include all headers, we assert if the retreat fails - */ - netBuffer = NET_BUFFER_LIST_FIRST_NB(packet->netBufferList); - - /* Drop the packet from the host stack */ - packet->classifyOut->actionType = FWP_ACTION_BLOCK; - packet->classifyOut->rights &= ~FWPS_RIGHT_ACTION_WRITE; - - /* Adjust the net buffer list offset to the start of the IP header */ - ndisStatus = NdisRetreatNetBufferDataStart(netBuffer, - packet->ipHeaderSize + - packet->transportHeaderSize, - 0, NULL); - ASSERT(ndisStatus == NDIS_STATUS_SUCCESS); - - /* Single NBL element for WFP */ - ASSERT(packet->netBufferList->Next == NULL); - - /* Note that the copy will inherit the original net buffer list's offset */ - packetLength = NET_BUFFER_DATA_LENGTH(netBuffer); - copiedNBL = OvsAllocateVariableSizeNBL(gOvsSwitchContext, packetLength, - OVS_DEFAULT_HEADROOM_SIZE); - - if (copiedNBL == NULL) { - goto analyzeDone; - } - - status = NdisCopyFromNetBufferToNetBuffer(NET_BUFFER_LIST_FIRST_NB(copiedNBL), - 0, packetLength, - netBuffer, 0, &bytesCopied); - if (status != NDIS_STATUS_SUCCESS || packetLength != bytesCopied) { - goto analyzeFreeNBL; - } - - status = OvsInjectPacketThroughActions(copiedNBL, - packet); - goto analyzeDone; - - /* Undo the adjustment on the original net buffer list */ -analyzeFreeNBL: - OvsCompleteNBL(gOvsSwitchContext, copiedNBL, TRUE); -analyzeDone: - NdisAdvanceNetBufferDataStart(netBuffer, - packet->transportHeaderSize + packet->ipHeaderSize, - FALSE, - NULL); - return status; -} - - -/* - * -------------------------------------------------------------------------- - * This is the classifyFn function of the datagram-data callout. It - * allocates a packet structure to store the classify and meta data and - * it references the net buffer list for out-of-band modification and - * re-injection. The packet structure will be queued to the global packet - * queue. The worker thread will then be signaled, if idle, to process - * the queue. - * -------------------------------------------------------------------------- - */ -VOID -OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, - const FWPS_INCOMING_METADATA_VALUES *inMetaValues, - VOID *layerData, - const VOID *classifyContext, - const FWPS_FILTER *filter, - UINT64 flowContext, - FWPS_CLASSIFY_OUT *classifyOut) -{ - OVS_TUNNEL_PENDED_PACKET packetStorage; - OVS_TUNNEL_PENDED_PACKET *packet = &packetStorage; - FWP_DIRECTION direction; - - UNREFERENCED_PARAMETER(classifyContext); - UNREFERENCED_PARAMETER(filter); - UNREFERENCED_PARAMETER(flowContext); - - ASSERT(layerData != NULL); - - /* We don't have the necessary right to alter the packet flow */ - if ((classifyOut->rights & FWPS_RIGHT_ACTION_WRITE) == 0) { - /* XXX TBD revisit protect against other filters owning this packet */ - ASSERT(FALSE); - goto Exit; - } - - RtlZeroMemory(packet, sizeof(OVS_TUNNEL_PENDED_PACKET)); - - /* classifyOut cannot be accessed from a different thread context */ - packet->classifyOut = classifyOut; - - if (inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V4) { - direction = - inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_DIRECTION].\ - value.uint32; - } - else { - ASSERT(inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V6); - direction = - inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V6_DIRECTION].\ - value.uint32; - } - - packet->netBufferList = layerData; - - ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(inMetaValues, - FWPS_METADATA_FIELD_COMPARTMENT_ID)); - - ASSERT(direction == FWP_DIRECTION_INBOUND); - - ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( - inMetaValues, - FWPS_METADATA_FIELD_IP_HEADER_SIZE)); - ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( - inMetaValues, - FWPS_METADATA_FIELD_TRANSPORT_HEADER_SIZE)); - - packet->ipHeaderSize = inMetaValues->ipHeaderSize; - packet->transportHeaderSize = inMetaValues->transportHeaderSize; - - ASSERT(inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_IP_PROTOCOL].value.uint8 == IPPROTO_UDP ); - OvsTunnelAnalyzePacket(packet); - -Exit: - ; -} - - -static NTSTATUS -OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, - OVS_TUNNEL_PENDED_PACKET *packet) -{ - NTSTATUS status = STATUS_SUCCESS; - OvsIPv4TunnelKey tunnelKey; - NET_BUFFER *pNb; - ULONG sendCompleteFlags = 0; - BOOLEAN dispatch; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - LOCK_STATE_EX lockState, dpLockState; - LIST_ENTRY missedPackets; - OvsCompletionList completionList; - KIRQL irql; - ULONG SendFlags = NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; - OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; - - ASSERT(gOvsSwitchContext); - - /* Fill the tunnel key */ - status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); - - if(!NT_SUCCESS(status)) { - goto dropit; - } - - pNb = NET_BUFFER_LIST_FIRST_NB(pNbl); - - NdisAdvanceNetBufferDataStart(pNb, - packet->transportHeaderSize + packet->ipHeaderSize + - sizeof(VXLANHdr), - FALSE, - NULL); - - /* Most likely (always) dispatch irql */ - irql = KeGetCurrentIrql(); - - /* dispatch is used for datapath lock as well */ - dispatch = (irql == DISPATCH_LEVEL) ? NDIS_RWL_AT_DISPATCH_LEVEL : 0; - if (dispatch) { - sendCompleteFlags |= NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL; - } - - InitializeListHead(&missedPackets); - OvsInitCompletionList(&completionList, gOvsSwitchContext, - sendCompleteFlags); - - { - POVS_VPORT_ENTRY vport; - UINT32 portNo; - OVS_PACKET_HDR_INFO layers; - OvsFlowKey key; - UINT64 hash; - PNET_BUFFER curNb; - OvsFlow *flow; - - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); - - /* - * XXX WFP packets contain a single NBL structure. - * Reassembeled packet "may" have multiple NBs, however, a simple test shows - * that the packet still has a single NB (after reassemble) - * We still need to check if the Ethernet header of the innet packet is in a single MD - */ - - curNb = NET_BUFFER_LIST_FIRST_NB(pNbl); - ASSERT(curNb->Next == NULL); - - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, dispatch); - - /* Lock the flowtable for the duration of accessing the flow */ - OvsAcquireDatapathRead(datapath, &dpLockState, NDIS_RWL_AT_DISPATCH_LEVEL); - - SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; - - vport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); - - if (vport == NULL){ - status = STATUS_UNSUCCESSFUL; - goto unlockAndDrop; - } - - ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); - - portNo = vport->portNo; - - status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); - if (status != NDIS_STATUS_SUCCESS) { - goto unlockAndDrop; - } - - flow = OvsLookupFlow(datapath, &key, &hash, FALSE); - if (flow) { - OvsFlowUsed(flow, pNbl, &layers); - datapath->hits++; - - OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, - portNo, SendFlags, &key, &hash, &layers, - flow->actions, flow->actionsLen); - - OvsReleaseDatapath(datapath, &dpLockState); - } else { - POVS_PACKET_QUEUE_ELEM elem; - - datapath->misses++; - elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS, - portNo, &key.tunKey, pNbl, curNb, - TRUE, &layers); - if (elem) { - /* Complete the packet since it was copied to user buffer. */ - InsertTailList(&missedPackets, &elem->link); - OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); - } else { - status = STATUS_INSUFFICIENT_RESOURCES; - } - goto unlockAndDrop; - } - - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - - } - - return status; - -unlockAndDrop: - OvsReleaseDatapath(datapath, &dpLockState); - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); -dropit: - pNbl = OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); - ASSERT(pNbl == NULL); - return status; -} - -#pragma warning(pop) diff --git a/datapath-windows/ovsext/OvsTunnel.h b/datapath-windows/ovsext/OvsTunnel.h deleted file mode 100644 index 110ff747b..000000000 --- a/datapath-windows/ovsext/OvsTunnel.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_TUNNEL_H_ -#define __OVS_TUNNEL_H_ 1 - -// -// OVS_TUNNEL_PENDED_PACKET is the object type we used to store all information -// needed for out-of-band packet modification and re-injection. This type -// also points back to the flow context the packet belongs to. - -typedef struct OVS_TUNNEL_PENDED_PACKET_ -{ - /* Common fields for inbound and outbound traffic */ - NET_BUFFER_LIST *netBufferList; - - UINT32 ipHeaderSize; - UINT32 transportHeaderSize; - FWPS_CLASSIFY_OUT *classifyOut; -} OVS_TUNNEL_PENDED_PACKET; - -/* Shared global data. */ - -extern UINT16 configNewDestPort; - -extern UINT32 gCalloutIdV4; - -// -// Shared function prototypes -// -VOID OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, - const FWPS_INCOMING_METADATA_VALUES *inMetaValues, - VOID *layerData, - const VOID *classifyContext, - const FWPS_FILTER *filter, - UINT64 flowContext, - FWPS_CLASSIFY_OUT *classifyOut); - - -NTSTATUS OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, - const GUID *filterKey, - const FWPS_FILTER *filter); - -#endif /* __OVS_TUNNEL_H_ */ diff --git a/datapath-windows/ovsext/OvsTunnelFilter.c b/datapath-windows/ovsext/OvsTunnelFilter.c deleted file mode 100644 index a1de00734..000000000 --- a/datapath-windows/ovsext/OvsTunnelFilter.c +++ /dev/null @@ -1,343 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" - -#pragma warning(push) -#pragma warning(disable:4201) // unnamed struct/union - - -#include - -#pragma warning(pop) - -#include -#include -#include -#include - -#include "OvsTunnel.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsVxlan.h" - - -#define INITGUID -#include - - -/* Configurable parameters (addresses and ports are in host order) */ -UINT16 configNewDestPort = VXLAN_UDP_PORT; - -/* - * Callout and sublayer GUIDs - */ -// b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 -DEFINE_GUID( - OVS_TUNNEL_CALLOUT_V4, - 0xb16b0a6e, - 0x2b2a, - 0x41a3, - 0x8b, 0x39, 0xbd, 0x3f, 0xfc, 0x85, 0x5f, 0xf8 - ); - -/* 0104fd7e-c825-414e-94c9-f0d525bbc169 */ -DEFINE_GUID( - OVS_TUNNEL_SUBLAYER, - 0x0104fd7e, - 0xc825, - 0x414e, - 0x94, 0xc9, 0xf0, 0xd5, 0x25, 0xbb, 0xc1, 0x69 - ); - -/* - * Callout driver global variables - */ -PDEVICE_OBJECT gDeviceObject; - -HANDLE gEngineHandle; -UINT32 gCalloutIdV4; - - -/* Callout driver implementation */ - -NTSTATUS -OvsTunnelAddFilter(PWSTR filterName, - const PWSTR filterDesc, - USHORT remotePort, - FWP_DIRECTION direction, - UINT64 context, - const GUID *layerKey, - const GUID *calloutKey) -{ - NTSTATUS status = STATUS_SUCCESS; - FWPM_FILTER filter = {0}; - FWPM_FILTER_CONDITION filterConditions[3] = {0}; - UINT conditionIndex; - - UNREFERENCED_PARAMETER(remotePort); - UNREFERENCED_PARAMETER(direction); - - filter.layerKey = *layerKey; - filter.displayData.name = (wchar_t*)filterName; - filter.displayData.description = (wchar_t*)filterDesc; - - filter.action.type = FWP_ACTION_CALLOUT_TERMINATING; - filter.action.calloutKey = *calloutKey; - filter.filterCondition = filterConditions; - filter.subLayerKey = OVS_TUNNEL_SUBLAYER; - filter.weight.type = FWP_EMPTY; // auto-weight. - filter.rawContext = context; - - conditionIndex = 0; - - filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_DIRECTION; - filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; - filterConditions[conditionIndex].conditionValue.type = FWP_UINT32; - filterConditions[conditionIndex].conditionValue.uint32 = direction; - - conditionIndex++; - - filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_IP_LOCAL_PORT; - filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; - filterConditions[conditionIndex].conditionValue.type = FWP_UINT16; - filterConditions[conditionIndex].conditionValue.uint16 = remotePort; - - conditionIndex++; - - filter.numFilterConditions = conditionIndex; - - status = FwpmFilterAdd(gEngineHandle, - &filter, - NULL, - NULL); - - return status; -} - -/* - * -------------------------------------------------------------------------- - * This function registers callouts and filters that intercept UDP traffic at - * WFP FWPM_LAYER_DATAGRAM_DATA_V4 - * -------------------------------------------------------------------------- - */ -NTSTATUS -OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey, - const GUID *calloutKey, - VOID *deviceObject, - UINT32 *calloutId) -{ - NTSTATUS status = STATUS_SUCCESS; - - FWPS_CALLOUT sCallout = {0}; - FWPM_CALLOUT mCallout = {0}; - - FWPM_DISPLAY_DATA displayData = {0}; - - BOOLEAN calloutRegistered = FALSE; - - sCallout.calloutKey = *calloutKey; - sCallout.classifyFn = OvsTunnelClassify; - sCallout.notifyFn = OvsTunnelNotify; -#if FLOW_CONTEXT - /* Currnetly we don't associate a context with the flow */ - sCallout.flowDeleteFn = OvsTunnelFlowDelete; - sCallout.flags = FWP_CALLOUT_FLAG_CONDITIONAL_ON_FLOW; -#endif - - status = FwpsCalloutRegister(deviceObject, - &sCallout, - calloutId); - - if (!NT_SUCCESS(status)) { - goto Exit; - } - calloutRegistered = TRUE; - - displayData.name = L"Datagram-Data OVS Callout"; - displayData.description = L"Proxies destination address/port for UDP"; - - mCallout.calloutKey = *calloutKey; - mCallout.displayData = displayData; - mCallout.applicableLayer = *layerKey; - - status = FwpmCalloutAdd(gEngineHandle, - &mCallout, - NULL, - NULL); - - if (!NT_SUCCESS(status)) { - goto Exit; - } - - status = OvsTunnelAddFilter(L"Datagram-Data OVS Filter (Inbound)", - L"address/port for UDP", - configNewDestPort, - FWP_DIRECTION_INBOUND, - 0, - layerKey, - calloutKey); - -Exit: - - if (!NT_SUCCESS(status)){ - if (calloutRegistered) { - FwpsCalloutUnregisterById(*calloutId); - *calloutId = 0; - } - } - - return status; -} - -/* - * -------------------------------------------------------------------------- - * This function registers dynamic callouts and filters that intercept UDP - * Callouts and filters will be removed during De-Initialize. - * -------------------------------------------------------------------------- - */ -NTSTATUS -OvsTunnelRegisterCallouts(VOID *deviceObject) -{ - NTSTATUS status = STATUS_SUCCESS; - FWPM_SUBLAYER OvsTunnelSubLayer; - - BOOLEAN engineOpened = FALSE; - BOOLEAN inTransaction = FALSE; - - FWPM_SESSION session = {0}; - - session.flags = FWPM_SESSION_FLAG_DYNAMIC; - - status = FwpmEngineOpen(NULL, - RPC_C_AUTHN_WINNT, - NULL, - &session, - &gEngineHandle); - - if (!NT_SUCCESS(status)) { - goto Exit; - } - engineOpened = TRUE; - - status = FwpmTransactionBegin(gEngineHandle, 0); - if (!NT_SUCCESS(status)) { - goto Exit; - } - inTransaction = TRUE; - - RtlZeroMemory(&OvsTunnelSubLayer, sizeof(FWPM_SUBLAYER)); - - OvsTunnelSubLayer.subLayerKey = OVS_TUNNEL_SUBLAYER; - OvsTunnelSubLayer.displayData.name = L"Datagram-Data OVS Sub-Layer"; - OvsTunnelSubLayer.displayData.description = - L"Sub-Layer for use by Datagram-Data OVS callouts"; - OvsTunnelSubLayer.flags = 0; - OvsTunnelSubLayer.weight = FWP_EMPTY; /* auto-weight */ - - status = FwpmSubLayerAdd(gEngineHandle, &OvsTunnelSubLayer, NULL); - if (!NT_SUCCESS(status)) { - goto Exit; - } - - // In order to use this callout a socket must be opened - status = OvsTunnelRegisterDatagramDataCallouts(&FWPM_LAYER_DATAGRAM_DATA_V4, - &OVS_TUNNEL_CALLOUT_V4, - deviceObject, - &gCalloutIdV4); - if (!NT_SUCCESS(status)) { - goto Exit; - } - - status = FwpmTransactionCommit(gEngineHandle); - if (!NT_SUCCESS(status)){ - goto Exit; - } - inTransaction = FALSE; - -Exit: - - if (!NT_SUCCESS(status)) { - if (inTransaction) { - FwpmTransactionAbort(gEngineHandle); - } - if (engineOpened) { - FwpmEngineClose(gEngineHandle); - gEngineHandle = NULL; - } - } - - return status; -} - -VOID -OvsTunnelUnregisterCallouts(VOID) -{ - FwpmEngineClose(gEngineHandle); - gEngineHandle = NULL; - FwpsCalloutUnregisterById(gCalloutIdV4); -} - - -VOID -OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject) -{ - UNREFERENCED_PARAMETER(driverObject); - - OvsTunnelUnregisterCallouts(); - IoDeleteDevice(gDeviceObject); -} - - -NTSTATUS -OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject) -{ - NTSTATUS status = STATUS_SUCCESS; - UNICODE_STRING deviceName; - - RtlInitUnicodeString(&deviceName, - L"\\Device\\OvsTunnelFilter"); - - status = IoCreateDevice(driverObject, - 0, - &deviceName, - FILE_DEVICE_NETWORK, - 0, - FALSE, - &gDeviceObject); - - if (!NT_SUCCESS(status)){ - goto Exit; - } - - status = OvsTunnelRegisterCallouts(gDeviceObject); - -Exit: - - if (!NT_SUCCESS(status)){ - if (gEngineHandle != NULL) { - OvsTunnelUnregisterCallouts(); - } - - if (gDeviceObject) { - IoDeleteDevice(gDeviceObject); - } - } - - return status; -} diff --git a/datapath-windows/ovsext/OvsTunnelIntf.h b/datapath-windows/ovsext/OvsTunnelIntf.h deleted file mode 100644 index 3543c8a7e..000000000 --- a/datapath-windows/ovsext/OvsTunnelIntf.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_TUNNEL_INTF_H_ -#define __OVS_TUNNEL_INTF_H_ 1 - -/* Tunnel callout driver load/unload functions */ -NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject); - -VOID OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject); - -#endif /* __OVS_TUNNEL_INTF_H_ */ diff --git a/datapath-windows/ovsext/OvsTypes.h b/datapath-windows/ovsext/OvsTypes.h deleted file mode 100644 index 57b27e77d..000000000 --- a/datapath-windows/ovsext/OvsTypes.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_TYPES_H_ -#define __OVS_TYPES_H_ 1 - -/* Defines the userspace specific data types - * for files included from user space. */ -typedef unsigned long long uint64, uint64_t, ovs_be64, u64; -typedef long long int64, int64_t; -typedef unsigned int uint32, uint32_t, ovs_be32, u32; -typedef unsigned short uint16, uint16_t, ovs_be16, u16; -typedef unsigned char uint8, uint8_t, u8; -typedef uint64 __u64, __be64; -typedef uint32 __u32, __be32; -typedef uint16 __u16, __be16; -typedef uint8 __u8; - -/* Defines the userspace specific data types for file - * included within kernel only. */ -typedef UINT32 BE32; -typedef UINT64 BE64; - -#define ETH_ALEN 6 - -#define SIZE_MAX MAXUINT32 - -#endif /* __OVS_TYPES_H_ */ diff --git a/datapath-windows/ovsext/OvsUser.c b/datapath-windows/ovsext/OvsUser.c deleted file mode 100644 index 088074997..000000000 --- a/datapath-windows/ovsext/OvsUser.c +++ /dev/null @@ -1,867 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * OvsUser.c - * Manage packet queue for packet miss for userAction. - */ - - -#include "precomp.h" - -#include "Datapath.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsPacketIO.h" -#include "OvsChecksum.h" -#include "OvsNetProto.h" -#include "OvsFlow.h" -#include "OvsTunnelIntf.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_USER -#include "OvsDebug.h" - -OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES]; - -POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance); -extern PNDIS_SPIN_LOCK gOvsCtrlLock; -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -OVS_USER_STATS ovsUserStats; - - -NTSTATUS -OvsUserInit() -{ - UINT32 i; - POVS_USER_PACKET_QUEUE queue; - for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { - queue = &ovsPacketQueues[i]; - RtlZeroMemory(queue, sizeof (*queue)); - InitializeListHead(&queue->packetList); - NdisAllocateSpinLock(&queue->queueLock); - } - return STATUS_SUCCESS; -} - -VOID -OvsUserCleanup() -{ - UINT32 i; - POVS_USER_PACKET_QUEUE queue; - for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { - queue = &ovsPacketQueues[i]; - ASSERT(IsListEmpty(&queue->packetList)); - ASSERT(queue->instance == NULL); - ASSERT(queue->pendingIrp == NULL); - NdisFreeSpinLock(&queue->queueLock); - } -} - -static VOID -OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue, - POVS_OPEN_INSTANCE instance) -{ - PLIST_ENTRY link, next; - LIST_ENTRY tmp; - POVS_PACKET_QUEUE_ELEM elem; - - InitializeListHead(&tmp); - NdisAcquireSpinLock(&queue->queueLock); - if (queue->instance != instance) { - NdisReleaseSpinLock(&queue->queueLock); - return; - } - - if (queue->numPackets) { - OvsAppendList(&tmp, &queue->packetList); - queue->numPackets = 0; - } - NdisReleaseSpinLock(&queue->queueLock); - LIST_FORALL_SAFE(&tmp, link, next) { - RemoveEntryList(link); - elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); - OvsFreeMemory(elem); - } -} - - -VOID -OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance) -{ - POVS_USER_PACKET_QUEUE queue; - POVS_PACKET_QUEUE_ELEM elem; - PLIST_ENTRY link, next; - LIST_ENTRY tmp; - PIRP irp = NULL; - - InitializeListHead(&tmp); - queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; - if (queue) { - PDRIVER_CANCEL cancelRoutine; - NdisAcquireSpinLock(&queue->queueLock); - if (queue->instance != instance) { - NdisReleaseSpinLock(&queue->queueLock); - return; - } - - if (queue->numPackets) { - OvsAppendList(&tmp, &queue->packetList); - queue->numPackets = 0; - } - queue->instance = NULL; - queue->queueId = OVS_MAX_NUM_PACKET_QUEUES; - instance->packetQueue = NULL; - irp = queue->pendingIrp; - queue->pendingIrp = NULL; - if (irp) { - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine == NULL) { - irp = NULL; - } - } - NdisReleaseSpinLock(&queue->queueLock); - } - LIST_FORALL_SAFE(&tmp, link, next) { - RemoveEntryList(link); - elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); - OvsFreeMemory(elem); - } - if (irp) { - OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); - } -} - -NTSTATUS -OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, - PVOID inputBuffer, - UINT32 inputLength) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - UINT32 queueId; - POVS_USER_PACKET_QUEUE queue; - if (inputLength < sizeof (UINT32)) { - return STATUS_INVALID_PARAMETER; - } - queueId = *(UINT32 *)inputBuffer; - if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) { - /* - * unsubscribe - */ - OvsCleanupPacketQueue(instance); - } else if (instance->packetQueue == NULL && - queueId < OVS_MAX_NUM_PACKET_QUEUES) { - queue = &ovsPacketQueues[queueId]; - NdisAcquireSpinLock(&queue->queueLock); - if (ovsPacketQueues[queueId].instance) { - if (ovsPacketQueues[queueId].instance != instance) { - NdisReleaseSpinLock(&queue->queueLock); - return STATUS_INSUFFICIENT_RESOURCES; - } else { - NdisReleaseSpinLock(&queue->queueLock); - return STATUS_SUCCESS; - } - } - queue->queueId = queueId; - queue->instance = instance; - instance->packetQueue = queue; - ASSERT(IsListEmpty(&queue->packetList)); - NdisReleaseSpinLock(&queue->queueLock); - } else { - return STATUS_INVALID_PARAMETER; - } - return STATUS_SUCCESS; -} - - -NTSTATUS -OvsReadDpIoctl(PFILE_OBJECT fileObject, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - POVS_PACKET_QUEUE_ELEM elem; - UINT32 len; - -#define TCP_CSUM_OFFSET 16 -#define UDP_CSUM_OFFSET 6 - ASSERT(instance); - - if (instance->packetQueue == NULL) { - return STATUS_INVALID_PARAMETER; - } - if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) { - return STATUS_BUFFER_TOO_SMALL; - } - - elem = OvsGetNextPacket(instance); - if (elem) { - /* - * XXX revisit this later - */ - len = elem->packet.totalLen > outputLength ? outputLength : - elem->packet.totalLen; - - if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) && - len == elem->packet.totalLen) { - UINT16 sum, *ptr; - UINT16 size = (UINT16)(elem->packet.userDataLen + - elem->hdrInfo.l4Offset + - (UINT16)sizeof (OVS_PACKET_INFO)); - RtlCopyMemory(outputBuffer, &elem->packet, size); - ASSERT(len - size >= elem->hdrInfo.l4PayLoad); - sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size, - (UINT8 *)&elem->packet + size, - elem->hdrInfo.l4PayLoad, 0); - ptr =(UINT16 *)((UINT8 *)outputBuffer + size + - (elem->hdrInfo.tcpCsumNeeded ? - TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); - *ptr = sum; - ovsUserStats.l4Csum++; - } else { - RtlCopyMemory(outputBuffer, &elem->packet, len); - } - - *replyLen = len; - OvsFreeMemory(elem); - } - return STATUS_SUCCESS; -} - -/* Helper function to allocate a Forwarding Context for an NBL */ -NTSTATUS -OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST nbl) -{ - return switchContext->NdisSwitchHandlers. - AllocateNetBufferListForwardingContext( - switchContext->NdisSwitchContext, nbl); -} - -/* - * -------------------------------------------------------------------------- - * This function allocates all the stuff necessary for creating an NBL from the - * input buffer of specified length, namely, a nonpaged data buffer of size - * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL - * context yet. It also copies data from the specified buffer to the NBL. - * -------------------------------------------------------------------------- - */ -PNET_BUFFER_LIST -OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext, - PVOID userBuffer, - ULONG length) -{ - UINT8 *data = NULL; - PNET_BUFFER_LIST nbl = NULL; - PNET_BUFFER nb; - PMDL mdl; - - if (length > OVS_DEFAULT_DATA_SIZE) { - nbl = OvsAllocateVariableSizeNBL(switchContext, length, - OVS_DEFAULT_HEADROOM_SIZE); - - } else { - nbl = OvsAllocateFixSizeNBL(switchContext, length, - OVS_DEFAULT_HEADROOM_SIZE); - } - if (nbl == NULL) { - return NULL; - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - mdl = NET_BUFFER_CURRENT_MDL(nb); - data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) + - NET_BUFFER_CURRENT_MDL_OFFSET(nb); - if (!data) { - OvsCompleteNBL(switchContext, nbl, TRUE); - return NULL; - } - - NdisMoveMemory(data, userBuffer, length); - - return nbl; -} - -NTSTATUS -OvsExecuteDpIoctl(PVOID inputBuffer, - UINT32 inputLength, - UINT32 outputLength) -{ - NTSTATUS status = STATUS_SUCCESS; - NTSTATUS ndisStatus; - OvsPacketExecute *execute; - LOCK_STATE_EX lockState; - PNET_BUFFER_LIST pNbl; - PNL_ATTR actions; - PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; - OvsFlowKey key; - OVS_PACKET_HDR_INFO layers; - POVS_VPORT_ENTRY vport; - - if (inputLength < sizeof(*execute) || outputLength != 0) { - return STATUS_INFO_LENGTH_MISMATCH; - } - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - execute = (struct OvsPacketExecute *) inputBuffer; - - if (execute->packetLen == 0) { - status = STATUS_INVALID_PARAMETER; - goto unlock; - } - - if (inputLength != sizeof (*execute) + - execute->actionsLen + execute->packetLen) { - status = STATUS_INFO_LENGTH_MISMATCH; - goto unlock; - } - actions = (PNL_ATTR)((PCHAR)&execute->actions + execute->packetLen); - - /* - * Allocate the NBL, copy the data from the userspace buffer. Allocate - * also, the forwarding context for the packet. - */ - pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, &execute->packetBuf, - execute->packetLen); - if (pNbl == NULL) { - status = STATUS_NO_MEMORY; - goto unlock; - } - - fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); - vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort); - if (vport) { - fwdDetail->SourcePortId = vport->portId; - fwdDetail->SourceNicIndex = vport->nicIndex; - } else { - fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; - fwdDetail->SourceNicIndex = 0; - } - // XXX: Figure out if any of the other members of fwdDetail need to be set. - - ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers, - NULL); - if (ndisStatus == NDIS_STATUS_SUCCESS) { - ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, - vport ? vport->portNo : - OVS_DEFAULT_PORT_NO, - NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, - &key, NULL, &layers, actions, - execute->actionsLen); - pNbl = NULL; - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - } - if (ndisStatus != NDIS_STATUS_SUCCESS) { - status = STATUS_UNSUCCESSFUL; - } - - if (pNbl) { - OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); - } -unlock: - NdisReleaseSpinLock(gOvsCtrlLock); - return status; -} - - -NTSTATUS -OvsPurgeDpIoctl(PFILE_OBJECT fileObject) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; - - if (queue == NULL) { - return STATUS_INVALID_PARAMETER; - } - OvsPurgePacketQueue(queue, instance); - return STATUS_SUCCESS; -} - -VOID -OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject, - PIRP irp) -{ - PIO_STACK_LOCATION irpSp; - PFILE_OBJECT fileObject; - POVS_OPEN_INSTANCE instance; - POVS_USER_PACKET_QUEUE queue = NULL; - - UNREFERENCED_PARAMETER(deviceObject); - - IoReleaseCancelSpinLock(irp->CancelIrql); - irpSp = IoGetCurrentIrpStackLocation(irp); - fileObject = irpSp->FileObject; - - if (fileObject == NULL) { - goto done; - } - NdisAcquireSpinLock(gOvsCtrlLock); - instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - if (instance) { - queue = instance->packetQueue; - } - if (instance == NULL || queue == NULL) { - NdisReleaseSpinLock(gOvsCtrlLock); - goto done; - } - NdisReleaseSpinLock(gOvsCtrlLock); - NdisAcquireSpinLock(&queue->queueLock); - if (queue->pendingIrp == irp) { - queue->pendingIrp = NULL; - } - NdisReleaseSpinLock(&queue->queueLock); -done: - OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); -} - - -NTSTATUS -OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject) -{ - POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; - POVS_USER_PACKET_QUEUE queue = - (POVS_USER_PACKET_QUEUE)instance->packetQueue; - NTSTATUS status = STATUS_SUCCESS; - BOOLEAN cancelled = FALSE; - - if (queue == NULL) { - return STATUS_INVALID_PARAMETER; - } - NdisAcquireSpinLock(&queue->queueLock); - if (queue->instance != instance) { - NdisReleaseSpinLock(&queue->queueLock); - return STATUS_INVALID_PARAMETER; - } - if (queue->pendingIrp) { - NdisReleaseSpinLock(&queue->queueLock); - return STATUS_DEVICE_BUSY; - } - if (queue->numPackets == 0) { - PDRIVER_CANCEL cancelRoutine; - IoMarkIrpPending(irp); - IoSetCancelRoutine(irp, OvsCancelIrpDatapath); - if (irp->Cancel) { - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine) { - cancelled = TRUE; - } - } else { - queue->pendingIrp = irp; - } - status = STATUS_PENDING; - } - NdisReleaseSpinLock(&queue->queueLock); - if (cancelled) { - OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); - OVS_LOG_INFO("Datapath IRP cancelled: %p", irp); - } - return status; -} - - -POVS_PACKET_QUEUE_ELEM -OvsGetNextPacket(POVS_OPEN_INSTANCE instance) -{ - POVS_USER_PACKET_QUEUE queue; - PLIST_ENTRY link; - queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; - if (queue == NULL) { - return NULL; - } - NdisAcquireSpinLock(&queue->queueLock); - if (queue->instance != instance || queue->numPackets == 0) { - NdisReleaseSpinLock(&queue->queueLock); - return NULL; - } - link = RemoveHeadList(&queue->packetList); - queue->numPackets--; - NdisReleaseSpinLock(&queue->queueLock); - return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); -} - - -POVS_USER_PACKET_QUEUE -OvsGetQueue(UINT32 queueId) -{ - POVS_USER_PACKET_QUEUE queue; - if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) { - return NULL; - } - queue = &ovsPacketQueues[queueId]; - return queue->instance != NULL ? queue : NULL; -} - -/* - *---------------------------------------------------------------------------- - * OvsCreateQueuePacket -- - * - * Create a packet which will be forwarded to user space. - * - * InputParameter: - * queueId Identify the queue the packet to be inserted - * This will be used when multiple queues is supported - * in userspace - * userData: when cmd is user action, this field contain - * user action data. - * userDataLen: as name indicated - * cmd: either miss or user action - * inPort: datapath port id from which the packet is received. - * tunnelKey: tunnelKey for tunneled packet - * nbl: the NET_BUFFER_LIST which contain the packet - * nb: the packet - * isRecv: This is used to decide how to interprete the csum info - * hdrInfo: include hdr info initialized during flow extraction. - * - * Results: - * NULL if fail to create the packet - * The packet element otherwise - *---------------------------------------------------------------------------- - */ -POVS_PACKET_QUEUE_ELEM -OvsCreateQueuePacket(UINT32 queueId, - PVOID userData, - UINT32 userDataLen, - UINT32 cmd, - UINT32 inPort, - OvsIPv4TunnelKey *tunnelKey, - PNET_BUFFER_LIST nbl, - PNET_BUFFER nb, - BOOLEAN isRecv, - POVS_PACKET_HDR_INFO hdrInfo) -{ -#define VLAN_TAG_SIZE 4 - UINT32 allocLen, dataLen, extraLen = 0; - POVS_PACKET_QUEUE_ELEM elem; - PMDL mdl; - UINT8 *src, *dst; - NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; - NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo; - - csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo); - - if (isRecv && (csumInfo.Receive.TcpChecksumFailed || - (csumInfo.Receive.UdpChecksumFailed && - !hdrInfo->udpCsumZero) || - csumInfo.Receive.IpChecksumFailed)) { - OVS_LOG_INFO("Packet dropped due to checksum failure."); - ovsUserStats.dropDuetoChecksum++; - return NULL; - } - - vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo); - if (vlanInfo.TagHeader.VlanId) { - /* - * We may also need to check priority XXX - */ - extraLen = VLAN_TAG_SIZE; - } - - dataLen = NET_BUFFER_DATA_LENGTH(nb); - allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + userDataLen + dataLen + - extraLen; - - elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen); - if (elem == NULL) { - ovsUserStats.dropDuetoResource++; - return NULL; - } - elem->hdrInfo.value = hdrInfo->value; - elem->packet.totalLen = sizeof (OVS_PACKET_INFO) + userDataLen + dataLen + - extraLen; - elem->packet.queue = queueId; - elem->packet.userDataLen = userDataLen; - elem->packet.inPort = inPort; - elem->packet.cmd = cmd; - if (cmd == (UINT32)OVS_PACKET_CMD_MISS) { - ovsUserStats.miss++; - } else { - ovsUserStats.action++; - } - elem->packet.packetLen = dataLen + extraLen; - if (tunnelKey) { - RtlCopyMemory(&elem->packet.tunnelKey, tunnelKey, - sizeof (*tunnelKey)); - } else { - RtlZeroMemory(&elem->packet.tunnelKey, - sizeof (elem->packet.tunnelKey)); - } - - dst = elem->packet.data; - if (userDataLen) { - RtlCopyMemory(dst, userData, userDataLen); - dst = dst + userDataLen; - } - dst += extraLen; - - mdl = NET_BUFFER_CURRENT_MDL(nb); - src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0); - if (src == NULL) { - OvsFreeMemory(elem); - ovsUserStats.dropDuetoResource++; - return NULL; - } else if (src != dst) { - /* Copy the data from the NDIS buffer to dst. */ - RtlCopyMemory(dst, src, dataLen); - } - - dst = elem->packet.data + userDataLen + extraLen; - /* - * Fix IP hdr if necessary - */ - if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) || - (!isRecv && csumInfo.Transmit.IsIPv4 && - csumInfo.Transmit.IpHeaderChecksum)) { - PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); - ASSERT(elem->hdrInfo.isIPv4); - ASSERT(ipHdr->Version == 4); - ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr, - ipHdr->HeaderLength << 2, - (UINT16)~ipHdr->HeaderChecksum); - ovsUserStats.ipCsum++; - } - ASSERT(elem->hdrInfo.tcpCsumNeeded == 0 && - elem->hdrInfo.udpCsumNeeded == 0); - /* - * Fow now, we will not do verification - * There is no correctness issue here. - * XXX - */ - /* - * calculate TCP/UDP pseudo checksum - */ - if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) { - /* - * Only this case, we need to reclaculate pseudo checksum - * all other cases, it is assumed the pseudo checksum is - * filled already. - * - */ - PTCP_HDR tcpHdr = (PTCP_HDR)(dst + hdrInfo->l4Offset); - if (hdrInfo->isIPv4) { - PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); - elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - - (ipHdr->HeaderLength << 2)); - tcpHdr->th_sum = - IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, - (UINT32 *)&ipHdr->DestinationAddress, - IPPROTO_TCP, elem->hdrInfo.l4PayLoad); - } else { - PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + hdrInfo->l3Offset); - elem->hdrInfo.l4PayLoad = - (UINT16)(ntohs(ipv6Hdr->PayloadLength) + - hdrInfo->l3Offset + sizeof(IPV6_HEADER) - - hdrInfo->l4Offset); - ASSERT(hdrInfo->isIPv6); - tcpHdr->th_sum = - IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, - (UINT32 *)&ipv6Hdr->DestinationAddress, - IPPROTO_TCP, elem->hdrInfo.l4PayLoad); - } - elem->hdrInfo.tcpCsumNeeded = 1; - ovsUserStats.recalTcpCsum++; - } else if (!isRecv) { - if (csumInfo.Transmit.TcpChecksum) { - elem->hdrInfo.tcpCsumNeeded = 1; - } else if (csumInfo.Transmit.UdpChecksum) { - elem->hdrInfo.udpCsumNeeded = 1; - } - if (elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) { -#ifdef DBG - UINT16 sum, *ptr; - UINT8 proto = - elem->hdrInfo.tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; -#endif - if (hdrInfo->isIPv4) { - PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); - elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - - (ipHdr->HeaderLength << 2)); -#ifdef DBG - sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, - (UINT32 *)&ipHdr->DestinationAddress, - proto, elem->hdrInfo.l4PayLoad); -#endif - } else { - PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + - hdrInfo->l3Offset); - elem->hdrInfo.l4PayLoad = - (UINT16)(ntohs(ipv6Hdr->PayloadLength) + - hdrInfo->l3Offset + sizeof(IPV6_HEADER) - - hdrInfo->l4Offset); - ASSERT(hdrInfo->isIPv6); -#ifdef DBG - sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, - (UINT32 *)&ipv6Hdr->DestinationAddress, - proto, elem->hdrInfo.l4PayLoad); -#endif - } -#ifdef DBG - ptr = (UINT16 *)(dst + hdrInfo->l4Offset + - (elem->hdrInfo.tcpCsumNeeded ? - TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); - ASSERT(*ptr == sum); -#endif - } - } - /* - * Finally insert VLAN tag - */ - if (extraLen) { - dst = elem->packet.data + userDataLen; - src = dst + extraLen; - ((UINT32 *)dst)[0] = ((UINT32 *)src)[0]; - ((UINT32 *)dst)[1] = ((UINT32 *)src)[1]; - ((UINT32 *)dst)[2] = ((UINT32 *)src)[2]; - dst += 12; - ((UINT16 *)dst)[0] = htons(0x8100); - ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId | - (vlanInfo.TagHeader.UserPriority << 13)); - elem->hdrInfo.l3Offset += VLAN_TAG_SIZE; - elem->hdrInfo.l4Offset += VLAN_TAG_SIZE; - ovsUserStats.vlanInsert++; - } - - return elem; -} - - -VOID -OvsQueuePackets(UINT32 queueId, - PLIST_ENTRY packetList, - UINT32 numElems) -{ - POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId); - POVS_PACKET_QUEUE_ELEM elem; - PIRP irp = NULL; - PLIST_ENTRY link; - UINT32 num = 0; - - OVS_LOG_LOUD("Enter: queueId %u, numELems: %u", - queueId, numElems); - if (queue == NULL) { - goto cleanup; - } - - NdisAcquireSpinLock(&queue->queueLock); - if (queue->instance == NULL) { - NdisReleaseSpinLock(&queue->queueLock); - goto cleanup; - } else { - OvsAppendList(&queue->packetList, packetList); - queue->numPackets += numElems; - } - if (queue->pendingIrp) { - PDRIVER_CANCEL cancelRoutine; - irp = queue->pendingIrp; - queue->pendingIrp = NULL; - cancelRoutine = IoSetCancelRoutine(irp, NULL); - if (cancelRoutine == NULL) { - irp = NULL; - } - } - NdisReleaseSpinLock(&queue->queueLock); - if (irp) { - OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); - } - -cleanup: - while (!IsListEmpty(packetList)) { - link = RemoveHeadList(packetList); - elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); - OvsFreeMemory(elem); - num++; - } - OVS_LOG_LOUD("Exit: drop %u packets", num); -} - - -/* - *---------------------------------------------------------------------------- - * OvsCreateAndAddPackets -- - * - * Create a packet and forwarded to user space. - * - * This function would fragment packet if needed, and queue - * each segment to user space. - *---------------------------------------------------------------------------- - */ -NTSTATUS -OvsCreateAndAddPackets(UINT32 queueId, - PVOID userData, - UINT32 userDataLen, - UINT32 cmd, - UINT32 inPort, - OvsIPv4TunnelKey *tunnelKey, - PNET_BUFFER_LIST nbl, - BOOLEAN isRecv, - POVS_PACKET_HDR_INFO hdrInfo, - POVS_SWITCH_CONTEXT switchContext, - LIST_ENTRY *list, - UINT32 *num) -{ - POVS_PACKET_QUEUE_ELEM elem; - PNET_BUFFER_LIST newNbl = NULL; - PNET_BUFFER nb; - - if (hdrInfo->isTcp) { - NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; - UINT32 packetLength; - - tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo); - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - packetLength = NET_BUFFER_DATA_LENGTH(nb); - - OVS_LOG_TRACE("MSS %u packet len %u", - tsoInfo.LsoV1Transmit.MSS, packetLength); - if (tsoInfo.LsoV1Transmit.MSS) { - OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset); - newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo, - tsoInfo.LsoV1Transmit.MSS , 0); - if (newNbl == NULL) { - return NDIS_STATUS_FAILURE; - } - nbl = newNbl; - } - } - - nb = NET_BUFFER_LIST_FIRST_NB(nbl); - while (nb) { - elem = OvsCreateQueuePacket(queueId, userData, userDataLen, - cmd, inPort, tunnelKey, nbl, nb, - isRecv, hdrInfo); - if (elem) { - InsertTailList(list, &elem->link); - (*num)++; - } - nb = NET_BUFFER_NEXT_NB(nb); - } - if (newNbl) { - OvsCompleteNBL(switchContext, newNbl, TRUE); - } - return NDIS_STATUS_SUCCESS; -} diff --git a/datapath-windows/ovsext/OvsUser.h b/datapath-windows/ovsext/OvsUser.h deleted file mode 100644 index b1e6e1ef0..000000000 --- a/datapath-windows/ovsext/OvsUser.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* This file contains structures and function definitions necessary for - * forwarding packet to user space. - */ - -#ifndef __OVS_USER_H_ -#define __OVS_USER_H_ 1 - -/* - * Even we have more cores, I don't think we need - * more than 32 queues for processing packets to - * userspace - */ -#define OVS_MAX_NUM_PACKET_QUEUES 32 -#define OVS_DEFAULT_PACKET_QUEUE 1 -#define OVS_MAX_PACKET_QUEUE_LEN 4096 - -/* - * Only when OVS_PER_VPORT_QUEUE_CTRL is defined - * we will apply this constraint - */ -#define OVS_MAX_PACKETS_PER_VPORT 128 -#define OVS_MAX_PACKETS_PER_TUNNEL 1024 - -typedef struct _OVS_USER_PACKET_QUEUE { - UINT32 queueId; - UINT32 numPackets; - LIST_ENTRY packetList; - PVOID instance; - PIRP pendingIrp; - NDIS_SPIN_LOCK queueLock; -} OVS_USER_PACKET_QUEUE, *POVS_USER_PACKET_QUEUE; - -typedef struct _OVS_PACKET_QUEUE_ELEM { - LIST_ENTRY link; - OVS_PACKET_HDR_INFO hdrInfo; - OVS_PACKET_INFO packet; -} OVS_PACKET_QUEUE_ELEM, *POVS_PACKET_QUEUE_ELEM; - -struct _OVS_OPEN_INSTANCE; - -typedef struct _OVS_USER_STATS { - UINT64 miss; - UINT64 action; - UINT32 dropDuetoResource; - UINT32 dropDuetoChecksum; - UINT32 ipCsum; - UINT32 recalTcpCsum; - UINT32 vlanInsert; - UINT32 l4Csum; -} OVS_USER_STATS, *POVS_USER_STATS; - - -NTSTATUS OvsUserInit(); -VOID OvsUserCleanup(); - -VOID OvsCleanupPacketQueue(struct _OVS_OPEN_INSTANCE *instance); - -POVS_PACKET_QUEUE_ELEM OvsCreateQueuePacket(UINT32 queueId, - PVOID userData, - UINT32 userDataLen, - UINT32 cmd, UINT32 inPort, - OvsIPv4TunnelKey *tunnelKey, - PNET_BUFFER_LIST nbl, - PNET_BUFFER nb, - BOOLEAN isRecv, - POVS_PACKET_HDR_INFO hdrInfo); - -VOID OvsQueuePackets(UINT32 queueId, PLIST_ENTRY packetList, - UINT32 numElems); -NTSTATUS OvsCreateAndAddPackets(UINT32 queueId, - PVOID userData, - UINT32 userDataLen, - UINT32 cmd, - UINT32 inPort, - OvsIPv4TunnelKey *tunnelKey, - PNET_BUFFER_LIST nbl, - BOOLEAN isRecv, - POVS_PACKET_HDR_INFO hdrInfo, - POVS_SWITCH_CONTEXT switchContext, - LIST_ENTRY *list, - UINT32 *num); - -NTSTATUS OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, - PVOID inputBuffer, - UINT32 inputLength); - -NTSTATUS OvsReadDpIoctl(PFILE_OBJECT fileObject, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsExecuteDpIoctl(PVOID inputBuffer, - UINT32 inputLength, - UINT32 outputLength); -NTSTATUS OvsPurgeDpIoctl(PFILE_OBJECT fileObject); - -NTSTATUS OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject); - -#endif /* __OVS_USER_H_ */ diff --git a/datapath-windows/ovsext/OvsUtil.c b/datapath-windows/ovsext/OvsUtil.c deleted file mode 100644 index e70f9a1fc..000000000 --- a/datapath-windows/ovsext/OvsUtil.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_OTHERS - -#include "OvsDebug.h" - -extern NDIS_HANDLE gOvsExtDriverHandle; - -VOID * -OvsAllocateMemory(size_t size) -{ - OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); - return NdisAllocateMemoryWithTagPriority(gOvsExtDriverHandle, - (UINT32)size, OVS_MEMORY_TAG, NormalPoolPriority); -} - -VOID * -OvsAllocateAlignedMemory(size_t size, UINT16 align) -{ - OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); - - ASSERT((align == 8) || (align == 16)); - - if ((align == 8) || (align == 16)) { - /* - * XXX: NdisAllocateMemory*() functions don't talk anything about - * alignment. Hence using ExAllocatePool*(); - */ - return (VOID *)ExAllocatePoolWithTagPriority(NonPagedPool, size, - OVS_MEMORY_TAG, - NormalPoolPriority); - } - - /* Invalid user input. */ - return NULL; -} - -VOID -OvsFreeMemory(VOID *ptr) -{ - ASSERT(ptr); - NdisFreeMemoryWithTagPriority(gOvsExtDriverHandle, ptr, OVS_MEMORY_TAG); -} - -VOID -OvsFreeAlignedMemory(VOID *ptr) -{ - ASSERT(ptr); - ExFreePoolWithTag(ptr, OVS_MEMORY_TAG); -} - -VOID -OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src) -{ - PLIST_ENTRY srcFirst, srcLast, dstLast; - if (IsListEmpty(src)) { - return; - } - srcFirst = src->Flink; - srcLast = src->Blink; - dstLast = dst->Blink; - - dstLast->Flink = srcFirst; - srcFirst->Blink = dstLast; - - srcLast->Flink = dst; - dst->Blink = srcLast; - - src->Flink = src; - src->Blink = src; -} diff --git a/datapath-windows/ovsext/OvsUtil.h b/datapath-windows/ovsext/OvsUtil.h deleted file mode 100644 index bff06b80d..000000000 --- a/datapath-windows/ovsext/OvsUtil.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_UTIL_H_ -#define __OVS_UTIL_H_ 1 - -#define OVS_MEMORY_TAG 'TSVO' -#define OVS_FIX_SIZE_NBL_POOL_TAG 'FSVO' -#define OVS_VARIABLE_SIZE_NBL_POOL_TAG 'VSVO' -#define OVS_NBL_ONLY_POOL_TAG 'OSVO' -#define OVS_NET_BUFFER_POOL_TAG 'NSVO' -#define OVS_OTHER_POOL_TAG 'MSVO' - -VOID *OvsAllocateMemory(size_t size); -VOID *OvsAllocateAlignedMemory(size_t size, UINT16 align); -VOID OvsFreeMemory(VOID *ptr); -VOID OvsFreeAlignedMemory(VOID *ptr); - -#define LIST_FORALL(_headPtr, _itemPtr) \ - for (_itemPtr = (_headPtr)->Flink; \ - _itemPtr != _headPtr; \ - _itemPtr = (_itemPtr)->Flink) - -#define LIST_FORALL_SAFE(_headPtr, _itemPtr, _nextPtr) \ - for (_itemPtr = (_headPtr)->Flink, _nextPtr = (_itemPtr)->Flink; \ - _itemPtr != _headPtr; \ - _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Flink) - -#define LIST_FORALL_REVERSE(_headPtr, _itemPtr) \ - for (_itemPtr = (_headPtr)->Blink; \ - _itemPtr != _headPtr; \ - _itemPtr = (_itemPtr)->Blink) - -#define LIST_FORALL_REVERSE_SAFE(_headPtr, _itemPtr, _nextPtr) \ - for (_itemPtr = (_headPtr)->Blink, _nextPtr = (_itemPtr)->Blink; \ - _itemPtr != _headPtr; \ - _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Blink) - -VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src); - - -#define MIN(_a, _b) (_a) > (_b) ? (_b) : (_a) -#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0]) -#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1) - -#ifndef htons -#define htons(_x) _byteswap_ushort((USHORT)(_x)) -#define ntohs(_x) _byteswap_ushort((USHORT)(_x)) -#define htonl(_x) _byteswap_ulong((ULONG)(_x)) -#define ntohl(_x) _byteswap_ulong((ULONG)(_x)) -#endif - -#define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \ - { \ - PNDIS_OBJECT_HEADER hdrp = _obj; \ - hdrp->Type = _type; \ - hdrp->Revision = _revision; \ - hdrp->Size = _size; \ - } - - -#define BIT16(_x) ((UINT16)0x1 << (_x)) -#define BIT32(_x) ((UINT32)0x1 << (_x)) - -#endif /* __OVS_UTIL_H_ */ diff --git a/datapath-windows/ovsext/OvsVport.c b/datapath-windows/ovsext/OvsVport.c deleted file mode 100644 index 0c00e5268..000000000 --- a/datapath-windows/ovsext/OvsVport.c +++ /dev/null @@ -1,1415 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsJhash.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsEvent.h" -#include "OvsUser.h" -#include "OvsVxlan.h" -#include "OvsIpHelper.h" -#include "OvsOid.h" - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_VPORT -#include "OvsDebug.h" - -#define VPORT_NIC_ENTER(_nic) \ - OVS_LOG_TRACE("Enter: PortId: %x, NicIndex: %d", _nic->PortId, \ - _nic->NicIndex) - -#define VPORT_NIC_EXIT(_nic) \ - OVS_LOG_TRACE("Exit: PortId: %x, NicIndex: %d", _nic->PortId, \ - _nic->NicIndex) - -#define VPORT_PORT_ENTER(_port) \ - OVS_LOG_TRACE("Enter: PortId: %x", _port->PortId) - -#define VPORT_PORT_EXIT(_port) \ - OVS_LOG_TRACE("Exit: PortId: %x", _port->PortId) - -#define OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC 100 - -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; -extern PNDIS_SPIN_LOCK gOvsCtrlLock; - -static UINT32 OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, UINT32 nicIndex, - OVS_VPORT_TYPE ovsType); -static POVS_VPORT_ENTRY OvsAllocateVport(VOID); -static VOID OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, - PNDIS_SWITCH_PORT_PARAMETERS portParam); -static VOID OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport, PNDIS_SWITCH_NIC_PARAMETERS nicParam); -static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY - virtVport, UINT32 nicIndex); -static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY - virtVport, UINT32 nicIndex); -static NDIS_STATUS OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport); -static VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport); -static __inline VOID OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, - ULONG sleepMicroSec); - -/* - * Functions implemented in relaton to NDIS port manipulation. - */ -NDIS_STATUS -OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam) -{ - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - - VPORT_PORT_ENTER(portParam); - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - portParam->PortId, 0); - if (vport != NULL) { - status = STATUS_DATA_NOT_ACCEPTED; - goto create_port_done; - } - vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); - if (vport == NULL) { - status = NDIS_STATUS_RESOURCES; - goto create_port_done; - } - OvsInitVportWithPortParam(vport, portParam); - OvsInitVportCommon(switchContext, vport); - -create_port_done: - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - VPORT_PORT_EXIT(portParam); - return status; -} - -VOID -OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam) -{ - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - - VPORT_PORT_ENTER(portParam); - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - portParam->PortId, 0); - if (vport) { - /* add assertion here - */ - vport->portState = NdisSwitchPortStateTeardown; - vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; - } else { - OVS_LOG_WARN("Vport not present."); - } - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - - VPORT_PORT_EXIT(portParam); -} - - - -VOID -OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam) -{ - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - - VPORT_PORT_ENTER(portParam); - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - portParam->PortId, 0); - if (vport) { - OvsRemoveAndDeleteVport(switchContext, vport); - } else { - OVS_LOG_WARN("Vport not present."); - } - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - - VPORT_PORT_EXIT(portParam); -} - - -/* - * Functions implemented in relaton to NDIS NIC manipulation. - */ -NDIS_STATUS -OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - POVS_VPORT_ENTRY vport; - UINT32 portNo = 0; - UINT32 event = 0; - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - - LOCK_STATE_EX lockState; - - VPORT_NIC_ENTER(nicParam); - - /* Wait for lists to be initialized. */ - OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); - - if (!switchContext->isActivated) { - OVS_LOG_WARN("Switch is not activated yet."); - /* Veto the creation of nic */ - status = NDIS_STATUS_NOT_SUPPORTED; - goto done; - } - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, nicParam->PortId, 0); - if (vport == NULL) { - OVS_LOG_ERROR("Create NIC without Switch Port," - " PortId: %x, NicIndex: %d", - nicParam->PortId, nicParam->NicIndex); - status = NDIS_STATUS_INVALID_PARAMETER; - goto add_nic_done; - } - - if (nicParam->NicType == NdisSwitchNicTypeExternal && - nicParam->NicIndex != 0) { - POVS_VPORT_ENTRY virtVport = - (POVS_VPORT_ENTRY)switchContext->externalVport; - vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); - if (vport == NULL) { - status = NDIS_STATUS_RESOURCES; - goto add_nic_done; - } - OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); - status = OvsInitVportCommon(switchContext, vport); - if (status != NDIS_STATUS_SUCCESS) { - OvsFreeMemory(vport); - goto add_nic_done; - } - } - OvsInitVportWithNicParam(switchContext, vport, nicParam); - portNo = vport->portNo; - if (vport->ovsState == OVS_STATE_CONNECTED) { - event = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; - } else if (vport->ovsState == OVS_STATE_NIC_CREATED) { - event = OVS_EVENT_CONNECT; - } - -add_nic_done: - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - if (portNo && event) { - OvsPostEvent(portNo, event); - } - -done: - VPORT_NIC_EXIT(nicParam); - OVS_LOG_TRACE("Exit: status %8x.\n", status); - - return status; -} - - -/* Mark already created NIC as connected. */ -VOID -OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - LOCK_STATE_EX lockState; - POVS_VPORT_ENTRY vport; - UINT32 portNo = 0; - - VPORT_NIC_ENTER(nicParam); - - /* Wait for lists to be initialized. */ - OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); - - if (!switchContext->isActivated) { - OVS_LOG_WARN("Switch is not activated yet."); - goto done; - } - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - nicParam->PortId, - nicParam->NicIndex); - - if (!vport) { - OVS_LOG_WARN("Vport not present."); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - ASSERT(0); - goto done; - } - - vport->ovsState = OVS_STATE_CONNECTED; - vport->nicState = NdisSwitchNicStateConnected; - portNo = vport->portNo; - - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - - OvsPostEvent(portNo, OVS_EVENT_LINK_UP); - - if (nicParam->NicType == NdisSwitchNicTypeInternal) { - OvsInternalAdapterUp(portNo, &nicParam->NetCfgInstanceId); - } - -done: - VPORT_NIC_EXIT(nicParam); -} - -VOID -OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - - UINT32 status = 0, portNo = 0; - - VPORT_NIC_ENTER(nicParam); - - /* Wait for lists to be initialized. */ - OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); - - if (!switchContext->isActivated) { - OVS_LOG_WARN("Switch is not activated yet."); - goto update_nic_done; - } - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - nicParam->PortId, - nicParam->NicIndex); - if (vport == NULL) { - OVS_LOG_WARN("Vport search failed."); - goto update_nic_done; - } - switch (nicParam->NicType) { - case NdisSwitchNicTypeExternal: - case NdisSwitchNicTypeInternal: - RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, - sizeof (GUID)); - break; - case NdisSwitchNicTypeSynthetic: - case NdisSwitchNicTypeEmulated: - if (!RtlEqualMemory(vport->vmMacAddress, nicParam->VMMacAddress, - sizeof (vport->vmMacAddress))) { - status |= OVS_EVENT_MAC_CHANGE; - RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, - sizeof (vport->vmMacAddress)); - } - break; - default: - ASSERT(0); - } - if (!RtlEqualMemory(vport->permMacAddress, nicParam->PermanentMacAddress, - sizeof (vport->permMacAddress))) { - RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, - sizeof (vport->permMacAddress)); - status |= OVS_EVENT_MAC_CHANGE; - } - if (!RtlEqualMemory(vport->currMacAddress, nicParam->CurrentMacAddress, - sizeof (vport->currMacAddress))) { - RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, - sizeof (vport->currMacAddress)); - status |= OVS_EVENT_MAC_CHANGE; - } - - if (vport->mtu != nicParam->MTU) { - vport->mtu = nicParam->MTU; - status |= OVS_EVENT_MTU_CHANGE; - } - vport->numaNodeId = nicParam->NumaNodeId; - portNo = vport->portNo; - - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - if (status && portNo) { - OvsPostEvent(portNo, status); - } -update_nic_done: - VPORT_NIC_EXIT(nicParam); -} - - -VOID -OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - POVS_VPORT_ENTRY vport; - UINT32 portNo = 0; - LOCK_STATE_EX lockState; - BOOLEAN isInternalPort = FALSE; - - VPORT_NIC_ENTER(nicParam); - - /* Wait for lists to be initialized. */ - OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); - - if (!switchContext->isActivated) { - OVS_LOG_WARN("Switch is not activated yet."); - goto done; - } - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - nicParam->PortId, - nicParam->NicIndex); - - if (!vport) { - OVS_LOG_WARN("Vport not present."); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - goto done; - } - - vport->nicState = NdisSwitchNicStateDisconnected; - vport->ovsState = OVS_STATE_NIC_CREATED; - portNo = vport->portNo; - - if (vport->ovsType == OVSWIN_VPORT_TYPE_INTERNAL) { - isInternalPort = TRUE; - } - - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - - OvsPostEvent(portNo, OVS_EVENT_LINK_DOWN); - - if (isInternalPort) { - OvsInternalAdapterDown(); - } - -done: - VPORT_NIC_EXIT(nicParam); -} - - -VOID -OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - LOCK_STATE_EX lockState; - POVS_VPORT_ENTRY vport; - UINT32 portNo = 0; - - VPORT_NIC_ENTER(nicParam); - /* Wait for lists to be initialized. */ - OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); - - if (!switchContext->isActivated) { - OVS_LOG_WARN("Switch is not activated yet."); - goto done; - } - - NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - nicParam->PortId, - nicParam->NicIndex); - - if (!vport) { - OVS_LOG_WARN("Vport not present."); - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - goto done; - } - - portNo = vport->portNo; - if (vport->portType == NdisSwitchPortTypeExternal && - vport->nicIndex != 0) { - OvsRemoveAndDeleteVport(switchContext, vport); - } - vport->nicState = NdisSwitchNicStateUnknown; - vport->ovsState = OVS_STATE_PORT_CREATED; - - NdisReleaseRWLock(switchContext->dispatchLock, &lockState); - OvsPostEvent(portNo, OVS_EVENT_DISCONNECT); - -done: - VPORT_NIC_EXIT(nicParam); -} - - -/* - * OVS Vport related functionality. - */ -POVS_VPORT_ENTRY -OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, - UINT32 portNo) -{ - if (OVS_VPORT_INDEX(portNo) < OVS_MAX_VPORT_ARRAY_SIZE) { - if (OVS_IS_VPORT_ENTRY_NULL(switchContext, OVS_VPORT_INDEX(portNo))) { - return NULL; - } else { - POVS_VPORT_ENTRY vport; - vport = (POVS_VPORT_ENTRY) - switchContext->vportArray[OVS_VPORT_INDEX(portNo)]; - return vport->portNo == portNo ? vport : NULL; - } - } - return NULL; -} - - -POVS_VPORT_ENTRY -OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, - CHAR *name, - UINT32 length) -{ - POVS_VPORT_ENTRY vport; - PLIST_ENTRY head, link; - UINT32 hash = OvsJhashBytes((const VOID *)name, length, OVS_HASH_BASIS); - head = &(switchContext->nameHashArray[hash & OVS_VPORT_MASK]); - LIST_FORALL(head, link) { - vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, nameLink); - if (vport->ovsNameLen == length && - RtlEqualMemory(name, vport->ovsName, length)) { - return vport; - } - } - return NULL; -} - -POVS_VPORT_ENTRY -OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, - NDIS_SWITCH_PORT_ID portId, - NDIS_SWITCH_NIC_INDEX index) -{ - if (portId == switchContext->externalPortId) { - if (index == 0) { - return (POVS_VPORT_ENTRY)switchContext->externalVport; - } else if (index > OVS_MAX_PHYS_ADAPTERS) { - return NULL; - } - if (OVS_IS_VPORT_ENTRY_NULL(switchContext, - index + OVS_EXTERNAL_VPORT_START)) { - return NULL; - } else { - return (POVS_VPORT_ENTRY)switchContext->vportArray[ - index + OVS_EXTERNAL_VPORT_START]; - } - } else if (switchContext->internalPortId == portId) { - return (POVS_VPORT_ENTRY)switchContext->internalVport; - } else { - PLIST_ENTRY head, link; - POVS_VPORT_ENTRY vport; - UINT32 hash; - hash = OvsJhashWords((UINT32 *)&portId, 1, OVS_HASH_BASIS); - head = &(switchContext->portHashArray[hash & OVS_VPORT_MASK]); - LIST_FORALL(head, link) { - vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portLink); - if (portId == vport->portId && index == vport->nicIndex) { - return vport; - } - } - return NULL; - } -} - -static UINT32 -OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, - UINT32 nicIndex, - OVS_VPORT_TYPE ovsType) -{ - UINT32 index = 0xffffff, i = 0; - UINT64 gen; - - switch (ovsType) { - case OVSWIN_VPORT_TYPE_EXTERNAL: - if (nicIndex == 0) { - return 0; // not a valid portNo - } else if (nicIndex > OVS_MAX_PHYS_ADAPTERS) { - return 0; - } else { - index = nicIndex + OVS_EXTERNAL_VPORT_START; - } - break; - case OVSWIN_VPORT_TYPE_INTERNAL: - index = OVS_INTERNAL_VPORT_DEFAULT_INDEX; - break; - case OVSWIN_VPORT_TYPE_SYNTHETIC: - case OVSWIN_VPORT_TYPE_EMULATED: - index = switchContext->lastPortIndex + 1; - if (index == OVS_MAX_VPORT_ARRAY_SIZE) { - index = OVS_VM_VPORT_START; - } - while (!OVS_IS_VPORT_ENTRY_NULL(switchContext, index) && - i < (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { - index++; - i++; - if (index == OVS_MAX_VPORT_ARRAY_SIZE) { - index = OVS_VM_VPORT_START; - } - } - if (i == (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { - return 0; // not available - } - switchContext->lastPortIndex = index; - break; - case OVSWIN_VPORT_TYPE_GRE: - index = OVS_GRE_VPORT_INDEX; - break; - case OVSWIN_VPORT_TYPE_GRE64: - index = OVS_GRE64_VPORT_INDEX; - break; - case OVSWIN_VPORT_TYPE_VXLAN: - index = OVS_VXLAN_VPORT_INDEX; - break; - case OVSWIN_VPORT_TYPE_LOCAL: - default: - ASSERT(0); - } - if (index > OVS_MAX_VPORT_ARRAY_SIZE) { - return 0; - } - gen = (UINT64)switchContext->vportArray[index]; - if (gen > 0xff) { - return 0; - } else if (gen == 0) { - gen++; - } - return OVS_VPORT_PORT_NO(index, (UINT32)gen); -} - - -static POVS_VPORT_ENTRY -OvsAllocateVport(VOID) -{ - POVS_VPORT_ENTRY vport; - vport = (POVS_VPORT_ENTRY)OvsAllocateMemory(sizeof (OVS_VPORT_ENTRY)); - if (vport == NULL) { - return NULL; - } - RtlZeroMemory(vport, sizeof (OVS_VPORT_ENTRY)); - vport->ovsState = OVS_STATE_UNKNOWN; - return vport; -} - -static VOID -OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, - PNDIS_SWITCH_PORT_PARAMETERS portParam) -{ - vport->isValidationPort = portParam->IsValidationPort; - vport->portType = portParam->PortType; - vport->portState = portParam->PortState; - vport->portId = portParam->PortId; - vport->nicState = NdisSwitchNicStateUnknown; - - switch (vport->portType) { - case NdisSwitchPortTypeExternal: - vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; - break; - case NdisSwitchPortTypeInternal: - vport->ovsType = OVSWIN_VPORT_TYPE_INTERNAL; - break; - case NdisSwitchPortTypeSynthetic: - vport->ovsType = OVSWIN_VPORT_TYPE_SYNTHETIC; - break; - case NdisSwitchPortTypeEmulated: - vport->ovsType = OVSWIN_VPORT_TYPE_EMULATED; - break; - } - RtlCopyMemory(&vport->portName, &portParam->PortName, - sizeof (NDIS_SWITCH_PORT_NAME)); - switch (vport->portState) { - case NdisSwitchPortStateCreated: - vport->ovsState = OVS_STATE_PORT_CREATED; - break; - case NdisSwitchPortStateTeardown: - vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; - break; - case NdisSwitchPortStateDeleted: - vport->ovsState = OVS_STATE_PORT_DELETED; - break; - } -} - - -static VOID -OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport, - PNDIS_SWITCH_NIC_PARAMETERS nicParam) -{ - ASSERT(vport->portId == nicParam->PortId); - ASSERT(vport->ovsState == OVS_STATE_PORT_CREATED); - - UNREFERENCED_PARAMETER(switchContext); - - RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, - sizeof (nicParam->PermanentMacAddress)); - RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, - sizeof (nicParam->CurrentMacAddress)); - - if (nicParam->NicType == NdisSwitchNicTypeSynthetic || - nicParam->NicType == NdisSwitchNicTypeEmulated) { - RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, - sizeof (nicParam->VMMacAddress)); - RtlCopyMemory(&vport->vmName, &nicParam->VmName, - sizeof (nicParam->VmName)); - } else { - RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, - sizeof (nicParam->NetCfgInstanceId)); - } - RtlCopyMemory(&vport->nicName, &nicParam->NicName, - sizeof (nicParam->NicName)); - vport->mtu = nicParam->MTU; - vport->nicState = nicParam->NicState; - vport->nicIndex = nicParam->NicIndex; - vport->numaNodeId = nicParam->NumaNodeId; - - switch (vport->nicState) { - case NdisSwitchNicStateCreated: - vport->ovsState = OVS_STATE_NIC_CREATED; - break; - case NdisSwitchNicStateConnected: - vport->ovsState = OVS_STATE_CONNECTED; - break; - case NdisSwitchNicStateDisconnected: - vport->ovsState = OVS_STATE_NIC_CREATED; - break; - case NdisSwitchNicStateDeleted: - vport->ovsState = OVS_STATE_PORT_CREATED; - break; - } -} - -static VOID -OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, - POVS_VPORT_ENTRY virtVport, - UINT32 nicIndex) -{ - vport->isValidationPort = virtVport->isValidationPort; - vport->portType = virtVport->portType; - vport->portState = virtVport->portState; - vport->portId = virtVport->portId; - vport->nicState = NdisSwitchNicStateUnknown; - vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; - vport->nicIndex = (NDIS_SWITCH_NIC_INDEX)nicIndex; - RtlCopyMemory(&vport->portName, &virtVport->portName, - sizeof (NDIS_SWITCH_PORT_NAME)); - vport->ovsState = OVS_STATE_PORT_CREATED; -} -static NDIS_STATUS -OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, -POVS_VPORT_ENTRY vport) -{ - UINT32 hash; - size_t len; - if (vport->portType != NdisSwitchPortTypeExternal || - vport->nicIndex != 0) { - vport->portNo = OvsGetVportNo(switchContext, vport->nicIndex, - vport->ovsType); - if (vport->portNo == 0) { - return NDIS_STATUS_RESOURCES; - } - ASSERT(OVS_IS_VPORT_ENTRY_NULL(switchContext, - OVS_VPORT_INDEX(vport->portNo))); - - switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = vport; - } - switch (vport->portType) { - case NdisSwitchPortTypeExternal: - if (vport->nicIndex == 0) { - switchContext->externalPortId = vport->portId; - switchContext->externalVport = vport; - RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, - "external.virtualAdapter"); - } - else { - switchContext->numPhysicalNics++; - RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, - "external.%lu", (UINT32)vport->nicIndex); - } - break; - case NdisSwitchPortTypeInternal: - switchContext->internalPortId = vport->portId; - switchContext->internalVport = vport; - RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, - "internal"); - break; - case NdisSwitchPortTypeSynthetic: - RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, - "vmNICSyn.%lx", vport->portNo); - break; - case NdisSwitchPortTypeEmulated: - RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, - "vmNICEmu.%lx", vport->portNo); - break; - } - StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); - vport->ovsNameLen = (UINT32)len; - if (vport->portType == NdisSwitchPortTypeExternal && - vport->nicIndex == 0) { - return NDIS_STATUS_SUCCESS; - } - hash = OvsJhashBytes(vport->ovsName, vport->ovsNameLen, OVS_HASH_BASIS); - InsertHeadList(&switchContext->nameHashArray[hash & OVS_VPORT_MASK], - &vport->nameLink); - hash = OvsJhashWords(&vport->portId, 1, OVS_HASH_BASIS); - InsertHeadList(&switchContext->portHashArray[hash & OVS_VPORT_MASK], - &vport->portLink); - switchContext->numVports++; - return NDIS_STATUS_SUCCESS; -} - - -static VOID -OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, - POVS_VPORT_ENTRY vport) -{ - UINT64 gen = vport->portNo >> 24; - switch (vport->ovsType) { - case OVSWIN_VPORT_TYPE_EXTERNAL: - if (vport->nicIndex == 0) { - ASSERT(switchContext->numPhysicalNics == 0); - switchContext->externalPortId = 0; - switchContext->externalVport = NULL; - OvsFreeMemory(vport); - return; - } else { - ASSERT(switchContext->numPhysicalNics); - switchContext->numPhysicalNics--; - } - break; - case OVSWIN_VPORT_TYPE_INTERNAL: - switchContext->internalPortId = 0; - switchContext->internalVport = NULL; - OvsInternalAdapterDown(); - break; - case OVSWIN_VPORT_TYPE_VXLAN: - OvsCleanupVxlanTunnel(vport); - break; - case OVSWIN_VPORT_TYPE_GRE: - case OVSWIN_VPORT_TYPE_GRE64: - break; - case OVSWIN_VPORT_TYPE_EMULATED: - case OVSWIN_VPORT_TYPE_SYNTHETIC: - default: - break; - } - - RemoveEntryList(&vport->nameLink); - RemoveEntryList(&vport->portLink); - gen = (gen + 1) & 0xff; - switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = - (PVOID)(UINT64)gen; - switchContext->numVports--; - OvsFreeMemory(vport); -} - - -NDIS_STATUS -OvsAddConfiguredSwitchPorts(POVS_SWITCH_CONTEXT switchContext) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - ULONG arrIndex; - PNDIS_SWITCH_PORT_PARAMETERS portParam; - PNDIS_SWITCH_PORT_ARRAY portArray = NULL; - POVS_VPORT_ENTRY vport; - - OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); - - status = OvsGetPortsOnSwitch(switchContext, &portArray); - if (status != NDIS_STATUS_SUCCESS) { - goto cleanup; - } - - for (arrIndex = 0; arrIndex < portArray->NumElements; arrIndex++) { - portParam = NDIS_SWITCH_PORT_AT_ARRAY_INDEX(portArray, arrIndex); - vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); - if (vport == NULL) { - status = NDIS_STATUS_RESOURCES; - goto cleanup; - } - OvsInitVportWithPortParam(vport, portParam); - status = OvsInitVportCommon(switchContext, vport); - if (status != NDIS_STATUS_SUCCESS) { - OvsFreeMemory(vport); - goto cleanup; - } - } -cleanup: - if (status != NDIS_STATUS_SUCCESS) { - OvsClearAllSwitchVports(switchContext); - } - - if (portArray != NULL) { - OvsFreeMemory(portArray); - } - OVS_LOG_TRACE("Exit: status: %x", status); - return status; -} - - -NDIS_STATUS -OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext) -{ - NDIS_STATUS status = NDIS_STATUS_SUCCESS; - PNDIS_SWITCH_NIC_ARRAY nicArray = NULL; - ULONG arrIndex; - PNDIS_SWITCH_NIC_PARAMETERS nicParam; - POVS_VPORT_ENTRY vport; - - OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); - /* - * Now, get NIC list. - */ - status = OvsGetNicsOnSwitch(switchContext, &nicArray); - if (status != NDIS_STATUS_SUCCESS) { - goto cleanup; - } - for (arrIndex = 0; arrIndex < nicArray->NumElements; ++arrIndex) { - - nicParam = NDIS_SWITCH_NIC_AT_ARRAY_INDEX(nicArray, arrIndex); - - /* - * XXX: Check if the port is configured with a VLAN. Disallow such a - * configuration, since we don't support tag-in-tag. - */ - - /* - * XXX: Check if the port is connected to a VF. Disconnect the VF in - * such a case. - */ - - if (nicParam->NicType == NdisSwitchNicTypeExternal && - nicParam->NicIndex != 0) { - POVS_VPORT_ENTRY virtVport = - (POVS_VPORT_ENTRY)switchContext->externalVport; - vport = OvsAllocateVport(); - if (vport) { - OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); - status = OvsInitVportCommon(switchContext, vport); - if (status != NDIS_STATUS_SUCCESS) { - OvsFreeMemory(vport); - vport = NULL; - } - } - } else { - vport = OvsFindVportByPortIdAndNicIndex(switchContext, - nicParam->PortId, - nicParam->NicIndex); - } - if (vport == NULL) { - OVS_LOG_ERROR("Fail to allocate vport"); - continue; - } - OvsInitVportWithNicParam(switchContext, vport, nicParam); - if (nicParam->NicType == NdisSwitchNicTypeInternal) { - OvsInternalAdapterUp(vport->portNo, &nicParam->NetCfgInstanceId); - } - } -cleanup: - - if (nicArray != NULL) { - OvsFreeMemory(nicArray); - } - OVS_LOG_TRACE("Exit: status: %x", status); - return status; -} - -VOID -OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) -{ - UINT32 i; - - for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { - if (!OVS_IS_VPORT_ENTRY_NULL(switchContext, i)) { - OvsRemoveAndDeleteVport(switchContext, - (POVS_VPORT_ENTRY)switchContext->vportArray[i]); - } - } - if (switchContext->externalVport) { - OvsRemoveAndDeleteVport(switchContext, - (POVS_VPORT_ENTRY)switchContext->externalVport); - } -} - -NTSTATUS -OvsDumpVportIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - UINT32 numVports, count; - UINT32 dpNo, i; - UINT32 *outPtr; - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - - if (inputLength < sizeof (UINT32)) { - return STATUS_INVALID_PARAMETER; - } - dpNo = *(UINT32 *)inputBuffer; - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - NdisReleaseSpinLock(gOvsCtrlLock); - return STATUS_INVALID_PARAMETER; - } - /* - * We should hold SwitchContext RW lock - */ - - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - numVports = outputLength/sizeof (UINT32); - numVports = MIN(gOvsSwitchContext->numVports, numVports); - outPtr = (UINT32 *)outputBuffer; - for (i = 0, count = 0; - i < OVS_MAX_VPORT_ARRAY_SIZE && count < numVports; i++) { - vport = (POVS_VPORT_ENTRY)gOvsSwitchContext->vportArray[i]; - if (OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, i)) { - continue; - } - if (vport->ovsState == OVS_STATE_CONNECTED || - vport->ovsState == OVS_STATE_NIC_CREATED) { - *outPtr = vport->portNo; - outPtr++; - count++; - } - } - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - *replyLen = count * sizeof (UINT32); - return STATUS_SUCCESS; -} - - -NTSTATUS -OvsGetVportIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - UINT32 dpNo; - POVS_VPORT_GET get; - POVS_VPORT_INFO info; - POVS_VPORT_ENTRY vport; - size_t len; - LOCK_STATE_EX lockState; - - if (inputLength < sizeof (OVS_VPORT_GET) || - outputLength < sizeof (OVS_VPORT_INFO)) { - return STATUS_INVALID_PARAMETER; - } - get = (POVS_VPORT_GET)inputBuffer; - dpNo = get->dpNo; - info = (POVS_VPORT_INFO)outputBuffer; - RtlZeroMemory(info, sizeof (POVS_VPORT_INFO)); - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != dpNo) { - NdisReleaseSpinLock(gOvsCtrlLock); - return STATUS_INVALID_PARAMETER; - } - - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - if (get->portNo == 0) { - StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); - vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, (UINT32)len); - } else { - vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); - } - if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && - vport->ovsState != OVS_STATE_NIC_CREATED)) { - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - /* - * XXX Change to NO DEVICE - */ - return STATUS_DEVICE_DOES_NOT_EXIST; - } - info->dpNo = dpNo; - info->portNo = vport->portNo; - info->type = vport->ovsType; - RtlCopyMemory(info->macAddress, vport->permMacAddress, - sizeof (vport->permMacAddress)); - RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); - - info->rxPackets = vport->stats.rxPackets; - info->rxBytes = vport->stats.rxBytes; - info->txPackets = vport->stats.txPackets; - info->txBytes = vport->stats.txBytes; - info->rxErrors = vport->errStats.rxErrors; - info->txErrors = vport->errStats.txErrors; - info->rxDropped = vport->errStats.rxDropped; - info->txDropped = vport->errStats.txDropped; - - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - *replyLen = sizeof (OVS_VPORT_INFO); - return STATUS_SUCCESS; -} - - -NTSTATUS -OvsInitTunnelVport(POVS_VPORT_ENTRY vport, - POVS_VPORT_ADD_REQUEST addReq) -{ - size_t len; - NTSTATUS status = STATUS_SUCCESS; - - vport->isValidationPort = FALSE; - vport->ovsType = addReq->type; - vport->ovsState = OVS_STATE_PORT_CREATED; - RtlCopyMemory(vport->ovsName, addReq->name, OVS_MAX_PORT_NAME_LENGTH); - vport->ovsName[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; - StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); - vport->ovsNameLen = (UINT32)len; - switch (addReq->type) { - case OVSWIN_VPORT_TYPE_GRE: - break; - case OVSWIN_VPORT_TYPE_GRE64: - break; - case OVSWIN_VPORT_TYPE_VXLAN: - status = OvsInitVxlanTunnel(vport, addReq); - break; - default: - ASSERT(0); - } - return status; -} - -NTSTATUS -OvsAddVportIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - NTSTATUS status = STATUS_SUCCESS; - POVS_VPORT_INFO vportInfo; - POVS_VPORT_ADD_REQUEST addReq; - POVS_VPORT_ENTRY vport; - LOCK_STATE_EX lockState; - UINT32 index; - UINT32 portNo; - - OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", - inputLength, outputLength); - if (inputLength < sizeof (OVS_VPORT_ADD_REQUEST) || - outputLength < sizeof (OVS_VPORT_INFO)) { - status = STATUS_INVALID_PARAMETER; - goto vport_add_done; - } - addReq = (POVS_VPORT_ADD_REQUEST)inputBuffer; - addReq->name[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; - - switch (addReq->type) { - case OVSWIN_VPORT_TYPE_GRE: - index = OVS_GRE_VPORT_INDEX; - break; - case OVSWIN_VPORT_TYPE_GRE64: - index = OVS_GRE64_VPORT_INDEX; - break; - case OVSWIN_VPORT_TYPE_VXLAN: - index = OVS_VXLAN_VPORT_INDEX; - break; - default: - status = STATUS_NOT_SUPPORTED; - goto vport_add_done; - } - - vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); - if (vport == NULL) { - status = STATUS_INSUFFICIENT_RESOURCES; - goto vport_add_done; - } - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != addReq->dpNo) { - NdisReleaseSpinLock(gOvsCtrlLock); - status = STATUS_INVALID_PARAMETER; - OvsFreeMemory(vport); - goto vport_add_done; - } - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - if (!OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { - status = STATUS_DEVICE_BUSY; - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - OvsFreeMemory(vport); - goto vport_add_done; - } - - status = OvsInitTunnelVport(vport, addReq); - if (status != STATUS_SUCCESS) { - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - OvsFreeMemory(vport); - goto vport_add_done; - } - - status = OvsInitVportCommon(gOvsSwitchContext, vport); - ASSERT(status == NDIS_STATUS_SUCCESS); - - vport->ovsState = OVS_STATE_CONNECTED; - vport->nicState = NdisSwitchNicStateConnected; - - vportInfo = (POVS_VPORT_INFO)outputBuffer; - - RtlZeroMemory(vportInfo, sizeof (POVS_VPORT_INFO)); - vportInfo->dpNo = gOvsSwitchContext->dpNo; - vportInfo->portNo = vport->portNo; - vportInfo->type = vport->ovsType; - RtlCopyMemory(vportInfo->name, vport->ovsName, vport->ovsNameLen + 1); - portNo = vport->portNo; - - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - OvsPostEvent(portNo, OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP); - *replyLen = sizeof (OVS_VPORT_INFO); - status = STATUS_SUCCESS; -vport_add_done: - OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", - *replyLen, status); - return status; -} - -NTSTATUS -OvsDelVportIoctl(PVOID inputBuffer, - UINT32 inputLength, - UINT32 *replyLen) -{ - NTSTATUS status = STATUS_SUCCESS; - POVS_VPORT_DELETE_REQUEST delReq; - LOCK_STATE_EX lockState; - POVS_VPORT_ENTRY vport; - size_t len; - UINT32 portNo = 0; - - OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); - - if (inputLength < sizeof (OVS_VPORT_DELETE_REQUEST)) { - status = STATUS_INVALID_PARAMETER; - goto vport_del_done; - } - delReq = (POVS_VPORT_DELETE_REQUEST)inputBuffer; - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != delReq->dpNo) { - NdisReleaseSpinLock(gOvsCtrlLock); - status = STATUS_INVALID_PARAMETER; - goto vport_del_done; - } - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - if (delReq->portNo == 0) { - StringCbLengthA(delReq->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); - vport = OvsFindVportByOvsName(gOvsSwitchContext, delReq->name, - (UINT32)len); - } else { - vport = OvsFindVportByPortNo(gOvsSwitchContext, delReq->portNo); - } - if (vport) { - OVS_LOG_INFO("delete vport: %s, portNo: %x", vport->ovsName, - vport->portNo); - portNo = vport->portNo; - OvsRemoveAndDeleteVport(gOvsSwitchContext, vport); - } else { - status = STATUS_DEVICE_DOES_NOT_EXIST; - } - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - if (vport) { - OvsPostEvent(portNo, OVS_EVENT_DISCONNECT | OVS_EVENT_LINK_DOWN); - } -vport_del_done: - OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", - *replyLen, status); - return status; -} - -NTSTATUS -OvsConvertIfCountedStrToAnsiStr(PIF_COUNTED_STRING wStr, - CHAR *str, - UINT16 maxStrLen) -{ - ANSI_STRING astr; - UNICODE_STRING ustr; - NTSTATUS status; - UINT32 size; - - ustr.Buffer = wStr->String; - ustr.Length = wStr->Length; - ustr.MaximumLength = IF_MAX_STRING_SIZE; - - astr.Buffer = str; - astr.MaximumLength = maxStrLen; - astr.Length = 0; - - size = RtlUnicodeStringToAnsiSize(&ustr); - if (size > maxStrLen) { - return STATUS_BUFFER_OVERFLOW; - } - - status = RtlUnicodeStringToAnsiString(&astr, &ustr, FALSE); - - ASSERT(status == STATUS_SUCCESS); - if (status != STATUS_SUCCESS) { - return status; - } - ASSERT(astr.Length <= maxStrLen); - str[astr.Length] = 0; - return STATUS_SUCCESS; -} - - -NTSTATUS -OvsGetExtInfoIoctl(PVOID inputBuffer, - UINT32 inputLength, - PVOID outputBuffer, - UINT32 outputLength, - UINT32 *replyLen) -{ - POVS_VPORT_GET get; - POVS_VPORT_EXT_INFO info; - POVS_VPORT_ENTRY vport; - size_t len; - LOCK_STATE_EX lockState; - NTSTATUS status = STATUS_SUCCESS; - NDIS_SWITCH_NIC_NAME nicName; - NDIS_VM_NAME vmName; - BOOLEAN doConvert = FALSE; - - OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", - inputLength, outputLength); - - if (inputLength < sizeof (OVS_VPORT_GET) || - outputLength < sizeof (OVS_VPORT_EXT_INFO)) { - status = STATUS_INVALID_PARAMETER; - goto ext_info_done; - } - get = (POVS_VPORT_GET)inputBuffer; - info = (POVS_VPORT_EXT_INFO)outputBuffer; - RtlZeroMemory(info, sizeof (POVS_VPORT_EXT_INFO)); - - NdisAcquireSpinLock(gOvsCtrlLock); - if (gOvsSwitchContext == NULL || - gOvsSwitchContext->dpNo != get->dpNo) { - NdisReleaseSpinLock(gOvsCtrlLock); - status = STATUS_INVALID_PARAMETER; - goto ext_info_done; - } - NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, - NDIS_RWL_AT_DISPATCH_LEVEL); - if (get->portNo == 0) { - StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); - vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, - (UINT32)len); - } else { - vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); - } - if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && - vport->ovsState != OVS_STATE_NIC_CREATED)) { - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - if (get->portNo) { - OVS_LOG_WARN("vport %u does not exist any more", get->portNo); - } else { - OVS_LOG_WARN("vport %s does not exist any more", get->name); - } - status = STATUS_DEVICE_DOES_NOT_EXIST; - goto ext_info_done; - } - info->dpNo = get->dpNo; - info->portNo = vport->portNo; - RtlCopyMemory(info->macAddress, vport->currMacAddress, - sizeof (vport->currMacAddress)); - RtlCopyMemory(info->permMACAddress, vport->permMacAddress, - sizeof (vport->permMacAddress)); - if (vport->ovsType == OVSWIN_VPORT_TYPE_SYNTHETIC || - vport->ovsType == OVSWIN_VPORT_TYPE_EMULATED) { - RtlCopyMemory(info->vmMACAddress, vport->vmMacAddress, - sizeof (vport->vmMacAddress)); - } - info->nicIndex = vport->nicIndex; - info->portId = vport->portId; - info->type = vport->ovsType; - info->mtu = vport->mtu; - /* - * TO be revisit XXX - */ - if (vport->ovsState == OVS_STATE_NIC_CREATED) { - info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_DOWN; - } else if (vport->ovsState == OVS_STATE_CONNECTED) { - info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; - } else { - info->status = OVS_EVENT_DISCONNECT; - } - if ((info->type == OVSWIN_VPORT_TYPE_SYNTHETIC || - info->type == OVSWIN_VPORT_TYPE_EMULATED) && - (vport->ovsState == OVS_STATE_NIC_CREATED || - vport->ovsState == OVS_STATE_CONNECTED)) { - RtlCopyMemory(&vmName, &vport->vmName, sizeof (NDIS_VM_NAME)); - RtlCopyMemory(&nicName, &vport->nicName, sizeof - (NDIS_SWITCH_NIC_NAME)); - doConvert = TRUE; - } else { - info->vmUUID[0] = 0; - info->vifUUID[0] = 0; - } - - RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); - NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); - NdisReleaseSpinLock(gOvsCtrlLock); - if (doConvert) { - status = OvsConvertIfCountedStrToAnsiStr(&vmName, - info->vmUUID, - OVS_MAX_VM_UUID_LEN); - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to convert VM name."); - info->vmUUID[0] = 0; - } - - status = OvsConvertIfCountedStrToAnsiStr(&nicName, - info->vifUUID, - OVS_MAX_VIF_UUID_LEN); - if (status != STATUS_SUCCESS) { - OVS_LOG_INFO("Fail to convert nic name"); - info->vifUUID[0] = 0; - } - /* - * for now ignore status - */ - status = STATUS_SUCCESS; - } - *replyLen = sizeof (OVS_VPORT_EXT_INFO); - -ext_info_done: - OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", - *replyLen, status); - return status; -} - - -static __inline VOID -OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, ULONG sleepMicroSec) -{ - while ((!switchContext->isActivated) && - (!switchContext->isActivateFailed)) { - /* Wait for the switch to be active and - * the list of ports in OVS to be initialized. */ - NdisMSleep(sleepMicroSec); - } -} diff --git a/datapath-windows/ovsext/OvsVport.h b/datapath-windows/ovsext/OvsVport.h deleted file mode 100644 index 4ab0019fe..000000000 --- a/datapath-windows/ovsext/OvsVport.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_VPORT_H_ -#define __OVS_VPORT_H_ 1 - -#include "OvsSwitch.h" - -/* - * A Vport, or Virtual Port, is a port on the OVS. It can be one of the - * following types. Some of the Vports are "real" ports on the hyper-v switch, - * and some are not: - * - VIF port (VM's NIC) - * - External Adapters (physical NIC) - * - Internal Adapter (Virtual adapter exposed on the host). - * - Tunnel ports created by OVS userspace. - */ - -typedef enum { - OVS_STATE_UNKNOWN, - OVS_STATE_PORT_CREATED, - OVS_STATE_NIC_CREATED, - OVS_STATE_CONNECTED, - OVS_STATE_PORT_TEAR_DOWN, - OVS_STATE_PORT_DELETED, -} OVS_VPORT_STATE; - -typedef struct _OVS_VPORT_STATS { - UINT64 rxBytes; - UINT64 rxPackets; - UINT64 txBytes; - UINT64 txPackets; -} OVS_VPORT_STATS; - -typedef struct _OVS_VPORT_ERR_STATS { - UINT64 rxErrors; - UINT64 txErrors; - UINT64 rxDropped; - UINT64 txDropped; -} OVS_VPORT_ERR_STATS; -/* - * Each internal, external adapter or vritual adapter has - * one vport entry. In addition, we have one vport for each - * tunnel type, such as vxlan, gre, gre64 - */ -typedef struct _OVS_VPORT_ENTRY { - LIST_ENTRY nameLink; - LIST_ENTRY portLink; - - OVS_VPORT_STATE ovsState; - OVS_VPORT_TYPE ovsType; - OVS_VPORT_STATS stats; - OVS_VPORT_ERR_STATS errStats; - UINT32 portNo; - UINT32 mtu; - CHAR ovsName[OVS_MAX_PORT_NAME_LENGTH]; - UINT32 ovsNameLen; - - PVOID priv; - NDIS_SWITCH_PORT_ID portId; - NDIS_SWITCH_NIC_INDEX nicIndex; - UINT16 numaNodeId; - NDIS_SWITCH_PORT_STATE portState; - NDIS_SWITCH_NIC_STATE nicState; - NDIS_SWITCH_PORT_TYPE portType; - BOOLEAN isValidationPort; - - UINT8 permMacAddress[MAC_ADDRESS_LEN]; - UINT8 currMacAddress[MAC_ADDRESS_LEN]; - UINT8 vmMacAddress[MAC_ADDRESS_LEN]; - - NDIS_SWITCH_PORT_NAME portName; - NDIS_SWITCH_NIC_NAME nicName; - NDIS_VM_NAME vmName; - GUID netCfgInstanceId; -} OVS_VPORT_ENTRY, *POVS_VPORT_ENTRY; - -struct _OVS_SWITCH_CONTEXT; - -#define OVS_IS_VPORT_ENTRY_NULL(_SwitchContext, _i) \ - ((UINT64)(_SwitchContext)->vportArray[_i] <= 0xff) - -POVS_VPORT_ENTRY -OvsFindVportByPortNo(struct _OVS_SWITCH_CONTEXT *switchContext, - UINT32 portNo); -POVS_VPORT_ENTRY -OvsFindVportByOvsName(struct _OVS_SWITCH_CONTEXT *switchContext, - CHAR *name, UINT32 length); -POVS_VPORT_ENTRY -OvsFindVportByPortIdAndNicIndex(struct _OVS_SWITCH_CONTEXT *switchContext, - NDIS_SWITCH_PORT_ID portId, - NDIS_SWITCH_NIC_INDEX index); - -NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); -NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); - -VOID OvsClearAllSwitchVports(struct _OVS_SWITCH_CONTEXT *switchContext); - -NTSTATUS OvsDumpVportIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsGetVportIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsAddVportIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NTSTATUS OvsDelVportIoctl(PVOID inputBuffer, UINT32 inputLength, - UINT32 *replyLen); -NTSTATUS OvsGetExtInfoIoctl(PVOID inputBuffer, UINT32 inputLength, - PVOID outputBuffer, UINT32 outputLength, - UINT32 *replyLen); -NDIS_STATUS OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam); -NDIS_STATUS OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam); -VOID OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam); -VOID OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_PORT_PARAMETERS portParam); -VOID OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam); -VOID OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam); -VOID OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam); -VOID OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, - PNDIS_SWITCH_NIC_PARAMETERS nicParam); - -static __inline BOOLEAN -OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) -{ - return ovsType == OVSWIN_VPORT_TYPE_VXLAN || - ovsType == OVSWIN_VPORT_TYPE_GRE || - ovsType == OVSWIN_VPORT_TYPE_GRE64; -} - -static __inline BOOLEAN -OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) -{ - return ovsType == OVSWIN_VPORT_TYPE_INTERNAL; -} - -static __inline BOOLEAN -OvsIsTunnelVportNo(UINT32 portNo) -{ - UINT32 idx = OVS_VPORT_INDEX(portNo); - return (idx >= OVS_TUNNEL_INDEX_START && idx <= OVS_TUNNEL_INDEX_END); -} - -static __inline BOOLEAN -OvsIsVifVportNo(UINT32 portNo) -{ - UINT32 idx = OVS_VPORT_INDEX(portNo); - return (idx >= OVS_VM_VPORT_START && idx <= OVS_VM_VPORT_MAX); -} - -static __inline POVS_VPORT_ENTRY -OvsGetTunnelVport(OVS_VPORT_TYPE type) -{ - ASSERT(OvsIsTunnelVportType(type)); - switch(type) { - case OVSWIN_VPORT_TYPE_VXLAN: - return (POVS_VPORT_ENTRY) OvsGetVportFromIndex(OVS_VXLAN_VPORT_INDEX); - default: - ASSERT(! "OvsGetTunnelVport not implemented for this tunnel."); - } - - return NULL; -} - -static __inline PVOID -OvsGetVportPriv(OVS_VPORT_TYPE type) -{ - return OvsGetTunnelVport(type)->priv; -} - -static __inline UINT32 -OvsGetExternalMtu() -{ - return ((POVS_VPORT_ENTRY) OvsGetExternalVport())->mtu; -} - -#endif /* __OVS_VPORT_H_ */ diff --git a/datapath-windows/ovsext/OvsVxlan.c b/datapath-windows/ovsext/OvsVxlan.c deleted file mode 100644 index db1909e15..000000000 --- a/datapath-windows/ovsext/OvsVxlan.c +++ /dev/null @@ -1,506 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "precomp.h" -#include "OvsNetProto.h" -#include "OvsSwitch.h" -#include "OvsVport.h" -#include "OvsFlow.h" -#include "OvsVxlan.h" -#include "OvsIpHelper.h" -#include "OvsChecksum.h" -#include "OvsUser.h" -#include "OvsPacketIO.h" -#include "OvsFlow.h" -#include "OvsPacketParser.h" -#include "OvsChecksum.h" - -#pragma warning( push ) -#pragma warning( disable:4127 ) - - -#ifdef OVS_DBG_MOD -#undef OVS_DBG_MOD -#endif -#define OVS_DBG_MOD OVS_DBG_VXLAN -#include "OvsDebug.h" - -/* Helper macro to check if a VXLAN ID is valid. */ -#define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff) -#define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40) -#define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40) -#define IP_DF_NBO 0x0040 -#define VXLAN_DEFAULT_TTL 64 -#define VXLAN_MULTICAST_TTL 64 -#define VXLAN_DEFAULT_INSTANCE_ID 1 - -/* Move to a header file */ -extern POVS_SWITCH_CONTEXT gOvsSwitchContext; - -NTSTATUS -OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, - POVS_VPORT_ADD_REQUEST addReq) -{ - POVS_VXLAN_VPORT vxlanPort; - NTSTATUS status = STATUS_SUCCESS; - - ASSERT(addReq->type == OVSWIN_VPORT_TYPE_VXLAN); - - vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort)); - if (vxlanPort == NULL) { - status = STATUS_INSUFFICIENT_RESOURCES; - } else { - RtlZeroMemory(vxlanPort, sizeof (*vxlanPort)); - vxlanPort->dstPort = addReq->dstPort; - /* - * since we are installing the WFP filter before the port is created - * We need to check if it is the same number - * XXX should be removed later - */ - ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT); - vport->priv = (PVOID)vxlanPort; - } - return status; -} - - -VOID -OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport) -{ - if (vport->ovsType != OVSWIN_VPORT_TYPE_VXLAN || - vport->priv == NULL) { - return; - } - - OvsFreeMemory(vport->priv); - vport->priv = NULL; -} - - -/* - *---------------------------------------------------------------------------- - * OvsDoEncapVxlan - * Encapsulates the packet. - *---------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - POVS_FWD_INFO fwdInfo, - POVS_PACKET_HDR_INFO layers, - POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST *newNbl) -{ - NDIS_STATUS status; - PNET_BUFFER curNb; - PMDL curMdl; - PUINT8 bufferStart; - EthHdr *ethHdr; - IPHdr *ipHdr; - UDPHdr *udpHdr; - VXLANHdr *vxlanHdr; - UINT32 headRoom = OvsGetVxlanTunHdrSize(); - UINT32 packetLength; - - /* - * XXX: the assumption currently is that the NBL is owned by OVS, and - * headroom has already been allocated as part of allocating the NBL and - * MDL. - */ - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - packetLength = NET_BUFFER_DATA_LENGTH(curNb); - if (layers->isTcp) { - NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; - - tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, - TcpLargeSendNetBufferListInfo); - OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength); - if (tsoInfo.LsoV1Transmit.MSS) { - OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); - *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, - tsoInfo.LsoV1Transmit.MSS, headRoom); - if (*newNbl == NULL) { - OVS_LOG_ERROR("Unable to segment NBL"); - return NDIS_STATUS_FAILURE; - } - } - } - /* If we didn't split the packet above, make a copy now */ - if (*newNbl == NULL) { - *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, - FALSE /*NBL info*/); - if (*newNbl == NULL) { - OVS_LOG_ERROR("Unable to copy NBL"); - return NDIS_STATUS_FAILURE; - } - } - - curNbl = *newNbl; - for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; - curNb = curNb->Next) { - status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); - if (status != NDIS_STATUS_SUCCESS) { - goto ret_error; - } - - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); - if (!bufferStart) { - status = NDIS_STATUS_RESOURCES; - goto ret_error; - } - - bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - if (NET_BUFFER_NEXT_NB(curNb)) { - OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb), - NET_BUFFER_DATA_LENGTH(curNb->Next)); - } - - /* L2 header */ - ethHdr = (EthHdr *)bufferStart; - NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, - sizeof ethHdr->Destination + sizeof ethHdr->Source); - ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == - (PCHAR)&fwdInfo->srcMacAddr); - ethHdr->Type = htons(ETH_TYPE_IPV4); - - // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, - // should we use those values instead? or will they end up being - // uninitialized; - /* IP header */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - - ipHdr->ihl = sizeof *ipHdr / 4; - ipHdr->version = IPV4; - ipHdr->tos = 0; - ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); - ipHdr->id = 0; - ipHdr->frag_off = IP_DF_NBO; - ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; - ipHdr->protocol = IPPROTO_UDP; - ASSERT(tunKey->dst == fwdInfo->dstIpAddr); - ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); - ipHdr->saddr = fwdInfo->srcIpAddr; - ipHdr->daddr = fwdInfo->dstIpAddr; - ipHdr->check = 0; - ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); - - /* UDP header */ - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); - udpHdr->source = htons(tunKey->flow_hash | 32768); - udpHdr->dest = VXLAN_UDP_PORT_NBO; - udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + - sizeof *udpHdr + sizeof *vxlanHdr); - udpHdr->check = 0; - - /* VXLAN header */ - vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); - vxlanHdr->flags1 = 0; - vxlanHdr->locallyReplicate = 0; - vxlanHdr->flags2 = 0; - vxlanHdr->reserved1 = 0; - if (tunKey->flags | OVS_TNL_F_KEY) { - vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId); - vxlanHdr->instanceID = 1; - } - vxlanHdr->reserved2 = 0; - } - return STATUS_SUCCESS; - -ret_error: - OvsCompleteNBL(switchContext, *newNbl, TRUE); - *newNbl = NULL; - return status; -} - - -/* - *---------------------------------------------------------------------------- - * OvsEncapVxlan -- - * Encapsulates the packet if L2/L3 for destination resolves. Otherwise, - * enqueues a callback that does encapsulatation after resolution. - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsEncapVxlan(PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, - POVS_PACKET_HDR_INFO layers, - PNET_BUFFER_LIST *newNbl) -{ - NTSTATUS status; - OVS_FWD_INFO fwdInfo; - UNREFERENCED_PARAMETER(completionList); - - status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); - if (status != STATUS_SUCCESS) { - OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); - // return NDIS_STATUS_PENDING; - /* - * XXX: Don't know if the completionList will make any sense when - * accessed in the callback. Make sure the caveats are known. - * - * XXX: This code will work once we are able to grab locks in the - * callback. - */ - return NDIS_STATUS_FAILURE; - } - - return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers, - switchContext, newNbl); -} - - -/* - *---------------------------------------------------------------------------- - * OvsIpHlprCbVxlan -- - * Callback function for IP helper. - * XXX: not used currently - *---------------------------------------------------------------------------- - */ -static VOID -OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, - UINT32 inPort, - OvsIPv4TunnelKey *tunKey, - PVOID cbData1, - PVOID cbData2, - NTSTATUS result, - POVS_FWD_INFO fwdInfo) -{ - OVS_PACKET_HDR_INFO layers; - OvsFlowKey key; - NDIS_STATUS status; - UNREFERENCED_PARAMETER(inPort); - - status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); - if (result == STATUS_SUCCESS) { - status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, - (POVS_SWITCH_CONTEXT)cbData1, NULL); - } else { - status = NDIS_STATUS_FAILURE; - } - - if (status != NDIS_STATUS_SUCCESS) { - // XXX: Free up the NBL; - return; - } - - OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl); -} - -/* - *---------------------------------------------------------------------------- - * OvsCalculateUDPChecksum - * Calculate UDP checksum - *---------------------------------------------------------------------------- - */ -static __inline NDIS_STATUS -OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, - PNET_BUFFER curNb, - IPHdr *ipHdr, - UDPHdr *udpHdr, - UINT32 packetLength) -{ - NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; - UINT16 checkSum; - - csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); - - /* Next check if UDP checksum has been calculated. */ - if (!csumInfo.Receive.UdpChecksumSucceeded) { - UINT32 l4Payload; - - checkSum = udpHdr->check; - - l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4; - udpHdr->check = 0; - udpHdr->check = - IPPseudoChecksum((UINT32 *)&ipHdr->saddr, - (UINT32 *)&ipHdr->daddr, - IPPROTO_UDP, (UINT16)l4Payload); - udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload, - sizeof(EthHdr) + ipHdr->ihl * 4); - if (checkSum != udpHdr->check) { - OVS_LOG_TRACE("UDP checksum incorrect."); - return NDIS_STATUS_INVALID_PACKET; - } - } - - csumInfo.Receive.UdpChecksumSucceeded = 1; - NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; - return NDIS_STATUS_SUCCESS; -} - -/* - *---------------------------------------------------------------------------- - * OvsDoDecapVxlan - * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. - *---------------------------------------------------------------------------- - */ -NDIS_STATUS -OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl) -{ - PNET_BUFFER curNb; - PMDL curMdl; - EthHdr *ethHdr; - IPHdr *ipHdr; - UDPHdr *udpHdr; - VXLANHdr *vxlanHdr; - UINT32 tunnelSize = 0, packetLength = 0; - PUINT8 bufferStart; - NDIS_STATUS status; - - /* Check the the length of the UDP payload */ - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - packetLength = NET_BUFFER_DATA_LENGTH(curNb); - tunnelSize = OvsGetVxlanTunHdrSize(); - if (packetLength <= tunnelSize) { - return NDIS_STATUS_INVALID_LENGTH; - } - - /* - * Create a copy of the NBL so that we have all the headers in one MDL. - */ - *newNbl = OvsPartialCopyNBL(switchContext, curNbl, - tunnelSize + OVS_DEFAULT_COPY_SIZE, 0, - TRUE /*copy NBL info */); - - if (*newNbl == NULL) { - return NDIS_STATUS_RESOURCES; - } - - /* XXX: Handle VLAN header. */ - curNbl = *newNbl; - curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); - curMdl = NET_BUFFER_CURRENT_MDL(curNb); - bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + - NET_BUFFER_CURRENT_MDL_OFFSET(curNb); - if (!bufferStart) { - status = NDIS_STATUS_RESOURCES; - goto dropNbl; - } - - ethHdr = (EthHdr *)bufferStart; - /* XXX: Handle IP options. */ - ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); - tunKey->src = ipHdr->saddr; - tunKey->dst = ipHdr->daddr; - tunKey->tos = ipHdr->tos; - tunKey->ttl = ipHdr->ttl; - tunKey->pad = 0; - udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); - - /* Validate if NIC has indicated checksum failure. */ - status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); - if (status != NDIS_STATUS_SUCCESS) { - goto dropNbl; - } - - /* Calculate and verify UDP checksum if NIC didn't do it. */ - if (udpHdr->check != 0) { - status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength); - if (status != NDIS_STATUS_SUCCESS) { - goto dropNbl; - } - } - - vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); - if (vxlanHdr->instanceID) { - tunKey->flags = OVS_TNL_F_KEY; - tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID); - } else { - tunKey->flags = 0; - tunKey->tunnelId = 0; - } - - /* Clear out the receive flag for the inner packet. */ - NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; - NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL); - return NDIS_STATUS_SUCCESS; - -dropNbl: - OvsCompleteNBL(switchContext, *newNbl, TRUE); - *newNbl = NULL; - return status; -} - - -NDIS_STATUS -OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, - OvsIPv4TunnelKey *tunnelKey) -{ - NDIS_STATUS status = NDIS_STATUS_FAILURE; - UDPHdr udpStorage; - const UDPHdr *udp; - VXLANHdr *VxlanHeader; - VXLANHdr VxlanHeaderBuffer; - struct IPHdr ip_storage; - const struct IPHdr *nh; - OVS_PACKET_HDR_INFO layers; - - layers.value = 0; - - do { - nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); - if (nh) { - layers.l4Offset = layers.l3Offset + nh->ihl * 4; - } else { - break; - } - - /* make sure it's a VXLAN packet */ - udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); - if (udp) { - layers.l7Offset = layers.l4Offset + sizeof *udp; - } else { - break; - } - - /* XXX Should be tested against the dynamic port # in the VXLAN vport */ - ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); - - VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, - sizeof(*VxlanHeader), - layers.l7Offset, - &VxlanHeaderBuffer); - - if (VxlanHeader) { - tunnelKey->src = nh->saddr; - tunnelKey->dst = nh->daddr; - tunnelKey->ttl = nh->ttl; - tunnelKey->tos = nh->tos; - if (VxlanHeader->instanceID) { - tunnelKey->flags = OVS_TNL_F_KEY; - tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID); - } else { - tunnelKey->flags = 0; - tunnelKey->tunnelId = 0; - } - } else { - break; - } - status = NDIS_STATUS_SUCCESS; - - } while(FALSE); - - return status; -} - -#pragma warning( pop ) diff --git a/datapath-windows/ovsext/OvsVxlan.h b/datapath-windows/ovsext/OvsVxlan.h deleted file mode 100644 index 55cfc8203..000000000 --- a/datapath-windows/ovsext/OvsVxlan.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2014 VMware, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __OVS_VXLAN_H_ -#define __OVS_VXLAN_H_ 1 - -#include "OvsNetProto.h" -typedef struct _OVS_VXLAN_VPORT { - UINT32 dstPort; - UINT64 inPkts; - UINT64 outPkts; - UINT64 slowInPkts; - UINT64 slowOutPkts; - /* - * To be filled - */ -} OVS_VXLAN_VPORT, *POVS_VXLAN_VPORT; - -/* VXLAN header. */ -typedef struct VXLANHdr { - /* Flags. */ - UINT32 flags1:2; - /* Packet needs replication to multicast group (used for multicast proxy). */ - UINT32 locallyReplicate:1; - /* Instance ID flag, must be set to 1. */ - UINT32 instanceID:1; - /* Flags. */ - UINT32 flags2:4; - /* Reserved. */ - UINT32 reserved1:24; - /* VXLAN ID. */ - UINT32 vxlanID:24; - /* Reserved. */ - UINT32 reserved2:8; -} VXLANHdr; - -NTSTATUS OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, - POVS_VPORT_ADD_REQUEST addReq); - -VOID OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport); - -NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, - OvsIPv4TunnelKey *tunnelKey); - -NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - POVS_SWITCH_CONTEXT switchContext, - VOID *completionList, - POVS_PACKET_HDR_INFO layers, - PNET_BUFFER_LIST *newNbl); - -NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, - PNET_BUFFER_LIST curNbl, - OvsIPv4TunnelKey *tunKey, - PNET_BUFFER_LIST *newNbl); - -static __inline UINT32 -OvsGetVxlanTunHdrSize(VOID) -{ - /* XXX: Can L2 include VLAN at all? */ - return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) + - sizeof (VXLANHdr); -} - -#define VXLAN_UDP_PORT 4789 -#define VXLAN_UDP_PORT_NBO 0xB512 - -#endif /* __OVS_VXLAN_H_ */ diff --git a/datapath-windows/ovsext/PacketIO.c b/datapath-windows/ovsext/PacketIO.c new file mode 100644 index 000000000..ac7862d85 --- /dev/null +++ b/datapath-windows/ovsext/PacketIO.c @@ -0,0 +1,440 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the implementation of the datapath/forwarding + * functionality of the OVS. + */ + +#include "precomp.h" +#include "Switch.h" +#include "Vport.h" +#include "NetProto.h" +#include "User.h" +#include "PacketIO.h" +#include "Flow.h" +#include "Event.h" +#include "User.h" + +/* Due to an imported header file */ +#pragma warning( disable:4505 ) + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_DISPATCH +#include "Debug.h" + +extern NDIS_STRING ovsExtGuidUC; +extern NDIS_STRING ovsExtFriendlyNameUC; + +static VOID OvsFinalizeCompletionList(OvsCompletionList *completionList); +static VOID OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, ULONG sendCompleteFlags); + +__inline VOID +OvsInitCompletionList(OvsCompletionList *completionList, + POVS_SWITCH_CONTEXT switchContext, + ULONG sendCompleteFlags) +{ + ASSERT(completionList); + completionList->dropNbl = NULL; + completionList->dropNblNext = &completionList->dropNbl; + completionList->switchContext = switchContext; + completionList->sendCompleteFlags = sendCompleteFlags; +} + +/* Utility function used to complete an NBL. */ +__inline VOID +OvsAddPktCompletionList(OvsCompletionList *completionList, + BOOLEAN incoming, + NDIS_SWITCH_PORT_ID sourcePort, + PNET_BUFFER_LIST netBufferList, + UINT32 netBufferListCount, + PNDIS_STRING filterReason) +{ + POVS_BUFFER_CONTEXT ctx; + + /* XXX: We handle one NBL at a time. */ + ASSERT(netBufferList->Next == NULL); + + /* Make sure it has a context. */ + ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(netBufferList); + ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC); + + completionList->switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( + completionList->switchContext->NdisSwitchContext, &ovsExtGuidUC, + &ovsExtFriendlyNameUC, sourcePort, + incoming ? NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING : 0, + netBufferListCount, netBufferList, filterReason); + + *completionList->dropNblNext = netBufferList; + completionList->dropNblNext = &netBufferList->Next; + ASSERT(completionList->dropNbl); +} + +static __inline VOID +OvsReportNBLIngressError(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nblList, + PNDIS_STRING filterReason, + NDIS_STATUS error) +{ + PNET_BUFFER_LIST nbl = nblList; + while (nbl) { + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl); + + nbl->Status = error; + + /* This can be optimized by batching NBL's from the same + * SourcePortId. */ + switchContext->NdisSwitchHandlers.ReportFilteredNetBufferLists( + switchContext->NdisSwitchContext, &ovsExtGuidUC, + &ovsExtFriendlyNameUC, fwdDetail->SourcePortId, + NDIS_SWITCH_REPORT_FILTERED_NBL_FLAGS_IS_INCOMING, + 1 /*Nbl count.*/, nbl, filterReason); + + nbl = NET_BUFFER_LIST_NEXT_NBL(nbl); + } +} + +static __inline ULONG +OvsGetSendCompleteFlags(ULONG sendFlags) +{ + BOOLEAN dispatch, sameSource; + ULONG sendCompleteFlags; + + dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(sendFlags); + sendCompleteFlags = (dispatch ? + NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL : 0); + sameSource = NDIS_TEST_SEND_FLAG(sendFlags, + NDIS_SEND_FLAGS_SWITCH_SINGLE_SOURCE); + sendCompleteFlags |= (sameSource ? + NDIS_SEND_COMPLETE_FLAGS_SWITCH_SINGLE_SOURCE : 0); + + return sendCompleteFlags; +} + +VOID +OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendFlags) +{ + if (switchContext->dataFlowState == OvsSwitchPaused) { + /* If a filter module is in the Paused state, the filter driver must not + * originate any send requests for that filter module. If NDIS calls + * FilterSendNetBufferLists, the driver must not call + * NdisFSendNetBufferLists to pass on the data until the driver is + * restarted. The driver should call NdisFSendNetBufferListsComplete + * immediately to complete the send operation. It should set the + * complete status in each NET_BUFFER_LIST structure to + * NDIS_STATUS_PAUSED. + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ + * ff549966(v=vs.85).aspx */ + NDIS_STRING filterReason; + ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); + + RtlInitUnicodeString(&filterReason, + L"Switch state PAUSED, drop before FSendNBL."); + OvsReportNBLIngressError(switchContext, netBufferLists, &filterReason, + NDIS_STATUS_PAUSED); + OvsCompleteNBLIngress(switchContext, netBufferLists, + sendCompleteFlags); + return; + } + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + + NdisFSendNetBufferLists(switchContext->NdisFilterHandle, netBufferLists, + NDIS_DEFAULT_PORT_NUMBER, sendFlags); +} + +static __inline VOID +OvsStartNBLIngressError(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nblList, + ULONG sendCompleteFlags, + PNDIS_STRING filterReason, + NDIS_STATUS error) +{ + ASSERT(error); + OvsReportNBLIngressError(switchContext, nblList, filterReason, error); + NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, nblList, + sendCompleteFlags); +} + +static VOID +OvsStartNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG SendFlags) +{ + NDIS_SWITCH_PORT_ID sourcePort = 0; + NDIS_SWITCH_NIC_INDEX sourceIndex = 0; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; + ULONG sendCompleteFlags; + UCHAR dispatch; + LOCK_STATE_EX lockState, dpLockState; + NDIS_STATUS status; + NDIS_STRING filterReason; + LIST_ENTRY missedPackets; + UINT32 num = 0; + OvsCompletionList completionList; + + dispatch = NDIS_TEST_SEND_AT_DISPATCH_LEVEL(SendFlags)? + NDIS_RWL_AT_DISPATCH_LEVEL : 0; + sendCompleteFlags = OvsGetSendCompleteFlags(SendFlags); + SendFlags |= NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; + + InitializeListHead(&missedPackets); + OvsInitCompletionList(&completionList, switchContext, sendCompleteFlags); + + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + POVS_VPORT_ENTRY vport; + UINT32 portNo; + OVS_DATAPATH *datapath = &switchContext->datapath; + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + UINT64 hash; + PNET_BUFFER curNb; + + nextNbl = curNbl->Next; + curNbl->Next = NULL; + + /* Ethernet Header is a guaranteed safe access. */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + if (curNb->Next != NULL) { + /* XXX: This case is not handled yet. */ + ASSERT(FALSE); + } else { + POVS_BUFFER_CONTEXT ctx; + OvsFlow *flow; + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl); + sourcePort = fwdDetail->SourcePortId; + sourceIndex = (NDIS_SWITCH_NIC_INDEX)fwdDetail->SourceNicIndex; + + /* Take the DispatchLock so none of the VPORTs disconnect while + * we are setting destination ports. + * + * XXX: acquire/release the dispatch lock for a "batch" of packets + * rather than for each packet. */ + NdisAcquireRWLockRead(switchContext->dispatchLock, &lockState, + dispatch); + + ctx = OvsInitExternalNBLContext(switchContext, curNbl, + sourcePort == switchContext->externalPortId); + if (ctx == NULL) { + RtlInitUnicodeString(&filterReason, + L"Cannot allocate external NBL context."); + + OvsStartNBLIngressError(switchContext, curNbl, + sendCompleteFlags, &filterReason, + NDIS_STATUS_RESOURCES); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; + } + + vport = OvsFindVportByPortIdAndNicIndex(switchContext, sourcePort, + sourceIndex); + if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) { + RtlInitUnicodeString(&filterReason, + L"OVS-Cannot forward packet from unknown source port"); + goto dropit; + } else { + portNo = vport->portNo; + } + + vport->stats.rxPackets++; + vport->stats.rxBytes += NET_BUFFER_DATA_LENGTH(curNb); + + status = OvsExtractFlow(curNbl, vport->portNo, &key, &layers, NULL); + if (status != NDIS_STATUS_SUCCESS) { + RtlInitUnicodeString(&filterReason, L"OVS-Flow extract failed"); + goto dropit; + } + + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + OvsAcquireDatapathRead(datapath, &dpLockState, dispatch); + + flow = OvsLookupFlow(datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, curNbl, &layers); + datapath->hits++; + /* If successful, OvsActionsExecute() consumes the NBL. + * Otherwise, it adds it to the completionList. No need to + * check the return value. */ + OvsActionsExecute(switchContext, &completionList, curNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + OvsReleaseDatapath(datapath, &dpLockState); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + continue; + } else { + OvsReleaseDatapath(datapath, &dpLockState); + + datapath->misses++; + status = OvsCreateAndAddPackets(OVS_DEFAULT_PACKET_QUEUE, + NULL, 0, OVS_PACKET_CMD_MISS, + portNo, + key.tunKey.dst != 0 ? + (OvsIPv4TunnelKey *)&key.tunKey : + NULL, curNbl, + sourcePort == + switchContext->externalPortId, + &layers, switchContext, + &missedPackets, &num); + if (status == NDIS_STATUS_SUCCESS) { + /* Complete the packet since it was copied to user + * buffer. */ + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped since packet was copied to userspace"); + } else { + RtlInitUnicodeString(&filterReason, + L"OVS-Dropped due to failure to queue to userspace"); + } + goto dropit; + } + +dropit: + OvsAddPktCompletionList(&completionList, TRUE, sourcePort, curNbl, 0, + &filterReason); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + } + } + + /* Queue the missed packets. */ + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num); + OvsFinalizeCompletionList(&completionList); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterSendNetBufferLists Function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtSendNBL(NDIS_HANDLE filterModuleContext, + PNET_BUFFER_LIST netBufferLists, + NDIS_PORT_NUMBER portNumber, + ULONG sendFlags) +{ + UNREFERENCED_PARAMETER(portNumber); + + /* 'filterModuleContext' is the switch context that gets created in the + * AttachHandler. */ + POVS_SWITCH_CONTEXT switchContext; + switchContext = (POVS_SWITCH_CONTEXT) filterModuleContext; + + if (switchContext->dataFlowState == OvsSwitchPaused) { + NDIS_STRING filterReason; + ULONG sendCompleteFlags = OvsGetSendCompleteFlags(sendFlags); + + RtlInitUnicodeString(&filterReason, + L"Switch state PAUSED, drop on ingress."); + OvsStartNBLIngressError(switchContext, netBufferLists, + sendCompleteFlags, &filterReason, + NDIS_STATUS_PAUSED); + return; + } + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + + OvsStartNBLIngress(switchContext, netBufferLists, sendFlags); +} + +static VOID +OvsCompleteNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags) +{ + PNET_BUFFER_LIST curNbl = NULL, nextNbl = NULL; + OvsCompletionList newList; + + newList.dropNbl = NULL; + newList.dropNblNext = &newList.dropNbl; + + for (curNbl = netBufferLists; curNbl != NULL; curNbl = nextNbl) { + nextNbl = curNbl->Next; + curNbl->Next = NULL; + + curNbl = OvsCompleteNBL(switchContext, curNbl, TRUE); + if (curNbl != NULL) { + /* NBL originated from the upper layer. */ + *newList.dropNblNext = curNbl; + newList.dropNblNext = &curNbl->Next; + } + } + + /* Complete the NBL's that were sent by the upper layer. */ + if (newList.dropNbl != NULL) { + NdisFSendNetBufferListsComplete(switchContext->NdisFilterHandle, newList.dropNbl, + sendCompleteFlags); + } +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterSendNetBufferListsComplete function. + * -------------------------------------------------------------------------- + */ +VOID +OvsExtSendNBLComplete(NDIS_HANDLE filterModuleContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendCompleteFlags) +{ + OvsCompleteNBLIngress((POVS_SWITCH_CONTEXT)filterModuleContext, + netBufferLists, sendCompleteFlags); +} + + +VOID +OvsFinalizeCompletionList(OvsCompletionList *completionList) +{ + if (completionList->dropNbl != NULL) { + OvsCompleteNBLIngress(completionList->switchContext, + completionList->dropNbl, + completionList->sendCompleteFlags); + + completionList->dropNbl = NULL; + completionList->dropNblNext = &completionList->dropNbl; + } +} + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterCancelSendNetBufferLists function. + * + * "If a filter driver specifies a FilterSendNetBufferLists function and it + * queues send requests, it must also specify a + * FilterCancelSendNetBufferLists function." + * + * http://msdn.microsoft.com/en-us/library/windows/hardware/ + * ff549966(v=vs.85).aspx + * -------------------------------------------------------------------------- + */ +VOID +OvsExtCancelSendNBL(NDIS_HANDLE filterModuleContext, + PVOID CancelId) +{ + UNREFERENCED_PARAMETER(filterModuleContext); + UNREFERENCED_PARAMETER(CancelId); + + /* All send requests get completed synchronously, so there is no need to + * implement this callback. */ +} diff --git a/datapath-windows/ovsext/PacketIO.h b/datapath-windows/ovsext/PacketIO.h new file mode 100644 index 000000000..4d29a1894 --- /dev/null +++ b/datapath-windows/ovsext/PacketIO.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PACKETIO_H_ +#define __PACKETIO_H_ 1 + +typedef union _OVS_PACKET_HDR_INFO OVS_PACKET_HDR_INFO; + +/* + * Data structures and utility functions to help manage a list of packets to be + * completed (dropped). + */ +typedef struct OvsCompletionList { + PNET_BUFFER_LIST dropNbl; + PNET_BUFFER_LIST *dropNblNext; + POVS_SWITCH_CONTEXT switchContext; + ULONG sendCompleteFlags; +} OvsCompletionList; + +VOID OvsInitCompletionList(OvsCompletionList *completionList, + POVS_SWITCH_CONTEXT switchContext, + ULONG sendCompleteFlags); +VOID OvsAddPktCompletionList(OvsCompletionList *completionList, + BOOLEAN incoming, + NDIS_SWITCH_PORT_ID sourcePort, + PNET_BUFFER_LIST netBufferList, + UINT32 netBufferListCount, + PNDIS_STRING filterReason); + + +/* + * Functions related to packet processing. + */ +VOID OvsSendNBLIngress(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST netBufferLists, + ULONG sendFlags); + +NDIS_STATUS OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext, + OvsCompletionList *completionList, + PNET_BUFFER_LIST curNbl, UINT32 srcVportNo, + ULONG sendFlags, OvsFlowKey *key, UINT64 *hash, + OVS_PACKET_HDR_INFO *layers, + const PNL_ATTR actions, int actionsLen); + +VOID OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext, + VOID *compList, PNET_BUFFER_LIST curNbl); + +#endif /* __PACKETIO_H_ */ diff --git a/datapath-windows/ovsext/PacketParser.c b/datapath-windows/ovsext/PacketParser.c new file mode 100644 index 000000000..2c955e1e6 --- /dev/null +++ b/datapath-windows/ovsext/PacketParser.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PacketParser.h" + +//XXX consider moving to NdisGetDataBuffer. +const VOID * +OvsGetPacketBytes(const NET_BUFFER_LIST *nbl, + UINT32 len, + UINT32 srcOffset, + VOID *storage) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNET_BUFFER netBuffer = NET_BUFFER_LIST_FIRST_NB(nbl); + PMDL currentMdl; + BOOLEAN firstMDL = TRUE; + ULONG destOffset = 0; + VOID *dest = storage; + const UINT32 copyLen = len; + ULONG packetLen; + + packetLen = NET_BUFFER_DATA_LENGTH(netBuffer); + // Start copy from current MDL + currentMdl = NET_BUFFER_CURRENT_MDL(netBuffer); + + // Data on current MDL may be offset from start of MDL + while (destOffset < copyLen && currentMdl) { + PUCHAR srcMemory = MmGetSystemAddressForMdlSafe(currentMdl, + LowPagePriority); + ULONG length = MmGetMdlByteCount(currentMdl); + if (!srcMemory) { + status = NDIS_STATUS_RESOURCES; + break; + } + + if (firstMDL) { + ULONG mdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(netBuffer); + srcMemory += mdlOffset; + length -= mdlOffset; + firstMDL = FALSE; + } + length = MIN(length, packetLen); + packetLen -= length; + ASSERT((INT)packetLen >= 0); + + if (srcOffset >= length) { + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + srcOffset -= length; + continue; + } else { + srcMemory += srcOffset; + length -= srcOffset; + srcOffset = 0; + } + + length = min(length, copyLen-destOffset); + + NdisMoveMemory((PUCHAR)dest+destOffset, srcMemory, length); + destOffset += length; + + currentMdl = NDIS_MDL_LINKAGE(currentMdl); + } + + if (destOffset == copyLen) { + ASSERT(status == NDIS_STATUS_SUCCESS); + return storage; + } + + return NULL; +} + +NDIS_STATUS +OvsParseIPv6(const NET_BUFFER_LIST *packet, + OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers) +{ + UINT16 ofs = layers->l3Offset; + IPv6Hdr ipv6HdrStorage; + const IPv6Hdr *nh; + UINT32 nextHdr; + Ipv6Key *flow= &key->ipv6Key; + + ofs = layers->l3Offset; + nh = OvsGetPacketBytes(packet, sizeof *nh, ofs, &ipv6HdrStorage); + if (!nh) { + return NDIS_STATUS_FAILURE; + } + + nextHdr = nh->nexthdr; + memcpy(&flow->ipv6Src, nh->saddr.s6_addr, 16); + memcpy(&flow->ipv6Dst, nh->daddr.s6_addr, 16); + + flow->nwTos = ((nh->flow_lbl[0] & 0xF0) >> 4) | (nh->priority << 4); + flow->ipv6Label = + ((nh->flow_lbl[0] & 0x0F) << 16) | (nh->flow_lbl[1] << 8) | nh->flow_lbl[2]; + flow->nwTtl = nh->hop_limit; + flow->nwProto = SOCKET_IPPROTO_NONE; + flow->nwFrag = 0; + + // Parse extended headers and compute L4 offset + ofs += sizeof(IPv6Hdr); + for (;;) { + if ((nextHdr != SOCKET_IPPROTO_HOPOPTS) + && (nextHdr != SOCKET_IPPROTO_ROUTING) + && (nextHdr != SOCKET_IPPROTO_DSTOPTS) + && (nextHdr != SOCKET_IPPROTO_AH) + && (nextHdr != SOCKET_IPPROTO_FRAGMENT)) { + /* + * It's either a terminal header (e.g., TCP, UDP) or one we + * don't understand. In either case, we're done with the + * packet, so use it to fill in 'nw_proto'. + */ + break; + } + + if (nextHdr == SOCKET_IPPROTO_HOPOPTS + || nextHdr == SOCKET_IPPROTO_ROUTING + || nextHdr == SOCKET_IPPROTO_DSTOPTS + || nextHdr == SOCKET_IPPROTO_AH) { + IPv6ExtHdr extHdrStorage; + const IPv6ExtHdr *extHdr; + UINT8 len; + + extHdr = OvsGetPacketBytes(packet, sizeof *extHdr, ofs, &extHdrStorage); + if (!extHdr) { + return NDIS_STATUS_FAILURE; + } + + len = extHdr->hdrExtLen; + ofs += nextHdr == SOCKET_IPPROTO_AH ? (len + 2) * 4 : (len + 1) * 8; + nextHdr = extHdr->nextHeader; + if (OvsPacketLenNBL(packet) < ofs) { + return NDIS_STATUS_FAILURE; + } + } else if (nextHdr == SOCKET_IPPROTO_FRAGMENT) { + IPv6FragHdr fragHdrStorage; + const IPv6FragHdr *fragHdr; + + fragHdr = OvsGetPacketBytes(packet, sizeof *fragHdr, ofs, + &fragHdrStorage); + if (!fragHdr) { + return NDIS_STATUS_FAILURE; + } + + nextHdr = fragHdr->nextHeader; + ofs += sizeof *fragHdr; + + /* We only process the first fragment. */ + if (fragHdr->offlg != htons(0)) { + if ((fragHdr->offlg & IP6F_OFF_HOST_ORDER_MASK) == htons(0)) { + flow->nwFrag = OVSWIN_NW_FRAG_ANY; + } else { + flow->nwFrag |= OVSWIN_NW_FRAG_LATER; + nextHdr = SOCKET_IPPROTO_FRAGMENT; + break; + } + } + } + } + + flow->nwProto = (UINT8)nextHdr; + layers->l4Offset = ofs; + return NDIS_STATUS_SUCCESS; +} + +VOID +OvsParseTcp(const NET_BUFFER_LIST *packet, + L4Key *flow, + POVS_PACKET_HDR_INFO layers) +{ + TCPHdr tcpStorage; + const TCPHdr *tcp = OvsGetTcp(packet, layers->l4Offset, &tcpStorage); + if (tcp) { + flow->tpSrc = tcp->source; + flow->tpDst = tcp->dest; + layers->isTcp = 1; + layers->l7Offset = layers->l4Offset + 4 * tcp->doff; + } +} + +VOID +OvsParseUdp(const NET_BUFFER_LIST *packet, + L4Key *flow, + POVS_PACKET_HDR_INFO layers) +{ + UDPHdr udpStorage; + const UDPHdr *udp = OvsGetUdp(packet, layers->l4Offset, &udpStorage); + if (udp) { + flow->tpSrc = udp->source; + flow->tpDst = udp->dest; + layers->isUdp = 1; + if (udp->check == 0) { + layers->udpCsumZero = 1; + } + layers->l7Offset = layers->l4Offset + sizeof *udp; + } +} + +NDIS_STATUS +OvsParseIcmpV6(const NET_BUFFER_LIST *packet, + OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers) +{ + UINT16 ofs = layers->l4Offset; + ICMPHdr icmpStorage; + const ICMPHdr *icmp; + Icmp6Key *flow = &key->icmp6Key; + + memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); + memset(flow->arpSha, 0, sizeof(flow->arpSha)); + memset(flow->arpTha, 0, sizeof(flow->arpTha)); + + icmp = OvsGetIcmp(packet, ofs, &icmpStorage); + if (!icmp) { + return NDIS_STATUS_FAILURE; + } + ofs += sizeof *icmp; + + /* + * The ICMPv6 type and code fields use the 16-bit transport port + * fields, so we need to store them in 16-bit network byte order. + */ + key->ipv6Key.l4.tpSrc = htons(icmp->type); + key->ipv6Key.l4.tpDst = htons(icmp->code); + + if (icmp->code == 0 && + (icmp->type == ND_NEIGHBOR_SOLICIT || + icmp->type == ND_NEIGHBOR_ADVERT)) { + struct in6_addr ndTargetStorage; + const struct in6_addr *ndTarget; + + ndTarget = OvsGetPacketBytes(packet, sizeof *ndTarget, ofs, + &ndTargetStorage); + if (!ndTarget) { + return NDIS_STATUS_FAILURE; + } + flow->ndTarget = *ndTarget; + + while ((UINT32)(ofs + 8) <= OvsPacketLenNBL(packet)) { + /* + * The minimum size of an option is 8 bytes, which also is + * the size of Ethernet link-layer options. + */ + IPv6NdOptHdr ndOptStorage; + const IPv6NdOptHdr *ndOpt; + UINT16 optLen; + + ndOpt = OvsGetPacketBytes(packet, sizeof *ndOpt, ofs, &ndOptStorage); + if (!ndOpt) { + return NDIS_STATUS_FAILURE; + } + + optLen = ndOpt->len * 8; + if (!optLen || (UINT32)(ofs + optLen) > OvsPacketLenNBL(packet)) { + goto invalid; + } + + /* + * Store the link layer address if the appropriate option is + * provided. It is considered an error if the same link + * layer option is specified twice. + */ + if (ndOpt->type == ND_OPT_SOURCE_LINKADDR && optLen == 8) { + if (Eth_IsNullAddr(flow->arpSha)) { + memcpy(flow->arpSha, ndOpt + 1, ETH_ADDR_LENGTH); + } else { + goto invalid; + } + } else if (ndOpt->type == ND_OPT_TARGET_LINKADDR && optLen == 8) { + if (Eth_IsNullAddr(flow->arpTha)) { + memcpy(flow->arpTha, ndOpt + 1, ETH_ADDR_LENGTH); + } else { + goto invalid; + } + } + + ofs += optLen; + } + } + + layers->l7Offset = ofs; + return NDIS_STATUS_SUCCESS; + +invalid: + memset(&flow->ndTarget, 0, sizeof(flow->ndTarget)); + memset(flow->arpSha, 0, sizeof(flow->arpSha)); + memset(flow->arpTha, 0, sizeof(flow->arpTha)); + + return NDIS_STATUS_FAILURE; +} diff --git a/datapath-windows/ovsext/PacketParser.h b/datapath-windows/ovsext/PacketParser.h new file mode 100644 index 000000000..55d110f11 --- /dev/null +++ b/datapath-windows/ovsext/PacketParser.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PACKET_PARSER_H_ +#define __PACKET_PARSER_H_ 1 + +#include "precomp.h" +#include "NetProto.h" + +const VOID* OvsGetPacketBytes(const NET_BUFFER_LIST *_pNB, UINT32 len, + UINT32 SrcOffset, VOID *storage); +NDIS_STATUS OvsParseIPv6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers); +VOID OvsParseTcp(const NET_BUFFER_LIST *packet, L4Key *flow, + POVS_PACKET_HDR_INFO layers); +VOID OvsParseUdp(const NET_BUFFER_LIST *packet, L4Key *flow, + POVS_PACKET_HDR_INFO layers); +NDIS_STATUS OvsParseIcmpV6(const NET_BUFFER_LIST *packet, OvsFlowKey *key, + POVS_PACKET_HDR_INFO layers); + +static __inline ULONG +OvsPacketLenNBL(const NET_BUFFER_LIST *_pNB) +{ + INT length = 0; + NET_BUFFER *nb; + + nb = NET_BUFFER_LIST_FIRST_NB(_pNB); + ASSERT(nb); + while(nb) { + length += NET_BUFFER_DATA_LENGTH(nb); + nb = NET_BUFFER_NEXT_NB(nb); + } + + return length; +} + +/* + * Returns the ctl field from the TCP header in 'packet', or 0 if the field + * can't be read. The caller must have ensured that 'packet' contains a TCP + * header. + * + * We can't just use TCPHdr, from netProto.h, for this because that + * breaks the flags down into individual bit-fields. We can't even use + * offsetof because that will try to take the address of a bit-field, + * which C does not allow. + */ +static UINT16 +OvsGetTcpCtl(const NET_BUFFER_LIST *packet, // IN + const POVS_PACKET_HDR_INFO layers) // IN +{ +#define TCP_CTL_OFS 12 // Offset of "ctl" field in TCP header. +#define TCP_FLAGS(CTL) ((CTL) & 0x3f) // Obtain TCP flags from CTL. + + const UINT16 *ctl; + UINT16 storage; + + ctl = OvsGetPacketBytes(packet, sizeof *ctl, layers->l4Offset + TCP_CTL_OFS, + &storage); + return ctl ? *ctl : 0; +} + + +static UINT8 +OvsGetTcpFlags(const NET_BUFFER_LIST *packet, // IN + const OvsFlowKey *key, // IN + const POVS_PACKET_HDR_INFO layers) // IN +{ + UNREFERENCED_PARAMETER(key); // should be removed later + + if (layers->isTcp) { + return TCP_FLAGS(OvsGetTcpCtl(packet, layers)); + } else { + return 0; + } +} + +static const EtherArp * +OvsGetArp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + EtherArp *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +static const IPHdr * +OvsGetIp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + IPHdr *storage) +{ + const IPHdr *ip = OvsGetPacketBytes(packet, sizeof *ip, ofs, storage); + if (ip) { + int ipLen = ip->ihl * 4; + if (ipLen >= sizeof *ip && OvsPacketLenNBL(packet) >= ofs + ipLen) { + return ip; + } + } + return NULL; +} + +static const TCPHdr * +OvsGetTcp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + TCPHdr *storage) +{ + const TCPHdr *tcp = OvsGetPacketBytes(packet, sizeof *tcp, ofs, storage); + if (tcp) { + int tcpLen = tcp->doff * 4; + if (tcpLen >= sizeof *tcp && OvsPacketLenNBL(packet) >= ofs + tcpLen) { + return tcp; + } + } + return NULL; +} + +static const UDPHdr * +OvsGetUdp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + UDPHdr *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +static const ICMPHdr * +OvsGetIcmp(const NET_BUFFER_LIST *packet, + UINT32 ofs, + ICMPHdr *storage) +{ + return OvsGetPacketBytes(packet, sizeof *storage, ofs, storage); +} + +#endif /* __PACKET_PARSER_H_ */ diff --git a/datapath-windows/ovsext/Switch.c b/datapath-windows/ovsext/Switch.c new file mode 100644 index 000000000..9578680e0 --- /dev/null +++ b/datapath-windows/ovsext/Switch.c @@ -0,0 +1,530 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the implementation of the management functionality of the + * OVS. + */ + +#include "precomp.h" + +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "Flow.h" +#include "IpHelper.h" +#include "TunnelIntf.h" +#include "Oid.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_SWITCH +#include "Debug.h" + +POVS_SWITCH_CONTEXT gOvsSwitchContext; +BOOLEAN gOvsInAttach; +UINT64 ovsTimeIncrementPerTick; + +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern NDIS_HANDLE gOvsExtDriverHandle; +extern NDIS_HANDLE gOvsExtDriverObject; + +static NDIS_STATUS OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, + POVS_SWITCH_CONTEXT *switchContextOut); +static NDIS_STATUS OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext); +static VOID OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext); +static VOID OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext); +static NDIS_STATUS OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext); + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterAttach function. + * + * This function allocates the switch context, and initializes its necessary + * members. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtAttach(NDIS_HANDLE ndisFilterHandle, + NDIS_HANDLE filterDriverContext, + PNDIS_FILTER_ATTACH_PARAMETERS attachParameters) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + NDIS_FILTER_ATTRIBUTES ovsExtAttributes; + POVS_SWITCH_CONTEXT switchContext = NULL; + + UNREFERENCED_PARAMETER(filterDriverContext); + + OVS_LOG_TRACE("Enter: ndisFilterHandle %p", ndisFilterHandle); + + ASSERT(filterDriverContext == (NDIS_HANDLE)gOvsExtDriverObject); + if (attachParameters->MiniportMediaType != NdisMedium802_3) { + status = NDIS_STATUS_INVALID_PARAMETER; + goto cleanup; + } + + if (gOvsExtDriverHandle == NULL) { + OVS_LOG_TRACE("Exit: OVSEXT driver is not loaded."); + ASSERT(FALSE); + goto cleanup; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext) { + NdisReleaseSpinLock(gOvsCtrlLock); + OVS_LOG_TRACE("Exit: Failed to create OVS Switch, only one datapath is" + "supported, %p.", gOvsSwitchContext); + goto cleanup; + } + if (gOvsInAttach) { + NdisReleaseSpinLock(gOvsCtrlLock); + /* Just fail the request. */ + OVS_LOG_TRACE("Exit: Failed to create OVS Switch, since another attach" + "instance is in attach process."); + goto cleanup; + } + gOvsInAttach = TRUE; + NdisReleaseSpinLock(gOvsCtrlLock); + + status = OvsInitIpHelper(ndisFilterHandle); + if (status != STATUS_SUCCESS) { + OVS_LOG_ERROR("Exit: Failed to initialize IP helper."); + goto cleanup; + } + + status = OvsCreateSwitch(ndisFilterHandle, &switchContext); + if (status != NDIS_STATUS_SUCCESS) { + OvsCleanupIpHelper(); + goto cleanup; + } + ASSERT(switchContext); + + /* + * Register the switch context with NDIS so NDIS can pass it back to the + * Filterxxx callback functions as the 'FilterModuleContext' parameter. + */ + RtlZeroMemory(&ovsExtAttributes, sizeof(NDIS_FILTER_ATTRIBUTES)); + ovsExtAttributes.Header.Revision = NDIS_FILTER_ATTRIBUTES_REVISION_1; + ovsExtAttributes.Header.Size = sizeof(NDIS_FILTER_ATTRIBUTES); + ovsExtAttributes.Header.Type = NDIS_OBJECT_TYPE_FILTER_ATTRIBUTES; + ovsExtAttributes.Flags = 0; + + NDIS_DECLARE_FILTER_MODULE_CONTEXT(OVS_SWITCH_CONTEXT); + status = NdisFSetAttributes(ndisFilterHandle, switchContext, &ovsExtAttributes); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("Failed to set attributes."); + OvsCleanupIpHelper(); + goto cleanup; + } + + /* Setup the state machine. */ + switchContext->controlFlowState = OvsSwitchAttached; + switchContext->dataFlowState = OvsSwitchPaused; + + gOvsSwitchContext = switchContext; + KeMemoryBarrier(); + +cleanup: + gOvsInAttach = FALSE; + if (status != NDIS_STATUS_SUCCESS) { + if (switchContext != NULL) { + OvsDeleteSwitch(switchContext); + } + } + OVS_LOG_TRACE("Exit: status %x", status); + + return status; +} + + +/* + * -------------------------------------------------------------------------- + * This function allocated the switch context, and initializes its necessary + * members. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsCreateSwitch(NDIS_HANDLE ndisFilterHandle, + POVS_SWITCH_CONTEXT *switchContextOut) +{ + NDIS_STATUS status; + POVS_SWITCH_CONTEXT switchContext; + NDIS_SWITCH_CONTEXT hostSwitchContext; + NDIS_SWITCH_OPTIONAL_HANDLERS hostSwitchHandler; + + OVS_LOG_TRACE("Enter: Create switch object"); + + switchContext = + (POVS_SWITCH_CONTEXT) OvsAllocateMemory(sizeof(OVS_SWITCH_CONTEXT)); + if (switchContext == NULL) { + status = NDIS_STATUS_RESOURCES; + goto create_switch_done; + } + RtlZeroMemory(switchContext, sizeof(OVS_SWITCH_CONTEXT)); + + /* Initialize the switch. */ + hostSwitchHandler.Header.Type = NDIS_OBJECT_TYPE_SWITCH_OPTIONAL_HANDLERS; + hostSwitchHandler.Header.Size = NDIS_SIZEOF_SWITCH_OPTIONAL_HANDLERS_REVISION_1; + hostSwitchHandler.Header.Revision = NDIS_SWITCH_OPTIONAL_HANDLERS_REVISION_1; + + status = NdisFGetOptionalSwitchHandlers(ndisFilterHandle, + &hostSwitchContext, + &hostSwitchHandler); + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_ERROR("OvsExtAttach: Extension is running in " + "non-switch environment."); + OvsFreeMemory(switchContext); + goto create_switch_done; + } + + switchContext->NdisFilterHandle = ndisFilterHandle; + switchContext->NdisSwitchContext = hostSwitchContext; + RtlCopyMemory(&switchContext->NdisSwitchHandlers, &hostSwitchHandler, + sizeof(NDIS_SWITCH_OPTIONAL_HANDLERS)); + + status = OvsInitSwitchContext(switchContext); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(switchContext); + goto create_switch_done; + } + + status = OvsTunnelFilterInitialize(gOvsExtDriverObject); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(switchContext); + goto create_switch_done; + } + *switchContextOut = switchContext; + +create_switch_done: + OVS_LOG_TRACE("Exit: switchContext: %p status: %#lx", + switchContext, status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterDetach function. + * -------------------------------------------------------------------------- + */ +_Use_decl_annotations_ +VOID +OvsExtDetach(NDIS_HANDLE filterModuleContext) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + + OVS_LOG_TRACE("Enter: filterModuleContext %p", filterModuleContext); + + ASSERT(switchContext->dataFlowState == OvsSwitchPaused); + switchContext->controlFlowState = OvsSwitchDetached; + KeMemoryBarrier(); + while(switchContext->pendingOidCount > 0) { + NdisMSleep(1000); + } + OvsDeleteSwitch(switchContext); + OvsCleanupIpHelper(); + gOvsSwitchContext = NULL; + /* This completes the cleanup, and a new attach can be handled now. */ + + OVS_LOG_TRACE("Exit: OvsDetach Successfully"); +} + + +/* + * -------------------------------------------------------------------------- + * This function deletes the switch by freeing all memory previously allocated. + * XXX need synchronization with other path. + * -------------------------------------------------------------------------- + */ +VOID +OvsDeleteSwitch(POVS_SWITCH_CONTEXT switchContext) +{ + UINT32 dpNo = switchContext->dpNo; + + OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); + + OvsTunnelFilterUninitialize(gOvsExtDriverObject); + OvsClearAllSwitchVports(switchContext); + OvsCleanupSwitchContext(switchContext); + OvsFreeMemory(switchContext); + OVS_LOG_TRACE("Exit: deleted switch %p dpNo: %d", switchContext, dpNo); +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterRestart function. + * -------------------------------------------------------------------------- + */ +_Use_decl_annotations_ +NDIS_STATUS +OvsExtRestart(NDIS_HANDLE filterModuleContext, + PNDIS_FILTER_RESTART_PARAMETERS filterRestartParameters) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + BOOLEAN switchActive; + + UNREFERENCED_PARAMETER(filterRestartParameters); + + OVS_LOG_TRACE("Enter: filterModuleContext %p", + filterModuleContext); + + /* Activate the switch if this is the first restart. */ + if (!switchContext->isActivated && !switchContext->isActivateFailed) { + status = OvsQuerySwitchActivationComplete(switchContext, + &switchActive); + if (status != NDIS_STATUS_SUCCESS) { + switchContext->isActivateFailed = TRUE; + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + + if (switchActive) { + status = OvsActivateSwitch(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to activate switch, dpNo:%d", + switchContext->dpNo); + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + } + } + + ASSERT(switchContext->dataFlowState == OvsSwitchPaused); + switchContext->dataFlowState = OvsSwitchRunning; + +cleanup: + OVS_LOG_TRACE("Exit: Restart switch:%p, dpNo: %d, status: %#x", + switchContext, switchContext->dpNo, status); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterPause function + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtPause(NDIS_HANDLE filterModuleContext, + PNDIS_FILTER_PAUSE_PARAMETERS pauseParameters) +{ + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + + UNREFERENCED_PARAMETER(pauseParameters); + OVS_LOG_TRACE("Enter: filterModuleContext %p", + filterModuleContext); + + ASSERT(switchContext->dataFlowState == OvsSwitchRunning); + switchContext->dataFlowState = OvsSwitchPaused; + KeMemoryBarrier(); + while(switchContext->pendingOidCount > 0) { + NdisMSleep(1000); + } + + OVS_LOG_TRACE("Exit: OvsDetach Successfully"); + return NDIS_STATUS_SUCCESS; +} + +static NDIS_STATUS +OvsInitSwitchContext(POVS_SWITCH_CONTEXT switchContext) +{ + int i; + NTSTATUS status; + + OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); + + switchContext->dispatchLock = + NdisAllocateRWLock(switchContext->NdisFilterHandle); + + switchContext->vportArray = + (PVOID *)OvsAllocateMemory(sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); + switchContext->nameHashArray = (PLIST_ENTRY) + OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); + switchContext->portHashArray = (PLIST_ENTRY) + OvsAllocateMemory(sizeof (LIST_ENTRY) * OVS_MAX_VPORT_ARRAY_SIZE); + status = OvsAllocateFlowTable(&switchContext->datapath, switchContext); + + if (status == NDIS_STATUS_SUCCESS) { + status = OvsInitBufferPool(switchContext); + } + if (status != NDIS_STATUS_SUCCESS || + switchContext->dispatchLock == NULL || + switchContext->vportArray == NULL || + switchContext->nameHashArray == NULL || + switchContext->portHashArray == NULL) { + if (switchContext->dispatchLock) { + NdisFreeRWLock(switchContext->dispatchLock); + } + if (switchContext->vportArray) { + OvsFreeMemory(switchContext->vportArray); + } + if (switchContext->nameHashArray) { + OvsFreeMemory(switchContext->nameHashArray); + } + if (switchContext->portHashArray) { + OvsFreeMemory(switchContext->portHashArray); + } + OvsDeleteFlowTable(&switchContext->datapath); + OvsCleanupBufferPool(switchContext); + + OVS_LOG_TRACE("Exit: Failed to init switchContext"); + return NDIS_STATUS_RESOURCES; + } + + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + InitializeListHead(&switchContext->nameHashArray[i]); + } + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + InitializeListHead(&switchContext->portHashArray[i]); + } + RtlZeroMemory(switchContext->vportArray, + sizeof (PVOID) * OVS_MAX_VPORT_ARRAY_SIZE); + + switchContext->isActivated = FALSE; + switchContext->isActivateFailed = FALSE; + switchContext->dpNo = OVS_DP_NUMBER; + switchContext->lastPortIndex = OVS_MAX_VPORT_ARRAY_SIZE -1; + ovsTimeIncrementPerTick = KeQueryTimeIncrement() / 10000; + OVS_LOG_TRACE("Exit: Succesfully initialized switchContext: %p", + switchContext); + return NDIS_STATUS_SUCCESS; +} + +static VOID +OvsCleanupSwitchContext(POVS_SWITCH_CONTEXT switchContext) +{ + OVS_LOG_TRACE("Enter: Delete switchContext:%p", switchContext); + + /* We need to do cleanup for tunnel port here. */ + ASSERT(switchContext->numVports == 0); + + NdisFreeRWLock(switchContext->dispatchLock); + OvsFreeMemory(switchContext->nameHashArray); + OvsFreeMemory(switchContext->portHashArray); + OvsFreeMemory(switchContext->vportArray); + OvsDeleteFlowTable(&switchContext->datapath); + OvsCleanupBufferPool(switchContext); + OVS_LOG_TRACE("Exit: Delete switchContext: %p", switchContext); +} + +/* + * -------------------------------------------------------------------------- + * This function activates the switch by initializing it with all the runtime + * state. First it queries all of the MAC addresses set as custom switch policy + * to allow sends from, and adds tme to the property list. Then it queries the + * NIC list and verifies it can support all of the NICs currently connected to + * the switch, and adds the NICs to the NIC list. + * -------------------------------------------------------------------------- + */ +static NDIS_STATUS +OvsActivateSwitch(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status; + + ASSERT(!switchContext->isActivated); + + OVS_LOG_TRACE("Enter: activate switch %p, dpNo: %ld", + switchContext, switchContext->dpNo); + + status = OvsAddConfiguredSwitchPorts(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to add configured switch ports"); + goto cleanup; + + } + status = OvsInitConfiguredSwitchNics(switchContext); + + if (status != NDIS_STATUS_SUCCESS) { + OVS_LOG_WARN("Failed to add configured vports"); + OvsClearAllSwitchVports(switchContext); + goto cleanup; + } + switchContext->isActivated = TRUE; + OvsPostEvent(OVS_DEFAULT_PORT_NO, OVS_DEFAULT_EVENT_STATUS); + +cleanup: + OVS_LOG_TRACE("Exit: activate switch:%p, isActivated: %s, status = %lx", + switchContext, + (switchContext->isActivated ? "TRUE" : "FALSE"), status); + return status; +} + +PVOID +OvsGetVportFromIndex(UINT16 index) +{ + if (index < OVS_MAX_VPORT_ARRAY_SIZE && + !OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { + return gOvsSwitchContext->vportArray[index]; + } + return NULL; +} + +PVOID +OvsGetExternalVport() +{ + return gOvsSwitchContext->externalVport; +} + + +/* + * -------------------------------------------------------------------------- + * Implements filter driver's FilterNetPnPEvent function. + * -------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsExtNetPnPEvent(NDIS_HANDLE filterModuleContext, + PNET_PNP_EVENT_NOTIFICATION netPnPEvent) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)filterModuleContext; + BOOLEAN switchActive; + + OVS_LOG_TRACE("Enter: filterModuleContext: %p, NetEvent: %d", + filterModuleContext, (netPnPEvent->NetPnPEvent).NetEvent); + /* + * The only interesting event is the NetEventSwitchActivate. It provides + * an asynchronous notification of the switch completing activation. + */ + if (netPnPEvent->NetPnPEvent.NetEvent == NetEventSwitchActivate) { + status = OvsQuerySwitchActivationComplete(switchContext, &switchActive); + if (status != NDIS_STATUS_SUCCESS) { + switchContext->isActivateFailed = TRUE; + } else { + ASSERT(switchContext->isActivated == FALSE); + ASSERT(switchActive == TRUE); + if (switchContext->isActivated == FALSE && switchActive == TRUE) { + status = OvsActivateSwitch(switchContext); + OVS_LOG_TRACE("OvsExtNetPnPEvent: activated switch: %p " + "status: %s", switchContext, + status ? "TRUE" : "FALSE"); + } + } + } + + if (status == NDIS_STATUS_SUCCESS) { + status = NdisFNetPnPEvent(switchContext->NdisFilterHandle, + netPnPEvent); + } + OVS_LOG_TRACE("Exit: OvsExtNetPnPEvent"); + + return status; +} diff --git a/datapath-windows/ovsext/Switch.h b/datapath-windows/ovsext/Switch.h new file mode 100644 index 000000000..51992686c --- /dev/null +++ b/datapath-windows/ovsext/Switch.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains the definition of the switch object for the OVS. + */ + +#ifndef __SWITCH_H_ +#define __SWITCH_H_ 1 + +#include "NetProto.h" +#include "BufferMgmt.h" +#define OVS_MAX_VPORT_ARRAY_SIZE 1024 + +#define OVS_VPORT_MASK (OVS_MAX_VPORT_ARRAY_SIZE - 1) + +#define OVS_INTERNAL_VPORT_DEFAULT_INDEX 0 + +//Tunnel port indicies +#define RESERVED_START_INDEX1 1 +#define OVS_TUNNEL_INDEX_START RESERVED_START_INDEX1 +#define OVS_VXLAN_VPORT_INDEX 2 +#define OVS_GRE_VPORT_INDEX 3 +#define OVS_GRE64_VPORT_INDEX 4 +#define OVS_TUNNEL_INDEX_END OVS_GRE64_VPORT_INDEX + +#define OVS_EXTERNAL_VPORT_START 8 +#define OVS_EXTERNAL_VPORT_END 40 +#define OVS_INTERNAL_VPORT_START 40 +#define OVS_INTERNAL_VPOR_END 72 +#define OVS_VM_VPORT_START 72 +#define OVS_VM_VPORT_MAX 0xffff +#define OVS_VPORT_INDEX(_portNo) ((_portNo) & 0xffffff) +#define OVS_VPORT_PORT_NO(_index, _gen) \ + (((_index) & 0xffffff) | ((UINT32)(_gen) << 24)) +#define OVS_VPORT_GEN(portNo) (portNo >> 24) + +#define OVS_MAX_PHYS_ADAPTERS 32 +#define OVS_MAX_IP_VPOR 32 + +#define OVS_HASH_BASIS 0x13578642 + +typedef struct _OVS_DATAPATH +{ + PLIST_ENTRY flowTable; // Contains OvsFlows. + UINT32 nFlows; // Number of entries in flowTable. + + // List_Links queues[64]; // Hash table of queue IDs. + + /* Statistics. */ + UINT64 hits; // Number of flow table hits. + UINT64 misses; // Number of flow table misses. + UINT64 lost; // Number of dropped misses. + + /* Used to protect the flows in the flowtable. */ + PNDIS_RW_LOCK_EX lock; +} OVS_DATAPATH, *POVS_DATAPATH; + +/* + * OVS_SWITCH_CONTEXT + * + * The context allocated per switch., For OVS, we only + * support one switch which corresponding to one datapath. + * Each datapath can have multiple logical bridges configured + * which is maintained by vswitchd. + */ + +typedef enum OVS_SWITCH_DATAFLOW_STATE +{ + OvsSwitchPaused, + OvsSwitchRunning +} OVS_SWITCH_DATAFLOW_STATE, *POVS_SWITCH_DATAFLOW_STATE; + +typedef enum OVS_SWITCH_CONTROFLOW_STATE +{ + OvsSwitchUnknown, + OvsSwitchAttached, + OvsSwitchDetached +} OVS_SWITCH_CONTROLFLOW_STATE, *POVS_SWITCH_CONTROLFLOW_STATE; + +// XXX: Take care of alignment and grouping members by cacheline +typedef struct _OVS_SWITCH_CONTEXT +{ + /* Coarse and fine-grained switch states. */ + OVS_SWITCH_DATAFLOW_STATE dataFlowState; + OVS_SWITCH_CONTROLFLOW_STATE controlFlowState; + BOOLEAN isActivated; + BOOLEAN isActivateFailed; + + UINT32 dpNo; + + NDIS_SWITCH_PORT_ID externalPortId; + NDIS_SWITCH_PORT_ID internalPortId; + PVOID externalVport; // the virtual adapter vport + PVOID internalVport; + + PVOID *vportArray; + PLIST_ENTRY nameHashArray; // based on ovsName + PLIST_ENTRY portHashArray; // based on portId + + UINT32 numPhysicalNics; + UINT32 numVports; // include validation port + UINT32 lastPortIndex; + + /* Lock taken over the switch. This protects the ports on the switch. */ + PNDIS_RW_LOCK_EX dispatchLock; + + /* The flowtable. */ + OVS_DATAPATH datapath; + + /* Handle to the OVSExt filter driver. Same as 'gOvsExtDriverHandle'. */ + NDIS_HANDLE NdisFilterHandle; + + /* Handle and callbacks exposed by the underlying hyper-v switch. */ + NDIS_SWITCH_CONTEXT NdisSwitchContext; + NDIS_SWITCH_OPTIONAL_HANDLERS NdisSwitchHandlers; + + volatile LONG pendingInjectedNblCount; + volatile LONG pendingOidCount; + + OVS_NBL_POOL ovsPool; +} OVS_SWITCH_CONTEXT, *POVS_SWITCH_CONTEXT; + + +static __inline VOID +OvsAcquireDatapathRead(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch) +{ + ASSERT(datapath); + NdisAcquireRWLockRead(datapath->lock, lockState, dispatch); +} + +static __inline VOID +OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch) +{ + ASSERT(datapath); + NdisAcquireRWLockWrite(datapath->lock, lockState, dispatch); +} + + +static __inline VOID +OvsReleaseDatapath(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState) +{ + ASSERT(datapath); + NdisReleaseRWLock(datapath->lock, lockState); +} + + +PVOID OvsGetVportFromIndex(UINT16 index); +PVOID OvsGetExternalVport(); + +#endif /* __SWITCH_H_ */ diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c new file mode 100644 index 000000000..2e7da10de --- /dev/null +++ b/datapath-windows/ovsext/Tunnel.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * OvsTunnel.c + * WFP Classified callback function and Action code for injecting a packet to the vswitch + */ + +#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable:4201) // unnamed struct/union +#include +#pragma warning(pop) + +#pragma warning( push ) +#pragma warning( disable:4127 ) + +#include +#include "Tunnel.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "Vxlan.h" +#include "PacketIO.h" +#include "NetProto.h" +#include "Flow.h" + +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +static NTSTATUS +OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, + OVS_TUNNEL_PENDED_PACKET *packet); + +VOID OvsAcquireDatapathRead(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch); +VOID OvsAcquireDatapathWrite(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState, + BOOLEAN dispatch); +VOID OvsReleaseDatapath(OVS_DATAPATH *datapath, + LOCK_STATE_EX *lockState); + + +NTSTATUS +OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, + const GUID *filterKey, + const FWPS_FILTER *filter) +{ + UNREFERENCED_PARAMETER(notifyType); + UNREFERENCED_PARAMETER(filterKey); + UNREFERENCED_PARAMETER(filter); + + return STATUS_SUCCESS; +} + +static NTSTATUS +OvsTunnelAnalyzePacket(OVS_TUNNEL_PENDED_PACKET *packet) +{ + NTSTATUS status = STATUS_SUCCESS; + UINT32 packetLength = 0; + ULONG bytesCopied = 0; + NET_BUFFER_LIST *copiedNBL = NULL; + NET_BUFFER *netBuffer; + NDIS_STATUS ndisStatus; + + /* + * For inbound net buffer list, we can assume it contains only one + * net buffer (unless it was an re-assembeled fragments). in both cases + * the first net buffer should include all headers, we assert if the retreat fails + */ + netBuffer = NET_BUFFER_LIST_FIRST_NB(packet->netBufferList); + + /* Drop the packet from the host stack */ + packet->classifyOut->actionType = FWP_ACTION_BLOCK; + packet->classifyOut->rights &= ~FWPS_RIGHT_ACTION_WRITE; + + /* Adjust the net buffer list offset to the start of the IP header */ + ndisStatus = NdisRetreatNetBufferDataStart(netBuffer, + packet->ipHeaderSize + + packet->transportHeaderSize, + 0, NULL); + ASSERT(ndisStatus == NDIS_STATUS_SUCCESS); + + /* Single NBL element for WFP */ + ASSERT(packet->netBufferList->Next == NULL); + + /* Note that the copy will inherit the original net buffer list's offset */ + packetLength = NET_BUFFER_DATA_LENGTH(netBuffer); + copiedNBL = OvsAllocateVariableSizeNBL(gOvsSwitchContext, packetLength, + OVS_DEFAULT_HEADROOM_SIZE); + + if (copiedNBL == NULL) { + goto analyzeDone; + } + + status = NdisCopyFromNetBufferToNetBuffer(NET_BUFFER_LIST_FIRST_NB(copiedNBL), + 0, packetLength, + netBuffer, 0, &bytesCopied); + if (status != NDIS_STATUS_SUCCESS || packetLength != bytesCopied) { + goto analyzeFreeNBL; + } + + status = OvsInjectPacketThroughActions(copiedNBL, + packet); + goto analyzeDone; + + /* Undo the adjustment on the original net buffer list */ +analyzeFreeNBL: + OvsCompleteNBL(gOvsSwitchContext, copiedNBL, TRUE); +analyzeDone: + NdisAdvanceNetBufferDataStart(netBuffer, + packet->transportHeaderSize + packet->ipHeaderSize, + FALSE, + NULL); + return status; +} + + +/* + * -------------------------------------------------------------------------- + * This is the classifyFn function of the datagram-data callout. It + * allocates a packet structure to store the classify and meta data and + * it references the net buffer list for out-of-band modification and + * re-injection. The packet structure will be queued to the global packet + * queue. The worker thread will then be signaled, if idle, to process + * the queue. + * -------------------------------------------------------------------------- + */ +VOID +OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, + const FWPS_INCOMING_METADATA_VALUES *inMetaValues, + VOID *layerData, + const VOID *classifyContext, + const FWPS_FILTER *filter, + UINT64 flowContext, + FWPS_CLASSIFY_OUT *classifyOut) +{ + OVS_TUNNEL_PENDED_PACKET packetStorage; + OVS_TUNNEL_PENDED_PACKET *packet = &packetStorage; + FWP_DIRECTION direction; + + UNREFERENCED_PARAMETER(classifyContext); + UNREFERENCED_PARAMETER(filter); + UNREFERENCED_PARAMETER(flowContext); + + ASSERT(layerData != NULL); + + /* We don't have the necessary right to alter the packet flow */ + if ((classifyOut->rights & FWPS_RIGHT_ACTION_WRITE) == 0) { + /* XXX TBD revisit protect against other filters owning this packet */ + ASSERT(FALSE); + goto Exit; + } + + RtlZeroMemory(packet, sizeof(OVS_TUNNEL_PENDED_PACKET)); + + /* classifyOut cannot be accessed from a different thread context */ + packet->classifyOut = classifyOut; + + if (inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V4) { + direction = + inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_DIRECTION].\ + value.uint32; + } + else { + ASSERT(inFixedValues->layerId == FWPS_LAYER_DATAGRAM_DATA_V6); + direction = + inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V6_DIRECTION].\ + value.uint32; + } + + packet->netBufferList = layerData; + + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT(inMetaValues, + FWPS_METADATA_FIELD_COMPARTMENT_ID)); + + ASSERT(direction == FWP_DIRECTION_INBOUND); + + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( + inMetaValues, + FWPS_METADATA_FIELD_IP_HEADER_SIZE)); + ASSERT(FWPS_IS_METADATA_FIELD_PRESENT( + inMetaValues, + FWPS_METADATA_FIELD_TRANSPORT_HEADER_SIZE)); + + packet->ipHeaderSize = inMetaValues->ipHeaderSize; + packet->transportHeaderSize = inMetaValues->transportHeaderSize; + + ASSERT(inFixedValues->incomingValue[FWPS_FIELD_DATAGRAM_DATA_V4_IP_PROTOCOL].value.uint8 == IPPROTO_UDP ); + OvsTunnelAnalyzePacket(packet); + +Exit: + ; +} + + +static NTSTATUS +OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, + OVS_TUNNEL_PENDED_PACKET *packet) +{ + NTSTATUS status = STATUS_SUCCESS; + OvsIPv4TunnelKey tunnelKey; + NET_BUFFER *pNb; + ULONG sendCompleteFlags = 0; + BOOLEAN dispatch; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + LOCK_STATE_EX lockState, dpLockState; + LIST_ENTRY missedPackets; + OvsCompletionList completionList; + KIRQL irql; + ULONG SendFlags = NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP; + OVS_DATAPATH *datapath = &gOvsSwitchContext->datapath; + + ASSERT(gOvsSwitchContext); + + /* Fill the tunnel key */ + status = OvsSlowPathDecapVxlan(pNbl, &tunnelKey); + + if(!NT_SUCCESS(status)) { + goto dropit; + } + + pNb = NET_BUFFER_LIST_FIRST_NB(pNbl); + + NdisAdvanceNetBufferDataStart(pNb, + packet->transportHeaderSize + packet->ipHeaderSize + + sizeof(VXLANHdr), + FALSE, + NULL); + + /* Most likely (always) dispatch irql */ + irql = KeGetCurrentIrql(); + + /* dispatch is used for datapath lock as well */ + dispatch = (irql == DISPATCH_LEVEL) ? NDIS_RWL_AT_DISPATCH_LEVEL : 0; + if (dispatch) { + sendCompleteFlags |= NDIS_SEND_COMPLETE_FLAGS_DISPATCH_LEVEL; + } + + InitializeListHead(&missedPackets); + OvsInitCompletionList(&completionList, gOvsSwitchContext, + sendCompleteFlags); + + { + POVS_VPORT_ENTRY vport; + UINT32 portNo; + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + UINT64 hash; + PNET_BUFFER curNb; + OvsFlow *flow; + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); + + /* + * XXX WFP packets contain a single NBL structure. + * Reassembeled packet "may" have multiple NBs, however, a simple test shows + * that the packet still has a single NB (after reassemble) + * We still need to check if the Ethernet header of the innet packet is in a single MD + */ + + curNb = NET_BUFFER_LIST_FIRST_NB(pNbl); + ASSERT(curNb->Next == NULL); + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, dispatch); + + /* Lock the flowtable for the duration of accessing the flow */ + OvsAcquireDatapathRead(datapath, &dpLockState, NDIS_RWL_AT_DISPATCH_LEVEL); + + SendFlags |= NDIS_SEND_FLAGS_DISPATCH_LEVEL; + + vport = OvsGetTunnelVport(OVSWIN_VPORT_TYPE_VXLAN); + + if (vport == NULL){ + status = STATUS_UNSUCCESSFUL; + goto unlockAndDrop; + } + + ASSERT(vport->ovsType == OVSWIN_VPORT_TYPE_VXLAN); + + portNo = vport->portNo; + + status = OvsExtractFlow(pNbl, portNo, &key, &layers, &tunnelKey); + if (status != NDIS_STATUS_SUCCESS) { + goto unlockAndDrop; + } + + flow = OvsLookupFlow(datapath, &key, &hash, FALSE); + if (flow) { + OvsFlowUsed(flow, pNbl, &layers); + datapath->hits++; + + OvsActionsExecute(gOvsSwitchContext, &completionList, pNbl, + portNo, SendFlags, &key, &hash, &layers, + flow->actions, flow->actionsLen); + + OvsReleaseDatapath(datapath, &dpLockState); + } else { + POVS_PACKET_QUEUE_ELEM elem; + + datapath->misses++; + elem = OvsCreateQueuePacket(1, NULL, 0, OVS_PACKET_CMD_MISS, + portNo, &key.tunKey, pNbl, curNb, + TRUE, &layers); + if (elem) { + /* Complete the packet since it was copied to user buffer. */ + InsertTailList(&missedPackets, &elem->link); + OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1); + } else { + status = STATUS_INSUFFICIENT_RESOURCES; + } + goto unlockAndDrop; + } + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + + } + + return status; + +unlockAndDrop: + OvsReleaseDatapath(datapath, &dpLockState); + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); +dropit: + pNbl = OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); + ASSERT(pNbl == NULL); + return status; +} + +#pragma warning(pop) diff --git a/datapath-windows/ovsext/Tunnel.h b/datapath-windows/ovsext/Tunnel.h new file mode 100644 index 000000000..2978bb395 --- /dev/null +++ b/datapath-windows/ovsext/Tunnel.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TUNNEL_H_ +#define __TUNNEL_H_ 1 + +// +// OVS_TUNNEL_PENDED_PACKET is the object type we used to store all information +// needed for out-of-band packet modification and re-injection. This type +// also points back to the flow context the packet belongs to. + +typedef struct OVS_TUNNEL_PENDED_PACKET_ +{ + /* Common fields for inbound and outbound traffic */ + NET_BUFFER_LIST *netBufferList; + + UINT32 ipHeaderSize; + UINT32 transportHeaderSize; + FWPS_CLASSIFY_OUT *classifyOut; +} OVS_TUNNEL_PENDED_PACKET; + +/* Shared global data. */ + +extern UINT16 configNewDestPort; + +extern UINT32 gCalloutIdV4; + +// +// Shared function prototypes +// +VOID OvsTunnelClassify(const FWPS_INCOMING_VALUES *inFixedValues, + const FWPS_INCOMING_METADATA_VALUES *inMetaValues, + VOID *layerData, + const VOID *classifyContext, + const FWPS_FILTER *filter, + UINT64 flowContext, + FWPS_CLASSIFY_OUT *classifyOut); + + +NTSTATUS OvsTunnelNotify(FWPS_CALLOUT_NOTIFY_TYPE notifyType, + const GUID *filterKey, + const FWPS_FILTER *filter); + +#endif /* __TUNNEL_H_ */ diff --git a/datapath-windows/ovsext/TunnelFilter.c b/datapath-windows/ovsext/TunnelFilter.c new file mode 100644 index 000000000..b191dd1f5 --- /dev/null +++ b/datapath-windows/ovsext/TunnelFilter.c @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable:4201) // unnamed struct/union + + +#include + +#pragma warning(pop) + +#include +#include +#include +#include + +#include "Tunnel.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "Vxlan.h" + + +#define INITGUID +#include + + +/* Configurable parameters (addresses and ports are in host order) */ +UINT16 configNewDestPort = VXLAN_UDP_PORT; + +/* + * Callout and sublayer GUIDs + */ +// b16b0a6e-2b2a-41a3-8b39-bd3ffc855ff8 +DEFINE_GUID( + OVS_TUNNEL_CALLOUT_V4, + 0xb16b0a6e, + 0x2b2a, + 0x41a3, + 0x8b, 0x39, 0xbd, 0x3f, 0xfc, 0x85, 0x5f, 0xf8 + ); + +/* 0104fd7e-c825-414e-94c9-f0d525bbc169 */ +DEFINE_GUID( + OVS_TUNNEL_SUBLAYER, + 0x0104fd7e, + 0xc825, + 0x414e, + 0x94, 0xc9, 0xf0, 0xd5, 0x25, 0xbb, 0xc1, 0x69 + ); + +/* + * Callout driver global variables + */ +PDEVICE_OBJECT gDeviceObject; + +HANDLE gEngineHandle; +UINT32 gCalloutIdV4; + + +/* Callout driver implementation */ + +NTSTATUS +OvsTunnelAddFilter(PWSTR filterName, + const PWSTR filterDesc, + USHORT remotePort, + FWP_DIRECTION direction, + UINT64 context, + const GUID *layerKey, + const GUID *calloutKey) +{ + NTSTATUS status = STATUS_SUCCESS; + FWPM_FILTER filter = {0}; + FWPM_FILTER_CONDITION filterConditions[3] = {0}; + UINT conditionIndex; + + UNREFERENCED_PARAMETER(remotePort); + UNREFERENCED_PARAMETER(direction); + + filter.layerKey = *layerKey; + filter.displayData.name = (wchar_t*)filterName; + filter.displayData.description = (wchar_t*)filterDesc; + + filter.action.type = FWP_ACTION_CALLOUT_TERMINATING; + filter.action.calloutKey = *calloutKey; + filter.filterCondition = filterConditions; + filter.subLayerKey = OVS_TUNNEL_SUBLAYER; + filter.weight.type = FWP_EMPTY; // auto-weight. + filter.rawContext = context; + + conditionIndex = 0; + + filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_DIRECTION; + filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; + filterConditions[conditionIndex].conditionValue.type = FWP_UINT32; + filterConditions[conditionIndex].conditionValue.uint32 = direction; + + conditionIndex++; + + filterConditions[conditionIndex].fieldKey = FWPM_CONDITION_IP_LOCAL_PORT; + filterConditions[conditionIndex].matchType = FWP_MATCH_EQUAL; + filterConditions[conditionIndex].conditionValue.type = FWP_UINT16; + filterConditions[conditionIndex].conditionValue.uint16 = remotePort; + + conditionIndex++; + + filter.numFilterConditions = conditionIndex; + + status = FwpmFilterAdd(gEngineHandle, + &filter, + NULL, + NULL); + + return status; +} + +/* + * -------------------------------------------------------------------------- + * This function registers callouts and filters that intercept UDP traffic at + * WFP FWPM_LAYER_DATAGRAM_DATA_V4 + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunnelRegisterDatagramDataCallouts(const GUID *layerKey, + const GUID *calloutKey, + VOID *deviceObject, + UINT32 *calloutId) +{ + NTSTATUS status = STATUS_SUCCESS; + + FWPS_CALLOUT sCallout = {0}; + FWPM_CALLOUT mCallout = {0}; + + FWPM_DISPLAY_DATA displayData = {0}; + + BOOLEAN calloutRegistered = FALSE; + + sCallout.calloutKey = *calloutKey; + sCallout.classifyFn = OvsTunnelClassify; + sCallout.notifyFn = OvsTunnelNotify; +#if FLOW_CONTEXT + /* Currnetly we don't associate a context with the flow */ + sCallout.flowDeleteFn = OvsTunnelFlowDelete; + sCallout.flags = FWP_CALLOUT_FLAG_CONDITIONAL_ON_FLOW; +#endif + + status = FwpsCalloutRegister(deviceObject, + &sCallout, + calloutId); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + calloutRegistered = TRUE; + + displayData.name = L"Datagram-Data OVS Callout"; + displayData.description = L"Proxies destination address/port for UDP"; + + mCallout.calloutKey = *calloutKey; + mCallout.displayData = displayData; + mCallout.applicableLayer = *layerKey; + + status = FwpmCalloutAdd(gEngineHandle, + &mCallout, + NULL, + NULL); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + + status = OvsTunnelAddFilter(L"Datagram-Data OVS Filter (Inbound)", + L"address/port for UDP", + configNewDestPort, + FWP_DIRECTION_INBOUND, + 0, + layerKey, + calloutKey); + +Exit: + + if (!NT_SUCCESS(status)){ + if (calloutRegistered) { + FwpsCalloutUnregisterById(*calloutId); + *calloutId = 0; + } + } + + return status; +} + +/* + * -------------------------------------------------------------------------- + * This function registers dynamic callouts and filters that intercept UDP + * Callouts and filters will be removed during De-Initialize. + * -------------------------------------------------------------------------- + */ +NTSTATUS +OvsTunnelRegisterCallouts(VOID *deviceObject) +{ + NTSTATUS status = STATUS_SUCCESS; + FWPM_SUBLAYER OvsTunnelSubLayer; + + BOOLEAN engineOpened = FALSE; + BOOLEAN inTransaction = FALSE; + + FWPM_SESSION session = {0}; + + session.flags = FWPM_SESSION_FLAG_DYNAMIC; + + status = FwpmEngineOpen(NULL, + RPC_C_AUTHN_WINNT, + NULL, + &session, + &gEngineHandle); + + if (!NT_SUCCESS(status)) { + goto Exit; + } + engineOpened = TRUE; + + status = FwpmTransactionBegin(gEngineHandle, 0); + if (!NT_SUCCESS(status)) { + goto Exit; + } + inTransaction = TRUE; + + RtlZeroMemory(&OvsTunnelSubLayer, sizeof(FWPM_SUBLAYER)); + + OvsTunnelSubLayer.subLayerKey = OVS_TUNNEL_SUBLAYER; + OvsTunnelSubLayer.displayData.name = L"Datagram-Data OVS Sub-Layer"; + OvsTunnelSubLayer.displayData.description = + L"Sub-Layer for use by Datagram-Data OVS callouts"; + OvsTunnelSubLayer.flags = 0; + OvsTunnelSubLayer.weight = FWP_EMPTY; /* auto-weight */ + + status = FwpmSubLayerAdd(gEngineHandle, &OvsTunnelSubLayer, NULL); + if (!NT_SUCCESS(status)) { + goto Exit; + } + + // In order to use this callout a socket must be opened + status = OvsTunnelRegisterDatagramDataCallouts(&FWPM_LAYER_DATAGRAM_DATA_V4, + &OVS_TUNNEL_CALLOUT_V4, + deviceObject, + &gCalloutIdV4); + if (!NT_SUCCESS(status)) { + goto Exit; + } + + status = FwpmTransactionCommit(gEngineHandle); + if (!NT_SUCCESS(status)){ + goto Exit; + } + inTransaction = FALSE; + +Exit: + + if (!NT_SUCCESS(status)) { + if (inTransaction) { + FwpmTransactionAbort(gEngineHandle); + } + if (engineOpened) { + FwpmEngineClose(gEngineHandle); + gEngineHandle = NULL; + } + } + + return status; +} + +VOID +OvsTunnelUnregisterCallouts(VOID) +{ + FwpmEngineClose(gEngineHandle); + gEngineHandle = NULL; + FwpsCalloutUnregisterById(gCalloutIdV4); +} + + +VOID +OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject) +{ + UNREFERENCED_PARAMETER(driverObject); + + OvsTunnelUnregisterCallouts(); + IoDeleteDevice(gDeviceObject); +} + + +NTSTATUS +OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject) +{ + NTSTATUS status = STATUS_SUCCESS; + UNICODE_STRING deviceName; + + RtlInitUnicodeString(&deviceName, + L"\\Device\\OvsTunnelFilter"); + + status = IoCreateDevice(driverObject, + 0, + &deviceName, + FILE_DEVICE_NETWORK, + 0, + FALSE, + &gDeviceObject); + + if (!NT_SUCCESS(status)){ + goto Exit; + } + + status = OvsTunnelRegisterCallouts(gDeviceObject); + +Exit: + + if (!NT_SUCCESS(status)){ + if (gEngineHandle != NULL) { + OvsTunnelUnregisterCallouts(); + } + + if (gDeviceObject) { + IoDeleteDevice(gDeviceObject); + } + } + + return status; +} diff --git a/datapath-windows/ovsext/TunnelIntf.h b/datapath-windows/ovsext/TunnelIntf.h new file mode 100644 index 000000000..c62272088 --- /dev/null +++ b/datapath-windows/ovsext/TunnelIntf.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TUNNEL_INTF_H_ +#define __TUNNEL_INTF_H_ 1 + +/* Tunnel callout driver load/unload functions */ +NTSTATUS OvsTunnelFilterInitialize(PDRIVER_OBJECT driverObject); + +VOID OvsTunnelFilterUninitialize(PDRIVER_OBJECT driverObject); + +#endif /* __TUNNEL_INTF_H_ */ diff --git a/datapath-windows/ovsext/Types.h b/datapath-windows/ovsext/Types.h new file mode 100644 index 000000000..e48df7aef --- /dev/null +++ b/datapath-windows/ovsext/Types.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __TYPES_H_ +#define __TYPES_H_ 1 + +/* Defines the userspace specific data types + * for files included from user space. */ +typedef unsigned long long uint64, uint64_t, ovs_be64, u64; +typedef long long int64, int64_t; +typedef unsigned int uint32, uint32_t, ovs_be32, u32; +typedef unsigned short uint16, uint16_t, ovs_be16, u16; +typedef unsigned char uint8, uint8_t, u8; +typedef uint64 __u64, __be64; +typedef uint32 __u32, __be32; +typedef uint16 __u16, __be16; +typedef uint8 __u8; + +/* Defines the userspace specific data types for file + * included within kernel only. */ +typedef UINT32 BE32; +typedef UINT64 BE64; + +#define ETH_ALEN 6 + +#define SIZE_MAX MAXUINT32 + +#endif /* __TYPES_H_ */ diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c new file mode 100644 index 000000000..612a4bd57 --- /dev/null +++ b/datapath-windows/ovsext/User.c @@ -0,0 +1,867 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * OvsUser.c + * Manage packet queue for packet miss for userAction. + */ + + +#include "precomp.h" + +#include "Datapath.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "PacketIO.h" +#include "Checksum.h" +#include "NetProto.h" +#include "Flow.h" +#include "TunnelIntf.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_USER +#include "Debug.h" + +OVS_USER_PACKET_QUEUE ovsPacketQueues[OVS_MAX_NUM_PACKET_QUEUES]; + +POVS_PACKET_QUEUE_ELEM OvsGetNextPacket(POVS_OPEN_INSTANCE instance); +extern PNDIS_SPIN_LOCK gOvsCtrlLock; +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; +OVS_USER_STATS ovsUserStats; + + +NTSTATUS +OvsUserInit() +{ + UINT32 i; + POVS_USER_PACKET_QUEUE queue; + for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { + queue = &ovsPacketQueues[i]; + RtlZeroMemory(queue, sizeof (*queue)); + InitializeListHead(&queue->packetList); + NdisAllocateSpinLock(&queue->queueLock); + } + return STATUS_SUCCESS; +} + +VOID +OvsUserCleanup() +{ + UINT32 i; + POVS_USER_PACKET_QUEUE queue; + for (i = 0; i < OVS_MAX_NUM_PACKET_QUEUES; i++) { + queue = &ovsPacketQueues[i]; + ASSERT(IsListEmpty(&queue->packetList)); + ASSERT(queue->instance == NULL); + ASSERT(queue->pendingIrp == NULL); + NdisFreeSpinLock(&queue->queueLock); + } +} + +static VOID +OvsPurgePacketQueue(POVS_USER_PACKET_QUEUE queue, + POVS_OPEN_INSTANCE instance) +{ + PLIST_ENTRY link, next; + LIST_ENTRY tmp; + POVS_PACKET_QUEUE_ELEM elem; + + InitializeListHead(&tmp); + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return; + } + + if (queue->numPackets) { + OvsAppendList(&tmp, &queue->packetList); + queue->numPackets = 0; + } + NdisReleaseSpinLock(&queue->queueLock); + LIST_FORALL_SAFE(&tmp, link, next) { + RemoveEntryList(link); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } +} + + +VOID +OvsCleanupPacketQueue(POVS_OPEN_INSTANCE instance) +{ + POVS_USER_PACKET_QUEUE queue; + POVS_PACKET_QUEUE_ELEM elem; + PLIST_ENTRY link, next; + LIST_ENTRY tmp; + PIRP irp = NULL; + + InitializeListHead(&tmp); + queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + if (queue) { + PDRIVER_CANCEL cancelRoutine; + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return; + } + + if (queue->numPackets) { + OvsAppendList(&tmp, &queue->packetList); + queue->numPackets = 0; + } + queue->instance = NULL; + queue->queueId = OVS_MAX_NUM_PACKET_QUEUES; + instance->packetQueue = NULL; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + if (irp) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + NdisReleaseSpinLock(&queue->queueLock); + } + LIST_FORALL_SAFE(&tmp, link, next) { + RemoveEntryList(link); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + } + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } +} + +NTSTATUS +OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + UINT32 queueId; + POVS_USER_PACKET_QUEUE queue; + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + queueId = *(UINT32 *)inputBuffer; + if (instance->packetQueue && queueId >= OVS_MAX_NUM_PACKET_QUEUES) { + /* + * unsubscribe + */ + OvsCleanupPacketQueue(instance); + } else if (instance->packetQueue == NULL && + queueId < OVS_MAX_NUM_PACKET_QUEUES) { + queue = &ovsPacketQueues[queueId]; + NdisAcquireSpinLock(&queue->queueLock); + if (ovsPacketQueues[queueId].instance) { + if (ovsPacketQueues[queueId].instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_INSUFFICIENT_RESOURCES; + } else { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_SUCCESS; + } + } + queue->queueId = queueId; + queue->instance = instance; + instance->packetQueue = queue; + ASSERT(IsListEmpty(&queue->packetList)); + NdisReleaseSpinLock(&queue->queueLock); + } else { + return STATUS_INVALID_PARAMETER; + } + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsReadDpIoctl(PFILE_OBJECT fileObject, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_PACKET_QUEUE_ELEM elem; + UINT32 len; + +#define TCP_CSUM_OFFSET 16 +#define UDP_CSUM_OFFSET 6 + ASSERT(instance); + + if (instance->packetQueue == NULL) { + return STATUS_INVALID_PARAMETER; + } + if (outputLength < (sizeof (OVS_PACKET_INFO) + OVS_MIN_PACKET_SIZE)) { + return STATUS_BUFFER_TOO_SMALL; + } + + elem = OvsGetNextPacket(instance); + if (elem) { + /* + * XXX revisit this later + */ + len = elem->packet.totalLen > outputLength ? outputLength : + elem->packet.totalLen; + + if ((elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) && + len == elem->packet.totalLen) { + UINT16 sum, *ptr; + UINT16 size = (UINT16)(elem->packet.userDataLen + + elem->hdrInfo.l4Offset + + (UINT16)sizeof (OVS_PACKET_INFO)); + RtlCopyMemory(outputBuffer, &elem->packet, size); + ASSERT(len - size >= elem->hdrInfo.l4PayLoad); + sum = CopyAndCalculateChecksum((UINT8 *)outputBuffer + size, + (UINT8 *)&elem->packet + size, + elem->hdrInfo.l4PayLoad, 0); + ptr =(UINT16 *)((UINT8 *)outputBuffer + size + + (elem->hdrInfo.tcpCsumNeeded ? + TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); + *ptr = sum; + ovsUserStats.l4Csum++; + } else { + RtlCopyMemory(outputBuffer, &elem->packet, len); + } + + *replyLen = len; + OvsFreeMemory(elem); + } + return STATUS_SUCCESS; +} + +/* Helper function to allocate a Forwarding Context for an NBL */ +NTSTATUS +OvsAllocateForwardingContextForNBL(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST nbl) +{ + return switchContext->NdisSwitchHandlers. + AllocateNetBufferListForwardingContext( + switchContext->NdisSwitchContext, nbl); +} + +/* + * -------------------------------------------------------------------------- + * This function allocates all the stuff necessary for creating an NBL from the + * input buffer of specified length, namely, a nonpaged data buffer of size + * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL + * context yet. It also copies data from the specified buffer to the NBL. + * -------------------------------------------------------------------------- + */ +PNET_BUFFER_LIST +OvsAllocateNBLForUserBuffer(POVS_SWITCH_CONTEXT switchContext, + PVOID userBuffer, + ULONG length) +{ + UINT8 *data = NULL; + PNET_BUFFER_LIST nbl = NULL; + PNET_BUFFER nb; + PMDL mdl; + + if (length > OVS_DEFAULT_DATA_SIZE) { + nbl = OvsAllocateVariableSizeNBL(switchContext, length, + OVS_DEFAULT_HEADROOM_SIZE); + + } else { + nbl = OvsAllocateFixSizeNBL(switchContext, length, + OVS_DEFAULT_HEADROOM_SIZE); + } + if (nbl == NULL) { + return NULL; + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + mdl = NET_BUFFER_CURRENT_MDL(nb); + data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(nb); + if (!data) { + OvsCompleteNBL(switchContext, nbl, TRUE); + return NULL; + } + + NdisMoveMemory(data, userBuffer, length); + + return nbl; +} + +NTSTATUS +OvsExecuteDpIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 outputLength) +{ + NTSTATUS status = STATUS_SUCCESS; + NTSTATUS ndisStatus; + OvsPacketExecute *execute; + LOCK_STATE_EX lockState; + PNET_BUFFER_LIST pNbl; + PNL_ATTR actions; + PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail; + OvsFlowKey key; + OVS_PACKET_HDR_INFO layers; + POVS_VPORT_ENTRY vport; + + if (inputLength < sizeof(*execute) || outputLength != 0) { + return STATUS_INFO_LENGTH_MISMATCH; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + execute = (struct OvsPacketExecute *) inputBuffer; + + if (execute->packetLen == 0) { + status = STATUS_INVALID_PARAMETER; + goto unlock; + } + + if (inputLength != sizeof (*execute) + + execute->actionsLen + execute->packetLen) { + status = STATUS_INFO_LENGTH_MISMATCH; + goto unlock; + } + actions = (PNL_ATTR)((PCHAR)&execute->actions + execute->packetLen); + + /* + * Allocate the NBL, copy the data from the userspace buffer. Allocate + * also, the forwarding context for the packet. + */ + pNbl = OvsAllocateNBLForUserBuffer(gOvsSwitchContext, &execute->packetBuf, + execute->packetLen); + if (pNbl == NULL) { + status = STATUS_NO_MEMORY; + goto unlock; + } + + fwdDetail = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(pNbl); + vport = OvsFindVportByPortNo(gOvsSwitchContext, execute->inPort); + if (vport) { + fwdDetail->SourcePortId = vport->portId; + fwdDetail->SourceNicIndex = vport->nicIndex; + } else { + fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID; + fwdDetail->SourceNicIndex = 0; + } + // XXX: Figure out if any of the other members of fwdDetail need to be set. + + ndisStatus = OvsExtractFlow(pNbl, fwdDetail->SourcePortId, &key, &layers, + NULL); + if (ndisStatus == NDIS_STATUS_SUCCESS) { + ASSERT(KeGetCurrentIrql() == DISPATCH_LEVEL); + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + ndisStatus = OvsActionsExecute(gOvsSwitchContext, NULL, pNbl, + vport ? vport->portNo : + OVS_DEFAULT_PORT_NO, + NDIS_SEND_FLAGS_SWITCH_DESTINATION_GROUP, + &key, NULL, &layers, actions, + execute->actionsLen); + pNbl = NULL; + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + } + if (ndisStatus != NDIS_STATUS_SUCCESS) { + status = STATUS_UNSUCCESSFUL; + } + + if (pNbl) { + OvsCompleteNBL(gOvsSwitchContext, pNbl, TRUE); + } +unlock: + NdisReleaseSpinLock(gOvsCtrlLock); + return status; +} + + +NTSTATUS +OvsPurgeDpIoctl(PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_USER_PACKET_QUEUE queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + + if (queue == NULL) { + return STATUS_INVALID_PARAMETER; + } + OvsPurgePacketQueue(queue, instance); + return STATUS_SUCCESS; +} + +VOID +OvsCancelIrpDatapath(PDEVICE_OBJECT deviceObject, + PIRP irp) +{ + PIO_STACK_LOCATION irpSp; + PFILE_OBJECT fileObject; + POVS_OPEN_INSTANCE instance; + POVS_USER_PACKET_QUEUE queue = NULL; + + UNREFERENCED_PARAMETER(deviceObject); + + IoReleaseCancelSpinLock(irp->CancelIrql); + irpSp = IoGetCurrentIrpStackLocation(irp); + fileObject = irpSp->FileObject; + + if (fileObject == NULL) { + goto done; + } + NdisAcquireSpinLock(gOvsCtrlLock); + instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + if (instance) { + queue = instance->packetQueue; + } + if (instance == NULL || queue == NULL) { + NdisReleaseSpinLock(gOvsCtrlLock); + goto done; + } + NdisReleaseSpinLock(gOvsCtrlLock); + NdisAcquireSpinLock(&queue->queueLock); + if (queue->pendingIrp == irp) { + queue->pendingIrp = NULL; + } + NdisReleaseSpinLock(&queue->queueLock); +done: + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); +} + + +NTSTATUS +OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject) +{ + POVS_OPEN_INSTANCE instance = (POVS_OPEN_INSTANCE)fileObject->FsContext; + POVS_USER_PACKET_QUEUE queue = + (POVS_USER_PACKET_QUEUE)instance->packetQueue; + NTSTATUS status = STATUS_SUCCESS; + BOOLEAN cancelled = FALSE; + + if (queue == NULL) { + return STATUS_INVALID_PARAMETER; + } + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_INVALID_PARAMETER; + } + if (queue->pendingIrp) { + NdisReleaseSpinLock(&queue->queueLock); + return STATUS_DEVICE_BUSY; + } + if (queue->numPackets == 0) { + PDRIVER_CANCEL cancelRoutine; + IoMarkIrpPending(irp); + IoSetCancelRoutine(irp, OvsCancelIrpDatapath); + if (irp->Cancel) { + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine) { + cancelled = TRUE; + } + } else { + queue->pendingIrp = irp; + } + status = STATUS_PENDING; + } + NdisReleaseSpinLock(&queue->queueLock); + if (cancelled) { + OvsCompleteIrpRequest(irp, 0, STATUS_CANCELLED); + OVS_LOG_INFO("Datapath IRP cancelled: %p", irp); + } + return status; +} + + +POVS_PACKET_QUEUE_ELEM +OvsGetNextPacket(POVS_OPEN_INSTANCE instance) +{ + POVS_USER_PACKET_QUEUE queue; + PLIST_ENTRY link; + queue = (POVS_USER_PACKET_QUEUE)instance->packetQueue; + if (queue == NULL) { + return NULL; + } + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance != instance || queue->numPackets == 0) { + NdisReleaseSpinLock(&queue->queueLock); + return NULL; + } + link = RemoveHeadList(&queue->packetList); + queue->numPackets--; + NdisReleaseSpinLock(&queue->queueLock); + return CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); +} + + +POVS_USER_PACKET_QUEUE +OvsGetQueue(UINT32 queueId) +{ + POVS_USER_PACKET_QUEUE queue; + if (queueId >= OVS_MAX_NUM_PACKET_QUEUES) { + return NULL; + } + queue = &ovsPacketQueues[queueId]; + return queue->instance != NULL ? queue : NULL; +} + +/* + *---------------------------------------------------------------------------- + * OvsCreateQueuePacket -- + * + * Create a packet which will be forwarded to user space. + * + * InputParameter: + * queueId Identify the queue the packet to be inserted + * This will be used when multiple queues is supported + * in userspace + * userData: when cmd is user action, this field contain + * user action data. + * userDataLen: as name indicated + * cmd: either miss or user action + * inPort: datapath port id from which the packet is received. + * tunnelKey: tunnelKey for tunneled packet + * nbl: the NET_BUFFER_LIST which contain the packet + * nb: the packet + * isRecv: This is used to decide how to interprete the csum info + * hdrInfo: include hdr info initialized during flow extraction. + * + * Results: + * NULL if fail to create the packet + * The packet element otherwise + *---------------------------------------------------------------------------- + */ +POVS_PACKET_QUEUE_ELEM +OvsCreateQueuePacket(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo) +{ +#define VLAN_TAG_SIZE 4 + UINT32 allocLen, dataLen, extraLen = 0; + POVS_PACKET_QUEUE_ELEM elem; + PMDL mdl; + UINT8 *src, *dst; + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + NDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo; + + csumInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpIpChecksumNetBufferListInfo); + + if (isRecv && (csumInfo.Receive.TcpChecksumFailed || + (csumInfo.Receive.UdpChecksumFailed && + !hdrInfo->udpCsumZero) || + csumInfo.Receive.IpChecksumFailed)) { + OVS_LOG_INFO("Packet dropped due to checksum failure."); + ovsUserStats.dropDuetoChecksum++; + return NULL; + } + + vlanInfo.Value = NET_BUFFER_LIST_INFO(nbl, Ieee8021QNetBufferListInfo); + if (vlanInfo.TagHeader.VlanId) { + /* + * We may also need to check priority XXX + */ + extraLen = VLAN_TAG_SIZE; + } + + dataLen = NET_BUFFER_DATA_LENGTH(nb); + allocLen = sizeof (OVS_PACKET_QUEUE_ELEM) + userDataLen + dataLen + + extraLen; + + elem = (POVS_PACKET_QUEUE_ELEM)OvsAllocateMemory(allocLen); + if (elem == NULL) { + ovsUserStats.dropDuetoResource++; + return NULL; + } + elem->hdrInfo.value = hdrInfo->value; + elem->packet.totalLen = sizeof (OVS_PACKET_INFO) + userDataLen + dataLen + + extraLen; + elem->packet.queue = queueId; + elem->packet.userDataLen = userDataLen; + elem->packet.inPort = inPort; + elem->packet.cmd = cmd; + if (cmd == (UINT32)OVS_PACKET_CMD_MISS) { + ovsUserStats.miss++; + } else { + ovsUserStats.action++; + } + elem->packet.packetLen = dataLen + extraLen; + if (tunnelKey) { + RtlCopyMemory(&elem->packet.tunnelKey, tunnelKey, + sizeof (*tunnelKey)); + } else { + RtlZeroMemory(&elem->packet.tunnelKey, + sizeof (elem->packet.tunnelKey)); + } + + dst = elem->packet.data; + if (userDataLen) { + RtlCopyMemory(dst, userData, userDataLen); + dst = dst + userDataLen; + } + dst += extraLen; + + mdl = NET_BUFFER_CURRENT_MDL(nb); + src = NdisGetDataBuffer(nb, dataLen, dst, 1, 0); + if (src == NULL) { + OvsFreeMemory(elem); + ovsUserStats.dropDuetoResource++; + return NULL; + } else if (src != dst) { + /* Copy the data from the NDIS buffer to dst. */ + RtlCopyMemory(dst, src, dataLen); + } + + dst = elem->packet.data + userDataLen + extraLen; + /* + * Fix IP hdr if necessary + */ + if ((isRecv && csumInfo.Receive.IpChecksumValueInvalid) || + (!isRecv && csumInfo.Transmit.IsIPv4 && + csumInfo.Transmit.IpHeaderChecksum)) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + ASSERT(elem->hdrInfo.isIPv4); + ASSERT(ipHdr->Version == 4); + ipHdr->HeaderChecksum = IPChecksum((UINT8 *)ipHdr, + ipHdr->HeaderLength << 2, + (UINT16)~ipHdr->HeaderChecksum); + ovsUserStats.ipCsum++; + } + ASSERT(elem->hdrInfo.tcpCsumNeeded == 0 && + elem->hdrInfo.udpCsumNeeded == 0); + /* + * Fow now, we will not do verification + * There is no correctness issue here. + * XXX + */ + /* + * calculate TCP/UDP pseudo checksum + */ + if (isRecv && csumInfo.Receive.TcpChecksumValueInvalid) { + /* + * Only this case, we need to reclaculate pseudo checksum + * all other cases, it is assumed the pseudo checksum is + * filled already. + * + */ + PTCP_HDR tcpHdr = (PTCP_HDR)(dst + hdrInfo->l4Offset); + if (hdrInfo->isIPv4) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - + (ipHdr->HeaderLength << 2)); + tcpHdr->th_sum = + IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, + (UINT32 *)&ipHdr->DestinationAddress, + IPPROTO_TCP, elem->hdrInfo.l4PayLoad); + } else { + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = + (UINT16)(ntohs(ipv6Hdr->PayloadLength) + + hdrInfo->l3Offset + sizeof(IPV6_HEADER) - + hdrInfo->l4Offset); + ASSERT(hdrInfo->isIPv6); + tcpHdr->th_sum = + IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, + (UINT32 *)&ipv6Hdr->DestinationAddress, + IPPROTO_TCP, elem->hdrInfo.l4PayLoad); + } + elem->hdrInfo.tcpCsumNeeded = 1; + ovsUserStats.recalTcpCsum++; + } else if (!isRecv) { + if (csumInfo.Transmit.TcpChecksum) { + elem->hdrInfo.tcpCsumNeeded = 1; + } else if (csumInfo.Transmit.UdpChecksum) { + elem->hdrInfo.udpCsumNeeded = 1; + } + if (elem->hdrInfo.tcpCsumNeeded || elem->hdrInfo.udpCsumNeeded) { +#ifdef DBG + UINT16 sum, *ptr; + UINT8 proto = + elem->hdrInfo.tcpCsumNeeded ? IPPROTO_TCP : IPPROTO_UDP; +#endif + if (hdrInfo->isIPv4) { + PIPV4_HEADER ipHdr = (PIPV4_HEADER)(dst + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = (UINT16)(ntohs(ipHdr->TotalLength) - + (ipHdr->HeaderLength << 2)); +#ifdef DBG + sum = IPPseudoChecksum((UINT32 *)&ipHdr->SourceAddress, + (UINT32 *)&ipHdr->DestinationAddress, + proto, elem->hdrInfo.l4PayLoad); +#endif + } else { + PIPV6_HEADER ipv6Hdr = (PIPV6_HEADER)(dst + + hdrInfo->l3Offset); + elem->hdrInfo.l4PayLoad = + (UINT16)(ntohs(ipv6Hdr->PayloadLength) + + hdrInfo->l3Offset + sizeof(IPV6_HEADER) - + hdrInfo->l4Offset); + ASSERT(hdrInfo->isIPv6); +#ifdef DBG + sum = IPv6PseudoChecksum((UINT32 *)&ipv6Hdr->SourceAddress, + (UINT32 *)&ipv6Hdr->DestinationAddress, + proto, elem->hdrInfo.l4PayLoad); +#endif + } +#ifdef DBG + ptr = (UINT16 *)(dst + hdrInfo->l4Offset + + (elem->hdrInfo.tcpCsumNeeded ? + TCP_CSUM_OFFSET : UDP_CSUM_OFFSET)); + ASSERT(*ptr == sum); +#endif + } + } + /* + * Finally insert VLAN tag + */ + if (extraLen) { + dst = elem->packet.data + userDataLen; + src = dst + extraLen; + ((UINT32 *)dst)[0] = ((UINT32 *)src)[0]; + ((UINT32 *)dst)[1] = ((UINT32 *)src)[1]; + ((UINT32 *)dst)[2] = ((UINT32 *)src)[2]; + dst += 12; + ((UINT16 *)dst)[0] = htons(0x8100); + ((UINT16 *)dst)[1] = htons(vlanInfo.TagHeader.VlanId | + (vlanInfo.TagHeader.UserPriority << 13)); + elem->hdrInfo.l3Offset += VLAN_TAG_SIZE; + elem->hdrInfo.l4Offset += VLAN_TAG_SIZE; + ovsUserStats.vlanInsert++; + } + + return elem; +} + + +VOID +OvsQueuePackets(UINT32 queueId, + PLIST_ENTRY packetList, + UINT32 numElems) +{ + POVS_USER_PACKET_QUEUE queue = OvsGetQueue(queueId); + POVS_PACKET_QUEUE_ELEM elem; + PIRP irp = NULL; + PLIST_ENTRY link; + UINT32 num = 0; + + OVS_LOG_LOUD("Enter: queueId %u, numELems: %u", + queueId, numElems); + if (queue == NULL) { + goto cleanup; + } + + NdisAcquireSpinLock(&queue->queueLock); + if (queue->instance == NULL) { + NdisReleaseSpinLock(&queue->queueLock); + goto cleanup; + } else { + OvsAppendList(&queue->packetList, packetList); + queue->numPackets += numElems; + } + if (queue->pendingIrp) { + PDRIVER_CANCEL cancelRoutine; + irp = queue->pendingIrp; + queue->pendingIrp = NULL; + cancelRoutine = IoSetCancelRoutine(irp, NULL); + if (cancelRoutine == NULL) { + irp = NULL; + } + } + NdisReleaseSpinLock(&queue->queueLock); + if (irp) { + OvsCompleteIrpRequest(irp, 0, STATUS_SUCCESS); + } + +cleanup: + while (!IsListEmpty(packetList)) { + link = RemoveHeadList(packetList); + elem = CONTAINING_RECORD(link, OVS_PACKET_QUEUE_ELEM, link); + OvsFreeMemory(elem); + num++; + } + OVS_LOG_LOUD("Exit: drop %u packets", num); +} + + +/* + *---------------------------------------------------------------------------- + * OvsCreateAndAddPackets -- + * + * Create a packet and forwarded to user space. + * + * This function would fragment packet if needed, and queue + * each segment to user space. + *---------------------------------------------------------------------------- + */ +NTSTATUS +OvsCreateAndAddPackets(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo, + POVS_SWITCH_CONTEXT switchContext, + LIST_ENTRY *list, + UINT32 *num) +{ + POVS_PACKET_QUEUE_ELEM elem; + PNET_BUFFER_LIST newNbl = NULL; + PNET_BUFFER nb; + + if (hdrInfo->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + UINT32 packetLength; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(nbl, TcpLargeSendNetBufferListInfo); + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + packetLength = NET_BUFFER_DATA_LENGTH(nb); + + OVS_LOG_TRACE("MSS %u packet len %u", + tsoInfo.LsoV1Transmit.MSS, packetLength); + if (tsoInfo.LsoV1Transmit.MSS) { + OVS_LOG_TRACE("l4Offset %d", hdrInfo->l4Offset); + newNbl = OvsTcpSegmentNBL(switchContext, nbl, hdrInfo, + tsoInfo.LsoV1Transmit.MSS , 0); + if (newNbl == NULL) { + return NDIS_STATUS_FAILURE; + } + nbl = newNbl; + } + } + + nb = NET_BUFFER_LIST_FIRST_NB(nbl); + while (nb) { + elem = OvsCreateQueuePacket(queueId, userData, userDataLen, + cmd, inPort, tunnelKey, nbl, nb, + isRecv, hdrInfo); + if (elem) { + InsertTailList(list, &elem->link); + (*num)++; + } + nb = NET_BUFFER_NEXT_NB(nb); + } + if (newNbl) { + OvsCompleteNBL(switchContext, newNbl, TRUE); + } + return NDIS_STATUS_SUCCESS; +} diff --git a/datapath-windows/ovsext/User.h b/datapath-windows/ovsext/User.h new file mode 100644 index 000000000..ed41f14df --- /dev/null +++ b/datapath-windows/ovsext/User.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file contains structures and function definitions necessary for + * forwarding packet to user space. + */ + +#ifndef __USER_H_ +#define __USER_H_ 1 + +/* + * Even we have more cores, I don't think we need + * more than 32 queues for processing packets to + * userspace + */ +#define OVS_MAX_NUM_PACKET_QUEUES 32 +#define OVS_DEFAULT_PACKET_QUEUE 1 +#define OVS_MAX_PACKET_QUEUE_LEN 4096 + +/* + * Only when OVS_PER_VPORT_QUEUE_CTRL is defined + * we will apply this constraint + */ +#define OVS_MAX_PACKETS_PER_VPORT 128 +#define OVS_MAX_PACKETS_PER_TUNNEL 1024 + +typedef struct _OVS_USER_PACKET_QUEUE { + UINT32 queueId; + UINT32 numPackets; + LIST_ENTRY packetList; + PVOID instance; + PIRP pendingIrp; + NDIS_SPIN_LOCK queueLock; +} OVS_USER_PACKET_QUEUE, *POVS_USER_PACKET_QUEUE; + +typedef struct _OVS_PACKET_QUEUE_ELEM { + LIST_ENTRY link; + OVS_PACKET_HDR_INFO hdrInfo; + OVS_PACKET_INFO packet; +} OVS_PACKET_QUEUE_ELEM, *POVS_PACKET_QUEUE_ELEM; + +struct _OVS_OPEN_INSTANCE; + +typedef struct _OVS_USER_STATS { + UINT64 miss; + UINT64 action; + UINT32 dropDuetoResource; + UINT32 dropDuetoChecksum; + UINT32 ipCsum; + UINT32 recalTcpCsum; + UINT32 vlanInsert; + UINT32 l4Csum; +} OVS_USER_STATS, *POVS_USER_STATS; + + +NTSTATUS OvsUserInit(); +VOID OvsUserCleanup(); + +VOID OvsCleanupPacketQueue(struct _OVS_OPEN_INSTANCE *instance); + +POVS_PACKET_QUEUE_ELEM OvsCreateQueuePacket(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + PNET_BUFFER nb, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo); + +VOID OvsQueuePackets(UINT32 queueId, PLIST_ENTRY packetList, + UINT32 numElems); +NTSTATUS OvsCreateAndAddPackets(UINT32 queueId, + PVOID userData, + UINT32 userDataLen, + UINT32 cmd, + UINT32 inPort, + OvsIPv4TunnelKey *tunnelKey, + PNET_BUFFER_LIST nbl, + BOOLEAN isRecv, + POVS_PACKET_HDR_INFO hdrInfo, + POVS_SWITCH_CONTEXT switchContext, + LIST_ENTRY *list, + UINT32 *num); + +NTSTATUS OvsSubscribeDpIoctl(PFILE_OBJECT fileObject, + PVOID inputBuffer, + UINT32 inputLength); + +NTSTATUS OvsReadDpIoctl(PFILE_OBJECT fileObject, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsExecuteDpIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 outputLength); +NTSTATUS OvsPurgeDpIoctl(PFILE_OBJECT fileObject); + +NTSTATUS OvsWaitDpIoctl(PIRP irp, PFILE_OBJECT fileObject); + +#endif /* __USER_H_ */ diff --git a/datapath-windows/ovsext/Util.c b/datapath-windows/ovsext/Util.c new file mode 100644 index 000000000..51360a8f9 --- /dev/null +++ b/datapath-windows/ovsext/Util.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_OTHERS + +#include "Debug.h" + +extern NDIS_HANDLE gOvsExtDriverHandle; + +VOID * +OvsAllocateMemory(size_t size) +{ + OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); + return NdisAllocateMemoryWithTagPriority(gOvsExtDriverHandle, + (UINT32)size, OVS_MEMORY_TAG, NormalPoolPriority); +} + +VOID * +OvsAllocateAlignedMemory(size_t size, UINT16 align) +{ + OVS_VERIFY_IRQL_LE(DISPATCH_LEVEL); + + ASSERT((align == 8) || (align == 16)); + + if ((align == 8) || (align == 16)) { + /* + * XXX: NdisAllocateMemory*() functions don't talk anything about + * alignment. Hence using ExAllocatePool*(); + */ + return (VOID *)ExAllocatePoolWithTagPriority(NonPagedPool, size, + OVS_MEMORY_TAG, + NormalPoolPriority); + } + + /* Invalid user input. */ + return NULL; +} + +VOID +OvsFreeMemory(VOID *ptr) +{ + ASSERT(ptr); + NdisFreeMemoryWithTagPriority(gOvsExtDriverHandle, ptr, OVS_MEMORY_TAG); +} + +VOID +OvsFreeAlignedMemory(VOID *ptr) +{ + ASSERT(ptr); + ExFreePoolWithTag(ptr, OVS_MEMORY_TAG); +} + +VOID +OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src) +{ + PLIST_ENTRY srcFirst, srcLast, dstLast; + if (IsListEmpty(src)) { + return; + } + srcFirst = src->Flink; + srcLast = src->Blink; + dstLast = dst->Blink; + + dstLast->Flink = srcFirst; + srcFirst->Blink = dstLast; + + srcLast->Flink = dst; + dst->Blink = srcLast; + + src->Flink = src; + src->Blink = src; +} diff --git a/datapath-windows/ovsext/Util.h b/datapath-windows/ovsext/Util.h new file mode 100644 index 000000000..c45d48881 --- /dev/null +++ b/datapath-windows/ovsext/Util.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_H_ +#define __UTIL_H_ 1 + +#define OVS_MEMORY_TAG 'TSVO' +#define OVS_FIX_SIZE_NBL_POOL_TAG 'FSVO' +#define OVS_VARIABLE_SIZE_NBL_POOL_TAG 'VSVO' +#define OVS_NBL_ONLY_POOL_TAG 'OSVO' +#define OVS_NET_BUFFER_POOL_TAG 'NSVO' +#define OVS_OTHER_POOL_TAG 'MSVO' + +VOID *OvsAllocateMemory(size_t size); +VOID *OvsAllocateAlignedMemory(size_t size, UINT16 align); +VOID OvsFreeMemory(VOID *ptr); +VOID OvsFreeAlignedMemory(VOID *ptr); + +#define LIST_FORALL(_headPtr, _itemPtr) \ + for (_itemPtr = (_headPtr)->Flink; \ + _itemPtr != _headPtr; \ + _itemPtr = (_itemPtr)->Flink) + +#define LIST_FORALL_SAFE(_headPtr, _itemPtr, _nextPtr) \ + for (_itemPtr = (_headPtr)->Flink, _nextPtr = (_itemPtr)->Flink; \ + _itemPtr != _headPtr; \ + _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Flink) + +#define LIST_FORALL_REVERSE(_headPtr, _itemPtr) \ + for (_itemPtr = (_headPtr)->Blink; \ + _itemPtr != _headPtr; \ + _itemPtr = (_itemPtr)->Blink) + +#define LIST_FORALL_REVERSE_SAFE(_headPtr, _itemPtr, _nextPtr) \ + for (_itemPtr = (_headPtr)->Blink, _nextPtr = (_itemPtr)->Blink; \ + _itemPtr != _headPtr; \ + _itemPtr = _nextPtr, _nextPtr = (_itemPtr)->Blink) + +VOID OvsAppendList(PLIST_ENTRY dst, PLIST_ENTRY src); + + +#define MIN(_a, _b) (_a) > (_b) ? (_b) : (_a) +#define ARRAY_SIZE(_x) ((sizeof(_x))/sizeof (_x)[0]) +#define OVS_SWITCH_PORT_ID_INVALID (NDIS_SWITCH_PORT_ID)(-1) + +#ifndef htons +#define htons(_x) _byteswap_ushort((USHORT)(_x)) +#define ntohs(_x) _byteswap_ushort((USHORT)(_x)) +#define htonl(_x) _byteswap_ulong((ULONG)(_x)) +#define ntohl(_x) _byteswap_ulong((ULONG)(_x)) +#endif + +#define OVS_INIT_OBJECT_HEADER(_obj, _type, _revision, _size) \ + { \ + PNDIS_OBJECT_HEADER hdrp = _obj; \ + hdrp->Type = _type; \ + hdrp->Revision = _revision; \ + hdrp->Size = _size; \ + } + + +#define BIT16(_x) ((UINT16)0x1 << (_x)) +#define BIT32(_x) ((UINT32)0x1 << (_x)) + +#endif /* __UTIL_H_ */ diff --git a/datapath-windows/ovsext/Vport.c b/datapath-windows/ovsext/Vport.c new file mode 100644 index 000000000..0d1e4abb9 --- /dev/null +++ b/datapath-windows/ovsext/Vport.c @@ -0,0 +1,1415 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "Jhash.h" +#include "Switch.h" +#include "Vport.h" +#include "Event.h" +#include "User.h" +#include "Vxlan.h" +#include "IpHelper.h" +#include "Oid.h" + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_VPORT +#include "Debug.h" + +#define VPORT_NIC_ENTER(_nic) \ + OVS_LOG_TRACE("Enter: PortId: %x, NicIndex: %d", _nic->PortId, \ + _nic->NicIndex) + +#define VPORT_NIC_EXIT(_nic) \ + OVS_LOG_TRACE("Exit: PortId: %x, NicIndex: %d", _nic->PortId, \ + _nic->NicIndex) + +#define VPORT_PORT_ENTER(_port) \ + OVS_LOG_TRACE("Enter: PortId: %x", _port->PortId) + +#define VPORT_PORT_EXIT(_port) \ + OVS_LOG_TRACE("Exit: PortId: %x", _port->PortId) + +#define OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC 100 + +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; +extern PNDIS_SPIN_LOCK gOvsCtrlLock; + +static UINT32 OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, UINT32 nicIndex, + OVS_VPORT_TYPE ovsType); +static POVS_VPORT_ENTRY OvsAllocateVport(VOID); +static VOID OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +static VOID OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, PNDIS_SWITCH_NIC_PARAMETERS nicParam); +static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY + virtVport, UINT32 nicIndex); +static VOID OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, POVS_VPORT_ENTRY + virtVport, UINT32 nicIndex); +static NDIS_STATUS OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport); +static VOID OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport); +static __inline VOID OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, + ULONG sleepMicroSec); + +/* + * Functions implemented in relaton to NDIS port manipulation. + */ +NDIS_STATUS +OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport != NULL) { + status = STATUS_DATA_NOT_ACCEPTED; + goto create_port_done; + } + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto create_port_done; + } + OvsInitVportWithPortParam(vport, portParam); + OvsInitVportCommon(switchContext, vport); + +create_port_done: + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + VPORT_PORT_EXIT(portParam); + return status; +} + +VOID +OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport) { + /* add assertion here + */ + vport->portState = NdisSwitchPortStateTeardown; + vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; + } else { + OVS_LOG_WARN("Vport not present."); + } + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + VPORT_PORT_EXIT(portParam); +} + + + +VOID +OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + VPORT_PORT_ENTER(portParam); + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + portParam->PortId, 0); + if (vport) { + OvsRemoveAndDeleteVport(switchContext, vport); + } else { + OVS_LOG_WARN("Vport not present."); + } + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + VPORT_PORT_EXIT(portParam); +} + + +/* + * Functions implemented in relaton to NDIS NIC manipulation. + */ +NDIS_STATUS +OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + UINT32 event = 0; + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + + LOCK_STATE_EX lockState; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + /* Veto the creation of nic */ + status = NDIS_STATUS_NOT_SUPPORTED; + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, nicParam->PortId, 0); + if (vport == NULL) { + OVS_LOG_ERROR("Create NIC without Switch Port," + " PortId: %x, NicIndex: %d", + nicParam->PortId, nicParam->NicIndex); + status = NDIS_STATUS_INVALID_PARAMETER; + goto add_nic_done; + } + + if (nicParam->NicType == NdisSwitchNicTypeExternal && + nicParam->NicIndex != 0) { + POVS_VPORT_ENTRY virtVport = + (POVS_VPORT_ENTRY)switchContext->externalVport; + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto add_nic_done; + } + OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + goto add_nic_done; + } + } + OvsInitVportWithNicParam(switchContext, vport, nicParam); + portNo = vport->portNo; + if (vport->ovsState == OVS_STATE_CONNECTED) { + event = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; + } else if (vport->ovsState == OVS_STATE_NIC_CREATED) { + event = OVS_EVENT_CONNECT; + } + +add_nic_done: + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + if (portNo && event) { + OvsPostEvent(portNo, event); + } + +done: + VPORT_NIC_EXIT(nicParam); + OVS_LOG_TRACE("Exit: status %8x.\n", status); + + return status; +} + + +/* Mark already created NIC as connected. */ +VOID +OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + ASSERT(0); + goto done; + } + + vport->ovsState = OVS_STATE_CONNECTED; + vport->nicState = NdisSwitchNicStateConnected; + portNo = vport->portNo; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + OvsPostEvent(portNo, OVS_EVENT_LINK_UP); + + if (nicParam->NicType == NdisSwitchNicTypeInternal) { + OvsInternalAdapterUp(portNo, &nicParam->NetCfgInstanceId); + } + +done: + VPORT_NIC_EXIT(nicParam); +} + +VOID +OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + UINT32 status = 0, portNo = 0; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto update_nic_done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + if (vport == NULL) { + OVS_LOG_WARN("Vport search failed."); + goto update_nic_done; + } + switch (nicParam->NicType) { + case NdisSwitchNicTypeExternal: + case NdisSwitchNicTypeInternal: + RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, + sizeof (GUID)); + break; + case NdisSwitchNicTypeSynthetic: + case NdisSwitchNicTypeEmulated: + if (!RtlEqualMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (vport->vmMacAddress))) { + status |= OVS_EVENT_MAC_CHANGE; + RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (vport->vmMacAddress)); + } + break; + default: + ASSERT(0); + } + if (!RtlEqualMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (vport->permMacAddress))) { + RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (vport->permMacAddress)); + status |= OVS_EVENT_MAC_CHANGE; + } + if (!RtlEqualMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (vport->currMacAddress))) { + RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (vport->currMacAddress)); + status |= OVS_EVENT_MAC_CHANGE; + } + + if (vport->mtu != nicParam->MTU) { + vport->mtu = nicParam->MTU; + status |= OVS_EVENT_MTU_CHANGE; + } + vport->numaNodeId = nicParam->NumaNodeId; + portNo = vport->portNo; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + if (status && portNo) { + OvsPostEvent(portNo, status); + } +update_nic_done: + VPORT_NIC_EXIT(nicParam); +} + + +VOID +OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + LOCK_STATE_EX lockState; + BOOLEAN isInternalPort = FALSE; + + VPORT_NIC_ENTER(nicParam); + + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + goto done; + } + + vport->nicState = NdisSwitchNicStateDisconnected; + vport->ovsState = OVS_STATE_NIC_CREATED; + portNo = vport->portNo; + + if (vport->ovsType == OVSWIN_VPORT_TYPE_INTERNAL) { + isInternalPort = TRUE; + } + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + + OvsPostEvent(portNo, OVS_EVENT_LINK_DOWN); + + if (isInternalPort) { + OvsInternalAdapterDown(); + } + +done: + VPORT_NIC_EXIT(nicParam); +} + + +VOID +OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + UINT32 portNo = 0; + + VPORT_NIC_ENTER(nicParam); + /* Wait for lists to be initialized. */ + OvsWaitActivate(switchContext, OVS_VPORT_DEFAULT_WAIT_TIME_MICROSEC); + + if (!switchContext->isActivated) { + OVS_LOG_WARN("Switch is not activated yet."); + goto done; + } + + NdisAcquireRWLockWrite(switchContext->dispatchLock, &lockState, 0); + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + + if (!vport) { + OVS_LOG_WARN("Vport not present."); + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + goto done; + } + + portNo = vport->portNo; + if (vport->portType == NdisSwitchPortTypeExternal && + vport->nicIndex != 0) { + OvsRemoveAndDeleteVport(switchContext, vport); + } + vport->nicState = NdisSwitchNicStateUnknown; + vport->ovsState = OVS_STATE_PORT_CREATED; + + NdisReleaseRWLock(switchContext->dispatchLock, &lockState); + OvsPostEvent(portNo, OVS_EVENT_DISCONNECT); + +done: + VPORT_NIC_EXIT(nicParam); +} + + +/* + * OVS Vport related functionality. + */ +POVS_VPORT_ENTRY +OvsFindVportByPortNo(POVS_SWITCH_CONTEXT switchContext, + UINT32 portNo) +{ + if (OVS_VPORT_INDEX(portNo) < OVS_MAX_VPORT_ARRAY_SIZE) { + if (OVS_IS_VPORT_ENTRY_NULL(switchContext, OVS_VPORT_INDEX(portNo))) { + return NULL; + } else { + POVS_VPORT_ENTRY vport; + vport = (POVS_VPORT_ENTRY) + switchContext->vportArray[OVS_VPORT_INDEX(portNo)]; + return vport->portNo == portNo ? vport : NULL; + } + } + return NULL; +} + + +POVS_VPORT_ENTRY +OvsFindVportByOvsName(POVS_SWITCH_CONTEXT switchContext, + CHAR *name, + UINT32 length) +{ + POVS_VPORT_ENTRY vport; + PLIST_ENTRY head, link; + UINT32 hash = OvsJhashBytes((const VOID *)name, length, OVS_HASH_BASIS); + head = &(switchContext->nameHashArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, nameLink); + if (vport->ovsNameLen == length && + RtlEqualMemory(name, vport->ovsName, length)) { + return vport; + } + } + return NULL; +} + +POVS_VPORT_ENTRY +OvsFindVportByPortIdAndNicIndex(POVS_SWITCH_CONTEXT switchContext, + NDIS_SWITCH_PORT_ID portId, + NDIS_SWITCH_NIC_INDEX index) +{ + if (portId == switchContext->externalPortId) { + if (index == 0) { + return (POVS_VPORT_ENTRY)switchContext->externalVport; + } else if (index > OVS_MAX_PHYS_ADAPTERS) { + return NULL; + } + if (OVS_IS_VPORT_ENTRY_NULL(switchContext, + index + OVS_EXTERNAL_VPORT_START)) { + return NULL; + } else { + return (POVS_VPORT_ENTRY)switchContext->vportArray[ + index + OVS_EXTERNAL_VPORT_START]; + } + } else if (switchContext->internalPortId == portId) { + return (POVS_VPORT_ENTRY)switchContext->internalVport; + } else { + PLIST_ENTRY head, link; + POVS_VPORT_ENTRY vport; + UINT32 hash; + hash = OvsJhashWords((UINT32 *)&portId, 1, OVS_HASH_BASIS); + head = &(switchContext->portHashArray[hash & OVS_VPORT_MASK]); + LIST_FORALL(head, link) { + vport = CONTAINING_RECORD(link, OVS_VPORT_ENTRY, portLink); + if (portId == vport->portId && index == vport->nicIndex) { + return vport; + } + } + return NULL; + } +} + +static UINT32 +OvsGetVportNo(POVS_SWITCH_CONTEXT switchContext, + UINT32 nicIndex, + OVS_VPORT_TYPE ovsType) +{ + UINT32 index = 0xffffff, i = 0; + UINT64 gen; + + switch (ovsType) { + case OVSWIN_VPORT_TYPE_EXTERNAL: + if (nicIndex == 0) { + return 0; // not a valid portNo + } else if (nicIndex > OVS_MAX_PHYS_ADAPTERS) { + return 0; + } else { + index = nicIndex + OVS_EXTERNAL_VPORT_START; + } + break; + case OVSWIN_VPORT_TYPE_INTERNAL: + index = OVS_INTERNAL_VPORT_DEFAULT_INDEX; + break; + case OVSWIN_VPORT_TYPE_SYNTHETIC: + case OVSWIN_VPORT_TYPE_EMULATED: + index = switchContext->lastPortIndex + 1; + if (index == OVS_MAX_VPORT_ARRAY_SIZE) { + index = OVS_VM_VPORT_START; + } + while (!OVS_IS_VPORT_ENTRY_NULL(switchContext, index) && + i < (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { + index++; + i++; + if (index == OVS_MAX_VPORT_ARRAY_SIZE) { + index = OVS_VM_VPORT_START; + } + } + if (i == (OVS_MAX_VPORT_ARRAY_SIZE - OVS_VM_VPORT_START)) { + return 0; // not available + } + switchContext->lastPortIndex = index; + break; + case OVSWIN_VPORT_TYPE_GRE: + index = OVS_GRE_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_GRE64: + index = OVS_GRE64_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_VXLAN: + index = OVS_VXLAN_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_LOCAL: + default: + ASSERT(0); + } + if (index > OVS_MAX_VPORT_ARRAY_SIZE) { + return 0; + } + gen = (UINT64)switchContext->vportArray[index]; + if (gen > 0xff) { + return 0; + } else if (gen == 0) { + gen++; + } + return OVS_VPORT_PORT_NO(index, (UINT32)gen); +} + + +static POVS_VPORT_ENTRY +OvsAllocateVport(VOID) +{ + POVS_VPORT_ENTRY vport; + vport = (POVS_VPORT_ENTRY)OvsAllocateMemory(sizeof (OVS_VPORT_ENTRY)); + if (vport == NULL) { + return NULL; + } + RtlZeroMemory(vport, sizeof (OVS_VPORT_ENTRY)); + vport->ovsState = OVS_STATE_UNKNOWN; + return vport; +} + +static VOID +OvsInitVportWithPortParam(POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_PORT_PARAMETERS portParam) +{ + vport->isValidationPort = portParam->IsValidationPort; + vport->portType = portParam->PortType; + vport->portState = portParam->PortState; + vport->portId = portParam->PortId; + vport->nicState = NdisSwitchNicStateUnknown; + + switch (vport->portType) { + case NdisSwitchPortTypeExternal: + vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; + break; + case NdisSwitchPortTypeInternal: + vport->ovsType = OVSWIN_VPORT_TYPE_INTERNAL; + break; + case NdisSwitchPortTypeSynthetic: + vport->ovsType = OVSWIN_VPORT_TYPE_SYNTHETIC; + break; + case NdisSwitchPortTypeEmulated: + vport->ovsType = OVSWIN_VPORT_TYPE_EMULATED; + break; + } + RtlCopyMemory(&vport->portName, &portParam->PortName, + sizeof (NDIS_SWITCH_PORT_NAME)); + switch (vport->portState) { + case NdisSwitchPortStateCreated: + vport->ovsState = OVS_STATE_PORT_CREATED; + break; + case NdisSwitchPortStateTeardown: + vport->ovsState = OVS_STATE_PORT_TEAR_DOWN; + break; + case NdisSwitchPortStateDeleted: + vport->ovsState = OVS_STATE_PORT_DELETED; + break; + } +} + + +static VOID +OvsInitVportWithNicParam(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport, + PNDIS_SWITCH_NIC_PARAMETERS nicParam) +{ + ASSERT(vport->portId == nicParam->PortId); + ASSERT(vport->ovsState == OVS_STATE_PORT_CREATED); + + UNREFERENCED_PARAMETER(switchContext); + + RtlCopyMemory(vport->permMacAddress, nicParam->PermanentMacAddress, + sizeof (nicParam->PermanentMacAddress)); + RtlCopyMemory(vport->currMacAddress, nicParam->CurrentMacAddress, + sizeof (nicParam->CurrentMacAddress)); + + if (nicParam->NicType == NdisSwitchNicTypeSynthetic || + nicParam->NicType == NdisSwitchNicTypeEmulated) { + RtlCopyMemory(vport->vmMacAddress, nicParam->VMMacAddress, + sizeof (nicParam->VMMacAddress)); + RtlCopyMemory(&vport->vmName, &nicParam->VmName, + sizeof (nicParam->VmName)); + } else { + RtlCopyMemory(&vport->netCfgInstanceId, &nicParam->NetCfgInstanceId, + sizeof (nicParam->NetCfgInstanceId)); + } + RtlCopyMemory(&vport->nicName, &nicParam->NicName, + sizeof (nicParam->NicName)); + vport->mtu = nicParam->MTU; + vport->nicState = nicParam->NicState; + vport->nicIndex = nicParam->NicIndex; + vport->numaNodeId = nicParam->NumaNodeId; + + switch (vport->nicState) { + case NdisSwitchNicStateCreated: + vport->ovsState = OVS_STATE_NIC_CREATED; + break; + case NdisSwitchNicStateConnected: + vport->ovsState = OVS_STATE_CONNECTED; + break; + case NdisSwitchNicStateDisconnected: + vport->ovsState = OVS_STATE_NIC_CREATED; + break; + case NdisSwitchNicStateDeleted: + vport->ovsState = OVS_STATE_PORT_CREATED; + break; + } +} + +static VOID +OvsInitPhysNicVport(POVS_VPORT_ENTRY vport, + POVS_VPORT_ENTRY virtVport, + UINT32 nicIndex) +{ + vport->isValidationPort = virtVport->isValidationPort; + vport->portType = virtVport->portType; + vport->portState = virtVport->portState; + vport->portId = virtVport->portId; + vport->nicState = NdisSwitchNicStateUnknown; + vport->ovsType = OVSWIN_VPORT_TYPE_EXTERNAL; + vport->nicIndex = (NDIS_SWITCH_NIC_INDEX)nicIndex; + RtlCopyMemory(&vport->portName, &virtVport->portName, + sizeof (NDIS_SWITCH_PORT_NAME)); + vport->ovsState = OVS_STATE_PORT_CREATED; +} +static NDIS_STATUS +OvsInitVportCommon(POVS_SWITCH_CONTEXT switchContext, +POVS_VPORT_ENTRY vport) +{ + UINT32 hash; + size_t len; + if (vport->portType != NdisSwitchPortTypeExternal || + vport->nicIndex != 0) { + vport->portNo = OvsGetVportNo(switchContext, vport->nicIndex, + vport->ovsType); + if (vport->portNo == 0) { + return NDIS_STATUS_RESOURCES; + } + ASSERT(OVS_IS_VPORT_ENTRY_NULL(switchContext, + OVS_VPORT_INDEX(vport->portNo))); + + switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = vport; + } + switch (vport->portType) { + case NdisSwitchPortTypeExternal: + if (vport->nicIndex == 0) { + switchContext->externalPortId = vport->portId; + switchContext->externalVport = vport; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "external.virtualAdapter"); + } + else { + switchContext->numPhysicalNics++; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "external.%lu", (UINT32)vport->nicIndex); + } + break; + case NdisSwitchPortTypeInternal: + switchContext->internalPortId = vport->portId; + switchContext->internalVport = vport; + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "internal"); + break; + case NdisSwitchPortTypeSynthetic: + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "vmNICSyn.%lx", vport->portNo); + break; + case NdisSwitchPortTypeEmulated: + RtlStringCbPrintfA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, + "vmNICEmu.%lx", vport->portNo); + break; + } + StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport->ovsNameLen = (UINT32)len; + if (vport->portType == NdisSwitchPortTypeExternal && + vport->nicIndex == 0) { + return NDIS_STATUS_SUCCESS; + } + hash = OvsJhashBytes(vport->ovsName, vport->ovsNameLen, OVS_HASH_BASIS); + InsertHeadList(&switchContext->nameHashArray[hash & OVS_VPORT_MASK], + &vport->nameLink); + hash = OvsJhashWords(&vport->portId, 1, OVS_HASH_BASIS); + InsertHeadList(&switchContext->portHashArray[hash & OVS_VPORT_MASK], + &vport->portLink); + switchContext->numVports++; + return NDIS_STATUS_SUCCESS; +} + + +static VOID +OvsRemoveAndDeleteVport(POVS_SWITCH_CONTEXT switchContext, + POVS_VPORT_ENTRY vport) +{ + UINT64 gen = vport->portNo >> 24; + switch (vport->ovsType) { + case OVSWIN_VPORT_TYPE_EXTERNAL: + if (vport->nicIndex == 0) { + ASSERT(switchContext->numPhysicalNics == 0); + switchContext->externalPortId = 0; + switchContext->externalVport = NULL; + OvsFreeMemory(vport); + return; + } else { + ASSERT(switchContext->numPhysicalNics); + switchContext->numPhysicalNics--; + } + break; + case OVSWIN_VPORT_TYPE_INTERNAL: + switchContext->internalPortId = 0; + switchContext->internalVport = NULL; + OvsInternalAdapterDown(); + break; + case OVSWIN_VPORT_TYPE_VXLAN: + OvsCleanupVxlanTunnel(vport); + break; + case OVSWIN_VPORT_TYPE_GRE: + case OVSWIN_VPORT_TYPE_GRE64: + break; + case OVSWIN_VPORT_TYPE_EMULATED: + case OVSWIN_VPORT_TYPE_SYNTHETIC: + default: + break; + } + + RemoveEntryList(&vport->nameLink); + RemoveEntryList(&vport->portLink); + gen = (gen + 1) & 0xff; + switchContext->vportArray[OVS_VPORT_INDEX(vport->portNo)] = + (PVOID)(UINT64)gen; + switchContext->numVports--; + OvsFreeMemory(vport); +} + + +NDIS_STATUS +OvsAddConfiguredSwitchPorts(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + ULONG arrIndex; + PNDIS_SWITCH_PORT_PARAMETERS portParam; + PNDIS_SWITCH_PORT_ARRAY portArray = NULL; + POVS_VPORT_ENTRY vport; + + OVS_LOG_TRACE("Enter: switchContext:%p", switchContext); + + status = OvsGetPortsOnSwitch(switchContext, &portArray); + if (status != NDIS_STATUS_SUCCESS) { + goto cleanup; + } + + for (arrIndex = 0; arrIndex < portArray->NumElements; arrIndex++) { + portParam = NDIS_SWITCH_PORT_AT_ARRAY_INDEX(portArray, arrIndex); + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = NDIS_STATUS_RESOURCES; + goto cleanup; + } + OvsInitVportWithPortParam(vport, portParam); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + goto cleanup; + } + } +cleanup: + if (status != NDIS_STATUS_SUCCESS) { + OvsClearAllSwitchVports(switchContext); + } + + if (portArray != NULL) { + OvsFreeMemory(portArray); + } + OVS_LOG_TRACE("Exit: status: %x", status); + return status; +} + + +NDIS_STATUS +OvsInitConfiguredSwitchNics(POVS_SWITCH_CONTEXT switchContext) +{ + NDIS_STATUS status = NDIS_STATUS_SUCCESS; + PNDIS_SWITCH_NIC_ARRAY nicArray = NULL; + ULONG arrIndex; + PNDIS_SWITCH_NIC_PARAMETERS nicParam; + POVS_VPORT_ENTRY vport; + + OVS_LOG_TRACE("Enter: switchContext: %p", switchContext); + /* + * Now, get NIC list. + */ + status = OvsGetNicsOnSwitch(switchContext, &nicArray); + if (status != NDIS_STATUS_SUCCESS) { + goto cleanup; + } + for (arrIndex = 0; arrIndex < nicArray->NumElements; ++arrIndex) { + + nicParam = NDIS_SWITCH_NIC_AT_ARRAY_INDEX(nicArray, arrIndex); + + /* + * XXX: Check if the port is configured with a VLAN. Disallow such a + * configuration, since we don't support tag-in-tag. + */ + + /* + * XXX: Check if the port is connected to a VF. Disconnect the VF in + * such a case. + */ + + if (nicParam->NicType == NdisSwitchNicTypeExternal && + nicParam->NicIndex != 0) { + POVS_VPORT_ENTRY virtVport = + (POVS_VPORT_ENTRY)switchContext->externalVport; + vport = OvsAllocateVport(); + if (vport) { + OvsInitPhysNicVport(vport, virtVport, nicParam->NicIndex); + status = OvsInitVportCommon(switchContext, vport); + if (status != NDIS_STATUS_SUCCESS) { + OvsFreeMemory(vport); + vport = NULL; + } + } + } else { + vport = OvsFindVportByPortIdAndNicIndex(switchContext, + nicParam->PortId, + nicParam->NicIndex); + } + if (vport == NULL) { + OVS_LOG_ERROR("Fail to allocate vport"); + continue; + } + OvsInitVportWithNicParam(switchContext, vport, nicParam); + if (nicParam->NicType == NdisSwitchNicTypeInternal) { + OvsInternalAdapterUp(vport->portNo, &nicParam->NetCfgInstanceId); + } + } +cleanup: + + if (nicArray != NULL) { + OvsFreeMemory(nicArray); + } + OVS_LOG_TRACE("Exit: status: %x", status); + return status; +} + +VOID +OvsClearAllSwitchVports(POVS_SWITCH_CONTEXT switchContext) +{ + UINT32 i; + + for (i = 0; i < OVS_MAX_VPORT_ARRAY_SIZE; i++) { + if (!OVS_IS_VPORT_ENTRY_NULL(switchContext, i)) { + OvsRemoveAndDeleteVport(switchContext, + (POVS_VPORT_ENTRY)switchContext->vportArray[i]); + } + } + if (switchContext->externalVport) { + OvsRemoveAndDeleteVport(switchContext, + (POVS_VPORT_ENTRY)switchContext->externalVport); + } +} + +NTSTATUS +OvsDumpVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 numVports, count; + UINT32 dpNo, i; + UINT32 *outPtr; + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + + if (inputLength < sizeof (UINT32)) { + return STATUS_INVALID_PARAMETER; + } + dpNo = *(UINT32 *)inputBuffer; + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + return STATUS_INVALID_PARAMETER; + } + /* + * We should hold SwitchContext RW lock + */ + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + numVports = outputLength/sizeof (UINT32); + numVports = MIN(gOvsSwitchContext->numVports, numVports); + outPtr = (UINT32 *)outputBuffer; + for (i = 0, count = 0; + i < OVS_MAX_VPORT_ARRAY_SIZE && count < numVports; i++) { + vport = (POVS_VPORT_ENTRY)gOvsSwitchContext->vportArray[i]; + if (OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, i)) { + continue; + } + if (vport->ovsState == OVS_STATE_CONNECTED || + vport->ovsState == OVS_STATE_NIC_CREATED) { + *outPtr = vport->portNo; + outPtr++; + count++; + } + } + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + *replyLen = count * sizeof (UINT32); + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsGetVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + UINT32 dpNo; + POVS_VPORT_GET get; + POVS_VPORT_INFO info; + POVS_VPORT_ENTRY vport; + size_t len; + LOCK_STATE_EX lockState; + + if (inputLength < sizeof (OVS_VPORT_GET) || + outputLength < sizeof (OVS_VPORT_INFO)) { + return STATUS_INVALID_PARAMETER; + } + get = (POVS_VPORT_GET)inputBuffer; + dpNo = get->dpNo; + info = (POVS_VPORT_INFO)outputBuffer; + RtlZeroMemory(info, sizeof (POVS_VPORT_INFO)); + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + return STATUS_INVALID_PARAMETER; + } + + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (get->portNo == 0) { + StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); + } + if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && + vport->ovsState != OVS_STATE_NIC_CREATED)) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + /* + * XXX Change to NO DEVICE + */ + return STATUS_DEVICE_DOES_NOT_EXIST; + } + info->dpNo = dpNo; + info->portNo = vport->portNo; + info->type = vport->ovsType; + RtlCopyMemory(info->macAddress, vport->permMacAddress, + sizeof (vport->permMacAddress)); + RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); + + info->rxPackets = vport->stats.rxPackets; + info->rxBytes = vport->stats.rxBytes; + info->txPackets = vport->stats.txPackets; + info->txBytes = vport->stats.txBytes; + info->rxErrors = vport->errStats.rxErrors; + info->txErrors = vport->errStats.txErrors; + info->rxDropped = vport->errStats.rxDropped; + info->txDropped = vport->errStats.txDropped; + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + *replyLen = sizeof (OVS_VPORT_INFO); + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsInitTunnelVport(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq) +{ + size_t len; + NTSTATUS status = STATUS_SUCCESS; + + vport->isValidationPort = FALSE; + vport->ovsType = addReq->type; + vport->ovsState = OVS_STATE_PORT_CREATED; + RtlCopyMemory(vport->ovsName, addReq->name, OVS_MAX_PORT_NAME_LENGTH); + vport->ovsName[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; + StringCbLengthA(vport->ovsName, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport->ovsNameLen = (UINT32)len; + switch (addReq->type) { + case OVSWIN_VPORT_TYPE_GRE: + break; + case OVSWIN_VPORT_TYPE_GRE64: + break; + case OVSWIN_VPORT_TYPE_VXLAN: + status = OvsInitVxlanTunnel(vport, addReq); + break; + default: + ASSERT(0); + } + return status; +} + +NTSTATUS +OvsAddVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_VPORT_INFO vportInfo; + POVS_VPORT_ADD_REQUEST addReq; + POVS_VPORT_ENTRY vport; + LOCK_STATE_EX lockState; + UINT32 index; + UINT32 portNo; + + OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", + inputLength, outputLength); + if (inputLength < sizeof (OVS_VPORT_ADD_REQUEST) || + outputLength < sizeof (OVS_VPORT_INFO)) { + status = STATUS_INVALID_PARAMETER; + goto vport_add_done; + } + addReq = (POVS_VPORT_ADD_REQUEST)inputBuffer; + addReq->name[OVS_MAX_PORT_NAME_LENGTH - 1] = 0; + + switch (addReq->type) { + case OVSWIN_VPORT_TYPE_GRE: + index = OVS_GRE_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_GRE64: + index = OVS_GRE64_VPORT_INDEX; + break; + case OVSWIN_VPORT_TYPE_VXLAN: + index = OVS_VXLAN_VPORT_INDEX; + break; + default: + status = STATUS_NOT_SUPPORTED; + goto vport_add_done; + } + + vport = (POVS_VPORT_ENTRY)OvsAllocateVport(); + if (vport == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + goto vport_add_done; + } + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != addReq->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + OvsFreeMemory(vport); + goto vport_add_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (!OVS_IS_VPORT_ENTRY_NULL(gOvsSwitchContext, index)) { + status = STATUS_DEVICE_BUSY; + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsFreeMemory(vport); + goto vport_add_done; + } + + status = OvsInitTunnelVport(vport, addReq); + if (status != STATUS_SUCCESS) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsFreeMemory(vport); + goto vport_add_done; + } + + status = OvsInitVportCommon(gOvsSwitchContext, vport); + ASSERT(status == NDIS_STATUS_SUCCESS); + + vport->ovsState = OVS_STATE_CONNECTED; + vport->nicState = NdisSwitchNicStateConnected; + + vportInfo = (POVS_VPORT_INFO)outputBuffer; + + RtlZeroMemory(vportInfo, sizeof (POVS_VPORT_INFO)); + vportInfo->dpNo = gOvsSwitchContext->dpNo; + vportInfo->portNo = vport->portNo; + vportInfo->type = vport->ovsType; + RtlCopyMemory(vportInfo->name, vport->ovsName, vport->ovsNameLen + 1); + portNo = vport->portNo; + + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + OvsPostEvent(portNo, OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP); + *replyLen = sizeof (OVS_VPORT_INFO); + status = STATUS_SUCCESS; +vport_add_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + +NTSTATUS +OvsDelVportIoctl(PVOID inputBuffer, + UINT32 inputLength, + UINT32 *replyLen) +{ + NTSTATUS status = STATUS_SUCCESS; + POVS_VPORT_DELETE_REQUEST delReq; + LOCK_STATE_EX lockState; + POVS_VPORT_ENTRY vport; + size_t len; + UINT32 portNo = 0; + + OVS_LOG_TRACE("Enter: inputLength: %u", inputLength); + + if (inputLength < sizeof (OVS_VPORT_DELETE_REQUEST)) { + status = STATUS_INVALID_PARAMETER; + goto vport_del_done; + } + delReq = (POVS_VPORT_DELETE_REQUEST)inputBuffer; + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != delReq->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + goto vport_del_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (delReq->portNo == 0) { + StringCbLengthA(delReq->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, delReq->name, + (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, delReq->portNo); + } + if (vport) { + OVS_LOG_INFO("delete vport: %s, portNo: %x", vport->ovsName, + vport->portNo); + portNo = vport->portNo; + OvsRemoveAndDeleteVport(gOvsSwitchContext, vport); + } else { + status = STATUS_DEVICE_DOES_NOT_EXIST; + } + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (vport) { + OvsPostEvent(portNo, OVS_EVENT_DISCONNECT | OVS_EVENT_LINK_DOWN); + } +vport_del_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + +NTSTATUS +OvsConvertIfCountedStrToAnsiStr(PIF_COUNTED_STRING wStr, + CHAR *str, + UINT16 maxStrLen) +{ + ANSI_STRING astr; + UNICODE_STRING ustr; + NTSTATUS status; + UINT32 size; + + ustr.Buffer = wStr->String; + ustr.Length = wStr->Length; + ustr.MaximumLength = IF_MAX_STRING_SIZE; + + astr.Buffer = str; + astr.MaximumLength = maxStrLen; + astr.Length = 0; + + size = RtlUnicodeStringToAnsiSize(&ustr); + if (size > maxStrLen) { + return STATUS_BUFFER_OVERFLOW; + } + + status = RtlUnicodeStringToAnsiString(&astr, &ustr, FALSE); + + ASSERT(status == STATUS_SUCCESS); + if (status != STATUS_SUCCESS) { + return status; + } + ASSERT(astr.Length <= maxStrLen); + str[astr.Length] = 0; + return STATUS_SUCCESS; +} + + +NTSTATUS +OvsGetExtInfoIoctl(PVOID inputBuffer, + UINT32 inputLength, + PVOID outputBuffer, + UINT32 outputLength, + UINT32 *replyLen) +{ + POVS_VPORT_GET get; + POVS_VPORT_EXT_INFO info; + POVS_VPORT_ENTRY vport; + size_t len; + LOCK_STATE_EX lockState; + NTSTATUS status = STATUS_SUCCESS; + NDIS_SWITCH_NIC_NAME nicName; + NDIS_VM_NAME vmName; + BOOLEAN doConvert = FALSE; + + OVS_LOG_TRACE("Enter: inputLength: %u, outputLength: %u", + inputLength, outputLength); + + if (inputLength < sizeof (OVS_VPORT_GET) || + outputLength < sizeof (OVS_VPORT_EXT_INFO)) { + status = STATUS_INVALID_PARAMETER; + goto ext_info_done; + } + get = (POVS_VPORT_GET)inputBuffer; + info = (POVS_VPORT_EXT_INFO)outputBuffer; + RtlZeroMemory(info, sizeof (POVS_VPORT_EXT_INFO)); + + NdisAcquireSpinLock(gOvsCtrlLock); + if (gOvsSwitchContext == NULL || + gOvsSwitchContext->dpNo != get->dpNo) { + NdisReleaseSpinLock(gOvsCtrlLock); + status = STATUS_INVALID_PARAMETER; + goto ext_info_done; + } + NdisAcquireRWLockRead(gOvsSwitchContext->dispatchLock, &lockState, + NDIS_RWL_AT_DISPATCH_LEVEL); + if (get->portNo == 0) { + StringCbLengthA(get->name, OVS_MAX_PORT_NAME_LENGTH - 1, &len); + vport = OvsFindVportByOvsName(gOvsSwitchContext, get->name, + (UINT32)len); + } else { + vport = OvsFindVportByPortNo(gOvsSwitchContext, get->portNo); + } + if (vport == NULL || (vport->ovsState != OVS_STATE_CONNECTED && + vport->ovsState != OVS_STATE_NIC_CREATED)) { + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (get->portNo) { + OVS_LOG_WARN("vport %u does not exist any more", get->portNo); + } else { + OVS_LOG_WARN("vport %s does not exist any more", get->name); + } + status = STATUS_DEVICE_DOES_NOT_EXIST; + goto ext_info_done; + } + info->dpNo = get->dpNo; + info->portNo = vport->portNo; + RtlCopyMemory(info->macAddress, vport->currMacAddress, + sizeof (vport->currMacAddress)); + RtlCopyMemory(info->permMACAddress, vport->permMacAddress, + sizeof (vport->permMacAddress)); + if (vport->ovsType == OVSWIN_VPORT_TYPE_SYNTHETIC || + vport->ovsType == OVSWIN_VPORT_TYPE_EMULATED) { + RtlCopyMemory(info->vmMACAddress, vport->vmMacAddress, + sizeof (vport->vmMacAddress)); + } + info->nicIndex = vport->nicIndex; + info->portId = vport->portId; + info->type = vport->ovsType; + info->mtu = vport->mtu; + /* + * TO be revisit XXX + */ + if (vport->ovsState == OVS_STATE_NIC_CREATED) { + info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_DOWN; + } else if (vport->ovsState == OVS_STATE_CONNECTED) { + info->status = OVS_EVENT_CONNECT | OVS_EVENT_LINK_UP; + } else { + info->status = OVS_EVENT_DISCONNECT; + } + if ((info->type == OVSWIN_VPORT_TYPE_SYNTHETIC || + info->type == OVSWIN_VPORT_TYPE_EMULATED) && + (vport->ovsState == OVS_STATE_NIC_CREATED || + vport->ovsState == OVS_STATE_CONNECTED)) { + RtlCopyMemory(&vmName, &vport->vmName, sizeof (NDIS_VM_NAME)); + RtlCopyMemory(&nicName, &vport->nicName, sizeof + (NDIS_SWITCH_NIC_NAME)); + doConvert = TRUE; + } else { + info->vmUUID[0] = 0; + info->vifUUID[0] = 0; + } + + RtlCopyMemory(info->name, vport->ovsName, vport->ovsNameLen + 1); + NdisReleaseRWLock(gOvsSwitchContext->dispatchLock, &lockState); + NdisReleaseSpinLock(gOvsCtrlLock); + if (doConvert) { + status = OvsConvertIfCountedStrToAnsiStr(&vmName, + info->vmUUID, + OVS_MAX_VM_UUID_LEN); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to convert VM name."); + info->vmUUID[0] = 0; + } + + status = OvsConvertIfCountedStrToAnsiStr(&nicName, + info->vifUUID, + OVS_MAX_VIF_UUID_LEN); + if (status != STATUS_SUCCESS) { + OVS_LOG_INFO("Fail to convert nic name"); + info->vifUUID[0] = 0; + } + /* + * for now ignore status + */ + status = STATUS_SUCCESS; + } + *replyLen = sizeof (OVS_VPORT_EXT_INFO); + +ext_info_done: + OVS_LOG_TRACE("Exit: byteReturned: %u, status: %x", + *replyLen, status); + return status; +} + + +static __inline VOID +OvsWaitActivate(POVS_SWITCH_CONTEXT switchContext, ULONG sleepMicroSec) +{ + while ((!switchContext->isActivated) && + (!switchContext->isActivateFailed)) { + /* Wait for the switch to be active and + * the list of ports in OVS to be initialized. */ + NdisMSleep(sleepMicroSec); + } +} diff --git a/datapath-windows/ovsext/Vport.h b/datapath-windows/ovsext/Vport.h new file mode 100644 index 000000000..80bdea8ea --- /dev/null +++ b/datapath-windows/ovsext/Vport.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VPORT_H_ +#define __VPORT_H_ 1 + +#include "Switch.h" + +/* + * A Vport, or Virtual Port, is a port on the OVS. It can be one of the + * following types. Some of the Vports are "real" ports on the hyper-v switch, + * and some are not: + * - VIF port (VM's NIC) + * - External Adapters (physical NIC) + * - Internal Adapter (Virtual adapter exposed on the host). + * - Tunnel ports created by OVS userspace. + */ + +typedef enum { + OVS_STATE_UNKNOWN, + OVS_STATE_PORT_CREATED, + OVS_STATE_NIC_CREATED, + OVS_STATE_CONNECTED, + OVS_STATE_PORT_TEAR_DOWN, + OVS_STATE_PORT_DELETED, +} OVS_VPORT_STATE; + +typedef struct _OVS_VPORT_STATS { + UINT64 rxBytes; + UINT64 rxPackets; + UINT64 txBytes; + UINT64 txPackets; +} OVS_VPORT_STATS; + +typedef struct _OVS_VPORT_ERR_STATS { + UINT64 rxErrors; + UINT64 txErrors; + UINT64 rxDropped; + UINT64 txDropped; +} OVS_VPORT_ERR_STATS; +/* + * Each internal, external adapter or vritual adapter has + * one vport entry. In addition, we have one vport for each + * tunnel type, such as vxlan, gre, gre64 + */ +typedef struct _OVS_VPORT_ENTRY { + LIST_ENTRY nameLink; + LIST_ENTRY portLink; + + OVS_VPORT_STATE ovsState; + OVS_VPORT_TYPE ovsType; + OVS_VPORT_STATS stats; + OVS_VPORT_ERR_STATS errStats; + UINT32 portNo; + UINT32 mtu; + CHAR ovsName[OVS_MAX_PORT_NAME_LENGTH]; + UINT32 ovsNameLen; + + PVOID priv; + NDIS_SWITCH_PORT_ID portId; + NDIS_SWITCH_NIC_INDEX nicIndex; + UINT16 numaNodeId; + NDIS_SWITCH_PORT_STATE portState; + NDIS_SWITCH_NIC_STATE nicState; + NDIS_SWITCH_PORT_TYPE portType; + BOOLEAN isValidationPort; + + UINT8 permMacAddress[MAC_ADDRESS_LEN]; + UINT8 currMacAddress[MAC_ADDRESS_LEN]; + UINT8 vmMacAddress[MAC_ADDRESS_LEN]; + + NDIS_SWITCH_PORT_NAME portName; + NDIS_SWITCH_NIC_NAME nicName; + NDIS_VM_NAME vmName; + GUID netCfgInstanceId; +} OVS_VPORT_ENTRY, *POVS_VPORT_ENTRY; + +struct _OVS_SWITCH_CONTEXT; + +#define OVS_IS_VPORT_ENTRY_NULL(_SwitchContext, _i) \ + ((UINT64)(_SwitchContext)->vportArray[_i] <= 0xff) + +POVS_VPORT_ENTRY +OvsFindVportByPortNo(struct _OVS_SWITCH_CONTEXT *switchContext, + UINT32 portNo); +POVS_VPORT_ENTRY +OvsFindVportByOvsName(struct _OVS_SWITCH_CONTEXT *switchContext, + CHAR *name, UINT32 length); +POVS_VPORT_ENTRY +OvsFindVportByPortIdAndNicIndex(struct _OVS_SWITCH_CONTEXT *switchContext, + NDIS_SWITCH_PORT_ID portId, + NDIS_SWITCH_NIC_INDEX index); + +NDIS_STATUS OvsAddConfiguredSwitchPorts(struct _OVS_SWITCH_CONTEXT *switchContext); +NDIS_STATUS OvsInitConfiguredSwitchNics(struct _OVS_SWITCH_CONTEXT *switchContext); + +VOID OvsClearAllSwitchVports(struct _OVS_SWITCH_CONTEXT *switchContext); + +NTSTATUS OvsDumpVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsGetVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsAddVportIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NTSTATUS OvsDelVportIoctl(PVOID inputBuffer, UINT32 inputLength, + UINT32 *replyLen); +NTSTATUS OvsGetExtInfoIoctl(PVOID inputBuffer, UINT32 inputLength, + PVOID outputBuffer, UINT32 outputLength, + UINT32 *replyLen); +NDIS_STATUS OvsCreateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +NDIS_STATUS OvsCreatePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsTeardownPort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsDeletePort(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_PORT_PARAMETERS portParam); +VOID OvsConnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsUpdateNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsDeleteNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); +VOID OvsDisconnectNic(POVS_SWITCH_CONTEXT switchContext, + PNDIS_SWITCH_NIC_PARAMETERS nicParam); + +static __inline BOOLEAN +OvsIsTunnelVportType(OVS_VPORT_TYPE ovsType) +{ + return ovsType == OVSWIN_VPORT_TYPE_VXLAN || + ovsType == OVSWIN_VPORT_TYPE_GRE || + ovsType == OVSWIN_VPORT_TYPE_GRE64; +} + +static __inline BOOLEAN +OvsIsInternalVportType(OVS_VPORT_TYPE ovsType) +{ + return ovsType == OVSWIN_VPORT_TYPE_INTERNAL; +} + +static __inline BOOLEAN +OvsIsTunnelVportNo(UINT32 portNo) +{ + UINT32 idx = OVS_VPORT_INDEX(portNo); + return (idx >= OVS_TUNNEL_INDEX_START && idx <= OVS_TUNNEL_INDEX_END); +} + +static __inline BOOLEAN +OvsIsVifVportNo(UINT32 portNo) +{ + UINT32 idx = OVS_VPORT_INDEX(portNo); + return (idx >= OVS_VM_VPORT_START && idx <= OVS_VM_VPORT_MAX); +} + +static __inline POVS_VPORT_ENTRY +OvsGetTunnelVport(OVS_VPORT_TYPE type) +{ + ASSERT(OvsIsTunnelVportType(type)); + switch(type) { + case OVSWIN_VPORT_TYPE_VXLAN: + return (POVS_VPORT_ENTRY) OvsGetVportFromIndex(OVS_VXLAN_VPORT_INDEX); + default: + ASSERT(! "OvsGetTunnelVport not implemented for this tunnel."); + } + + return NULL; +} + +static __inline PVOID +OvsGetVportPriv(OVS_VPORT_TYPE type) +{ + return OvsGetTunnelVport(type)->priv; +} + +static __inline UINT32 +OvsGetExternalMtu() +{ + return ((POVS_VPORT_ENTRY) OvsGetExternalVport())->mtu; +} + +#endif /* __VPORT_H_ */ diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c new file mode 100644 index 000000000..3a1291ca3 --- /dev/null +++ b/datapath-windows/ovsext/Vxlan.c @@ -0,0 +1,506 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "precomp.h" +#include "NetProto.h" +#include "Switch.h" +#include "Vport.h" +#include "Flow.h" +#include "Vxlan.h" +#include "IpHelper.h" +#include "Checksum.h" +#include "User.h" +#include "PacketIO.h" +#include "Flow.h" +#include "PacketParser.h" +#include "Checksum.h" + +#pragma warning( push ) +#pragma warning( disable:4127 ) + + +#ifdef OVS_DBG_MOD +#undef OVS_DBG_MOD +#endif +#define OVS_DBG_MOD OVS_DBG_VXLAN +#include "Debug.h" + +/* Helper macro to check if a VXLAN ID is valid. */ +#define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff) +#define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40) +#define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40) +#define IP_DF_NBO 0x0040 +#define VXLAN_DEFAULT_TTL 64 +#define VXLAN_MULTICAST_TTL 64 +#define VXLAN_DEFAULT_INSTANCE_ID 1 + +/* Move to a header file */ +extern POVS_SWITCH_CONTEXT gOvsSwitchContext; + +NTSTATUS +OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq) +{ + POVS_VXLAN_VPORT vxlanPort; + NTSTATUS status = STATUS_SUCCESS; + + ASSERT(addReq->type == OVSWIN_VPORT_TYPE_VXLAN); + + vxlanPort = OvsAllocateMemory(sizeof (*vxlanPort)); + if (vxlanPort == NULL) { + status = STATUS_INSUFFICIENT_RESOURCES; + } else { + RtlZeroMemory(vxlanPort, sizeof (*vxlanPort)); + vxlanPort->dstPort = addReq->dstPort; + /* + * since we are installing the WFP filter before the port is created + * We need to check if it is the same number + * XXX should be removed later + */ + ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT); + vport->priv = (PVOID)vxlanPort; + } + return status; +} + + +VOID +OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport) +{ + if (vport->ovsType != OVSWIN_VPORT_TYPE_VXLAN || + vport->priv == NULL) { + return; + } + + OvsFreeMemory(vport->priv); + vport->priv = NULL; +} + + +/* + *---------------------------------------------------------------------------- + * OvsDoEncapVxlan + * Encapsulates the packet. + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_FWD_INFO fwdInfo, + POVS_PACKET_HDR_INFO layers, + POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST *newNbl) +{ + NDIS_STATUS status; + PNET_BUFFER curNb; + PMDL curMdl; + PUINT8 bufferStart; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + VXLANHdr *vxlanHdr; + UINT32 headRoom = OvsGetVxlanTunHdrSize(); + UINT32 packetLength; + + /* + * XXX: the assumption currently is that the NBL is owned by OVS, and + * headroom has already been allocated as part of allocating the NBL and + * MDL. + */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + if (layers->isTcp) { + NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo; + + tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl, + TcpLargeSendNetBufferListInfo); + OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength); + if (tsoInfo.LsoV1Transmit.MSS) { + OVS_LOG_TRACE("l4Offset %d", layers->l4Offset); + *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers, + tsoInfo.LsoV1Transmit.MSS, headRoom); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to segment NBL"); + return NDIS_STATUS_FAILURE; + } + } + } + /* If we didn't split the packet above, make a copy now */ + if (*newNbl == NULL) { + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom, + FALSE /*NBL info*/); + if (*newNbl == NULL) { + OVS_LOG_ERROR("Unable to copy NBL"); + return NDIS_STATUS_FAILURE; + } + } + + curNbl = *newNbl; + for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL; + curNb = curNb->Next) { + status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL); + if (status != NDIS_STATUS_SUCCESS) { + goto ret_error; + } + + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto ret_error; + } + + bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (NET_BUFFER_NEXT_NB(curNb)) { + OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb), + NET_BUFFER_DATA_LENGTH(curNb->Next)); + } + + /* L2 header */ + ethHdr = (EthHdr *)bufferStart; + NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr, + sizeof ethHdr->Destination + sizeof ethHdr->Source); + ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) == + (PCHAR)&fwdInfo->srcMacAddr); + ethHdr->Type = htons(ETH_TYPE_IPV4); + + // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such, + // should we use those values instead? or will they end up being + // uninitialized; + /* IP header */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + + ipHdr->ihl = sizeof *ipHdr / 4; + ipHdr->version = IPV4; + ipHdr->tos = 0; + ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr); + ipHdr->id = 0; + ipHdr->frag_off = IP_DF_NBO; + ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL; + ipHdr->protocol = IPPROTO_UDP; + ASSERT(tunKey->dst == fwdInfo->dstIpAddr); + ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0); + ipHdr->saddr = fwdInfo->srcIpAddr; + ipHdr->daddr = fwdInfo->dstIpAddr; + ipHdr->check = 0; + ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0); + + /* UDP header */ + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + udpHdr->source = htons(tunKey->flow_hash | 32768); + udpHdr->dest = VXLAN_UDP_PORT_NBO; + udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom + + sizeof *udpHdr + sizeof *vxlanHdr); + udpHdr->check = 0; + + /* VXLAN header */ + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + vxlanHdr->flags1 = 0; + vxlanHdr->locallyReplicate = 0; + vxlanHdr->flags2 = 0; + vxlanHdr->reserved1 = 0; + if (tunKey->flags | OVS_TNL_F_KEY) { + vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId); + vxlanHdr->instanceID = 1; + } + vxlanHdr->reserved2 = 0; + } + return STATUS_SUCCESS; + +ret_error: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + + +/* + *---------------------------------------------------------------------------- + * OvsEncapVxlan -- + * Encapsulates the packet if L2/L3 for destination resolves. Otherwise, + * enqueues a callback that does encapsulatation after resolution. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + VOID *completionList, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl) +{ + NTSTATUS status; + OVS_FWD_INFO fwdInfo; + UNREFERENCED_PARAMETER(completionList); + + status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo); + if (status != STATUS_SUCCESS) { + OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL); + // return NDIS_STATUS_PENDING; + /* + * XXX: Don't know if the completionList will make any sense when + * accessed in the callback. Make sure the caveats are known. + * + * XXX: This code will work once we are able to grab locks in the + * callback. + */ + return NDIS_STATUS_FAILURE; + } + + return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers, + switchContext, newNbl); +} + + +/* + *---------------------------------------------------------------------------- + * OvsIpHlprCbVxlan -- + * Callback function for IP helper. + * XXX: not used currently + *---------------------------------------------------------------------------- + */ +static VOID +OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl, + UINT32 inPort, + OvsIPv4TunnelKey *tunKey, + PVOID cbData1, + PVOID cbData2, + NTSTATUS result, + POVS_FWD_INFO fwdInfo) +{ + OVS_PACKET_HDR_INFO layers; + OvsFlowKey key; + NDIS_STATUS status; + UNREFERENCED_PARAMETER(inPort); + + status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL); + if (result == STATUS_SUCCESS) { + status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers, + (POVS_SWITCH_CONTEXT)cbData1, NULL); + } else { + status = NDIS_STATUS_FAILURE; + } + + if (status != NDIS_STATUS_SUCCESS) { + // XXX: Free up the NBL; + return; + } + + OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl); +} + +/* + *---------------------------------------------------------------------------- + * OvsCalculateUDPChecksum + * Calculate UDP checksum + *---------------------------------------------------------------------------- + */ +static __inline NDIS_STATUS +OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl, + PNET_BUFFER curNb, + IPHdr *ipHdr, + UDPHdr *udpHdr, + UINT32 packetLength) +{ + NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; + UINT16 checkSum; + + csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo); + + /* Next check if UDP checksum has been calculated. */ + if (!csumInfo.Receive.UdpChecksumSucceeded) { + UINT32 l4Payload; + + checkSum = udpHdr->check; + + l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4; + udpHdr->check = 0; + udpHdr->check = + IPPseudoChecksum((UINT32 *)&ipHdr->saddr, + (UINT32 *)&ipHdr->daddr, + IPPROTO_UDP, (UINT16)l4Payload); + udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload, + sizeof(EthHdr) + ipHdr->ihl * 4); + if (checkSum != udpHdr->check) { + OVS_LOG_TRACE("UDP checksum incorrect."); + return NDIS_STATUS_INVALID_PACKET; + } + } + + csumInfo.Receive.UdpChecksumSucceeded = 1; + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value; + return NDIS_STATUS_SUCCESS; +} + +/* + *---------------------------------------------------------------------------- + * OvsDoDecapVxlan + * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'. + *---------------------------------------------------------------------------- + */ +NDIS_STATUS +OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl) +{ + PNET_BUFFER curNb; + PMDL curMdl; + EthHdr *ethHdr; + IPHdr *ipHdr; + UDPHdr *udpHdr; + VXLANHdr *vxlanHdr; + UINT32 tunnelSize = 0, packetLength = 0; + PUINT8 bufferStart; + NDIS_STATUS status; + + /* Check the the length of the UDP payload */ + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + packetLength = NET_BUFFER_DATA_LENGTH(curNb); + tunnelSize = OvsGetVxlanTunHdrSize(); + if (packetLength <= tunnelSize) { + return NDIS_STATUS_INVALID_LENGTH; + } + + /* + * Create a copy of the NBL so that we have all the headers in one MDL. + */ + *newNbl = OvsPartialCopyNBL(switchContext, curNbl, + tunnelSize + OVS_DEFAULT_COPY_SIZE, 0, + TRUE /*copy NBL info */); + + if (*newNbl == NULL) { + return NDIS_STATUS_RESOURCES; + } + + /* XXX: Handle VLAN header. */ + curNbl = *newNbl; + curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); + curMdl = NET_BUFFER_CURRENT_MDL(curNb); + bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) + + NET_BUFFER_CURRENT_MDL_OFFSET(curNb); + if (!bufferStart) { + status = NDIS_STATUS_RESOURCES; + goto dropNbl; + } + + ethHdr = (EthHdr *)bufferStart; + /* XXX: Handle IP options. */ + ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr); + tunKey->src = ipHdr->saddr; + tunKey->dst = ipHdr->daddr; + tunKey->tos = ipHdr->tos; + tunKey->ttl = ipHdr->ttl; + tunKey->pad = 0; + udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr); + + /* Validate if NIC has indicated checksum failure. */ + status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0); + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + + /* Calculate and verify UDP checksum if NIC didn't do it. */ + if (udpHdr->check != 0) { + status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength); + if (status != NDIS_STATUS_SUCCESS) { + goto dropNbl; + } + } + + vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr); + if (vxlanHdr->instanceID) { + tunKey->flags = OVS_TNL_F_KEY; + tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID); + } else { + tunKey->flags = 0; + tunKey->tunnelId = 0; + } + + /* Clear out the receive flag for the inner packet. */ + NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0; + NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL); + return NDIS_STATUS_SUCCESS; + +dropNbl: + OvsCompleteNBL(switchContext, *newNbl, TRUE); + *newNbl = NULL; + return status; +} + + +NDIS_STATUS +OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, + OvsIPv4TunnelKey *tunnelKey) +{ + NDIS_STATUS status = NDIS_STATUS_FAILURE; + UDPHdr udpStorage; + const UDPHdr *udp; + VXLANHdr *VxlanHeader; + VXLANHdr VxlanHeaderBuffer; + struct IPHdr ip_storage; + const struct IPHdr *nh; + OVS_PACKET_HDR_INFO layers; + + layers.value = 0; + + do { + nh = OvsGetIp(packet, layers.l3Offset, &ip_storage); + if (nh) { + layers.l4Offset = layers.l3Offset + nh->ihl * 4; + } else { + break; + } + + /* make sure it's a VXLAN packet */ + udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage); + if (udp) { + layers.l7Offset = layers.l4Offset + sizeof *udp; + } else { + break; + } + + /* XXX Should be tested against the dynamic port # in the VXLAN vport */ + ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT)); + + VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet, + sizeof(*VxlanHeader), + layers.l7Offset, + &VxlanHeaderBuffer); + + if (VxlanHeader) { + tunnelKey->src = nh->saddr; + tunnelKey->dst = nh->daddr; + tunnelKey->ttl = nh->ttl; + tunnelKey->tos = nh->tos; + if (VxlanHeader->instanceID) { + tunnelKey->flags = OVS_TNL_F_KEY; + tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID); + } else { + tunnelKey->flags = 0; + tunnelKey->tunnelId = 0; + } + } else { + break; + } + status = NDIS_STATUS_SUCCESS; + + } while(FALSE); + + return status; +} + +#pragma warning( pop ) diff --git a/datapath-windows/ovsext/Vxlan.h b/datapath-windows/ovsext/Vxlan.h new file mode 100644 index 000000000..e77793328 --- /dev/null +++ b/datapath-windows/ovsext/Vxlan.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2014 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __VXLAN_H_ +#define __VXLAN_H_ 1 + +#include "NetProto.h" +typedef struct _OVS_VXLAN_VPORT { + UINT32 dstPort; + UINT64 inPkts; + UINT64 outPkts; + UINT64 slowInPkts; + UINT64 slowOutPkts; + /* + * To be filled + */ +} OVS_VXLAN_VPORT, *POVS_VXLAN_VPORT; + +/* VXLAN header. */ +typedef struct VXLANHdr { + /* Flags. */ + UINT32 flags1:2; + /* Packet needs replication to multicast group (used for multicast proxy). */ + UINT32 locallyReplicate:1; + /* Instance ID flag, must be set to 1. */ + UINT32 instanceID:1; + /* Flags. */ + UINT32 flags2:4; + /* Reserved. */ + UINT32 reserved1:24; + /* VXLAN ID. */ + UINT32 vxlanID:24; + /* Reserved. */ + UINT32 reserved2:8; +} VXLANHdr; + +NTSTATUS OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport, + POVS_VPORT_ADD_REQUEST addReq); + +VOID OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport); + +NDIS_STATUS OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet, + OvsIPv4TunnelKey *tunnelKey); + +NDIS_STATUS OvsEncapVxlan(PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + POVS_SWITCH_CONTEXT switchContext, + VOID *completionList, + POVS_PACKET_HDR_INFO layers, + PNET_BUFFER_LIST *newNbl); + +NDIS_STATUS OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext, + PNET_BUFFER_LIST curNbl, + OvsIPv4TunnelKey *tunKey, + PNET_BUFFER_LIST *newNbl); + +static __inline UINT32 +OvsGetVxlanTunHdrSize(VOID) +{ + /* XXX: Can L2 include VLAN at all? */ + return sizeof (EthHdr) + sizeof (IPHdr) + sizeof (UDPHdr) + + sizeof (VXLANHdr); +} + +#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT_NBO 0xB512 + +#endif /* __VXLAN_H_ */ diff --git a/datapath-windows/ovsext/ovsext.vcxproj b/datapath-windows/ovsext/ovsext.vcxproj index 1a618dec7..82ab908ee 100644 --- a/datapath-windows/ovsext/ovsext.vcxproj +++ b/datapath-windows/ovsext/ovsext.vcxproj @@ -71,31 +71,32 @@ - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + OVSExt @@ -127,34 +128,34 @@ + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + ;%(AdditionalIncludeDirectories) precomp.h Create $(IntDir)\precomp.h.pch + + + + + + + @@ -167,4 +168,4 @@ - + \ No newline at end of file diff --git a/datapath-windows/ovsext/precomp.h b/datapath-windows/ovsext/precomp.h index 5b6c2a971..41e321807 100644 --- a/datapath-windows/ovsext/precomp.h +++ b/datapath-windows/ovsext/precomp.h @@ -21,9 +21,9 @@ #include #include -#include "OvsTypes.h" +#include "Types.h" #include "..\include\OvsPub.h" -#include "OvsUtil.h" +#include "Util.h" #include "Netlink.h" #include "NetlinkProto.h" /*