2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
28 #include "PacketParser.h"
30 #pragma warning( push )
31 #pragma warning( disable:4127 )
37 #define OVS_DBG_MOD OVS_DBG_VXLAN
40 /* Helper macro to check if a VXLAN ID is valid. */
41 #define VXLAN_ID_IS_VALID(vxlanID) (0 < (vxlanID) && (vxlanID) <= 0xffffff)
42 #define VXLAN_TUNNELID_TO_VNI(_tID) (UINT32)(((UINT64)(_tID)) >> 40)
43 #define VXLAN_VNI_TO_TUNNELID(_vni) (((UINT64)(_vni)) << 40)
44 #define IP_DF_NBO 0x0040
45 #define VXLAN_DEFAULT_TTL 64
46 #define VXLAN_MULTICAST_TTL 64
47 #define VXLAN_DEFAULT_INSTANCE_ID 1
49 /* Move to a header file */
50 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
53 * udpDestPort: the vxlan is set as payload to a udp frame. If the destination
54 * port of an udp frame is udpDestPort, we understand it to be vxlan.
57 OvsInitVxlanTunnel(POVS_VPORT_ENTRY vport,
60 POVS_VXLAN_VPORT vxlanPort;
62 vxlanPort = OvsAllocateMemoryWithTag(sizeof (*vxlanPort),
64 if (vxlanPort == NULL) {
65 return STATUS_INSUFFICIENT_RESOURCES;
68 RtlZeroMemory(vxlanPort, sizeof(*vxlanPort));
69 vxlanPort->dstPort = udpDestPort;
71 * since we are installing the WFP filter before the port is created
72 * We need to check if it is the same number
73 * XXX should be removed later
75 ASSERT(vxlanPort->dstPort == VXLAN_UDP_PORT);
76 vport->priv = (PVOID)vxlanPort;
78 return STATUS_SUCCESS;
83 OvsCleanupVxlanTunnel(POVS_VPORT_ENTRY vport)
85 if (vport->ovsType != OVS_VPORT_TYPE_VXLAN ||
86 vport->priv == NULL) {
90 OvsFreeMemoryWithTag(vport->priv, OVS_VXLAN_POOL_TAG);
96 *----------------------------------------------------------------------------
98 * Encapsulates the packet.
99 *----------------------------------------------------------------------------
101 static __inline NDIS_STATUS
102 OvsDoEncapVxlan(PNET_BUFFER_LIST curNbl,
103 OvsIPv4TunnelKey *tunKey,
104 POVS_FWD_INFO fwdInfo,
105 POVS_PACKET_HDR_INFO layers,
106 POVS_SWITCH_CONTEXT switchContext,
107 PNET_BUFFER_LIST *newNbl)
117 UINT32 headRoom = OvsGetVxlanTunHdrSize();
121 * XXX: the assumption currently is that the NBL is owned by OVS, and
122 * headroom has already been allocated as part of allocating the NBL and
125 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
126 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
128 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
130 tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
131 TcpLargeSendNetBufferListInfo);
132 OVS_LOG_TRACE("MSS %u packet len %u", tsoInfo.LsoV1Transmit.MSS, packetLength);
133 if (tsoInfo.LsoV1Transmit.MSS) {
134 OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
135 *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
136 tsoInfo.LsoV1Transmit.MSS, headRoom);
137 if (*newNbl == NULL) {
138 OVS_LOG_ERROR("Unable to segment NBL");
139 return NDIS_STATUS_FAILURE;
143 /* If we didn't split the packet above, make a copy now */
144 if (*newNbl == NULL) {
145 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
147 if (*newNbl == NULL) {
148 OVS_LOG_ERROR("Unable to copy NBL");
149 return NDIS_STATUS_FAILURE;
154 for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
155 curNb = curNb->Next) {
156 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
157 if (status != NDIS_STATUS_SUCCESS) {
161 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
162 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
164 status = NDIS_STATUS_RESOURCES;
168 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
169 if (NET_BUFFER_NEXT_NB(curNb)) {
170 OVS_LOG_TRACE("nb length %u next %u", NET_BUFFER_DATA_LENGTH(curNb),
171 NET_BUFFER_DATA_LENGTH(curNb->Next));
175 ethHdr = (EthHdr *)bufferStart;
176 NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
177 sizeof ethHdr->Destination + sizeof ethHdr->Source);
178 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
179 (PCHAR)&fwdInfo->srcMacAddr);
180 ethHdr->Type = htons(ETH_TYPE_IPV4);
182 // XXX: question: there are fields in the OvsIPv4TunnelKey for ttl and such,
183 // should we use those values instead? or will they end up being
186 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
188 ipHdr->ihl = sizeof *ipHdr / 4;
189 ipHdr->version = IPV4;
191 ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
193 ipHdr->frag_off = IP_DF_NBO;
194 ipHdr->ttl = tunKey->ttl ? tunKey->ttl : VXLAN_DEFAULT_TTL;
195 ipHdr->protocol = IPPROTO_UDP;
196 ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
197 ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
198 ipHdr->saddr = fwdInfo->srcIpAddr;
199 ipHdr->daddr = fwdInfo->dstIpAddr;
201 ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
204 udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
205 udpHdr->source = htons(tunKey->flow_hash | 32768);
206 udpHdr->dest = VXLAN_UDP_PORT_NBO;
207 udpHdr->len = htons(NET_BUFFER_DATA_LENGTH(curNb) - headRoom +
208 sizeof *udpHdr + sizeof *vxlanHdr);
212 vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
213 vxlanHdr->flags1 = 0;
214 vxlanHdr->locallyReplicate = 0;
215 vxlanHdr->flags2 = 0;
216 vxlanHdr->reserved1 = 0;
217 if (tunKey->flags | OVS_TNL_F_KEY) {
218 vxlanHdr->vxlanID = VXLAN_TUNNELID_TO_VNI(tunKey->tunnelId);
219 vxlanHdr->instanceID = 1;
221 vxlanHdr->reserved2 = 0;
223 return STATUS_SUCCESS;
226 OvsCompleteNBL(switchContext, *newNbl, TRUE);
233 *----------------------------------------------------------------------------
235 * Encapsulates the packet if L2/L3 for destination resolves. Otherwise,
236 * enqueues a callback that does encapsulatation after resolution.
237 *----------------------------------------------------------------------------
240 OvsEncapVxlan(PNET_BUFFER_LIST curNbl,
241 OvsIPv4TunnelKey *tunKey,
242 POVS_SWITCH_CONTEXT switchContext,
243 VOID *completionList,
244 POVS_PACKET_HDR_INFO layers,
245 PNET_BUFFER_LIST *newNbl)
248 OVS_FWD_INFO fwdInfo;
249 UNREFERENCED_PARAMETER(completionList);
251 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
252 if (status != STATUS_SUCCESS) {
253 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
254 // return NDIS_STATUS_PENDING;
256 * XXX: Don't know if the completionList will make any sense when
257 * accessed in the callback. Make sure the caveats are known.
259 * XXX: This code will work once we are able to grab locks in the
262 return NDIS_STATUS_FAILURE;
265 return OvsDoEncapVxlan(curNbl, tunKey, &fwdInfo, layers,
266 switchContext, newNbl);
271 *----------------------------------------------------------------------------
272 * OvsIpHlprCbVxlan --
273 * Callback function for IP helper.
274 * XXX: not used currently
275 *----------------------------------------------------------------------------
278 OvsIpHlprCbVxlan(PNET_BUFFER_LIST curNbl,
280 OvsIPv4TunnelKey *tunKey,
284 POVS_FWD_INFO fwdInfo)
286 OVS_PACKET_HDR_INFO layers;
289 UNREFERENCED_PARAMETER(inPort);
291 status = OvsExtractFlow(curNbl, inPort, &key, &layers, NULL);
292 if (result == STATUS_SUCCESS) {
293 status = OvsDoEncapVxlan(curNbl, tunKey, fwdInfo, &layers,
294 (POVS_SWITCH_CONTEXT)cbData1, NULL);
296 status = NDIS_STATUS_FAILURE;
299 if (status != NDIS_STATUS_SUCCESS) {
300 // XXX: Free up the NBL;
304 OvsLookupFlowOutput((POVS_SWITCH_CONTEXT)cbData1, cbData2, curNbl);
308 *----------------------------------------------------------------------------
309 * OvsCalculateUDPChecksum
310 * Calculate UDP checksum
311 *----------------------------------------------------------------------------
313 static __inline NDIS_STATUS
314 OvsCalculateUDPChecksum(PNET_BUFFER_LIST curNbl,
320 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
323 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo);
325 /* Next check if UDP checksum has been calculated. */
326 if (!csumInfo.Receive.UdpChecksumSucceeded) {
329 checkSum = udpHdr->check;
331 l4Payload = packetLength - sizeof(EthHdr) - ipHdr->ihl * 4;
334 IPPseudoChecksum((UINT32 *)&ipHdr->saddr,
335 (UINT32 *)&ipHdr->daddr,
336 IPPROTO_UDP, (UINT16)l4Payload);
337 udpHdr->check = CalculateChecksumNB(curNb, (UINT16)l4Payload,
338 sizeof(EthHdr) + ipHdr->ihl * 4);
339 if (checkSum != udpHdr->check) {
340 OVS_LOG_TRACE("UDP checksum incorrect.");
341 return NDIS_STATUS_INVALID_PACKET;
345 csumInfo.Receive.UdpChecksumSucceeded = 1;
346 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = csumInfo.Value;
347 return NDIS_STATUS_SUCCESS;
351 *----------------------------------------------------------------------------
353 * Decapsulates to tunnel header in 'curNbl' and puts into 'tunKey'.
354 *----------------------------------------------------------------------------
357 OvsDoDecapVxlan(POVS_SWITCH_CONTEXT switchContext,
358 PNET_BUFFER_LIST curNbl,
359 OvsIPv4TunnelKey *tunKey,
360 PNET_BUFFER_LIST *newNbl)
368 UINT32 tunnelSize = 0, packetLength = 0;
372 /* Check the the length of the UDP payload */
373 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
374 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
375 tunnelSize = OvsGetVxlanTunHdrSize();
376 if (packetLength <= tunnelSize) {
377 return NDIS_STATUS_INVALID_LENGTH;
381 * Create a copy of the NBL so that we have all the headers in one MDL.
383 *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
384 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
385 TRUE /*copy NBL info */);
387 if (*newNbl == NULL) {
388 return NDIS_STATUS_RESOURCES;
391 /* XXX: Handle VLAN header. */
393 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
394 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
395 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
396 NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
398 status = NDIS_STATUS_RESOURCES;
402 ethHdr = (EthHdr *)bufferStart;
403 /* XXX: Handle IP options. */
404 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
405 tunKey->src = ipHdr->saddr;
406 tunKey->dst = ipHdr->daddr;
407 tunKey->tos = ipHdr->tos;
408 tunKey->ttl = ipHdr->ttl;
410 udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
412 /* Validate if NIC has indicated checksum failure. */
413 status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
414 if (status != NDIS_STATUS_SUCCESS) {
418 /* Calculate and verify UDP checksum if NIC didn't do it. */
419 if (udpHdr->check != 0) {
420 status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr, packetLength);
421 if (status != NDIS_STATUS_SUCCESS) {
426 vxlanHdr = (VXLANHdr *)((PCHAR)udpHdr + sizeof *udpHdr);
427 if (vxlanHdr->instanceID) {
428 tunKey->flags = OVS_TNL_F_KEY;
429 tunKey->tunnelId = VXLAN_VNI_TO_TUNNELID(vxlanHdr->vxlanID);
432 tunKey->tunnelId = 0;
435 /* Clear out the receive flag for the inner packet. */
436 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
437 NdisAdvanceNetBufferDataStart(curNb, tunnelSize, FALSE, NULL);
438 return NDIS_STATUS_SUCCESS;
441 OvsCompleteNBL(switchContext, *newNbl, TRUE);
448 OvsSlowPathDecapVxlan(const PNET_BUFFER_LIST packet,
449 OvsIPv4TunnelKey *tunnelKey)
451 NDIS_STATUS status = NDIS_STATUS_FAILURE;
454 VXLANHdr *VxlanHeader;
455 VXLANHdr VxlanHeaderBuffer;
456 struct IPHdr ip_storage;
457 const struct IPHdr *nh;
458 OVS_PACKET_HDR_INFO layers;
463 nh = OvsGetIp(packet, layers.l3Offset, &ip_storage);
465 layers.l4Offset = layers.l3Offset + nh->ihl * 4;
470 /* make sure it's a VXLAN packet */
471 udp = OvsGetUdp(packet, layers.l4Offset, &udpStorage);
473 layers.l7Offset = layers.l4Offset + sizeof *udp;
478 /* XXX Should be tested against the dynamic port # in the VXLAN vport */
479 ASSERT(udp->dest == RtlUshortByteSwap(VXLAN_UDP_PORT));
481 VxlanHeader = (VXLANHdr *)OvsGetPacketBytes(packet,
482 sizeof(*VxlanHeader),
487 tunnelKey->src = nh->saddr;
488 tunnelKey->dst = nh->daddr;
489 tunnelKey->ttl = nh->ttl;
490 tunnelKey->tos = nh->tos;
491 if (VxlanHeader->instanceID) {
492 tunnelKey->flags = OVS_TNL_F_KEY;
493 tunnelKey->tunnelId = VXLAN_VNI_TO_TUNNELID(VxlanHeader->vxlanID);
495 tunnelKey->flags = 0;
496 tunnelKey->tunnelId = 0;
501 status = NDIS_STATUS_SUCCESS;
508 #pragma warning( pop )