2 * Copyright (c) 2015 Cloudbase Solutions Srl
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
26 #include "PacketParser.h"
35 #define OVS_DBG_MOD OVS_DBG_GRE
39 OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
40 const OvsIPv4TunnelKey *tunKey,
41 const POVS_FWD_INFO fwdInfo,
42 POVS_PACKET_HDR_INFO layers,
43 POVS_SWITCH_CONTEXT switchContext,
44 PNET_BUFFER_LIST *newNbl);
47 * --------------------------------------------------------------------------
49 * Initialize GRE tunnel module.
50 * --------------------------------------------------------------------------
53 OvsInitGreTunnel(POVS_VPORT_ENTRY vport)
55 POVS_GRE_VPORT grePort;
57 grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort),
60 OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT");
61 return STATUS_INSUFFICIENT_RESOURCES;
64 RtlZeroMemory(grePort, sizeof(*grePort));
65 vport->priv = (PVOID)grePort;
66 return STATUS_SUCCESS;
70 * --------------------------------------------------------------------------
71 * OvsCleanupGreTunnel --
72 * Cleanup GRE Tunnel module.
73 * --------------------------------------------------------------------------
76 OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport)
78 if (vport->ovsType != OVS_VPORT_TYPE_GRE ||
79 vport->priv == NULL) {
83 OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG);
88 * --------------------------------------------------------------------------
90 * Encapsulates a packet with an GRE header.
91 * --------------------------------------------------------------------------
94 OvsEncapGre(POVS_VPORT_ENTRY vport,
95 PNET_BUFFER_LIST curNbl,
96 OvsIPv4TunnelKey *tunKey,
97 POVS_SWITCH_CONTEXT switchContext,
98 POVS_PACKET_HDR_INFO layers,
99 PNET_BUFFER_LIST *newNbl)
101 OVS_FWD_INFO fwdInfo;
104 status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
105 if (status != STATUS_SUCCESS) {
106 OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
107 return NDIS_STATUS_FAILURE;
110 status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers,
111 switchContext, newNbl);
116 * --------------------------------------------------------------------------
118 * Internal utility function which actually does the GRE encap.
119 * --------------------------------------------------------------------------
122 OvsDoEncapGre(POVS_VPORT_ENTRY vport,
123 PNET_BUFFER_LIST curNbl,
124 const OvsIPv4TunnelKey *tunKey,
125 const POVS_FWD_INFO fwdInfo,
126 POVS_PACKET_HDR_INFO layers,
127 POVS_SWITCH_CONTEXT switchContext,
128 PNET_BUFFER_LIST *newNbl)
137 POVS_GRE_VPORT vportGre;
138 UINT32 headRoom = GreTunHdrSize(tunKey->flags);
140 UINT32 counterHeadRoom;
144 ASSERT(*newNbl == NULL);
146 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
147 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
150 NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
152 tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
153 TcpLargeSendNetBufferListInfo);
154 switch (tsoInfo.Transmit.Type) {
155 case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
156 mss = tsoInfo.LsoV1Transmit.MSS;
158 case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
159 mss = tsoInfo.LsoV2Transmit.MSS;
162 OVS_LOG_ERROR("Unknown LSO transmit type:%d",
163 tsoInfo.Transmit.Type);
165 OVS_LOG_TRACE("MSS %u packet len %u", mss,
168 OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
169 *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
171 if (*newNbl == NULL) {
172 OVS_LOG_ERROR("Unable to segment NBL");
173 return NDIS_STATUS_FAILURE;
175 /* Clear out LSO flags after this point */
176 NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
180 vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport);
183 /* If we didn't split the packet above, make a copy now */
184 if (*newNbl == NULL) {
185 *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
187 if (*newNbl == NULL) {
188 OVS_LOG_ERROR("Unable to copy NBL");
189 return NDIS_STATUS_FAILURE;
192 * To this point we do not have GRE hardware offloading.
193 * Apply defined checksums
195 curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
196 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
197 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
200 status = NDIS_STATUS_RESOURCES;
204 NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
205 csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
206 TcpIpChecksumNetBufferListInfo);
208 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
210 if (layers->isIPv4) {
211 IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
213 if (csumInfo.Transmit.IpHeaderChecksum) {
215 ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
218 if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
219 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
220 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
221 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
222 IPPROTO_TCP, csumLength);
223 tcp->check = CalculateChecksumNB(curNb, csumLength,
224 (UINT32)(layers->l4Offset));
225 } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
226 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
227 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
228 udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
229 IPPROTO_UDP, csumLength);
230 udp->check = CalculateChecksumNB(curNb, csumLength,
231 (UINT32)(layers->l4Offset));
233 } else if (layers->isIPv6) {
234 IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
236 if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
237 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
238 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
239 tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
240 (UINT32 *) &ip->daddr,
241 IPPROTO_TCP, csumLength);
242 tcp->check = CalculateChecksumNB(curNb, csumLength,
243 (UINT32)(layers->l4Offset));
244 } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
245 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
246 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
247 udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
248 (UINT32 *) &ip->daddr,
249 IPPROTO_UDP, csumLength);
250 udp->check = CalculateChecksumNB(curNb, csumLength,
251 (UINT32)(layers->l4Offset));
254 /* Clear out TcpIpChecksumNetBufferListInfo flag */
255 NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
259 for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
260 curNb = curNb->Next) {
262 counterHeadRoom = headRoom;
264 status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
265 if (status != NDIS_STATUS_SUCCESS) {
269 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
270 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
273 status = NDIS_STATUS_RESOURCES;
277 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
278 if (NET_BUFFER_NEXT_NB(curNb)) {
279 OVS_LOG_TRACE("nb length %u next %u",
280 NET_BUFFER_DATA_LENGTH(curNb),
281 NET_BUFFER_DATA_LENGTH(curNb->Next));
285 ethHdr = (EthHdr *)bufferStart;
286 ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
287 (PCHAR)&fwdInfo->srcMacAddr);
288 NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
289 sizeof ethHdr->Destination + sizeof ethHdr->Source);
290 ethHdr->Type = htons(ETH_TYPE_IPV4);
292 counterHeadRoom -= sizeof *ethHdr;
296 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
298 ipHdr->ihl = sizeof *ipHdr / 4;
299 ipHdr->version = IPPROTO_IPV4;
300 ipHdr->tos = tunKey->tos;
301 ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
302 ipHdr->id = (uint16)atomic_add64(&vportGre->ipId,
303 NET_BUFFER_DATA_LENGTH(curNb));
304 ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
306 ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64;
307 ipHdr->protocol = IPPROTO_GRE;
308 ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
309 ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
310 ipHdr->saddr = fwdInfo->srcIpAddr;
311 ipHdr->daddr = fwdInfo->dstIpAddr;
314 ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
316 counterHeadRoom -= sizeof *ipHdr;
320 greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
321 greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags);
322 greHdr->protocolType = GRE_NET_TEB;
324 counterHeadRoom -= sizeof *greHdr;
327 PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
329 if (tunKey->flags & OVS_TNL_F_CSUM) {
330 RtlZeroMemory(currentOffset, 4);
333 counterHeadRoom -= 4;
337 if (tunKey->flags & OVS_TNL_F_KEY) {
338 RtlZeroMemory(currentOffset, 4);
339 UINT32 key = (tunKey->tunnelId >> 32);
340 RtlCopyMemory(currentOffset, &key, sizeof key);
343 counterHeadRoom -= 4;
348 ASSERT(counterHeadRoom == 0);
352 return STATUS_SUCCESS;
355 OvsCompleteNBL(switchContext, *newNbl, TRUE);
361 OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
362 PNET_BUFFER_LIST curNbl,
363 OvsIPv4TunnelKey *tunKey,
364 PNET_BUFFER_LIST *newNbl)
371 UINT32 tunnelSize = 0, packetLength = 0;
376 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
377 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
378 tunnelSize = GreTunHdrSize(tunKey->flags);
379 if (packetLength <= tunnelSize) {
380 return NDIS_STATUS_INVALID_LENGTH;
384 * Create a copy of the NBL so that we have all the headers in one MDL.
386 *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
387 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
388 TRUE /*copy NBL info */);
390 if (*newNbl == NULL) {
391 return NDIS_STATUS_RESOURCES;
395 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
396 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
397 bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
398 NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
400 status = NDIS_STATUS_RESOURCES;
404 ethHdr = (EthHdr *)bufferStart;
405 headRoom += sizeof *ethHdr;
407 ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
408 tunKey->src = ipHdr->saddr;
409 tunKey->dst = ipHdr->daddr;
410 tunKey->tos = ipHdr->tos;
411 tunKey->ttl = ipHdr->ttl;
413 headRoom += sizeof *ipHdr;
415 greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
416 headRoom += sizeof *greHdr;
418 /* Validate if GRE header protocol type. */
419 if (greHdr->protocolType != GRE_NET_TEB) {
420 status = STATUS_NDIS_INVALID_PACKET;
424 PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
426 if (greHdr->flags & GRE_CSUM) {
427 tunKey->flags |= OVS_TNL_F_CSUM;
432 if (greHdr->flags & GRE_KEY) {
433 tunKey->flags |= OVS_TNL_F_KEY;
435 RtlCopyMemory(&key, currentOffset, 4);
436 tunKey->tunnelId = (UINT64)key << 32;
441 /* Clear out the receive flag for the inner packet. */
442 NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
443 NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE,
445 ASSERT(headRoom == GreTunHdrSize(tunKey->flags));
446 return NDIS_STATUS_SUCCESS;
449 OvsCompleteNBL(switchContext, *newNbl, TRUE);