datapath-windows: Fix bug small bug in GRE.
[cascardo/ovs.git] / datapath-windows / ovsext / Gre.c
1 /*
2  * Copyright (c) 2015 Cloudbase Solutions Srl
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18
19 #include "Atomic.h"
20 #include "Checksum.h"
21 #include "Flow.h"
22 #include "Gre.h"
23 #include "IpHelper.h"
24 #include "NetProto.h"
25 #include "PacketIO.h"
26 #include "PacketParser.h"
27 #include "Switch.h"
28 #include "User.h"
29 #include "Util.h"
30 #include "Vport.h"
31
32 #ifdef OVS_DBG_MOD
33 #undef OVS_DBG_MOD
34 #endif
35 #define OVS_DBG_MOD OVS_DBG_GRE
36 #include "Debug.h"
37
38 static NDIS_STATUS
39 OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
40               const OvsIPv4TunnelKey *tunKey,
41               const POVS_FWD_INFO fwdInfo,
42               POVS_PACKET_HDR_INFO layers,
43               POVS_SWITCH_CONTEXT switchContext,
44               PNET_BUFFER_LIST *newNbl);
45
46 /*
47  * --------------------------------------------------------------------------
48  * OvsInitGreTunnel --
49  *    Initialize GRE tunnel module.
50  * --------------------------------------------------------------------------
51  */
52 NTSTATUS
53 OvsInitGreTunnel(POVS_VPORT_ENTRY vport)
54 {
55     POVS_GRE_VPORT grePort;
56
57     grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort),
58                                                        OVS_GRE_POOL_TAG);
59     if (!grePort) {
60         OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT");
61         return STATUS_INSUFFICIENT_RESOURCES;
62     }
63
64     RtlZeroMemory(grePort, sizeof(*grePort));
65     vport->priv = (PVOID)grePort;
66     return STATUS_SUCCESS;
67 }
68
69 /*
70  * --------------------------------------------------------------------------
71  * OvsCleanupGreTunnel --
72  *    Cleanup GRE Tunnel module.
73  * --------------------------------------------------------------------------
74  */
75 void
76 OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport)
77 {
78     if (vport->ovsType != OVS_VPORT_TYPE_GRE ||
79         vport->priv == NULL) {
80         return;
81     }
82
83     OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG);
84     vport->priv = NULL;
85 }
86
87 /*
88  * --------------------------------------------------------------------------
89  * OvsEncapGre --
90  *     Encapsulates a packet with an GRE header.
91  * --------------------------------------------------------------------------
92  */
93 NDIS_STATUS
94 OvsEncapGre(POVS_VPORT_ENTRY vport,
95             PNET_BUFFER_LIST curNbl,
96             OvsIPv4TunnelKey *tunKey,
97             POVS_SWITCH_CONTEXT switchContext,
98             POVS_PACKET_HDR_INFO layers,
99             PNET_BUFFER_LIST *newNbl)
100 {
101     OVS_FWD_INFO fwdInfo;
102     NDIS_STATUS status;
103
104     status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
105     if (status != STATUS_SUCCESS) {
106         OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
107         return NDIS_STATUS_FAILURE;
108     }
109
110     status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers,
111                            switchContext, newNbl);
112     return status;
113 }
114
115 /*
116  * --------------------------------------------------------------------------
117  * OvsDoEncapGre --
118  *    Internal utility function which actually does the GRE encap.
119  * --------------------------------------------------------------------------
120  */
121 NDIS_STATUS
122 OvsDoEncapGre(POVS_VPORT_ENTRY vport,
123               PNET_BUFFER_LIST curNbl,
124               const OvsIPv4TunnelKey *tunKey,
125               const POVS_FWD_INFO fwdInfo,
126               POVS_PACKET_HDR_INFO layers,
127               POVS_SWITCH_CONTEXT switchContext,
128               PNET_BUFFER_LIST *newNbl)
129 {
130     NDIS_STATUS status;
131     PNET_BUFFER curNb;
132     PMDL curMdl;
133     PUINT8 bufferStart;
134     EthHdr *ethHdr;
135     IPHdr *ipHdr;
136     PGREHdr greHdr;
137     POVS_GRE_VPORT vportGre;
138     UINT32 headRoom = GreTunHdrSize(tunKey->flags);
139 #if DBG
140     UINT32 counterHeadRoom;
141 #endif
142     UINT32 packetLength;
143     ULONG mss = 0;
144     ASSERT(*newNbl == NULL);
145
146     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
147     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
148
149     if (layers->isTcp) {
150         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
151
152         tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
153                                              TcpLargeSendNetBufferListInfo);
154         switch (tsoInfo.Transmit.Type) {
155             case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
156                 mss = tsoInfo.LsoV1Transmit.MSS;
157                 break;
158             case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
159                 mss = tsoInfo.LsoV2Transmit.MSS;
160                 break;
161             default:
162                 OVS_LOG_ERROR("Unknown LSO transmit type:%d",
163                               tsoInfo.Transmit.Type);
164         }
165         OVS_LOG_TRACE("MSS %u packet len %u", mss,
166                       packetLength);
167         if (mss) {
168             OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
169             *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
170                                        mss, headRoom);
171             if (*newNbl == NULL) {
172                 OVS_LOG_ERROR("Unable to segment NBL");
173                 return NDIS_STATUS_FAILURE;
174             }
175             /* Clear out LSO flags after this point */
176             NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
177         }
178     }
179
180     vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport);
181     ASSERT(vportGre);
182
183     /* If we didn't split the packet above, make a copy now */
184     if (*newNbl == NULL) {
185         *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
186                                     FALSE /*NBL info*/);
187         if (*newNbl == NULL) {
188             OVS_LOG_ERROR("Unable to copy NBL");
189             return NDIS_STATUS_FAILURE;
190         }
191         /*
192          * To this point we do not have GRE hardware offloading.
193          * Apply defined checksums
194          */
195         curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
196         curMdl = NET_BUFFER_CURRENT_MDL(curNb);
197         bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
198                                                            LowPagePriority);
199         if (!bufferStart) {
200             status = NDIS_STATUS_RESOURCES;
201             goto ret_error;
202         }
203
204         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
205         csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
206                                               TcpIpChecksumNetBufferListInfo);
207
208         bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
209
210         if (layers->isIPv4) {
211             IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
212
213             if (csumInfo.Transmit.IpHeaderChecksum) {
214                 ip->check = 0;
215                 ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
216             }
217
218             if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
219                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
220                 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
221                 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
222                                               IPPROTO_TCP, csumLength);
223                 tcp->check = CalculateChecksumNB(curNb, csumLength,
224                                                  (UINT32)(layers->l4Offset));
225             } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
226                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
227                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
228                 udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
229                                               IPPROTO_UDP, csumLength);
230                 udp->check = CalculateChecksumNB(curNb, csumLength,
231                                                  (UINT32)(layers->l4Offset));
232             }
233         } else if (layers->isIPv6) {
234             IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
235
236             if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
237                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
238                 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
239                 tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
240                                                 (UINT32 *) &ip->daddr,
241                                                 IPPROTO_TCP, csumLength);
242                 tcp->check = CalculateChecksumNB(curNb, csumLength,
243                                                  (UINT32)(layers->l4Offset));
244             } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
245                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
246                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
247                 udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
248                                                 (UINT32 *) &ip->daddr,
249                                                 IPPROTO_UDP, csumLength);
250                 udp->check = CalculateChecksumNB(curNb, csumLength,
251                                                  (UINT32)(layers->l4Offset));
252             }
253         }
254         /* Clear out TcpIpChecksumNetBufferListInfo flag */
255         NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
256     }
257
258     curNbl = *newNbl;
259     for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
260          curNb = curNb->Next) {
261 #if DBG
262         counterHeadRoom = headRoom;
263 #endif
264         status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
265         if (status != NDIS_STATUS_SUCCESS) {
266             goto ret_error;
267         }
268
269         curMdl = NET_BUFFER_CURRENT_MDL(curNb);
270         bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
271                                                            LowPagePriority);
272         if (!bufferStart) {
273             status = NDIS_STATUS_RESOURCES;
274             goto ret_error;
275         }
276
277         bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
278         if (NET_BUFFER_NEXT_NB(curNb)) {
279             OVS_LOG_TRACE("nb length %u next %u",
280                           NET_BUFFER_DATA_LENGTH(curNb),
281                           NET_BUFFER_DATA_LENGTH(curNb->Next));
282         }
283
284         /* L2 header */
285         ethHdr = (EthHdr *)bufferStart;
286         ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
287                (PCHAR)&fwdInfo->srcMacAddr);
288         NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
289                        sizeof ethHdr->Destination + sizeof ethHdr->Source);
290         ethHdr->Type = htons(ETH_TYPE_IPV4);
291 #if DBG
292         counterHeadRoom -= sizeof *ethHdr;
293 #endif
294
295         /* IP header */
296         ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
297
298         ipHdr->ihl = sizeof *ipHdr / 4;
299         ipHdr->version = IPPROTO_IPV4;
300         ipHdr->tos = tunKey->tos;
301         ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
302         ipHdr->id = (uint16)atomic_add64(&vportGre->ipId,
303                                          NET_BUFFER_DATA_LENGTH(curNb));
304         ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
305                           IP_DF_NBO : 0;
306         ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64;
307         ipHdr->protocol = IPPROTO_GRE;
308         ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
309         ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
310         ipHdr->saddr = fwdInfo->srcIpAddr;
311         ipHdr->daddr = fwdInfo->dstIpAddr;
312
313         ipHdr->check = 0;
314         ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
315 #if DBG
316         counterHeadRoom -= sizeof *ipHdr;
317 #endif
318
319         /* GRE header */
320         greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
321         greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags);
322         greHdr->protocolType = GRE_NET_TEB;
323 #if DBG
324         counterHeadRoom -= sizeof *greHdr;
325 #endif
326
327         PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
328
329         if (tunKey->flags & OVS_TNL_F_CSUM) {
330             RtlZeroMemory(currentOffset, 4);
331             currentOffset += 4;
332 #if DBG
333             counterHeadRoom -= 4;
334 #endif
335         }
336
337         if (tunKey->flags & OVS_TNL_F_KEY) {
338             RtlZeroMemory(currentOffset, 4);
339             UINT32 key = (tunKey->tunnelId >> 32);
340             RtlCopyMemory(currentOffset, &key, sizeof key);
341             currentOffset += 4;
342 #if DBG
343             counterHeadRoom -= 4;
344 #endif
345         }
346
347 #if DBG
348         ASSERT(counterHeadRoom == 0);
349 #endif
350
351     }
352     return STATUS_SUCCESS;
353
354 ret_error:
355     OvsCompleteNBL(switchContext, *newNbl, TRUE);
356     *newNbl = NULL;
357     return status;
358 }
359
360 NDIS_STATUS
361 OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
362             PNET_BUFFER_LIST curNbl,
363             OvsIPv4TunnelKey *tunKey,
364             PNET_BUFFER_LIST *newNbl)
365 {
366     PNET_BUFFER curNb;
367     PMDL curMdl;
368     EthHdr *ethHdr;
369     IPHdr *ipHdr;
370     GREHdr *greHdr;
371     UINT32 tunnelSize = 0, packetLength = 0;
372     UINT32 headRoom = 0;
373     PUINT8 bufferStart;
374     NDIS_STATUS status;
375
376     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
377     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
378     tunnelSize = GreTunHdrSize(tunKey->flags);
379     if (packetLength <= tunnelSize) {
380         return NDIS_STATUS_INVALID_LENGTH;
381     }
382
383     /*
384      * Create a copy of the NBL so that we have all the headers in one MDL.
385      */
386     *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
387                                 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
388                                 TRUE /*copy NBL info */);
389
390     if (*newNbl == NULL) {
391         return NDIS_STATUS_RESOURCES;
392     }
393
394     curNbl = *newNbl;
395     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
396     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
397     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
398                   NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
399     if (!bufferStart) {
400         status = NDIS_STATUS_RESOURCES;
401         goto dropNbl;
402     }
403
404     ethHdr = (EthHdr *)bufferStart;
405     headRoom += sizeof *ethHdr;
406
407     ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
408     tunKey->src = ipHdr->saddr;
409     tunKey->dst = ipHdr->daddr;
410     tunKey->tos = ipHdr->tos;
411     tunKey->ttl = ipHdr->ttl;
412     tunKey->pad = 0;
413     headRoom += sizeof *ipHdr;
414
415     greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
416     headRoom += sizeof *greHdr;
417
418     /* Validate if GRE header protocol type. */
419     if (greHdr->protocolType != GRE_NET_TEB) {
420         status = STATUS_NDIS_INVALID_PACKET;
421         goto dropNbl;
422     }
423
424     PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
425
426     if (greHdr->flags & GRE_CSUM) {
427         tunKey->flags |= OVS_TNL_F_CSUM;
428         currentOffset += 4;
429         headRoom += 4;
430     }
431
432     if (greHdr->flags & GRE_KEY) {
433         tunKey->flags |= OVS_TNL_F_KEY;
434         UINT32 key = 0;
435         RtlCopyMemory(&key, currentOffset, 4);
436         tunKey->tunnelId = (UINT64)key << 32;
437         currentOffset += 4;
438         headRoom += 4;
439     }
440
441     /* Clear out the receive flag for the inner packet. */
442     NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
443     NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE,
444                                   NULL);
445     ASSERT(headRoom == GreTunHdrSize(tunKey->flags));
446     return NDIS_STATUS_SUCCESS;
447
448 dropNbl:
449     OvsCompleteNBL(switchContext, *newNbl, TRUE);
450     *newNbl = NULL;
451     return status;
452 }