3ebfda3f8495a550f40bc5f3df2a46a01ac38678
[cascardo/ovs.git] / datapath-windows / ovsext / Gre.c
1 /*
2  * Copyright (c) 2015 Cloudbase Solutions Srl
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18
19 #include "Atomic.h"
20 #include "Checksum.h"
21 #include "Flow.h"
22 #include "Gre.h"
23 #include "IpHelper.h"
24 #include "NetProto.h"
25 #include "PacketIO.h"
26 #include "PacketParser.h"
27 #include "Switch.h"
28 #include "User.h"
29 #include "Util.h"
30 #include "Vport.h"
31
32 #ifdef OVS_DBG_MOD
33 #undef OVS_DBG_MOD
34 #endif
35 #define OVS_DBG_MOD OVS_DBG_GRE
36 #include "Debug.h"
37
38 static NDIS_STATUS
39 OvsDoEncapGre(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
40               const OvsIPv4TunnelKey *tunKey,
41               const POVS_FWD_INFO fwdInfo,
42               POVS_PACKET_HDR_INFO layers,
43               POVS_SWITCH_CONTEXT switchContext,
44               PNET_BUFFER_LIST *newNbl);
45
46 /*
47  * --------------------------------------------------------------------------
48  * OvsInitGreTunnel --
49  *    Initialize GRE tunnel module.
50  * --------------------------------------------------------------------------
51  */
52 NTSTATUS
53 OvsInitGreTunnel(POVS_VPORT_ENTRY vport)
54 {
55     POVS_GRE_VPORT grePort;
56
57     grePort = (POVS_GRE_VPORT)OvsAllocateMemoryWithTag(sizeof(*grePort),
58                                                        OVS_GRE_POOL_TAG);
59     if (!grePort) {
60         OVS_LOG_ERROR("Insufficient memory, can't allocate OVS_GRE_VPORT");
61         return STATUS_INSUFFICIENT_RESOURCES;
62     }
63
64     RtlZeroMemory(grePort, sizeof(*grePort));
65     vport->priv = (PVOID)grePort;
66     return STATUS_SUCCESS;
67 }
68
69 /*
70  * --------------------------------------------------------------------------
71  * OvsCleanupGreTunnel --
72  *    Cleanup GRE Tunnel module.
73  * --------------------------------------------------------------------------
74  */
75 void
76 OvsCleanupGreTunnel(POVS_VPORT_ENTRY vport)
77 {
78     if (vport->ovsType != OVS_VPORT_TYPE_GRE ||
79         vport->priv == NULL) {
80         return;
81     }
82
83     OvsFreeMemoryWithTag(vport->priv, OVS_GRE_POOL_TAG);
84     vport->priv = NULL;
85 }
86
87 /*
88  * --------------------------------------------------------------------------
89  * OvsEncapGre --
90  *     Encapsulates a packet with an GRE header.
91  * --------------------------------------------------------------------------
92  */
93 NDIS_STATUS
94 OvsEncapGre(POVS_VPORT_ENTRY vport,
95             PNET_BUFFER_LIST curNbl,
96             OvsIPv4TunnelKey *tunKey,
97             POVS_SWITCH_CONTEXT switchContext,
98             POVS_PACKET_HDR_INFO layers,
99             PNET_BUFFER_LIST *newNbl)
100 {
101     OVS_FWD_INFO fwdInfo;
102     NDIS_STATUS status;
103
104     status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
105     if (status != STATUS_SUCCESS) {
106         OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
107         return NDIS_STATUS_FAILURE;
108     }
109
110     status = OvsDoEncapGre(vport, curNbl, tunKey, &fwdInfo, layers,
111                            switchContext, newNbl);
112     return status;
113 }
114
115 /*
116  * --------------------------------------------------------------------------
117  * OvsDoEncapGre --
118  *    Internal utility function which actually does the GRE encap.
119  * --------------------------------------------------------------------------
120  */
121 NDIS_STATUS
122 OvsDoEncapGre(POVS_VPORT_ENTRY vport,
123               PNET_BUFFER_LIST curNbl,
124               const OvsIPv4TunnelKey *tunKey,
125               const POVS_FWD_INFO fwdInfo,
126               POVS_PACKET_HDR_INFO layers,
127               POVS_SWITCH_CONTEXT switchContext,
128               PNET_BUFFER_LIST *newNbl)
129 {
130     NDIS_STATUS status;
131     PNET_BUFFER curNb;
132     PMDL curMdl;
133     PUINT8 bufferStart;
134     EthHdr *ethHdr;
135     IPHdr *ipHdr;
136     PGREHdr greHdr;
137     POVS_GRE_VPORT vportGre;
138     UINT32 headRoom = GreTunHdrSize(tunKey->flags);
139 #if DBG
140     UINT32 counterHeadRoom;
141 #endif
142     UINT32 packetLength;
143     ULONG mss = 0;
144     ASSERT(*newNbl == NULL);
145
146     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
147     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
148
149     if (layers->isTcp) {
150         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO tsoInfo;
151
152         tsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
153                                              TcpLargeSendNetBufferListInfo);
154         switch (tsoInfo.Transmit.Type) {
155             case NDIS_TCP_LARGE_SEND_OFFLOAD_V1_TYPE:
156                 mss = tsoInfo.LsoV1Transmit.MSS;
157                 break;
158             case NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE:
159                 mss = tsoInfo.LsoV2Transmit.MSS;
160                 break;
161             default:
162                 OVS_LOG_ERROR("Unknown LSO transmit type:%d",
163                               tsoInfo.Transmit.Type);
164                 return NDIS_STATUS_FAILURE;
165         }
166         OVS_LOG_TRACE("MSS %u packet len %u", mss,
167                       packetLength);
168         if (mss) {
169             OVS_LOG_TRACE("l4Offset %d", layers->l4Offset);
170             *newNbl = OvsTcpSegmentNBL(switchContext, curNbl, layers,
171                                        mss, headRoom);
172             if (*newNbl == NULL) {
173                 OVS_LOG_ERROR("Unable to segment NBL");
174                 return NDIS_STATUS_FAILURE;
175             }
176             /* Clear out LSO flags after this point */
177             NET_BUFFER_LIST_INFO(*newNbl, TcpLargeSendNetBufferListInfo) = 0;
178         }
179     }
180
181     vportGre = (POVS_GRE_VPORT)GetOvsVportPriv(vport);
182     ASSERT(vportGre);
183
184     /* If we didn't split the packet above, make a copy now */
185     if (*newNbl == NULL) {
186         *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
187                                     FALSE /*NBL info*/);
188         if (*newNbl == NULL) {
189             OVS_LOG_ERROR("Unable to copy NBL");
190             return NDIS_STATUS_FAILURE;
191         }
192         /*
193          * To this point we do not have GRE hardware offloading.
194          * Apply defined checksums
195          */
196         curNb = NET_BUFFER_LIST_FIRST_NB(*newNbl);
197         curMdl = NET_BUFFER_CURRENT_MDL(curNb);
198         bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
199                                                            LowPagePriority);
200         if (!bufferStart) {
201             status = NDIS_STATUS_RESOURCES;
202             goto ret_error;
203         }
204
205         NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
206         csumInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
207                                               TcpIpChecksumNetBufferListInfo);
208
209         bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
210
211         if (layers->isIPv4) {
212             IPHdr *ip = (IPHdr *)(bufferStart + layers->l3Offset);
213
214             if (csumInfo.Transmit.IpHeaderChecksum) {
215                 ip->check = 0;
216                 ip->check = IPChecksum((UINT8 *)ip, 4 * ip->ihl, 0);
217             }
218
219             if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
220                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
221                 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
222                 tcp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
223                                               IPPROTO_TCP, csumLength);
224                 tcp->check = CalculateChecksumNB(curNb, csumLength,
225                                                  (UINT32)(layers->l4Offset));
226             } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
227                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
228                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
229                 udp->check = IPPseudoChecksum(&ip->saddr, &ip->daddr,
230                                               IPPROTO_UDP, csumLength);
231                 udp->check = CalculateChecksumNB(curNb, csumLength,
232                                                  (UINT32)(layers->l4Offset));
233             }
234         } else if (layers->isIPv6) {
235             IPv6Hdr *ip = (IPv6Hdr *)(bufferStart + layers->l3Offset);
236
237             if (layers->isTcp && csumInfo.Transmit.TcpChecksum) {
238                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
239                 TCPHdr *tcp = (TCPHdr *)(bufferStart + layers->l4Offset);
240                 tcp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
241                                                 (UINT32 *) &ip->daddr,
242                                                 IPPROTO_TCP, csumLength);
243                 tcp->check = CalculateChecksumNB(curNb, csumLength,
244                                                  (UINT32)(layers->l4Offset));
245             } else if (layers->isUdp && csumInfo.Transmit.UdpChecksum) {
246                 UINT16 csumLength = (UINT16)(packetLength - layers->l4Offset);
247                 UDPHdr *udp = (UDPHdr *)((PCHAR)ip + sizeof *ip);
248                 udp->check = IPv6PseudoChecksum((UINT32 *) &ip->saddr,
249                                                 (UINT32 *) &ip->daddr,
250                                                 IPPROTO_UDP, csumLength);
251                 udp->check = CalculateChecksumNB(curNb, csumLength,
252                                                  (UINT32)(layers->l4Offset));
253             }
254         }
255         /* Clear out TcpIpChecksumNetBufferListInfo flag */
256         NET_BUFFER_LIST_INFO(*newNbl, TcpIpChecksumNetBufferListInfo) = 0;
257     }
258
259     curNbl = *newNbl;
260     for (curNb = NET_BUFFER_LIST_FIRST_NB(curNbl); curNb != NULL;
261          curNb = curNb->Next) {
262 #if DBG
263         counterHeadRoom = headRoom;
264 #endif
265         status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
266         if (status != NDIS_STATUS_SUCCESS) {
267             goto ret_error;
268         }
269
270         curMdl = NET_BUFFER_CURRENT_MDL(curNb);
271         bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl,
272                                                            LowPagePriority);
273         if (!bufferStart) {
274             status = NDIS_STATUS_RESOURCES;
275             goto ret_error;
276         }
277
278         bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
279         if (NET_BUFFER_NEXT_NB(curNb)) {
280             OVS_LOG_TRACE("nb length %u next %u",
281                           NET_BUFFER_DATA_LENGTH(curNb),
282                           NET_BUFFER_DATA_LENGTH(curNb->Next));
283         }
284
285         /* L2 header */
286         ethHdr = (EthHdr *)bufferStart;
287         ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
288                (PCHAR)&fwdInfo->srcMacAddr);
289         NdisMoveMemory(ethHdr->Destination, fwdInfo->dstMacAddr,
290                        sizeof ethHdr->Destination + sizeof ethHdr->Source);
291         ethHdr->Type = htons(ETH_TYPE_IPV4);
292 #if DBG
293         counterHeadRoom -= sizeof *ethHdr;
294 #endif
295
296         /* IP header */
297         ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
298
299         ipHdr->ihl = sizeof *ipHdr / 4;
300         ipHdr->version = IPPROTO_IPV4;
301         ipHdr->tos = tunKey->tos;
302         ipHdr->tot_len = htons(NET_BUFFER_DATA_LENGTH(curNb) - sizeof *ethHdr);
303         ipHdr->id = (uint16)atomic_add64(&vportGre->ipId,
304                                          NET_BUFFER_DATA_LENGTH(curNb));
305         ipHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
306                           IP_DF_NBO : 0;
307         ipHdr->ttl = tunKey->ttl ? tunKey->ttl : 64;
308         ipHdr->protocol = IPPROTO_GRE;
309         ASSERT(tunKey->dst == fwdInfo->dstIpAddr);
310         ASSERT(tunKey->src == fwdInfo->srcIpAddr || tunKey->src == 0);
311         ipHdr->saddr = fwdInfo->srcIpAddr;
312         ipHdr->daddr = fwdInfo->dstIpAddr;
313
314         ipHdr->check = 0;
315         ipHdr->check = IPChecksum((UINT8 *)ipHdr, sizeof *ipHdr, 0);
316 #if DBG
317         counterHeadRoom -= sizeof *ipHdr;
318 #endif
319
320         /* GRE header */
321         greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
322         greHdr->flags = OvsTunnelFlagsToGreFlags(tunKey->flags);
323         greHdr->protocolType = GRE_NET_TEB;
324 #if DBG
325         counterHeadRoom -= sizeof *greHdr;
326 #endif
327
328         PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
329
330         if (tunKey->flags & OVS_TNL_F_CSUM) {
331             RtlZeroMemory(currentOffset, 4);
332             currentOffset += 4;
333 #if DBG
334             counterHeadRoom -= 4;
335 #endif
336         }
337
338         if (tunKey->flags & OVS_TNL_F_KEY) {
339             RtlZeroMemory(currentOffset, 4);
340             UINT32 key = (tunKey->tunnelId >> 32);
341             RtlCopyMemory(currentOffset, &key, sizeof key);
342             currentOffset += 4;
343 #if DBG
344             counterHeadRoom -= 4;
345 #endif
346         }
347
348 #if DBG
349         ASSERT(counterHeadRoom == 0);
350 #endif
351
352     }
353     return STATUS_SUCCESS;
354
355 ret_error:
356     OvsCompleteNBL(switchContext, *newNbl, TRUE);
357     *newNbl = NULL;
358     return status;
359 }
360
361 NDIS_STATUS
362 OvsDecapGre(POVS_SWITCH_CONTEXT switchContext,
363             PNET_BUFFER_LIST curNbl,
364             OvsIPv4TunnelKey *tunKey,
365             PNET_BUFFER_LIST *newNbl)
366 {
367     PNET_BUFFER curNb;
368     PMDL curMdl;
369     EthHdr *ethHdr;
370     IPHdr *ipHdr;
371     GREHdr *greHdr;
372     UINT32 tunnelSize = 0, packetLength = 0;
373     UINT32 headRoom = 0;
374     PUINT8 bufferStart;
375     NDIS_STATUS status;
376
377     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
378     packetLength = NET_BUFFER_DATA_LENGTH(curNb);
379     tunnelSize = GreTunHdrSize(tunKey->flags);
380     if (packetLength <= tunnelSize) {
381         return NDIS_STATUS_INVALID_LENGTH;
382     }
383
384     /*
385      * Create a copy of the NBL so that we have all the headers in one MDL.
386      */
387     *newNbl = OvsPartialCopyNBL(switchContext, curNbl,
388                                 tunnelSize + OVS_DEFAULT_COPY_SIZE, 0,
389                                 TRUE /*copy NBL info */);
390
391     if (*newNbl == NULL) {
392         return NDIS_STATUS_RESOURCES;
393     }
394
395     curNbl = *newNbl;
396     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
397     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
398     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority) +
399                   NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
400     if (!bufferStart) {
401         status = NDIS_STATUS_RESOURCES;
402         goto dropNbl;
403     }
404
405     ethHdr = (EthHdr *)bufferStart;
406     headRoom += sizeof *ethHdr;
407
408     ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
409     tunKey->src = ipHdr->saddr;
410     tunKey->dst = ipHdr->daddr;
411     tunKey->tos = ipHdr->tos;
412     tunKey->ttl = ipHdr->ttl;
413     tunKey->pad = 0;
414     headRoom += sizeof *ipHdr;
415
416     greHdr = (GREHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
417     headRoom += sizeof *greHdr;
418
419     /* Validate if GRE header protocol type. */
420     if (greHdr->protocolType != GRE_NET_TEB) {
421         status = STATUS_NDIS_INVALID_PACKET;
422         goto dropNbl;
423     }
424
425     PCHAR currentOffset = (PCHAR)greHdr + sizeof *greHdr;
426
427     if (greHdr->flags & GRE_CSUM) {
428         tunKey->flags |= OVS_TNL_F_CSUM;
429         currentOffset += 4;
430         headRoom += 4;
431     }
432
433     if (greHdr->flags & GRE_KEY) {
434         tunKey->flags |= OVS_TNL_F_KEY;
435         UINT32 key = 0;
436         RtlCopyMemory(&key, currentOffset, 4);
437         tunKey->tunnelId = (UINT64)key << 32;
438         currentOffset += 4;
439         headRoom += 4;
440     }
441
442     /* Clear out the receive flag for the inner packet. */
443     NET_BUFFER_LIST_INFO(curNbl, TcpIpChecksumNetBufferListInfo) = 0;
444     NdisAdvanceNetBufferDataStart(curNb, GreTunHdrSize(tunKey->flags), FALSE,
445                                   NULL);
446     ASSERT(headRoom == GreTunHdrSize(tunKey->flags));
447     return NDIS_STATUS_SUCCESS;
448
449 dropNbl:
450     OvsCompleteNBL(switchContext, *newNbl, TRUE);
451     *newNbl = NULL;
452     return status;
453 }