datapath-windows: use correct dst port during Vxlan Tx
[cascardo/ovs.git] / datapath-windows / ovsext / Stt.c
1 /*
2  * Copyright (c) 2015 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Switch.h"
20 #include "Vport.h"
21 #include "Flow.h"
22 #include "Stt.h"
23 #include "IpHelper.h"
24 #include "Checksum.h"
25 #include "User.h"
26 #include "PacketIO.h"
27 #include "Flow.h"
28 #include "PacketParser.h"
29 #include "Atomic.h"
30 #include "Util.h"
31
32 #ifdef OVS_DBG_MOD
33 #undef OVS_DBG_MOD
34 #endif
35 #define OVS_DBG_MOD OVS_DBG_STT
36 #include "Debug.h"
37
38 static NDIS_STATUS
39 OvsDoEncapStt(POVS_VPORT_ENTRY vport, PNET_BUFFER_LIST curNbl,
40               const OvsIPv4TunnelKey *tunKey,
41               const POVS_FWD_INFO fwdInfo,
42               POVS_PACKET_HDR_INFO layers,
43               POVS_SWITCH_CONTEXT switchContext,
44               PNET_BUFFER_LIST *newNbl);
45
46 /*
47  * --------------------------------------------------------------------------
48  * OvsInitSttTunnel --
49  *    Initialize STT tunnel module.
50  * --------------------------------------------------------------------------
51  */
52 NTSTATUS
53 OvsInitSttTunnel(POVS_VPORT_ENTRY vport,
54                  UINT16 tcpDestPort)
55 {
56     POVS_STT_VPORT sttPort;
57
58     sttPort = (POVS_STT_VPORT) OvsAllocateMemoryWithTag(sizeof(*sttPort),
59                                                         OVS_STT_POOL_TAG);
60     if (!sttPort) {
61         OVS_LOG_ERROR("Insufficient memory, can't allocate STT_VPORT");
62         return STATUS_INSUFFICIENT_RESOURCES;
63     }
64
65     RtlZeroMemory(sttPort, sizeof(*sttPort));
66     sttPort->dstPort = tcpDestPort;
67     vport->priv = (PVOID) sttPort;
68     return STATUS_SUCCESS;
69 }
70
71 /*
72  * --------------------------------------------------------------------------
73  * OvsCleanupSttTunnel --
74  *    Cleanup STT Tunnel module.
75  * --------------------------------------------------------------------------
76  */
77 void
78 OvsCleanupSttTunnel(POVS_VPORT_ENTRY vport)
79 {
80     if (vport->ovsType != OVS_VPORT_TYPE_STT ||
81         vport->priv == NULL) {
82         return;
83     }
84
85     OvsFreeMemoryWithTag(vport->priv, OVS_STT_POOL_TAG);
86     vport->priv = NULL;
87 }
88
89 /*
90  * --------------------------------------------------------------------------
91  * OvsEncapStt --
92  *     Encapsulates a packet with an STT header.
93  * --------------------------------------------------------------------------
94  */
95 NDIS_STATUS
96 OvsEncapStt(POVS_VPORT_ENTRY vport,
97             PNET_BUFFER_LIST curNbl,
98             OvsIPv4TunnelKey *tunKey,
99             POVS_SWITCH_CONTEXT switchContext,
100             POVS_PACKET_HDR_INFO layers,
101             PNET_BUFFER_LIST *newNbl)
102 {
103     OVS_FWD_INFO fwdInfo;
104     NDIS_STATUS status;
105
106     UNREFERENCED_PARAMETER(switchContext);
107     status = OvsLookupIPFwdInfo(tunKey->dst, &fwdInfo);
108     if (status != STATUS_SUCCESS) {
109         OvsFwdIPHelperRequest(NULL, 0, tunKey, NULL, NULL, NULL);
110         /*
111          * XXX This case where the ARP table is not populated is
112          * currently not handled
113          */
114         return NDIS_STATUS_FAILURE;
115     }
116
117     status = OvsDoEncapStt(vport, curNbl, tunKey, &fwdInfo, layers,
118                            switchContext, newNbl);
119     return status;
120 }
121
122 /*
123  * --------------------------------------------------------------------------
124  * OvsDoEncapStt --
125  *    Internal utility function which actually does the STT encap.
126  * --------------------------------------------------------------------------
127  */
128 NDIS_STATUS
129 OvsDoEncapStt(POVS_VPORT_ENTRY vport,
130               PNET_BUFFER_LIST curNbl,
131               const OvsIPv4TunnelKey *tunKey,
132               const POVS_FWD_INFO fwdInfo,
133               POVS_PACKET_HDR_INFO layers,
134               POVS_SWITCH_CONTEXT switchContext,
135               PNET_BUFFER_LIST *newNbl)
136 {
137     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
138     PMDL curMdl = NULL;
139     PNET_BUFFER curNb;
140     PUINT8 buf = NULL;
141     EthHdr *outerEthHdr;
142     IPHdr *outerIpHdr;
143     TCPHdr *outerTcpHdr;
144     SttHdr *sttHdr;
145     UINT32 innerFrameLen, ipTotalLen;
146     POVS_STT_VPORT vportStt;
147     UINT32 headRoom = OvsGetSttTunHdrSize();
148     UINT32 tcpChksumLen;
149
150     UNREFERENCED_PARAMETER(layers);
151
152     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
153     if (layers->isTcp) {
154         NDIS_TCP_LARGE_SEND_OFFLOAD_NET_BUFFER_LIST_INFO lsoInfo;
155
156         lsoInfo.Value = NET_BUFFER_LIST_INFO(curNbl,
157                 TcpLargeSendNetBufferListInfo);
158         if (lsoInfo.LsoV1Transmit.MSS) {
159             /* XXX We don't handle LSO yet */
160             OVS_LOG_ERROR("LSO on STT is not supported");
161             return NDIS_STATUS_FAILURE;
162         }
163     }
164
165     vportStt = (POVS_STT_VPORT) GetOvsVportPriv(vport);
166     ASSERT(vportStt);
167
168     *newNbl = OvsPartialCopyNBL(switchContext, curNbl, 0, headRoom,
169                                 FALSE /*copy NblInfo*/);
170     if (*newNbl == NULL) {
171         OVS_LOG_ERROR("Unable to copy NBL");
172         return NDIS_STATUS_FAILURE;
173     }
174
175     curNbl = *newNbl;
176     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
177     /* NB Chain should be split before */
178     ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
179
180     innerFrameLen = NET_BUFFER_DATA_LENGTH(curNb);
181     /*
182      * External port can't be removed as we hold the dispatch lock
183      * We also check if the external port was removed beforecalling
184      * port encapsulation functions
185      */
186     if (innerFrameLen > OvsGetExternalMtu(switchContext) - headRoom) {
187         OVS_LOG_ERROR("Packet too large (size %d, mtu %d). Can't encapsulate",
188                 innerFrameLen, OvsGetExternalMtu(switchContext));
189         status = NDIS_STATUS_FAILURE;
190         goto ret_error;
191     }
192
193     status = NdisRetreatNetBufferDataStart(curNb, headRoom, 0, NULL);
194     if (status != NDIS_STATUS_SUCCESS) {
195         ASSERT(!"Unable to NdisRetreatNetBufferDataStart(headroom)");
196         OVS_LOG_ERROR("Unable to NdisRetreatNetBufferDataStart(headroom)");
197         goto ret_error;
198     }
199
200     /*
201      * Make sure that the headroom for the tunnel header is continguous in
202      * memory.
203      */
204     curMdl = NET_BUFFER_CURRENT_MDL(curNb);
205     ASSERT((int) (MmGetMdlByteCount(curMdl) - NET_BUFFER_CURRENT_MDL_OFFSET(curNb))
206                 >= (int) headRoom);
207
208     buf = (PUINT8) MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
209     if (!buf) {
210         ASSERT(!"MmGetSystemAddressForMdlSafe failed");
211         OVS_LOG_ERROR("MmGetSystemAddressForMdlSafe failed");
212         status = NDIS_STATUS_RESOURCES;
213         goto ret_error;
214     }
215
216     buf += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
217     outerEthHdr = (EthHdr *)buf;
218     outerIpHdr = (IPHdr *) (outerEthHdr + 1);
219     outerTcpHdr = (TCPHdr *) (outerIpHdr + 1);
220     sttHdr = (SttHdr *) (outerTcpHdr + 1);
221
222     /* L2 header */
223     ASSERT(((PCHAR)&fwdInfo->dstMacAddr + sizeof fwdInfo->dstMacAddr) ==
224             (PCHAR)&fwdInfo->srcMacAddr);
225     NdisMoveMemory(outerEthHdr->Destination, fwdInfo->dstMacAddr,
226                     sizeof outerEthHdr->Destination + sizeof outerEthHdr->Source);
227     outerEthHdr->Type = htons(ETH_TYPE_IPV4);
228
229     /* L3 header */
230     outerIpHdr->ihl = sizeof(IPHdr) >> 2;
231     outerIpHdr->version = IPPROTO_IPV4;
232     outerIpHdr->tos = tunKey->tos;
233
234     ipTotalLen = sizeof(IPHdr) + sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
235     outerIpHdr->tot_len = htons(ipTotalLen);
236     ASSERT(ipTotalLen < 65536);
237
238     outerIpHdr->id = (uint16) atomic_add64(&vportStt->ipId, innerFrameLen);
239     outerIpHdr->frag_off = (tunKey->flags & OVS_TNL_F_DONT_FRAGMENT) ?
240                            IP_DF_NBO : 0;
241     outerIpHdr->ttl = tunKey->ttl? tunKey->ttl : 64;
242     outerIpHdr->protocol = IPPROTO_TCP;
243     outerIpHdr->check = 0;
244     outerIpHdr->saddr = fwdInfo->srcIpAddr;
245     outerIpHdr->daddr = tunKey->dst;
246     outerIpHdr->check = IPChecksum((uint8 *)outerIpHdr, sizeof *outerIpHdr, 0);
247
248     /* L4 header */
249     RtlZeroMemory(outerTcpHdr, sizeof *outerTcpHdr);
250     outerTcpHdr->source = htons(tunKey->flow_hash | 32768);
251     outerTcpHdr->dest = htons(vportStt->dstPort);
252     outerTcpHdr->seq = htonl((STT_HDR_LEN + innerFrameLen) <<
253                              STT_SEQ_LEN_SHIFT);
254     outerTcpHdr->ack_seq = htonl(atomic_inc64(&vportStt->ackNo));
255     outerTcpHdr->doff = sizeof(TCPHdr) >> 2;
256     outerTcpHdr->psh = 1;
257     outerTcpHdr->ack = 1;
258     outerTcpHdr->window = (uint16) ~0;
259
260     /* Calculate pseudo header chksum */
261     tcpChksumLen = sizeof(TCPHdr) + STT_HDR_LEN + innerFrameLen;
262     ASSERT(tcpChksumLen < 65535);
263     outerTcpHdr->check = IPPseudoChecksum(&fwdInfo->srcIpAddr,(uint32 *) &tunKey->dst,
264                                           IPPROTO_TCP, (uint16) tcpChksumLen);
265     sttHdr->version = 0;
266
267     /* XXX need to peek into the inner packet, hard code for now */
268     sttHdr->flags = STT_PROTO_IPV4;
269     sttHdr->l4Offset = 0;
270
271     sttHdr->reserved = 0;
272     /* XXX Used for large TCP packets.Not sure how it is used, clarify */
273     sttHdr->mss = 0;
274     sttHdr->vlanTCI = 0;
275     sttHdr->key = tunKey->tunnelId;
276     /* Zero out stt padding */
277     *(uint16 *)(sttHdr + 1) = 0;
278
279     /* Calculate software tcp checksum */
280     outerTcpHdr->check = CalculateChecksumNB(curNb, (uint16) tcpChksumLen,
281                                              sizeof(EthHdr) + sizeof(IPHdr));
282     if (outerTcpHdr->check == 0) {
283         status = NDIS_STATUS_FAILURE;
284         goto ret_error;
285     }
286
287     return STATUS_SUCCESS;
288
289 ret_error:
290     OvsCompleteNBL(switchContext, *newNbl, TRUE);
291     *newNbl = NULL;
292     return status;
293 }
294
295 /*
296  * --------------------------------------------------------------------------
297  * OvsDecapStt --
298  *     Decapsulates an STT packet.
299  * --------------------------------------------------------------------------
300  */
301 NDIS_STATUS
302 OvsDecapStt(POVS_SWITCH_CONTEXT switchContext,
303             PNET_BUFFER_LIST curNbl,
304             OvsIPv4TunnelKey *tunKey,
305             PNET_BUFFER_LIST *newNbl)
306 {
307     NDIS_STATUS status = NDIS_STATUS_FAILURE;
308     PNET_BUFFER curNb;
309     IPHdr *ipHdr;
310     char *ipBuf[sizeof(IPHdr)];
311     SttHdr *sttHdr;
312     char *sttBuf[STT_HDR_LEN];
313     UINT32 advanceCnt, hdrLen;
314
315     curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
316     ASSERT(NET_BUFFER_NEXT_NB(curNb) == NULL);
317
318     if (NET_BUFFER_DATA_LENGTH(curNb) < OvsGetSttTunHdrSize()) {
319         OVS_LOG_ERROR("Packet length received is less than the tunnel header:"
320             " %d<%d\n", NET_BUFFER_DATA_LENGTH(curNb), OvsGetSttTunHdrSize());
321         return NDIS_STATUS_INVALID_LENGTH;
322     }
323
324     /* Skip Eth header */
325     hdrLen = sizeof(EthHdr);
326     NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
327     advanceCnt = hdrLen;
328
329     ipHdr = NdisGetDataBuffer(curNb, sizeof *ipHdr, (PVOID) &ipBuf,
330                                                     1 /*no align*/, 0);
331     ASSERT(ipHdr);
332
333     /* Skip IP & TCP headers */
334     hdrLen = sizeof(IPHdr) + sizeof(TCPHdr),
335     NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
336     advanceCnt += hdrLen;
337
338     /* STT Header */
339     sttHdr = NdisGetDataBuffer(curNb, sizeof *sttHdr, (PVOID) &sttBuf,
340                                                     1 /*no align*/, 0);
341     ASSERT(sttHdr);
342
343     /* Initialize the tunnel key */
344     tunKey->dst = ipHdr->daddr;
345     tunKey->src = ipHdr->saddr;
346     tunKey->tunnelId = sttHdr->key;
347     tunKey->flags = (OVS_TNL_F_CSUM | OVS_TNL_F_KEY);
348     tunKey->tos = ipHdr->tos;
349     tunKey->ttl = ipHdr->ttl;
350     tunKey->pad = 0;
351
352     /* Skip stt header, DataOffset points to inner pkt now. */
353     hdrLen = STT_HDR_LEN;
354     NdisAdvanceNetBufferDataStart(curNb, hdrLen, FALSE, NULL);
355     advanceCnt += hdrLen;
356
357     *newNbl = OvsPartialCopyNBL(switchContext, curNbl, OVS_DEFAULT_COPY_SIZE,
358                                 0, FALSE /*copy NBL info*/);
359
360     ASSERT(advanceCnt == OvsGetSttTunHdrSize());
361     status = NdisRetreatNetBufferDataStart(curNb, advanceCnt, 0, NULL);
362
363     if (*newNbl == NULL) {
364         OVS_LOG_ERROR("OvsDecapStt: Unable to allocate a new cloned NBL");
365         status = NDIS_STATUS_RESOURCES;
366     }
367
368     return status;
369 }