netdev-dpdk: fix mbuf leaks
[cascardo/ovs.git] / datapath-windows / ovsext / BufferMgmt.c
1 /*
2  * Copyright (c) 2014, 2016 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ****************************************************************************
19  *
20  *       Simple Buffer Management framework for OVS
21  *
22  *  It introduces four NDIS buffer pools
23  *     **Fix size net buffer list pool--this is used for small buffer
24  *     One allocation will include NBL + NB + MDL + Data + CONTEXT.
25  *
26  *     **Variable size net buffer list pool--this is used for variable size
27  *     buffer. The allocation of net buffer list will include NBL + NB +
28  *     CONTEXT, a separate allocation of MDL + data buffer is required.
29  *
30  *     **NBL only net buffer list pool-- this is used for partial copy
31  *     (or clone). In this case we can not allocate net buffer list and
32  *     net buffer at the same time.
33  *
34  *     **Net buffer pool-- this is required when net buffer need to be
35  *     allocated separately.
36  *
37  *  A Buffer context is defined to track the buffer specific information
38  *  so that during NBL completion, proper action can be taken. Please see
39  *  code for details.
40  *
41  *  Here is the usage of the management API
42  *  All external NBL should be initialized its NBL context by calling
43  *     OvsInitExternalNBLContext()
44  *
45  *  After the external NBL context is initialized, it can call the following
46  *  API to allocate, copy or partial copy NBL.
47  *
48  *     OvsAllocateFixSizeNBL()
49  *     OvsAllocateVariableSizeNBL()
50  *
51  *     OvsPartialCopyNBL()
52  *     OvsPartialCopyToMultipleNBLs()
53  *
54  *     OvsFullCopyNBL()
55  *     OvsFullCopyToMultipleNBLs()
56  *
57  *  See code comments for detail description of the functions.
58  *
59  *  All NBLs is completed through
60  *       OvsCompleteNBL()
61  *     If this API return non NULL value, then the returned NBL should be
62  *     returned to upper layer by calling
63  *     NdisFSendNetBufferListsComplete() if the buffer is from upper
64  *     layer. In case of WFP, it can call the corresponding completion routine
65  *     to return the NBL to the framework.
66  *
67  *  NOTE:
68  *     1. Copy or partial copy will not copy destination port array
69  *     2. Copy or partial copy will copy src port id and index
70  *     3. New Allocated NBL will have src port set to default port id
71  *     4. If original packet has direction flag set, the copied or partial
72  *        copied NBL will still be in same direction.
73  *     5. When you advance or retreate the buffer, you may need to update
74  *        relevant meta data to keep it consistent.
75  *
76  * ****************************************************************************
77  */
78
79 #include "precomp.h"
80 #include "Debug.h"
81 #include "Flow.h"
82 #include "Offload.h"
83 #include "NetProto.h"
84 #include "PacketParser.h"
85 #include "Switch.h"
86 #include "Vport.h"
87
88 #ifdef OVS_DBG_MOD
89 #undef OVS_DBG_MOD
90 #endif
91 #define OVS_DBG_MOD OVS_DBG_BUFMGMT
92
93
94 /*
95  * --------------------------------------------------------------------------
96  * OvsInitBufferPool --
97  *
98  *    Allocate NBL and NB pool
99  *
100  * XXX: more optimization may be done for buffer management include local cache
101  * of NBL, NB, data, context, MDL.
102  * --------------------------------------------------------------------------
103  */
104 NDIS_STATUS
105 OvsInitBufferPool(PVOID ovsContext)
106 {
107     POVS_NBL_POOL ovsPool;
108     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
109     NET_BUFFER_LIST_POOL_PARAMETERS  nblParam;
110     NET_BUFFER_POOL_PARAMETERS nbParam;
111
112     C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8);
113
114     OVS_LOG_TRACE("Enter: context: %p", context);
115
116     ovsPool = &context->ovsPool;
117     RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL));
118     ovsPool->ndisHandle = context->NdisFilterHandle;
119     ovsPool->ndisContext = context->NdisSwitchContext;
120     /*
121      * fix size NBL pool includes
122      *    NBL + NB + MDL + DATA + Context
123      *    This is mainly used for Packet execute or slow path when copy is
124      *    required and size is less than OVS_DEFAULT_DATA_SIZE. We expect
125      *    Most of packet from user space will use this Pool. (This is
126      *    true for all bfd and cfm packet.
127      */
128     RtlZeroMemory(&nblParam, sizeof (nblParam));
129     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
130                            NDIS_OBJECT_TYPE_DEFAULT,
131                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
132                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
133     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
134     nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG;
135     nblParam.fAllocateNetBuffer = TRUE;
136     nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE;
137
138     ovsPool->fixSizePool =
139         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
140     if (ovsPool->fixSizePool == NULL) {
141         goto pool_cleanup;
142     }
143
144     /*
145      * Zero Size NBL Pool includes
146      *    NBL + NB + Context
147      *    This is mainly for packet with large data Size, in this case MDL and
148      *    Data will be allocate separately.
149      */
150     RtlZeroMemory(&nblParam, sizeof (nblParam));
151     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
152                            NDIS_OBJECT_TYPE_DEFAULT,
153                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
154                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
155
156     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
157     nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG;
158     nblParam.fAllocateNetBuffer = TRUE;
159     nblParam.DataSize = 0;
160
161     ovsPool->zeroSizePool =
162         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
163     if (ovsPool->zeroSizePool == NULL) {
164         goto pool_cleanup;
165     }
166
167     /*
168      * NBL only pool just includes
169      *    NBL (+ context)
170      *    This is mainly used for clone and partial copy
171      */
172     RtlZeroMemory(&nblParam, sizeof (nblParam));
173     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
174                            NDIS_OBJECT_TYPE_DEFAULT,
175                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
176                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
177
178     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
179     nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG;
180     nblParam.fAllocateNetBuffer = FALSE;
181     nblParam.DataSize = 0;
182
183     ovsPool->nblOnlyPool =
184         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
185     if (ovsPool->nblOnlyPool == NULL) {
186         goto pool_cleanup;
187     }
188
189     /* nb Pool
190      *    NB only pool, used for copy
191      */
192
193     OVS_INIT_OBJECT_HEADER(&nbParam.Header,
194                            NDIS_OBJECT_TYPE_DEFAULT,
195                            NET_BUFFER_POOL_PARAMETERS_REVISION_1,
196                            NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1);
197     nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG;
198     nbParam.DataSize = 0;
199     ovsPool->nbPool =
200         NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam);
201     if (ovsPool->nbPool == NULL) {
202         goto pool_cleanup;
203     }
204     OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p"
205                   "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool,
206                   ovsPool->nblOnlyPool, ovsPool->nbPool);
207     return NDIS_STATUS_SUCCESS;
208
209 pool_cleanup:
210     OvsCleanupBufferPool(context);
211     OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool");
212     return NDIS_STATUS_RESOURCES;
213 }
214
215
216 /*
217  * --------------------------------------------------------------------------
218  * OvsCleanupBufferPool --
219  *  Free Buffer pool for NBL and NB.
220  * --------------------------------------------------------------------------
221  */
222 VOID
223 OvsCleanupBufferPool(PVOID ovsContext)
224 {
225     POVS_NBL_POOL ovsPool;
226     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
227     ovsPool = &context->ovsPool;
228     OVS_LOG_TRACE("Enter: context: %p", context);
229 #ifdef DBG
230     ASSERT(ovsPool->fixNBLCount == 0);
231     ASSERT(ovsPool->zeroNBLCount == 0);
232     ASSERT(ovsPool->nblOnlyCount == 0);
233     ASSERT(ovsPool->nbCount == 0);
234     ASSERT(ovsPool->sysNBLCount == 0);
235     ASSERT(ovsPool->fragNBLCount == 0);
236 #endif
237
238     if (ovsPool->fixSizePool) {
239         NdisFreeNetBufferListPool(ovsPool->fixSizePool);
240         ovsPool->fixSizePool = NULL;
241     }
242     if (ovsPool->zeroSizePool) {
243         NdisFreeNetBufferListPool(ovsPool->zeroSizePool);
244         ovsPool->zeroSizePool = NULL;
245     }
246     if (ovsPool->nblOnlyPool) {
247         NdisFreeNetBufferListPool(ovsPool->nblOnlyPool);
248         ovsPool->nblOnlyPool = NULL;
249     }
250     if (ovsPool->nbPool) {
251         NdisFreeNetBufferPool(ovsPool->nbPool);
252         ovsPool->nbPool = NULL;
253     }
254     OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool");
255 }
256
257
258 static VOID
259 OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
260                   UINT16 flags,
261                   UINT32 origDataLength,
262                   UINT32 srcPortNo)
263 {
264     ctx->magic = OVS_CTX_MAGIC;
265     ctx->refCount = 1;
266     ctx->flags = flags;
267     ctx->srcPortNo = srcPortNo;
268     ctx->origDataLength = origDataLength;
269 }
270
271
272 static VOID
273 OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl)
274 {
275     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
276     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
277     if (info == NULL) {
278         return;
279     }
280     OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d "
281                  "isDataSafe: %s, safeDataSize: %d",
282                  nbl, info->NumAvailableDestinations, info->SourcePortId,
283                  info->SourceNicIndex,
284                  info->IsPacketDataSafe ? "TRUE" : "FALSE",
285                  info->IsPacketDataSafe ? 0 : info->SafePacketDataSize);
286
287 }
288
289 static VOID
290 OvsDumpNBLContext(PNET_BUFFER_LIST nbl)
291 {
292     PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context;
293     if (ctx == NULL) {
294         OVS_LOG_INFO("No Net Buffer List context");
295         return;
296     }
297     while (ctx) {
298         OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d",
299                      nbl, ctx, ctx->Size, ctx->Offset);
300         ctx = ctx->Next;
301     }
302 }
303
304
305 static VOID
306 OvsDumpMDLChain(PMDL mdl)
307 {
308     PMDL tmp;
309     tmp = mdl;
310     while (tmp) {
311         OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p"
312                      " ByteCount: %d, ByteOffset: %d",
313                      tmp, tmp->Size, tmp->MappedSystemVa,
314                      tmp->StartVa, tmp->ByteCount, tmp->ByteOffset);
315         tmp = tmp->Next;
316     }
317 }
318
319
320 static VOID
321 OvsDumpNetBuffer(PNET_BUFFER nb)
322 {
323     OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p "
324                  "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d",
325                  nb,
326                  NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle,
327                  NET_BUFFER_FIRST_MDL(nb),
328                  NET_BUFFER_CURRENT_MDL(nb),
329                  NET_BUFFER_CURRENT_MDL_OFFSET(nb),
330                  NET_BUFFER_DATA_LENGTH(nb),
331                  NET_BUFFER_DATA_OFFSET(nb));
332     OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb));
333 }
334
335
336 static VOID
337 OvsDumpNetBufferList(PNET_BUFFER_LIST nbl)
338 {
339     PNET_BUFFER nb;
340     OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d "
341                  "poolHandle: %p",
342                  nbl, nbl->ParentNetBufferList,
343                  nbl->SourceHandle, nbl->ChildRefCount,
344                  nbl->NdisPoolHandle);
345     OvsDumpNBLContext(nbl);
346     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
347     while (nb) {
348         OvsDumpNetBuffer(nb);
349         nb = NET_BUFFER_NEXT_NB(nb);
350     }
351 }
352
353 /*
354  * --------------------------------------------------------------------------
355  * OvsAllocateFixSizeNBL --
356  *
357  *    Allocate fix size NBL which include
358  *       NBL + NB + MBL + Data + Context
359  *    Please note:
360  *       * Forwarding Context is allocated, but forwarding detail information
361  *       is not initailized.
362  *       * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128
363  *       byte).
364  * --------------------------------------------------------------------------
365  */
366 PNET_BUFFER_LIST
367 OvsAllocateFixSizeNBL(PVOID ovsContext,
368                       UINT32 size,
369                       UINT32 headRoom)
370 {
371     PNET_BUFFER_LIST nbl = NULL;
372     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
373     POVS_BUFFER_CONTEXT ctx;
374     POVS_NBL_POOL ovsPool = &context->ovsPool;
375     NDIS_STATUS status;
376     UINT32 line;
377     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
378
379     if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) {
380         line = __LINE__;
381         goto allocate_done;
382     }
383
384     nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool,
385                                     (UINT16)sizeof (OVS_BUFFER_CONTEXT),
386                                     (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
387
388     if (nbl == NULL) {
389         line = __LINE__;
390         goto allocate_done;
391     }
392
393     nbl->SourceHandle = ovsPool->ndisHandle;
394     status = context->NdisSwitchHandlers.
395              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
396
397     if (status != NDIS_STATUS_SUCCESS) {
398         NdisFreeNetBufferList(nbl);
399         nbl = NULL;
400         line = __LINE__;
401         goto allocate_done;
402     }
403     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
404     ASSERT(info);
405     info->IsPacketDataSafe = TRUE;
406     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
407
408     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
409                                            size, 0, NULL);
410     ASSERT(status == NDIS_STATUS_SUCCESS);
411
412 #ifdef DBG
413     InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount);
414     OvsDumpNetBufferList(nbl);
415     OvsDumpForwardingDetails(nbl);
416 #endif
417
418     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
419     ASSERT(ctx);
420
421     OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL |
422                       OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size,
423                       OVS_DPPORT_NUMBER_INVALID);
424     line = __LINE__;
425 allocate_done:
426     OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line);
427     return nbl;
428 }
429
430
431 static PMDL
432 OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
433                       UINT32 dataSize)
434 {
435     PMDL mdl;
436     PVOID data;
437
438     data = OvsAllocateMemoryWithTag(dataSize, OVS_MDL_POOL_TAG);
439     if (data == NULL) {
440         return NULL;
441     }
442
443     mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
444     if (mdl == NULL) {
445         OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
446     }
447
448     return mdl;
449 }
450
451
452 static VOID
453 OvsFreeMDLAndData(PMDL mdl)
454 {
455     PVOID data;
456
457     data = MmGetMdlVirtualAddress(mdl);
458     NdisFreeMdl(mdl);
459     OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
460 }
461
462
463 /*
464  * --------------------------------------------------------------------------
465  * OvsAllocateVariableSizeNBL --
466  *
467  *    Allocate variable size NBL, the NBL looks like
468  *      NBL + NB + Context
469  *      MDL + Data
470  * --------------------------------------------------------------------------
471  */
472 PNET_BUFFER_LIST
473 OvsAllocateVariableSizeNBL(PVOID ovsContext,
474                            UINT32 size,
475                            UINT32 headRoom)
476 {
477     PNET_BUFFER_LIST nbl = NULL;
478     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
479     POVS_NBL_POOL ovsPool = &context->ovsPool;
480     POVS_BUFFER_CONTEXT ctx;
481     UINT32 realSize;
482     PMDL mdl;
483     NDIS_STATUS status;
484     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
485     if (size == 0) {
486         return NULL;
487     }
488     realSize = MEM_ALIGN_SIZE(size + headRoom);
489
490     mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize);
491     if (mdl == NULL) {
492         return NULL;
493     }
494
495     nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool,
496                                          (UINT16)sizeof (OVS_BUFFER_CONTEXT),
497                                          (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL,
498                                                 mdl, realSize, 0);
499     if (nbl == NULL) {
500         OvsFreeMDLAndData(mdl);
501         return NULL;
502     }
503
504     nbl->SourceHandle = ovsPool->ndisHandle;
505     status = context->NdisSwitchHandlers.
506              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
507
508     if (status != NDIS_STATUS_SUCCESS) {
509        /*
510         * do we need to remove mdl from nbl XXX
511         */
512         OvsFreeMDLAndData(mdl);
513         NdisFreeNetBufferList(nbl);
514         return NULL;
515     }
516
517     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
518     ASSERT(info);
519     info->IsPacketDataSafe = TRUE;
520     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
521     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
522                                            size, 0, NULL);
523     ASSERT(status == NDIS_STATUS_SUCCESS);
524
525 #ifdef DBG
526     InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount);
527     OvsDumpNetBufferList(nbl);
528     OvsDumpForwardingDetails(nbl);
529 #endif
530
531     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
532
533     OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
534                            OVS_BUFFER_PRIVATE_FORWARD_CONTEXT |
535                            OVS_BUFFER_FROM_ZERO_SIZE_POOL,
536                            size, OVS_DPPORT_NUMBER_INVALID);
537
538     OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl);
539     return nbl;
540 }
541
542
543 /*
544  * --------------------------------------------------------------------------
545  * OvsInitExternalNBLContext --
546  *
547  *     For NBL not allocated by OVS, it will allocate and initialize
548  *     the NBL context.
549  * --------------------------------------------------------------------------
550  */
551 POVS_BUFFER_CONTEXT
552 OvsInitExternalNBLContext(PVOID ovsContext,
553                           PNET_BUFFER_LIST nbl,
554                           BOOLEAN isRecv)
555 {
556     NDIS_HANDLE poolHandle;
557     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
558     POVS_BUFFER_CONTEXT ctx;
559     PNET_BUFFER nb;
560     NDIS_STATUS status;
561     UINT16 flags;
562
563     poolHandle = NdisGetPoolFromNetBufferList(nbl);
564
565     if (poolHandle == context->ovsPool.ndisHandle ||
566         nbl->SourceHandle == context->ovsPool.ndisHandle) {
567         return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
568     }
569     status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
570                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
571                                               OVS_OTHER_POOL_TAG);
572     if (status != NDIS_STATUS_SUCCESS) {
573         return NULL;
574     }
575 #ifdef DBG
576     OvsDumpNBLContext(nbl);
577     InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
578 #endif
579     flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
580     flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
581     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
582
583     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
584     /*
585      * we use first nb to decide whether we need advance or retreat during
586      * complete.
587      */
588     OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb),
589                       OVS_DPPORT_NUMBER_INVALID);
590     return ctx;
591 }
592
593 /*
594  * --------------------------------------------------------------------------
595  * OvsAllocateNBLContext
596  *
597  *    Create NBL buffer context and forwarding context.
598  * --------------------------------------------------------------------------
599  */
600 NDIS_STATUS
601 OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context,
602                       PNET_BUFFER_LIST nbl)
603 {
604     POVS_NBL_POOL ovsPool = &context->ovsPool;
605     NDIS_STATUS status;
606
607     status = NdisAllocateNetBufferListContext(nbl,
608                                               sizeof (OVS_BUFFER_CONTEXT),
609                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
610                                               OVS_OTHER_POOL_TAG);
611     if (status != NDIS_STATUS_SUCCESS) {
612         return NDIS_STATUS_FAILURE;
613     }
614
615     nbl->SourceHandle = ovsPool->ndisHandle;
616     status = context->NdisSwitchHandlers.
617         AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
618
619     if (status != NDIS_STATUS_SUCCESS) {
620         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
621         return NDIS_STATUS_FAILURE;
622     }
623     return status;
624 }
625
626 /*
627  * --------------------------------------------------------------------------
628  * OvsFreeNBLContext
629  *
630  *    Free the NBL buffer context and forwarding context.
631  * --------------------------------------------------------------------------
632  */
633 NDIS_STATUS
634 OvsFreeNBLContext(POVS_SWITCH_CONTEXT context,
635                   PNET_BUFFER_LIST nbl)
636 {
637     POVS_NBL_POOL ovsPool = &context->ovsPool;
638
639     context->NdisSwitchHandlers.
640          FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
641     NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
642
643     return NDIS_STATUS_SUCCESS;
644 }
645
646 /*
647  * --------------------------------------------------------------------------
648  * OvsCopyNBLInfo
649  *
650  *    Copy NBL info from src to dst
651  * --------------------------------------------------------------------------
652  */
653 NDIS_STATUS
654 OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl,
655                POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize,
656                BOOLEAN copyNblInfo)
657 {
658     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo;
659     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
660
661     srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl);
662     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl);
663     if (srcInfo) {
664 #ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO
665         status = context->NdisSwitchHandlers.
666             CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0);
667
668         if (status != NDIS_STATUS_SUCCESS) {
669             return status;
670         }
671 #else
672         dstInfo->SourcePortId = srcInfo->SourcePortId;
673         dstInfo->SourceNicIndex = srcInfo->SourceNicIndex;
674         if (copyNblInfo) {
675             if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) {
676                 NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl);
677             } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) {
678                 NdisCopySendNetBufferListInfo(dstNbl, srcNbl);
679             }
680         }
681 #endif
682         dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe;
683         if (!srcInfo->IsPacketDataSafe && copySize >
684             srcInfo->SafePacketDataSize) {
685             srcInfo->SafePacketDataSize = copySize;
686         }
687     } else {
688         /*
689          * Assume all data are safe
690          */
691         dstInfo->IsPacketDataSafe = TRUE;
692         dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
693     }
694     return status;
695 }
696
697 /*
698  * --------------------------------------------------------------------------
699  * OvsPartialCopyNBL --
700  *
701  *    Partial copy NBL, if there is multiple NB in NBL, each one will be
702  *    copied. We also reserve headroom for the new NBL.
703  *
704  *    Please note,
705  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
706  *       this function.
707  *       The NBL should already have ref to itself so that during copy
708  *       it will not be freed.
709  * --------------------------------------------------------------------------
710  */
711 PNET_BUFFER_LIST
712 OvsPartialCopyNBL(PVOID ovsContext,
713                   PNET_BUFFER_LIST nbl,
714                   UINT32 copySize,
715                   UINT32 headRoom,
716                   BOOLEAN copyNblInfo)
717 {
718     PNET_BUFFER_LIST newNbl;
719     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
720     NDIS_STATUS status;
721     PNET_BUFFER srcNb, dstNb;
722     ULONG byteCopied;
723     POVS_NBL_POOL ovsPool = &context->ovsPool;
724     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
725     UINT16 flags;
726
727     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
728     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
729         OVS_LOG_INFO("src nbl must have ctx initialized");
730         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
731         return NULL;
732     }
733
734     if (copySize) {
735         NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL);
736     }
737     newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool,
738                                             NULL, 0);
739     if (copySize) {
740         status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0,
741                                                    NULL, NULL);
742         ASSERT(status == NDIS_STATUS_SUCCESS);
743     }
744
745     if (newNbl == NULL) {
746         return NULL;
747     }
748
749     /*
750      * Allocate private memory for copy
751      */
752     if (copySize + headRoom) {
753         status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom,
754                                                    0, NULL, NULL);
755         if (status != NDIS_STATUS_SUCCESS) {
756             goto retreat_error;
757         }
758
759         if (headRoom) {
760             NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL);
761         }
762         if (copySize) {
763             srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
764             dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
765
766             while (srcNb) {
767                 status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize,
768                                                           srcNb, 0,
769                                                           &byteCopied);
770                 if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) {
771                     goto nbl_context_error;
772                 }
773                 srcNb = NET_BUFFER_NEXT_NB(srcNb);
774                 dstNb = NET_BUFFER_NEXT_NB(dstNb);
775             }
776         }
777     }
778
779     status = OvsAllocateNBLContext(context, newNbl);
780     if (status != NDIS_STATUS_SUCCESS) {
781         goto nbl_context_error;
782     }
783
784     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo);
785     if (status != NDIS_STATUS_SUCCESS) {
786         goto copy_list_info_error;
787     }
788
789 #ifdef DBG
790     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
791 #endif
792
793     newNbl->ParentNetBufferList = nbl;
794
795     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
796     ASSERT(dstCtx != NULL);
797
798     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
799
800     flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT |
801              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
802
803     srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
804     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize,
805                       OVS_DPPORT_NUMBER_INVALID);
806
807     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
808
809 #ifdef DBG
810     OvsDumpNetBufferList(nbl);
811     OvsDumpForwardingDetails(nbl);
812
813     OvsDumpNetBufferList(newNbl);
814     OvsDumpForwardingDetails(newNbl);
815 #endif
816
817     OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl);
818     return newNbl;
819
820 copy_list_info_error:
821     OvsFreeNBLContext(context, newNbl);
822 nbl_context_error:
823     if (copySize) {
824         NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL);
825     }
826 retreat_error:
827     NdisFreeCloneNetBufferList(newNbl, 0);
828     return NULL;
829 }
830
831 /*
832  * --------------------------------------------------------------------------
833  * OvsPartialCopyToMultipleNBLs --
834  *
835  *     This is similar to OvsPartialCopyNBL() except that each NB will
836  *     have its own NBL.
837  * --------------------------------------------------------------------------
838  */
839 PNET_BUFFER_LIST
840 OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
841                              PNET_BUFFER_LIST nbl,
842                              UINT32 copySize,
843                              UINT32 headRoom,
844                              BOOLEAN copyNblInfo)
845 {
846     PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb;
847     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
848     PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL;
849
850     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
851     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
852         return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo);
853     }
854
855     firstNb = nb;
856     prevNb = nb;
857
858     while (nb) {
859         nextNb = NET_BUFFER_NEXT_NB(nb);
860         NET_BUFFER_NEXT_NB(nb) = NULL;
861
862         NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
863
864         newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom,
865                                    copyNblInfo);
866         if (newNbl == NULL) {
867             goto cleanup;
868         }
869         if (prevNbl == NULL) {
870             firstNbl = newNbl;
871         } else {
872             NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
873             NET_BUFFER_NEXT_NB(prevNb) = nb;
874         }
875         prevNbl = newNbl;
876         prevNb = nb;
877         nb = nextNb;
878     }
879     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
880     return firstNbl;
881
882 cleanup:
883     NET_BUFFER_NEXT_NB(prevNb) = nb;
884     NET_BUFFER_NEXT_NB(nb) = nextNb;
885     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
886
887     newNbl = firstNbl;
888     while (newNbl) {
889         firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl);
890         NET_BUFFER_LIST_NEXT_NBL(newNbl) = NULL;
891         OvsCompleteNBL(context, newNbl, TRUE);
892         newNbl = firstNbl;
893     }
894     return NULL;
895 }
896
897
898 static PNET_BUFFER_LIST
899 OvsCopySinglePacketNBL(PVOID ovsContext,
900                        PNET_BUFFER_LIST nbl,
901                        PNET_BUFFER nb,
902                        UINT32 headRoom,
903                        BOOLEAN copyNblInfo)
904 {
905     UINT32 size;
906     ULONG copiedSize;
907     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
908     PNET_BUFFER_LIST newNbl;
909     PNET_BUFFER newNb;
910     NDIS_STATUS status;
911     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
912
913     size = NET_BUFFER_DATA_LENGTH(nb);
914     if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) {
915         newNbl = OvsAllocateFixSizeNBL(context, size, headRoom);
916     } else {
917         newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom);
918     }
919     if (newNbl == NULL) {
920         return NULL;
921     }
922     newNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
923     status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
924                                               &copiedSize);
925
926     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
927     if (status == NDIS_STATUS_SUCCESS) {
928         status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo);
929     }
930
931     if (status != NDIS_STATUS_SUCCESS || copiedSize != size) {
932         OvsCompleteNBL(context, newNbl, TRUE);
933         return NULL;
934     }
935
936     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
937     ASSERT(dstCtx && srcCtx);
938     ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC);
939
940     dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER |
941                                       OVS_BUFFER_SEND_BUFFER);
942 #ifdef DBG
943     OvsDumpNetBufferList(newNbl);
944     OvsDumpForwardingDetails(newNbl);
945 #endif
946     OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl);
947     return newNbl;
948 }
949
950 /*
951  * --------------------------------------------------------------------------
952  * OvsFullCopyNBL --
953  *
954  *    Copy the NBL to a new NBL including data.
955  *
956  * Notes:
957  *     The NBL can have multiple NBs, but the final result is one NBL.
958  * --------------------------------------------------------------------------
959  */
960 PNET_BUFFER_LIST
961 OvsFullCopyNBL(PVOID ovsContext,
962                PNET_BUFFER_LIST nbl,
963                UINT32 headRoom,
964                BOOLEAN copyNblInfo)
965 {
966     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
967     POVS_NBL_POOL ovsPool = &context->ovsPool;
968     PNET_BUFFER_LIST newNbl;
969     PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL;
970     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
971     PMDL mdl;
972     NDIS_STATUS status;
973     UINT32 size, totalSize;
974     ULONG copiedSize;
975     UINT16 flags;
976     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo;
977
978     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
979     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
980         OVS_LOG_INFO("src nbl must have ctx initialized");
981         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
982         return NULL;
983     }
984
985     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
986
987     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
988         return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
989     }
990
991     newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool,
992                                        (UINT16)sizeof (OVS_BUFFER_CONTEXT),
993                                        (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
994     if (newNbl == NULL) {
995         return NULL;
996     }
997
998     while (nb) {
999         size = NET_BUFFER_DATA_LENGTH(nb);
1000         totalSize = MEM_ALIGN_SIZE(size + headRoom);
1001         mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize);
1002
1003         if (mdl == NULL) {
1004             goto nblcopy_error;
1005         }
1006         newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0);
1007         if (newNb == NULL) {
1008             OvsFreeMDLAndData(mdl);
1009             goto nblcopy_error;
1010         }
1011         if (firstNb == NULL) {
1012             firstNb = newNb;
1013         } else {
1014             NET_BUFFER_NEXT_NB(prevNb) = newNb;
1015         }
1016         prevNb = newNb;
1017 #ifdef DBG
1018         InterlockedIncrement((LONG volatile *)&ovsPool->nbCount);
1019 #endif
1020         status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL);
1021         ASSERT(status == NDIS_STATUS_SUCCESS);
1022
1023         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
1024                                                   &copiedSize);
1025         if (status != NDIS_STATUS_SUCCESS || size != copiedSize) {
1026             goto nblcopy_error;
1027         }
1028
1029         nb = NET_BUFFER_NEXT_NB(nb);
1030     }
1031
1032     NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb;
1033
1034     newNbl->SourceHandle = ovsPool->ndisHandle;
1035     status = context->NdisSwitchHandlers.
1036          AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl);
1037
1038     if (status != NDIS_STATUS_SUCCESS) {
1039         goto nblcopy_error;
1040     }
1041
1042     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo);
1043     if (status != NDIS_STATUS_SUCCESS) {
1044         goto nblcopy_error;
1045     }
1046
1047     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl);
1048     dstInfo->IsPacketDataSafe = TRUE;
1049
1050     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1051
1052     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
1053
1054     flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
1055              OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL |
1056              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
1057
1058     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb),
1059                       OVS_DPPORT_NUMBER_INVALID);
1060
1061 #ifdef DBG
1062     OvsDumpNetBufferList(nbl);
1063     OvsDumpForwardingDetails(nbl);
1064     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
1065 #endif
1066     OVS_LOG_LOUD("newNbl: %p", newNbl);
1067     return newNbl;
1068
1069 nblcopy_error:
1070     while (firstNb) {
1071 #ifdef DBG
1072         InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1073 #endif
1074         prevNb = firstNb;
1075         firstNb = NET_BUFFER_NEXT_NB(prevNb);
1076         mdl = NET_BUFFER_FIRST_MDL(prevNb);
1077         NET_BUFFER_FIRST_MDL(prevNb) = NULL;
1078         NdisFreeNetBuffer(prevNb);
1079         OvsFreeMDLAndData(mdl);
1080     }
1081     NdisFreeNetBufferList(newNbl);
1082     OVS_LOG_ERROR("OvsFullCopyNBL failed");
1083     return NULL;
1084 }
1085
1086 /*
1087  * --------------------------------------------------------------------------
1088  * GetSegmentHeaderInfo
1089  *
1090  *    Extract header size and sequence number for the segment.
1091  * --------------------------------------------------------------------------
1092  */
1093 static NDIS_STATUS
1094 GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
1095                      const POVS_PACKET_HDR_INFO hdrInfo,
1096                      UINT32 *hdrSize, UINT32 *seqNumber)
1097 {
1098     TCPHdr tcpStorage;
1099     const TCPHdr *tcp;
1100
1101     /* Parse the orginal Eth/IP/TCP header */
1102     tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
1103     if (tcp == NULL) {
1104         return NDIS_STATUS_FAILURE;
1105     }
1106     *seqNumber = ntohl(tcp->seq);
1107     *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp);
1108
1109     return NDIS_STATUS_SUCCESS;
1110 }
1111
1112
1113 /*
1114  * --------------------------------------------------------------------------
1115  * FixSegmentHeader
1116  *
1117  *    Fix IP length, IP checksum, TCP sequence number and TCP checksum
1118  *    in the segment.
1119  * --------------------------------------------------------------------------
1120  */
1121 static NDIS_STATUS
1122 FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
1123                  BOOLEAN lastPacket, UINT16 packetCounter)
1124 {
1125     EthHdr *dstEth;
1126     IPHdr *dstIP;
1127     TCPHdr *dstTCP;
1128     PMDL mdl;
1129     PUINT8 bufferStart;
1130
1131     mdl = NET_BUFFER_FIRST_MDL(nb);
1132
1133     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority);
1134     if (!bufferStart) {
1135         return NDIS_STATUS_RESOURCES;
1136     }
1137     dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb));
1138     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1139             >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
1140     dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth);
1141     dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
1142     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1143             >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1144
1145     /* Fix IP length and checksum */
1146     ASSERT(dstIP->protocol == IPPROTO_TCP);
1147     dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1148     dstIP->id += packetCounter;
1149     dstIP->check = 0;
1150     dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
1151
1152     /* Fix TCP checksum */
1153     dstTCP->seq = htonl(seqNumber);
1154
1155     /*
1156      * Set the TCP FIN and PSH bit only for the last packet
1157      * More information can be found under:
1158      * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx
1159      */
1160     if (dstTCP->fin) {
1161         dstTCP->fin = lastPacket;
1162     }
1163     if (dstTCP->psh) {
1164         dstTCP->psh = lastPacket;
1165     }
1166
1167     UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP);
1168     dstTCP->check = IPPseudoChecksum(&dstIP->saddr,
1169                                      &dstIP->daddr,
1170                                      IPPROTO_TCP,
1171                                      csumLength);
1172     dstTCP->check = CalculateChecksumNB(nb,
1173                                         csumLength,
1174                                         sizeof *dstEth + dstIP->ihl * 4);
1175
1176     return STATUS_SUCCESS;
1177 }
1178
1179 /*
1180  * --------------------------------------------------------------------------
1181  * OvsTcpSegmentyNBL --
1182  *
1183  *    Segment TCP payload, and prepend each segment with ether/IP/TCP header.
1184  *    Leave headRoom for additional encap.
1185  *
1186  *    Please note,
1187  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
1188  *       this function.
1189  *       The NBL should already have ref to itself so that during copy
1190  *       it will not be freed.
1191  *       Currently this API assert there is only one NB in an NBL, it needs
1192  *       to be fixed if we receive multiple NBs in an NBL.
1193  * --------------------------------------------------------------------------
1194  */
1195 PNET_BUFFER_LIST
1196 OvsTcpSegmentNBL(PVOID ovsContext,
1197                  PNET_BUFFER_LIST nbl,
1198                  POVS_PACKET_HDR_INFO hdrInfo,
1199                  UINT32 mss,
1200                  UINT32 headRoom)
1201 {
1202     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1203 #ifdef DBG
1204     POVS_NBL_POOL ovsPool = &context->ovsPool;
1205 #endif
1206     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
1207     UINT32 size, hdrSize, seqNumber;
1208     PNET_BUFFER_LIST newNbl;
1209     PNET_BUFFER nb, newNb;
1210     NDIS_STATUS status;
1211     UINT16 segmentSize;
1212     ULONG copiedSize;
1213     UINT16 packetCounter = 0;
1214
1215     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1216     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1217         OVS_LOG_INFO("src nbl must have ctx initialized");
1218         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1219         return NULL;
1220     }
1221
1222     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1223     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
1224
1225     /* Figure out the segment header size */
1226     status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
1227     if (status != NDIS_STATUS_SUCCESS) {
1228         OVS_LOG_INFO("Cannot parse NBL header");
1229         return NULL;
1230     }
1231
1232     size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
1233
1234     /* XXX add to ovsPool counters? */
1235     newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
1236             NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
1237     if (newNbl == NULL) {
1238         return NULL;
1239     }
1240
1241     /* Now deal with TCP payload */
1242     for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
1243             newNb = NET_BUFFER_NEXT_NB(newNb)) {
1244         segmentSize = (size > mss ? mss : size) & 0xffff;
1245         if (headRoom) {
1246             NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
1247         }
1248
1249         /* Now copy the eth/IP/TCP header and fix up */
1250         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0,
1251                                                   &copiedSize);
1252         if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) {
1253             goto nblcopy_error;
1254         }
1255
1256         status = FixSegmentHeader(newNb, segmentSize, seqNumber,
1257                                   NET_BUFFER_NEXT_NB(newNb) == NULL,
1258                                   packetCounter);
1259         if (status != NDIS_STATUS_SUCCESS) {
1260             goto nblcopy_error;
1261         }
1262
1263
1264         /* Move on to the next segment */
1265         size -= segmentSize;
1266         seqNumber += segmentSize;
1267         packetCounter++;
1268     }
1269
1270     status = OvsAllocateNBLContext(context, newNbl);
1271     if (status != NDIS_STATUS_SUCCESS) {
1272         goto nblcopy_error;
1273     }
1274
1275     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE);
1276     if (status != NDIS_STATUS_SUCCESS) {
1277         goto nbl_context_error;
1278     }
1279
1280     newNbl->ParentNetBufferList = nbl;
1281
1282     /* Remember it's a fragment NBL so we can free it properly */
1283     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1284     ASSERT(dstCtx != NULL);
1285     dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT |
1286         OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER;
1287     dstCtx->refCount = 1;
1288     dstCtx->magic = OVS_CTX_MAGIC;
1289     dstCtx->dataOffsetDelta = hdrSize + headRoom;
1290
1291     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
1292 #ifdef DBG
1293     InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount);
1294
1295     OvsDumpNetBufferList(nbl);
1296     OvsDumpForwardingDetails(nbl);
1297
1298     OvsDumpNetBufferList(newNbl);
1299     OvsDumpForwardingDetails(newNbl);
1300 #endif
1301     OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
1302     return newNbl;
1303
1304 nbl_context_error:
1305     OvsFreeNBLContext(context, newNbl);
1306 nblcopy_error:
1307 #ifdef DBG
1308     InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1309 #endif
1310     NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0);
1311     return NULL;
1312 }
1313
1314 /*
1315  * --------------------------------------------------------------------------
1316  * OvsAllocateNBLFromBuffer --
1317  *
1318  * This function allocates all the stuff necessary for creating an NBL from the
1319  * input buffer of specified length, namely, a nonpaged data buffer of size
1320  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
1321  * context yet. It also copies data from the specified buffer to the NBL.
1322  * --------------------------------------------------------------------------
1323  */
1324 PNET_BUFFER_LIST
1325 OvsAllocateNBLFromBuffer(PVOID context,
1326                          PVOID buffer,
1327                          ULONG length)
1328 {
1329     POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)context;
1330     UINT8 *data = NULL;
1331     PNET_BUFFER_LIST nbl = NULL;
1332     PNET_BUFFER nb;
1333     PMDL mdl;
1334
1335     if (length > OVS_DEFAULT_DATA_SIZE) {
1336         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
1337                                          OVS_DEFAULT_HEADROOM_SIZE);
1338
1339     } else {
1340         nbl = OvsAllocateFixSizeNBL(switchContext, length,
1341                                     OVS_DEFAULT_HEADROOM_SIZE);
1342     }
1343     if (nbl == NULL) {
1344         return NULL;
1345     }
1346
1347     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1348     mdl = NET_BUFFER_CURRENT_MDL(nb);
1349     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
1350                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
1351     if (!data) {
1352         OvsCompleteNBL(switchContext, nbl, TRUE);
1353         return NULL;
1354     }
1355
1356     NdisMoveMemory(data, buffer, length);
1357
1358     return nbl;
1359 }
1360
1361 /*
1362  * --------------------------------------------------------------------------
1363  * OvsFullCopyToMultipleNBLs --
1364  *
1365  *    Copy NBL to multiple NBLs, each NB will have its own NBL
1366  * --------------------------------------------------------------------------
1367  */
1368 PNET_BUFFER_LIST
1369 OvsFullCopyToMultipleNBLs(PVOID ovsContext,
1370                           PNET_BUFFER_LIST nbl,
1371                           UINT32 headRoom,
1372                           BOOLEAN copyNblInfo)
1373 {
1374
1375     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1376     PNET_BUFFER_LIST firstNbl, currNbl, newNbl;
1377     PNET_BUFFER nb;
1378     POVS_BUFFER_CONTEXT srcCtx;
1379
1380     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1381     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1382         OVS_LOG_INFO("src nbl must have ctx initialized");
1383         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1384         return NULL;
1385     }
1386
1387     nb =  NET_BUFFER_LIST_FIRST_NB(nbl);
1388     newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
1389
1390     if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) {
1391         return newNbl;
1392     } else {
1393         firstNbl = newNbl;
1394         currNbl = newNbl;
1395     }
1396
1397     while (nb) {
1398         newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom,
1399                                         copyNblInfo);
1400         if (newNbl == NULL) {
1401             goto copymultiple_error;
1402         }
1403         NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl;
1404         currNbl = newNbl;
1405         nb = NET_BUFFER_NEXT_NB(nb);
1406     }
1407     return firstNbl;
1408
1409 copymultiple_error:
1410     while (firstNbl) {
1411         currNbl = firstNbl;
1412         firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl);
1413         NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL;
1414         OvsCompleteNBL(context, currNbl, TRUE);
1415     }
1416     return NULL;
1417
1418 }
1419
1420
1421 /*
1422  * --------------------------------------------------------------------------
1423  * OvsCompleteNBL --
1424  *
1425  *     This function tries to free the NBL allocated by OVS buffer
1426  *     management module. If it trigger the completion of the parent
1427  *     NBL, it will recursively call itself. If it trigger the completion
1428  *     of external NBL, it will be returned to the caller. The caller
1429  *     is responsible to call API to return to upper layer.
1430  * --------------------------------------------------------------------------
1431  */
1432 PNET_BUFFER_LIST
1433 OvsCompleteNBL(POVS_SWITCH_CONTEXT context,
1434                PNET_BUFFER_LIST nbl,
1435                BOOLEAN updateRef)
1436 {
1437     POVS_BUFFER_CONTEXT ctx;
1438     UINT16 flags;
1439     PNET_BUFFER_LIST parent;
1440     NDIS_STATUS status;
1441     NDIS_HANDLE poolHandle;
1442     LONG value;
1443     POVS_NBL_POOL ovsPool = &context->ovsPool;
1444     PNET_BUFFER nb;
1445
1446
1447     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1448
1449     ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1450
1451     OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d",
1452                  nbl, ctx, ctx->refCount, updateRef);
1453
1454     if (updateRef) {
1455         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1456         if (value != 0) {
1457             return NULL;
1458         }
1459     } else {
1460         /*
1461          * This is a special case, the refCount must be zero
1462          */
1463         ASSERT(ctx->refCount == 0);
1464     }
1465
1466     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1467
1468     flags = ctx->flags;
1469     if (!(flags & OVS_BUFFER_FRAGMENT) &&
1470         NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) {
1471         UINT32 diff;
1472         if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) {
1473             diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb);
1474             status = NdisRetreatNetBufferListDataStart(nbl, diff, 0,
1475                                                        NULL, NULL);
1476             ASSERT(status == NDIS_STATUS_SUCCESS);
1477         } else {
1478             diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength;
1479             NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL);
1480         }
1481     }
1482
1483     if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) {
1484         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
1485     }
1486
1487     if (flags & OVS_BUFFER_NEED_COMPLETE) {
1488         /*
1489          * return to caller for completion
1490          */
1491 #ifdef DBG
1492         InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount);
1493 #endif
1494         return nbl;
1495     }
1496
1497     if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) {
1498         context->NdisSwitchHandlers.
1499               FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
1500     }
1501
1502     if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) {
1503         PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1504         while (nb) {
1505             PMDL mdl = NET_BUFFER_FIRST_MDL(nb);
1506             NET_BUFFER_FIRST_MDL(nb) = NULL;
1507             ASSERT(mdl->Next == NULL);
1508             OvsFreeMDLAndData(mdl);
1509             nb = NET_BUFFER_NEXT_NB(nb);
1510         }
1511     }
1512
1513     if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) {
1514         PNET_BUFFER nb, nextNb;
1515
1516         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1517         while (nb) {
1518             nextNb = NET_BUFFER_NEXT_NB(nb);
1519             NdisFreeNetBuffer(nb);
1520 #ifdef DBG
1521             InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1522 #endif
1523             nb = nextNb;
1524         }
1525         NET_BUFFER_LIST_FIRST_NB(nbl) = NULL;
1526     }
1527
1528     parent = nbl->ParentNetBufferList;
1529
1530     poolHandle = NdisGetPoolFromNetBufferList(nbl);
1531     if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) {
1532         ASSERT(poolHandle == ovsPool->fixSizePool);
1533 #ifdef DBG
1534         InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount);
1535 #endif
1536         NdisFreeNetBufferList(nbl);
1537     } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) {
1538         ASSERT(poolHandle == ovsPool->zeroSizePool);
1539 #ifdef DBG
1540         InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount);
1541 #endif
1542         NdisFreeNetBufferList(nbl);
1543     } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) {
1544         ASSERT(poolHandle == ovsPool->nblOnlyPool);
1545 #ifdef DBG
1546         InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount);
1547 #endif
1548         NdisFreeCloneNetBufferList(nbl, 0);
1549     } else if (flags & OVS_BUFFER_FRAGMENT) {
1550         OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent);
1551 #ifdef DBG
1552         InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1553 #endif
1554         NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0);
1555     }
1556
1557     if (parent != NULL) {
1558         ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent);
1559         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1560         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1561         if (value == 0) {
1562             return OvsCompleteNBL(context, parent, FALSE);
1563         }
1564     }
1565     return NULL;
1566 }
1567
1568 /*
1569  * --------------------------------------------------------------------------
1570  * OvsSetCtxSourcePortNo --
1571  *      Setter function which stores the source port of an NBL in the NBL
1572  * Context Info.
1573  * --------------------------------------------------------------------------
1574  */
1575 NDIS_STATUS
1576 OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1577                       UINT32 portNo)
1578 {
1579     POVS_BUFFER_CONTEXT ctx;
1580     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1581     if (ctx == NULL) {
1582         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1583         return STATUS_INVALID_PARAMETER;
1584     }
1585
1586     ctx->srcPortNo = portNo;
1587     return NDIS_STATUS_SUCCESS;
1588 }
1589
1590 /*
1591  * --------------------------------------------------------------------------
1592  * OvsGetCtxSourcePortNo --
1593  *      Get source port of an NBL from its Context Info.
1594  * --------------------------------------------------------------------------
1595  */
1596 NDIS_STATUS
1597 OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1598                       UINT32 *portNo)
1599 {
1600     POVS_BUFFER_CONTEXT ctx;
1601     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1602     if (ctx == NULL || portNo == NULL) {
1603         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1604         return STATUS_INVALID_PARAMETER;
1605     }
1606     *portNo = ctx->srcPortNo;
1607     return NDIS_STATUS_SUCCESS;
1608 }