7ec073b700f78503300d320a0daecb72e91bb58e
[cascardo/ovs.git] / datapath-windows / ovsext / BufferMgmt.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ****************************************************************************
19  *
20  *       Simple Buffer Management framework for OVS
21  *
22  *  It introduces four NDIS buffer pools
23  *     **Fix size net buffer list pool--this is used for small buffer
24  *     One allocation will include NBL + NB + MDL + Data + CONTEXT.
25  *
26  *     **Variable size net buffer list pool--this is used for variable size
27  *     buffer. The allocation of net buffer list will include NBL + NB +
28  *     CONTEXT, a separate allocation of MDL + data buffer is required.
29  *
30  *     **NBL only net buffer list pool-- this is used for partial copy
31  *     (or clone). In this case we can not allocate net buffer list and
32  *     net buffer at the same time.
33  *
34  *     **Net buffer pool-- this is required when net buffer need to be
35  *     allocated separately.
36  *
37  *  A Buffer context is defined to track the buffer specific information
38  *  so that during NBL completion, proper action can be taken. Please see
39  *  code for details.
40  *
41  *  Here is the usage of the management API
42  *  All external NBL should be initialized its NBL context by calling
43  *     OvsInitExternalNBLContext()
44  *
45  *  After the external NBL context is initialized, it can call the following
46  *  API to allocate, copy or partial copy NBL.
47  *
48  *     OvsAllocateFixSizeNBL()
49  *     OvsAllocateVariableSizeNBL()
50  *
51  *     OvsPartialCopyNBL()
52  *     OvsPartialCopyToMultipleNBLs()
53  *
54  *     OvsFullCopyNBL()
55  *     OvsFullCopyToMultipleNBLs()
56  *
57  *  See code comments for detail description of the functions.
58  *
59  *  All NBLs is completed through
60  *       OvsCompleteNBL()
61  *     If this API return non NULL value, then the returned NBL should be
62  *     returned to upper layer by calling
63  *     NdisFSendNetBufferListsComplete() if the buffer is from upper
64  *     layer. In case of WFP, it can call the corresponding completion routine
65  *     to return the NBL to the framework.
66  *
67  *  NOTE:
68  *     1. Copy or partial copy will not copy destination port array
69  *     2. Copy or partial copy will copy src port id and index
70  *     3. New Allocated NBL will have src port set to default port id
71  *     4. If original packet has direction flag set, the copied or partial
72  *        copied NBL will still be in same direction.
73  *     5. When you advance or retreate the buffer, you may need to update
74  *        relevant meta data to keep it consistent.
75  *
76  * ****************************************************************************
77  */
78
79 #include "precomp.h"
80 #include "Switch.h"
81
82 #ifdef OVS_DBG_MOD
83 #undef OVS_DBG_MOD
84 #endif
85 #define OVS_DBG_MOD OVS_DBG_BUFMGMT
86 #include "Debug.h"
87 #include "NetProto.h"
88 #include "Flow.h"
89 #include "Checksum.h"
90 #include "PacketParser.h"
91 #include "Vport.h"
92
93 /*
94  * --------------------------------------------------------------------------
95  * OvsInitBufferPool --
96  *
97  *    Allocate NBL and NB pool
98  *
99  * XXX: more optimization may be done for buffer management include local cache
100  * of NBL, NB, data, context, MDL.
101  * --------------------------------------------------------------------------
102  */
103 NDIS_STATUS
104 OvsInitBufferPool(PVOID ovsContext)
105 {
106     POVS_NBL_POOL ovsPool;
107     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
108     NET_BUFFER_LIST_POOL_PARAMETERS  nblParam;
109     NET_BUFFER_POOL_PARAMETERS nbParam;
110
111     C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8);
112
113     OVS_LOG_TRACE("Enter: context: %p", context);
114
115     ovsPool = &context->ovsPool;
116     RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL));
117     ovsPool->ndisHandle = context->NdisFilterHandle;
118     ovsPool->ndisContext = context->NdisSwitchContext;
119     /*
120      * fix size NBL pool includes
121      *    NBL + NB + MDL + DATA + Context
122      *    This is mainly used for Packet execute or slow path when copy is
123      *    required and size is less than OVS_DEFAULT_DATA_SIZE. We expect
124      *    Most of packet from user space will use this Pool. (This is
125      *    true for all bfd and cfm packet.
126      */
127     RtlZeroMemory(&nblParam, sizeof (nblParam));
128     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
129                            NDIS_OBJECT_TYPE_DEFAULT,
130                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
131                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
132     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
133     nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG;
134     nblParam.fAllocateNetBuffer = TRUE;
135     nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE;
136
137     ovsPool->fixSizePool =
138         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
139     if (ovsPool->fixSizePool == NULL) {
140         goto pool_cleanup;
141     }
142
143     /*
144      * Zero Size NBL Pool includes
145      *    NBL + NB + Context
146      *    This is mainly for packet with large data Size, in this case MDL and
147      *    Data will be allocate separately.
148      */
149     RtlZeroMemory(&nblParam, sizeof (nblParam));
150     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
151                            NDIS_OBJECT_TYPE_DEFAULT,
152                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
153                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
154
155     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
156     nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG;
157     nblParam.fAllocateNetBuffer = TRUE;
158     nblParam.DataSize = 0;
159
160     ovsPool->zeroSizePool =
161         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
162     if (ovsPool->zeroSizePool == NULL) {
163         goto pool_cleanup;
164     }
165
166     /*
167      * NBL only pool just includes
168      *    NBL (+ context)
169      *    This is mainly used for clone and partial copy
170      */
171     RtlZeroMemory(&nblParam, sizeof (nblParam));
172     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
173                            NDIS_OBJECT_TYPE_DEFAULT,
174                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
175                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
176
177     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
178     nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG;
179     nblParam.fAllocateNetBuffer = FALSE;
180     nblParam.DataSize = 0;
181
182     ovsPool->nblOnlyPool =
183         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
184     if (ovsPool->nblOnlyPool == NULL) {
185         goto pool_cleanup;
186     }
187
188     /* nb Pool
189      *    NB only pool, used for copy
190      */
191
192     OVS_INIT_OBJECT_HEADER(&nbParam.Header,
193                            NDIS_OBJECT_TYPE_DEFAULT,
194                            NET_BUFFER_POOL_PARAMETERS_REVISION_1,
195                            NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1);
196     nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG;
197     nbParam.DataSize = 0;
198     ovsPool->nbPool =
199         NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam);
200     if (ovsPool->nbPool == NULL) {
201         goto pool_cleanup;
202     }
203     OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p"
204                   "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool,
205                   ovsPool->nblOnlyPool, ovsPool->nbPool);
206     return NDIS_STATUS_SUCCESS;
207
208 pool_cleanup:
209     OvsCleanupBufferPool(context);
210     OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool");
211     return NDIS_STATUS_RESOURCES;
212 }
213
214
215 /*
216  * --------------------------------------------------------------------------
217  * OvsCleanupBufferPool --
218  *  Free Buffer pool for NBL and NB.
219  * --------------------------------------------------------------------------
220  */
221 VOID
222 OvsCleanupBufferPool(PVOID ovsContext)
223 {
224     POVS_NBL_POOL ovsPool;
225     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
226     ovsPool = &context->ovsPool;
227     OVS_LOG_TRACE("Enter: context: %p", context);
228 #ifdef DBG
229     ASSERT(ovsPool->fixNBLCount == 0);
230     ASSERT(ovsPool->zeroNBLCount == 0);
231     ASSERT(ovsPool->nblOnlyCount == 0);
232     ASSERT(ovsPool->nbCount == 0);
233     ASSERT(ovsPool->sysNBLCount == 0);
234     ASSERT(ovsPool->fragNBLCount == 0);
235 #endif
236
237     if (ovsPool->fixSizePool) {
238         NdisFreeNetBufferListPool(ovsPool->fixSizePool);
239         ovsPool->fixSizePool = NULL;
240     }
241     if (ovsPool->zeroSizePool) {
242         NdisFreeNetBufferListPool(ovsPool->zeroSizePool);
243         ovsPool->zeroSizePool = NULL;
244     }
245     if (ovsPool->nblOnlyPool) {
246         NdisFreeNetBufferListPool(ovsPool->nblOnlyPool);
247         ovsPool->nblOnlyPool = NULL;
248     }
249     if (ovsPool->nbPool) {
250         NdisFreeNetBufferPool(ovsPool->nbPool);
251         ovsPool->nbPool = NULL;
252     }
253     OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool");
254 }
255
256
257 static VOID
258 OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
259                   UINT16 flags,
260                   UINT32 origDataLength,
261                   UINT32 srcPortNo)
262 {
263     ctx->magic = OVS_CTX_MAGIC;
264     ctx->refCount = 1;
265     ctx->flags = flags;
266     ctx->srcPortNo = srcPortNo;
267     ctx->origDataLength = origDataLength;
268 }
269
270
271 static VOID
272 OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl)
273 {
274     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
275     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
276     if (info == NULL) {
277         return;
278     }
279     OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d "
280                  "isDataSafe: %s, safeDataSize: %d",
281                  nbl, info->NumAvailableDestinations, info->SourcePortId,
282                  info->SourceNicIndex,
283                  info->IsPacketDataSafe ? "TRUE" : "FALSE",
284                  info->IsPacketDataSafe ? 0 : info->SafePacketDataSize);
285
286 }
287
288 static VOID
289 OvsDumpNBLContext(PNET_BUFFER_LIST nbl)
290 {
291     PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context;
292     if (ctx == NULL) {
293         OVS_LOG_INFO("No Net Buffer List context");
294         return;
295     }
296     while (ctx) {
297         OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d",
298                      nbl, ctx, ctx->Size, ctx->Offset);
299         ctx = ctx->Next;
300     }
301 }
302
303
304 static VOID
305 OvsDumpMDLChain(PMDL mdl)
306 {
307     PMDL tmp;
308     tmp = mdl;
309     while (tmp) {
310         OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p"
311                      " ByteCount: %d, ByteOffset: %d",
312                      tmp, tmp->Size, tmp->MappedSystemVa,
313                      tmp->StartVa, tmp->ByteCount, tmp->ByteOffset);
314         tmp = tmp->Next;
315     }
316 }
317
318
319 static VOID
320 OvsDumpNetBuffer(PNET_BUFFER nb)
321 {
322     OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p "
323                  "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d",
324                  nb,
325                  NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle,
326                  NET_BUFFER_FIRST_MDL(nb),
327                  NET_BUFFER_CURRENT_MDL(nb),
328                  NET_BUFFER_CURRENT_MDL_OFFSET(nb),
329                  NET_BUFFER_DATA_LENGTH(nb),
330                  NET_BUFFER_DATA_OFFSET(nb));
331     OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb));
332 }
333
334
335 static VOID
336 OvsDumpNetBufferList(PNET_BUFFER_LIST nbl)
337 {
338     PNET_BUFFER nb;
339     OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d "
340                  "poolHandle: %p",
341                  nbl, nbl->ParentNetBufferList,
342                  nbl->SourceHandle, nbl->ChildRefCount,
343                  nbl->NdisPoolHandle);
344     OvsDumpNBLContext(nbl);
345     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
346     while (nb) {
347         OvsDumpNetBuffer(nb);
348         nb = NET_BUFFER_NEXT_NB(nb);
349     }
350 }
351
352 /*
353  * --------------------------------------------------------------------------
354  * OvsAllocateFixSizeNBL --
355  *
356  *    Allocate fix size NBL which include
357  *       NBL + NB + MBL + Data + Context
358  *    Please note:
359  *       * Forwarding Context is allocated, but forwarding detail information
360  *       is not initailized.
361  *       * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128
362  *       byte).
363  * --------------------------------------------------------------------------
364  */
365 PNET_BUFFER_LIST
366 OvsAllocateFixSizeNBL(PVOID ovsContext,
367                       UINT32 size,
368                       UINT32 headRoom)
369 {
370     PNET_BUFFER_LIST nbl = NULL;
371     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
372     POVS_BUFFER_CONTEXT ctx;
373     POVS_NBL_POOL ovsPool = &context->ovsPool;
374     NDIS_STATUS status;
375     UINT32 line;
376     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
377
378     if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) {
379         line = __LINE__;
380         goto allocate_done;
381     }
382
383     nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool,
384                                     (UINT16)sizeof (OVS_BUFFER_CONTEXT),
385                                     (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
386
387     if (nbl == NULL) {
388         line = __LINE__;
389         goto allocate_done;
390     }
391
392     nbl->SourceHandle = ovsPool->ndisHandle;
393     status = context->NdisSwitchHandlers.
394              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
395
396     if (status != NDIS_STATUS_SUCCESS) {
397         NdisFreeNetBufferList(nbl);
398         nbl = NULL;
399         line = __LINE__;
400         goto allocate_done;
401     }
402     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
403     ASSERT(info);
404     info->IsPacketDataSafe = TRUE;
405     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
406
407     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
408                                            size, 0, NULL);
409     ASSERT(status == NDIS_STATUS_SUCCESS);
410
411 #ifdef DBG
412     InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount);
413     OvsDumpNetBufferList(nbl);
414     OvsDumpForwardingDetails(nbl);
415 #endif
416
417     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
418     ASSERT(ctx);
419
420     OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL |
421                       OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size,
422                       OVS_DPPORT_NUMBER_INVALID);
423     line = __LINE__;
424 allocate_done:
425     OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line);
426     return nbl;
427 }
428
429
430 static PMDL
431 OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
432                       UINT32 dataSize)
433 {
434     PMDL mdl;
435     PVOID data;
436
437     data = OvsAllocateMemoryWithTag(dataSize, OVS_MDL_POOL_TAG);
438     if (data == NULL) {
439         return NULL;
440     }
441
442     mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
443     if (mdl == NULL) {
444         OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
445     }
446
447     return mdl;
448 }
449
450
451 static VOID
452 OvsFreeMDLAndData(PMDL mdl)
453 {
454     PVOID data;
455
456     data = MmGetMdlVirtualAddress(mdl);
457     NdisFreeMdl(mdl);
458     OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
459 }
460
461
462 /*
463  * --------------------------------------------------------------------------
464  * OvsAllocateVariableSizeNBL --
465  *
466  *    Allocate variable size NBL, the NBL looks like
467  *      NBL + NB + Context
468  *      MDL + Data
469  * --------------------------------------------------------------------------
470  */
471 PNET_BUFFER_LIST
472 OvsAllocateVariableSizeNBL(PVOID ovsContext,
473                            UINT32 size,
474                            UINT32 headRoom)
475 {
476     PNET_BUFFER_LIST nbl = NULL;
477     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
478     POVS_NBL_POOL ovsPool = &context->ovsPool;
479     POVS_BUFFER_CONTEXT ctx;
480     UINT32 realSize;
481     PMDL mdl;
482     NDIS_STATUS status;
483     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
484     if (size == 0) {
485         return NULL;
486     }
487     realSize = MEM_ALIGN_SIZE(size + headRoom);
488
489     mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize);
490     if (mdl == NULL) {
491         return NULL;
492     }
493
494     nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool,
495                                          (UINT16)sizeof (OVS_BUFFER_CONTEXT),
496                                          (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL,
497                                                 mdl, realSize, 0);
498     if (nbl == NULL) {
499         OvsFreeMDLAndData(mdl);
500         return NULL;
501     }
502
503     nbl->SourceHandle = ovsPool->ndisHandle;
504     status = context->NdisSwitchHandlers.
505              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
506
507     if (status != NDIS_STATUS_SUCCESS) {
508        /*
509         * do we need to remove mdl from nbl XXX
510         */
511         OvsFreeMDLAndData(mdl);
512         NdisFreeNetBufferList(nbl);
513         return NULL;
514     }
515
516     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
517     ASSERT(info);
518     info->IsPacketDataSafe = TRUE;
519     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
520     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
521                                            size, 0, NULL);
522     ASSERT(status == NDIS_STATUS_SUCCESS);
523
524 #ifdef DBG
525     InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount);
526     OvsDumpNetBufferList(nbl);
527     OvsDumpForwardingDetails(nbl);
528 #endif
529
530     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
531
532     OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
533                            OVS_BUFFER_PRIVATE_FORWARD_CONTEXT |
534                            OVS_BUFFER_FROM_ZERO_SIZE_POOL,
535                            size, OVS_DPPORT_NUMBER_INVALID);
536
537     OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl);
538     return nbl;
539 }
540
541
542 /*
543  * --------------------------------------------------------------------------
544  * OvsInitExternalNBLContext --
545  *
546  *     For NBL not allocated by OVS, it will allocate and initialize
547  *     the NBL context.
548  * --------------------------------------------------------------------------
549  */
550 POVS_BUFFER_CONTEXT
551 OvsInitExternalNBLContext(PVOID ovsContext,
552                           PNET_BUFFER_LIST nbl,
553                           BOOLEAN isRecv)
554 {
555     NDIS_HANDLE poolHandle;
556     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
557     POVS_BUFFER_CONTEXT ctx;
558     PNET_BUFFER nb;
559     NDIS_STATUS status;
560     UINT16 flags;
561
562     poolHandle = NdisGetPoolFromNetBufferList(nbl);
563
564     if (poolHandle == context->ovsPool.ndisHandle ||
565         nbl->SourceHandle == context->ovsPool.ndisHandle) {
566         return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
567     }
568     status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
569                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
570                                               OVS_OTHER_POOL_TAG);
571     if (status != NDIS_STATUS_SUCCESS) {
572         return NULL;
573     }
574 #ifdef DBG
575     OvsDumpNBLContext(nbl);
576     InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
577 #endif
578     flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
579     flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
580     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
581
582     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
583     /*
584      * we use first nb to decide whether we need advance or retreat during
585      * complete.
586      */
587     OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb),
588                       OVS_DPPORT_NUMBER_INVALID);
589     return ctx;
590 }
591
592 /*
593  * --------------------------------------------------------------------------
594  * OvsAllocateNBLContext
595  *
596  *    Create NBL buffer context and forwarding context.
597  * --------------------------------------------------------------------------
598  */
599 NDIS_STATUS
600 OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context,
601                       PNET_BUFFER_LIST nbl)
602 {
603     POVS_NBL_POOL ovsPool = &context->ovsPool;
604     NDIS_STATUS status;
605
606     status = NdisAllocateNetBufferListContext(nbl,
607                                               sizeof (OVS_BUFFER_CONTEXT),
608                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
609                                               OVS_OTHER_POOL_TAG);
610     if (status != NDIS_STATUS_SUCCESS) {
611         return NDIS_STATUS_FAILURE;
612     }
613
614     nbl->SourceHandle = ovsPool->ndisHandle;
615     status = context->NdisSwitchHandlers.
616         AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
617
618     if (status != NDIS_STATUS_SUCCESS) {
619         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
620         return NDIS_STATUS_FAILURE;
621     }
622     return status;
623 }
624
625 /*
626  * --------------------------------------------------------------------------
627  * OvsFreeNBLContext
628  *
629  *    Free the NBL buffer context and forwarding context.
630  * --------------------------------------------------------------------------
631  */
632 NDIS_STATUS
633 OvsFreeNBLContext(POVS_SWITCH_CONTEXT context,
634                   PNET_BUFFER_LIST nbl)
635 {
636     POVS_NBL_POOL ovsPool = &context->ovsPool;
637
638     context->NdisSwitchHandlers.
639          FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
640     NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
641
642     return NDIS_STATUS_SUCCESS;
643 }
644
645 /*
646  * --------------------------------------------------------------------------
647  * OvsCopyNBLInfo
648  *
649  *    Copy NBL info from src to dst
650  * --------------------------------------------------------------------------
651  */
652 NDIS_STATUS
653 OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl,
654                POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize,
655                BOOLEAN copyNblInfo)
656 {
657     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo;
658     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
659
660     srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl);
661     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl);
662     if (srcInfo) {
663 #ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO
664         status = context->NdisSwitchHandlers.
665             CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0);
666
667         if (status != NDIS_STATUS_SUCCESS) {
668             return status;
669         }
670 #else
671         dstInfo->SourcePortId = srcInfo->SourcePortId;
672         dstInfo->SourceNicIndex = srcInfo->SourceNicIndex;
673         if (copyNblInfo) {
674             if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) {
675                 NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl);
676             } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) {
677                 NdisCopySendNetBufferListInfo(dstNbl, srcNbl);
678             }
679         }
680 #endif
681         dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe;
682         if (!srcInfo->IsPacketDataSafe && copySize >
683             srcInfo->SafePacketDataSize) {
684             srcInfo->SafePacketDataSize = copySize;
685         }
686     } else {
687         /*
688          * Assume all data are safe
689          */
690         dstInfo->IsPacketDataSafe = TRUE;
691         dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
692     }
693     return status;
694 }
695
696 /*
697  * --------------------------------------------------------------------------
698  * OvsPartialCopyNBL --
699  *
700  *    Partial copy NBL, if there is multiple NB in NBL, each one will be
701  *    copied. We also reserve headroom for the new NBL.
702  *
703  *    Please note,
704  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
705  *       this function.
706  *       The NBL should already have ref to itself so that during copy
707  *       it will not be freed.
708  * --------------------------------------------------------------------------
709  */
710 PNET_BUFFER_LIST
711 OvsPartialCopyNBL(PVOID ovsContext,
712                   PNET_BUFFER_LIST nbl,
713                   UINT32 copySize,
714                   UINT32 headRoom,
715                   BOOLEAN copyNblInfo)
716 {
717     PNET_BUFFER_LIST newNbl;
718     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
719     NDIS_STATUS status;
720     PNET_BUFFER srcNb, dstNb;
721     ULONG byteCopied;
722     POVS_NBL_POOL ovsPool = &context->ovsPool;
723     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
724     UINT16 flags;
725
726     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
727     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
728         OVS_LOG_INFO("src nbl must have ctx initialized");
729         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
730         return NULL;
731     }
732
733     if (copySize) {
734         NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL);
735     }
736     newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool,
737                                             NULL, 0);
738     if (copySize) {
739         status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0,
740                                                    NULL, NULL);
741         ASSERT(status == NDIS_STATUS_SUCCESS);
742     }
743
744     if (newNbl == NULL) {
745         return NULL;
746     }
747
748     /*
749      * Allocate private memory for copy
750      */
751     if (copySize + headRoom) {
752         status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom,
753                                                    0, NULL, NULL);
754         if (status != NDIS_STATUS_SUCCESS) {
755             goto retreat_error;
756         }
757
758         if (headRoom) {
759             NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL);
760         }
761         if (copySize) {
762             srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
763             dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
764
765             while (srcNb) {
766                 status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize,
767                                                           srcNb, 0,
768                                                           &byteCopied);
769                 if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) {
770                     goto nbl_context_error;
771                 }
772                 srcNb = NET_BUFFER_NEXT_NB(srcNb);
773                 dstNb = NET_BUFFER_NEXT_NB(dstNb);
774             }
775         }
776     }
777
778     status = OvsAllocateNBLContext(context, newNbl);
779     if (status != NDIS_STATUS_SUCCESS) {
780         goto nbl_context_error;
781     }
782
783     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo);
784     if (status != NDIS_STATUS_SUCCESS) {
785         goto copy_list_info_error;
786     }
787
788 #ifdef DBG
789     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
790 #endif
791
792     newNbl->ParentNetBufferList = nbl;
793
794     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
795     ASSERT(dstCtx != NULL);
796
797     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
798
799     flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT |
800              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
801
802     srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
803     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize,
804                       OVS_DPPORT_NUMBER_INVALID);
805
806     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
807
808 #ifdef DBG
809     OvsDumpNetBufferList(nbl);
810     OvsDumpForwardingDetails(nbl);
811
812     OvsDumpNetBufferList(newNbl);
813     OvsDumpForwardingDetails(newNbl);
814 #endif
815
816     OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl);
817     return newNbl;
818
819 copy_list_info_error:
820     OvsFreeNBLContext(context, newNbl);
821 nbl_context_error:
822     if (copySize) {
823         NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL);
824     }
825 retreat_error:
826     NdisFreeCloneNetBufferList(newNbl, 0);
827     return NULL;
828 }
829
830 /*
831  * --------------------------------------------------------------------------
832  * OvsPartialCopyToMultipleNBLs --
833  *
834  *     This is similar to OvsPartialCopyNBL() except that each NB will
835  *     have its own NBL.
836  * --------------------------------------------------------------------------
837  */
838 PNET_BUFFER_LIST
839 OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
840                              PNET_BUFFER_LIST nbl,
841                              UINT32 copySize,
842                              UINT32 headRoom,
843                              BOOLEAN copyNblInfo)
844 {
845     PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb;
846     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
847     PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL;
848
849     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
850     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
851         return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo);
852     }
853
854     firstNb = nb;
855     prevNb = nb;
856
857     while (nb) {
858         nextNb = NET_BUFFER_NEXT_NB(nb);
859         NET_BUFFER_NEXT_NB(nb) = NULL;
860
861         NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
862
863         newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom,
864                                    copyNblInfo);
865         if (newNbl == NULL) {
866             goto cleanup;
867         }
868         if (prevNbl == NULL) {
869             firstNbl = newNbl;
870         } else {
871             NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
872             NET_BUFFER_NEXT_NB(prevNb) = nb;
873         }
874         prevNbl = newNbl;
875         prevNb = nb;
876         nb = nextNb;
877     }
878     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
879     return firstNbl;
880
881 cleanup:
882     NET_BUFFER_NEXT_NB(prevNb) = nb;
883     NET_BUFFER_NEXT_NB(nb) = nextNb;
884     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
885
886     newNbl = firstNbl;
887     while (newNbl) {
888         firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl);
889         NET_BUFFER_LIST_NEXT_NBL(newNbl) = NULL;
890         OvsCompleteNBL(context, newNbl, TRUE);
891         newNbl = firstNbl;
892     }
893     return NULL;
894 }
895
896
897 static PNET_BUFFER_LIST
898 OvsCopySinglePacketNBL(PVOID ovsContext,
899                        PNET_BUFFER_LIST nbl,
900                        PNET_BUFFER nb,
901                        UINT32 headRoom,
902                        BOOLEAN copyNblInfo)
903 {
904     UINT32 size;
905     ULONG copiedSize;
906     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
907     PNET_BUFFER_LIST newNbl;
908     PNET_BUFFER newNb;
909     NDIS_STATUS status;
910     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
911
912     size = NET_BUFFER_DATA_LENGTH(nb);
913     if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) {
914         newNbl = OvsAllocateFixSizeNBL(context, size, headRoom);
915     } else {
916         newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom);
917     }
918     if (newNbl == NULL) {
919         return NULL;
920     }
921     newNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
922     status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
923                                               &copiedSize);
924
925     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
926     if (status == NDIS_STATUS_SUCCESS) {
927         status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo);
928     }
929
930     if (status != NDIS_STATUS_SUCCESS || copiedSize != size) {
931         OvsCompleteNBL(context, newNbl, TRUE);
932         return NULL;
933     }
934
935     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
936     ASSERT(dstCtx && srcCtx);
937     ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC);
938
939     dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER |
940                                       OVS_BUFFER_SEND_BUFFER);
941 #ifdef DBG
942     OvsDumpNetBufferList(newNbl);
943     OvsDumpForwardingDetails(newNbl);
944 #endif
945     OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl);
946     return newNbl;
947 }
948
949 /*
950  * --------------------------------------------------------------------------
951  * OvsFullCopyNBL --
952  *
953  *    Copy the NBL to a new NBL including data.
954  *
955  * Notes:
956  *     The NBL can have multiple NBs, but the final result is one NBL.
957  * --------------------------------------------------------------------------
958  */
959 PNET_BUFFER_LIST
960 OvsFullCopyNBL(PVOID ovsContext,
961                PNET_BUFFER_LIST nbl,
962                UINT32 headRoom,
963                BOOLEAN copyNblInfo)
964 {
965     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
966     POVS_NBL_POOL ovsPool = &context->ovsPool;
967     PNET_BUFFER_LIST newNbl;
968     PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL;
969     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
970     PMDL mdl;
971     NDIS_STATUS status;
972     UINT32 size, totalSize;
973     ULONG copiedSize;
974     UINT16 flags;
975     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo;
976
977     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
978     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
979         OVS_LOG_INFO("src nbl must have ctx initialized");
980         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
981         return NULL;
982     }
983
984     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
985
986     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
987         return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
988     }
989
990     newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool,
991                                        (UINT16)sizeof (OVS_BUFFER_CONTEXT),
992                                        (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
993     if (newNbl == NULL) {
994         return NULL;
995     }
996
997     while (nb) {
998         size = NET_BUFFER_DATA_LENGTH(nb);
999         totalSize = MEM_ALIGN_SIZE(size + headRoom);
1000         mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize);
1001
1002         if (mdl == NULL) {
1003             goto nblcopy_error;
1004         }
1005         newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0);
1006         if (newNb == NULL) {
1007             OvsFreeMDLAndData(mdl);
1008             goto nblcopy_error;
1009         }
1010         if (firstNb == NULL) {
1011             firstNb = newNb;
1012         } else {
1013             NET_BUFFER_NEXT_NB(prevNb) = newNb;
1014         }
1015         prevNb = newNb;
1016 #ifdef DBG
1017         InterlockedIncrement((LONG volatile *)&ovsPool->nbCount);
1018 #endif
1019         status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL);
1020         ASSERT(status == NDIS_STATUS_SUCCESS);
1021
1022         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
1023                                                   &copiedSize);
1024         if (status != NDIS_STATUS_SUCCESS || size != copiedSize) {
1025             goto nblcopy_error;
1026         }
1027
1028         nb = NET_BUFFER_NEXT_NB(nb);
1029     }
1030
1031     NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb;
1032
1033     newNbl->SourceHandle = ovsPool->ndisHandle;
1034     status = context->NdisSwitchHandlers.
1035          AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl);
1036
1037     if (status != NDIS_STATUS_SUCCESS) {
1038         goto nblcopy_error;
1039     }
1040
1041     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo);
1042     if (status != NDIS_STATUS_SUCCESS) {
1043         goto nblcopy_error;
1044     }
1045
1046     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl);
1047     dstInfo->IsPacketDataSafe = TRUE;
1048
1049     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1050
1051     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
1052
1053     flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
1054              OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL |
1055              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
1056
1057     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb),
1058                       OVS_DPPORT_NUMBER_INVALID);
1059
1060 #ifdef DBG
1061     OvsDumpNetBufferList(nbl);
1062     OvsDumpForwardingDetails(nbl);
1063     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
1064 #endif
1065     OVS_LOG_LOUD("newNbl: %p", newNbl);
1066     return newNbl;
1067
1068 nblcopy_error:
1069     while (firstNb) {
1070 #ifdef DBG
1071         InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1072 #endif
1073         prevNb = firstNb;
1074         firstNb = NET_BUFFER_NEXT_NB(prevNb);
1075         mdl = NET_BUFFER_FIRST_MDL(prevNb);
1076         NET_BUFFER_FIRST_MDL(prevNb) = NULL;
1077         NdisFreeNetBuffer(prevNb);
1078         OvsFreeMDLAndData(mdl);
1079     }
1080     NdisFreeNetBufferList(newNbl);
1081     OVS_LOG_ERROR("OvsFullCopyNBL failed");
1082     return NULL;
1083 }
1084
1085 /*
1086  * --------------------------------------------------------------------------
1087  * GetSegmentHeaderInfo
1088  *
1089  *    Extract header size and sequence number for the segment.
1090  * --------------------------------------------------------------------------
1091  */
1092 static NDIS_STATUS
1093 GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
1094                      const POVS_PACKET_HDR_INFO hdrInfo,
1095                      UINT32 *hdrSize, UINT32 *seqNumber)
1096 {
1097     TCPHdr tcpStorage;
1098     const TCPHdr *tcp;
1099
1100     /* Parse the orginal Eth/IP/TCP header */
1101     tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
1102     if (tcp == NULL) {
1103         return NDIS_STATUS_FAILURE;
1104     }
1105     *seqNumber = ntohl(tcp->seq);
1106     *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp);
1107
1108     return NDIS_STATUS_SUCCESS;
1109 }
1110
1111
1112 /*
1113  * --------------------------------------------------------------------------
1114  * FixSegmentHeader
1115  *
1116  *    Fix IP length, IP checksum, TCP sequence number and TCP checksum
1117  *    in the segment.
1118  * --------------------------------------------------------------------------
1119  */
1120 static NDIS_STATUS
1121 FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber,
1122                  BOOLEAN lastPacket, UINT16 packetCounter)
1123 {
1124     EthHdr *dstEth;
1125     IPHdr *dstIP;
1126     TCPHdr *dstTCP;
1127     PMDL mdl;
1128     PUINT8 bufferStart;
1129
1130     mdl = NET_BUFFER_FIRST_MDL(nb);
1131
1132     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority);
1133     if (!bufferStart) {
1134         return NDIS_STATUS_RESOURCES;
1135     }
1136     dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb));
1137     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1138             >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
1139     dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth);
1140     dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
1141     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1142             >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1143
1144     /* Fix IP length and checksum */
1145     ASSERT(dstIP->protocol == IPPROTO_TCP);
1146     dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1147     dstIP->id += packetCounter;
1148     dstIP->check = 0;
1149     dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
1150
1151     /* Fix TCP checksum */
1152     dstTCP->seq = htonl(seqNumber);
1153
1154     /*
1155      * Set the TCP FIN and PSH bit only for the last packet
1156      * More information can be found under:
1157      * https://msdn.microsoft.com/en-us/library/windows/hardware/ff568840%28v=vs.85%29.aspx
1158      */
1159     if (dstTCP->fin) {
1160         dstTCP->fin = lastPacket;
1161     }
1162     if (dstTCP->psh) {
1163         dstTCP->psh = lastPacket;
1164     }
1165
1166     UINT16 csumLength = segmentSize + TCP_HDR_LEN(dstTCP);
1167     dstTCP->check = IPPseudoChecksum(&dstIP->saddr,
1168                                      &dstIP->daddr,
1169                                      IPPROTO_TCP,
1170                                      csumLength);
1171     dstTCP->check = CalculateChecksumNB(nb,
1172                                         csumLength,
1173                                         sizeof *dstEth + dstIP->ihl * 4);
1174
1175     return STATUS_SUCCESS;
1176 }
1177
1178 /*
1179  * --------------------------------------------------------------------------
1180  * OvsTcpSegmentyNBL --
1181  *
1182  *    Segment TCP payload, and prepend each segment with ether/IP/TCP header.
1183  *    Leave headRoom for additional encap.
1184  *
1185  *    Please note,
1186  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
1187  *       this function.
1188  *       The NBL should already have ref to itself so that during copy
1189  *       it will not be freed.
1190  *       Currently this API assert there is only one NB in an NBL, it needs
1191  *       to be fixed if we receive multiple NBs in an NBL.
1192  * --------------------------------------------------------------------------
1193  */
1194 PNET_BUFFER_LIST
1195 OvsTcpSegmentNBL(PVOID ovsContext,
1196                  PNET_BUFFER_LIST nbl,
1197                  POVS_PACKET_HDR_INFO hdrInfo,
1198                  UINT32 mss,
1199                  UINT32 headRoom)
1200 {
1201     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1202 #ifdef DBG
1203     POVS_NBL_POOL ovsPool = &context->ovsPool;
1204 #endif
1205     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
1206     UINT32 size, hdrSize, seqNumber;
1207     PNET_BUFFER_LIST newNbl;
1208     PNET_BUFFER nb, newNb;
1209     NDIS_STATUS status;
1210     UINT16 segmentSize;
1211     ULONG copiedSize;
1212     UINT16 packetCounter = 0;
1213
1214     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1215     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1216         OVS_LOG_INFO("src nbl must have ctx initialized");
1217         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1218         return NULL;
1219     }
1220
1221     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1222     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
1223
1224     /* Figure out the segment header size */
1225     status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
1226     if (status != NDIS_STATUS_SUCCESS) {
1227         OVS_LOG_INFO("Cannot parse NBL header");
1228         return NULL;
1229     }
1230
1231     size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
1232
1233     /* XXX add to ovsPool counters? */
1234     newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
1235             NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
1236     if (newNbl == NULL) {
1237         return NULL;
1238     }
1239
1240     /* Now deal with TCP payload */
1241     for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
1242             newNb = NET_BUFFER_NEXT_NB(newNb)) {
1243         segmentSize = (size > mss ? mss : size) & 0xffff;
1244         if (headRoom) {
1245             NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
1246         }
1247
1248         /* Now copy the eth/IP/TCP header and fix up */
1249         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0,
1250                                                   &copiedSize);
1251         if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) {
1252             goto nblcopy_error;
1253         }
1254
1255         status = FixSegmentHeader(newNb, segmentSize, seqNumber,
1256                                   NET_BUFFER_NEXT_NB(newNb) == NULL,
1257                                   packetCounter);
1258         if (status != NDIS_STATUS_SUCCESS) {
1259             goto nblcopy_error;
1260         }
1261
1262
1263         /* Move on to the next segment */
1264         size -= segmentSize;
1265         seqNumber += segmentSize;
1266         packetCounter++;
1267     }
1268
1269     status = OvsAllocateNBLContext(context, newNbl);
1270     if (status != NDIS_STATUS_SUCCESS) {
1271         goto nblcopy_error;
1272     }
1273
1274     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE);
1275     if (status != NDIS_STATUS_SUCCESS) {
1276         goto nbl_context_error;
1277     }
1278
1279     newNbl->ParentNetBufferList = nbl;
1280
1281     /* Remember it's a fragment NBL so we can free it properly */
1282     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1283     ASSERT(dstCtx != NULL);
1284     dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT |
1285         OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER;
1286     dstCtx->refCount = 1;
1287     dstCtx->magic = OVS_CTX_MAGIC;
1288     dstCtx->dataOffsetDelta = hdrSize + headRoom;
1289
1290     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
1291 #ifdef DBG
1292     InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount);
1293
1294     OvsDumpNetBufferList(nbl);
1295     OvsDumpForwardingDetails(nbl);
1296
1297     OvsDumpNetBufferList(newNbl);
1298     OvsDumpForwardingDetails(newNbl);
1299 #endif
1300     OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
1301     return newNbl;
1302
1303 nbl_context_error:
1304     OvsFreeNBLContext(context, newNbl);
1305 nblcopy_error:
1306 #ifdef DBG
1307     InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1308 #endif
1309     NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0);
1310     return NULL;
1311 }
1312
1313 /*
1314  * --------------------------------------------------------------------------
1315  * OvsAllocateNBLFromBuffer --
1316  *
1317  * This function allocates all the stuff necessary for creating an NBL from the
1318  * input buffer of specified length, namely, a nonpaged data buffer of size
1319  * length, an MDL from it, and a NB and NBL from it. It does not allocate an NBL
1320  * context yet. It also copies data from the specified buffer to the NBL.
1321  * --------------------------------------------------------------------------
1322  */
1323 PNET_BUFFER_LIST
1324 OvsAllocateNBLFromBuffer(PVOID context,
1325                          PVOID buffer,
1326                          ULONG length)
1327 {
1328     POVS_SWITCH_CONTEXT switchContext = (POVS_SWITCH_CONTEXT)context;
1329     UINT8 *data = NULL;
1330     PNET_BUFFER_LIST nbl = NULL;
1331     PNET_BUFFER nb;
1332     PMDL mdl;
1333
1334     if (length > OVS_DEFAULT_DATA_SIZE) {
1335         nbl = OvsAllocateVariableSizeNBL(switchContext, length,
1336                                          OVS_DEFAULT_HEADROOM_SIZE);
1337
1338     } else {
1339         nbl = OvsAllocateFixSizeNBL(switchContext, length,
1340                                     OVS_DEFAULT_HEADROOM_SIZE);
1341     }
1342     if (nbl == NULL) {
1343         return NULL;
1344     }
1345
1346     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1347     mdl = NET_BUFFER_CURRENT_MDL(nb);
1348     data = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority) +
1349                     NET_BUFFER_CURRENT_MDL_OFFSET(nb);
1350     if (!data) {
1351         OvsCompleteNBL(switchContext, nbl, TRUE);
1352         return NULL;
1353     }
1354
1355     NdisMoveMemory(data, buffer, length);
1356
1357     return nbl;
1358 }
1359
1360 /*
1361  * --------------------------------------------------------------------------
1362  * OvsFullCopyToMultipleNBLs --
1363  *
1364  *    Copy NBL to multiple NBLs, each NB will have its own NBL
1365  * --------------------------------------------------------------------------
1366  */
1367 PNET_BUFFER_LIST
1368 OvsFullCopyToMultipleNBLs(PVOID ovsContext,
1369                           PNET_BUFFER_LIST nbl,
1370                           UINT32 headRoom,
1371                           BOOLEAN copyNblInfo)
1372 {
1373
1374     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1375     PNET_BUFFER_LIST firstNbl, currNbl, newNbl;
1376     PNET_BUFFER nb;
1377     POVS_BUFFER_CONTEXT srcCtx;
1378
1379     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1380     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1381         OVS_LOG_INFO("src nbl must have ctx initialized");
1382         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1383         return NULL;
1384     }
1385
1386     nb =  NET_BUFFER_LIST_FIRST_NB(nbl);
1387     newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
1388
1389     if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) {
1390         return newNbl;
1391     } else {
1392         firstNbl = newNbl;
1393         currNbl = newNbl;
1394     }
1395
1396     while (nb) {
1397         newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom,
1398                                         copyNblInfo);
1399         if (newNbl == NULL) {
1400             goto copymultiple_error;
1401         }
1402         NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl;
1403         currNbl = newNbl;
1404         nb = NET_BUFFER_NEXT_NB(nb);
1405     }
1406     return firstNbl;
1407
1408 copymultiple_error:
1409     while (firstNbl) {
1410         currNbl = firstNbl;
1411         firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl);
1412         NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL;
1413         OvsCompleteNBL(context, currNbl, TRUE);
1414     }
1415     return NULL;
1416
1417 }
1418
1419
1420 /*
1421  * --------------------------------------------------------------------------
1422  * OvsCompleteNBL --
1423  *
1424  *     This function tries to free the NBL allocated by OVS buffer
1425  *     management module. If it trigger the completion of the parent
1426  *     NBL, it will recursively call itself. If it trigger the completion
1427  *     of external NBL, it will be returned to the caller. The caller
1428  *     is responsible to call API to return to upper layer.
1429  * --------------------------------------------------------------------------
1430  */
1431 PNET_BUFFER_LIST
1432 OvsCompleteNBL(POVS_SWITCH_CONTEXT context,
1433                PNET_BUFFER_LIST nbl,
1434                BOOLEAN updateRef)
1435 {
1436     POVS_BUFFER_CONTEXT ctx;
1437     UINT16 flags;
1438     PNET_BUFFER_LIST parent;
1439     NDIS_STATUS status;
1440     NDIS_HANDLE poolHandle;
1441     LONG value;
1442     POVS_NBL_POOL ovsPool = &context->ovsPool;
1443     PNET_BUFFER nb;
1444
1445
1446     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1447
1448     ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1449
1450     OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d",
1451                  nbl, ctx, ctx->refCount, updateRef);
1452
1453     if (updateRef) {
1454         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1455         if (value != 0) {
1456             return NULL;
1457         }
1458     } else {
1459         /*
1460          * This is a special case, the refCount must be zero
1461          */
1462         ASSERT(ctx->refCount == 0);
1463     }
1464
1465     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1466
1467     flags = ctx->flags;
1468     if (!(flags & OVS_BUFFER_FRAGMENT) &&
1469         NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) {
1470         UINT32 diff;
1471         if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) {
1472             diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb);
1473             status = NdisRetreatNetBufferListDataStart(nbl, diff, 0,
1474                                                        NULL, NULL);
1475             ASSERT(status == NDIS_STATUS_SUCCESS);
1476         } else {
1477             diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength;
1478             NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL);
1479         }
1480     }
1481
1482     if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) {
1483         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
1484     }
1485
1486     if (flags & OVS_BUFFER_NEED_COMPLETE) {
1487         /*
1488          * return to caller for completion
1489          */
1490 #ifdef DBG
1491         InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount);
1492 #endif
1493         return nbl;
1494     }
1495
1496     if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) {
1497         context->NdisSwitchHandlers.
1498               FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
1499     }
1500
1501     if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) {
1502         PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1503         while (nb) {
1504             PMDL mdl = NET_BUFFER_FIRST_MDL(nb);
1505             NET_BUFFER_FIRST_MDL(nb) = NULL;
1506             ASSERT(mdl->Next == NULL);
1507             OvsFreeMDLAndData(mdl);
1508             nb = NET_BUFFER_NEXT_NB(nb);
1509         }
1510     }
1511
1512     if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) {
1513         PNET_BUFFER nb, nextNb;
1514
1515         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1516         while (nb) {
1517             nextNb = NET_BUFFER_NEXT_NB(nb);
1518             NdisFreeNetBuffer(nb);
1519 #ifdef DBG
1520             InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1521 #endif
1522             nb = nextNb;
1523         }
1524         NET_BUFFER_LIST_FIRST_NB(nbl) = NULL;
1525     }
1526
1527     parent = nbl->ParentNetBufferList;
1528
1529     poolHandle = NdisGetPoolFromNetBufferList(nbl);
1530     if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) {
1531         ASSERT(poolHandle == ovsPool->fixSizePool);
1532 #ifdef DBG
1533         InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount);
1534 #endif
1535         NdisFreeNetBufferList(nbl);
1536     } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) {
1537         ASSERT(poolHandle == ovsPool->zeroSizePool);
1538 #ifdef DBG
1539         InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount);
1540 #endif
1541         NdisFreeNetBufferList(nbl);
1542     } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) {
1543         ASSERT(poolHandle == ovsPool->nblOnlyPool);
1544 #ifdef DBG
1545         InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount);
1546 #endif
1547         NdisFreeCloneNetBufferList(nbl, 0);
1548     } else if (flags & OVS_BUFFER_FRAGMENT) {
1549         OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent);
1550 #ifdef DBG
1551         InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1552 #endif
1553         NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0);
1554     }
1555
1556     if (parent != NULL) {
1557         ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent);
1558         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1559         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1560         if (value == 0) {
1561             return OvsCompleteNBL(context, parent, FALSE);
1562         }
1563     }
1564     return NULL;
1565 }
1566
1567 /*
1568  * --------------------------------------------------------------------------
1569  * OvsSetCtxSourcePortNo --
1570  *      Setter function which stores the source port of an NBL in the NBL
1571  * Context Info.
1572  * --------------------------------------------------------------------------
1573  */
1574 NDIS_STATUS
1575 OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1576                       UINT32 portNo)
1577 {
1578     POVS_BUFFER_CONTEXT ctx;
1579     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1580     if (ctx == NULL) {
1581         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1582         return STATUS_INVALID_PARAMETER;
1583     }
1584
1585     ctx->srcPortNo = portNo;
1586     return NDIS_STATUS_SUCCESS;
1587 }
1588
1589 /*
1590  * --------------------------------------------------------------------------
1591  * OvsGetCtxSourcePortNo --
1592  *      Get source port of an NBL from its Context Info.
1593  * --------------------------------------------------------------------------
1594  */
1595 NDIS_STATUS
1596 OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1597                       UINT32 *portNo)
1598 {
1599     POVS_BUFFER_CONTEXT ctx;
1600     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1601     if (ctx == NULL || portNo == NULL) {
1602         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1603         return STATUS_INVALID_PARAMETER;
1604     }
1605     *portNo = ctx->srcPortNo;
1606     return NDIS_STATUS_SUCCESS;
1607 }