datapath-windows: Solved BSOD when adding OVS ports
[cascardo/ovs.git] / datapath-windows / ovsext / BufferMgmt.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ****************************************************************************
19  *
20  *       Simple Buffer Management framework for OVS
21  *
22  *  It introduces four NDIS buffer pools
23  *     **Fix size net buffer list pool--this is used for small buffer
24  *     One allocation will include NBL + NB + MDL + Data + CONTEXT.
25  *
26  *     **Variable size net buffer list pool--this is used for variable size
27  *     buffer. The allocation of net buffer list will include NBL + NB +
28  *     CONTEXT, a separate allocation of MDL + data buffer is required.
29  *
30  *     **NBL only net buffer list pool-- this is used for partial copy
31  *     (or clone). In this case we can not allocate net buffer list and
32  *     net buffer at the same time.
33  *
34  *     **Net buffer pool-- this is required when net buffer need to be
35  *     allocated separately.
36  *
37  *  A Buffer context is defined to track the buffer specific information
38  *  so that during NBL completion, proper action can be taken. Please see
39  *  code for details.
40  *
41  *  Here is the usage of the management API
42  *  All external NBL should be initialized its NBL context by calling
43  *     OvsInitExternalNBLContext()
44  *
45  *  After the external NBL context is initialized, it can call the following
46  *  API to allocate, copy or partial copy NBL.
47  *
48  *     OvsAllocateFixSizeNBL()
49  *     OvsAllocateVariableSizeNBL()
50  *
51  *     OvsPartialCopyNBL()
52  *     OvsPartialCopyToMultipleNBLs()
53  *
54  *     OvsFullCopyNBL()
55  *     OvsFullCopyToMultipleNBLs()
56  *
57  *  See code comments for detail description of the functions.
58  *
59  *  All NBLs is completed through
60  *       OvsCompleteNBL()
61  *     If this API return non NULL value, then the returned NBL should be
62  *     returned to upper layer by calling
63  *     NdisFSendNetBufferListsComplete() if the buffer is from upper
64  *     layer. In case of WFP, it can call the corresponding completion routine
65  *     to return the NBL to the framework.
66  *
67  *  NOTE:
68  *     1. Copy or partial copy will not copy destination port array
69  *     2. Copy or partial copy will copy src port id and index
70  *     3. New Allocated NBL will have src port set to default port id
71  *     4. If original packet has direction flag set, the copied or partial
72  *        copied NBL will still be in same direction.
73  *     5. When you advance or retreate the buffer, you may need to update
74  *        relevant meta data to keep it consistent.
75  *
76  * ****************************************************************************
77  */
78
79 #include "precomp.h"
80 #include "Switch.h"
81
82 #ifdef OVS_DBG_MOD
83 #undef OVS_DBG_MOD
84 #endif
85 #define OVS_DBG_MOD OVS_DBG_BUFMGMT
86 #include "Debug.h"
87 #include "NetProto.h"
88 #include "Flow.h"
89 #include "Checksum.h"
90 #include "PacketParser.h"
91
92 /*
93  * --------------------------------------------------------------------------
94  * OvsInitBufferPool --
95  *
96  *    Allocate NBL and NB pool
97  *
98  * XXX: more optimization may be done for buffer management include local cache
99  * of NBL, NB, data, context, MDL.
100  * --------------------------------------------------------------------------
101  */
102 NDIS_STATUS
103 OvsInitBufferPool(PVOID ovsContext)
104 {
105     POVS_NBL_POOL ovsPool;
106     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
107     NET_BUFFER_LIST_POOL_PARAMETERS  nblParam;
108     NET_BUFFER_POOL_PARAMETERS nbParam;
109
110     C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8);
111
112     OVS_LOG_TRACE("Enter: context: %p", context);
113
114     ovsPool = &context->ovsPool;
115     RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL));
116     ovsPool->ndisHandle = context->NdisFilterHandle;
117     ovsPool->ndisContext = context->NdisSwitchContext;
118     /*
119      * fix size NBL pool includes
120      *    NBL + NB + MDL + DATA + Context
121      *    This is mainly used for Packet execute or slow path when copy is
122      *    required and size is less than OVS_DEFAULT_DATA_SIZE. We expect
123      *    Most of packet from user space will use this Pool. (This is
124      *    true for all bfd and cfm packet.
125      */
126     RtlZeroMemory(&nblParam, sizeof (nblParam));
127     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
128                            NDIS_OBJECT_TYPE_DEFAULT,
129                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
130                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
131     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
132     nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG;
133     nblParam.fAllocateNetBuffer = TRUE;
134     nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE;
135
136     ovsPool->fixSizePool =
137         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
138     if (ovsPool->fixSizePool == NULL) {
139         goto pool_cleanup;
140     }
141
142     /*
143      * Zero Size NBL Pool includes
144      *    NBL + NB + Context
145      *    This is mainly for packet with large data Size, in this case MDL and
146      *    Data will be allocate separately.
147      */
148     RtlZeroMemory(&nblParam, sizeof (nblParam));
149     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
150                            NDIS_OBJECT_TYPE_DEFAULT,
151                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
152                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
153
154     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
155     nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG;
156     nblParam.fAllocateNetBuffer = TRUE;
157     nblParam.DataSize = 0;
158
159     ovsPool->zeroSizePool =
160         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
161     if (ovsPool->zeroSizePool == NULL) {
162         goto pool_cleanup;
163     }
164
165     /*
166      * NBL only pool just includes
167      *    NBL (+ context)
168      *    This is mainly used for clone and partial copy
169      */
170     RtlZeroMemory(&nblParam, sizeof (nblParam));
171     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
172                            NDIS_OBJECT_TYPE_DEFAULT,
173                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
174                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
175
176     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
177     nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG;
178     nblParam.fAllocateNetBuffer = FALSE;
179     nblParam.DataSize = 0;
180
181     ovsPool->nblOnlyPool =
182         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
183     if (ovsPool->nblOnlyPool == NULL) {
184         goto pool_cleanup;
185     }
186
187     /* nb Pool
188      *    NB only pool, used for copy
189      */
190
191     OVS_INIT_OBJECT_HEADER(&nbParam.Header,
192                            NDIS_OBJECT_TYPE_DEFAULT,
193                            NET_BUFFER_POOL_PARAMETERS_REVISION_1,
194                            NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1);
195     nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG;
196     nbParam.DataSize = 0;
197     ovsPool->nbPool =
198         NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam);
199     if (ovsPool->nbPool == NULL) {
200         goto pool_cleanup;
201     }
202     OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p"
203                   "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool,
204                   ovsPool->nblOnlyPool, ovsPool->nbPool);
205     return NDIS_STATUS_SUCCESS;
206
207 pool_cleanup:
208     OvsCleanupBufferPool(context);
209     OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool");
210     return NDIS_STATUS_RESOURCES;
211 }
212
213
214 /*
215  * --------------------------------------------------------------------------
216  * OvsCleanupBufferPool --
217  *  Free Buffer pool for NBL and NB.
218  * --------------------------------------------------------------------------
219  */
220 VOID
221 OvsCleanupBufferPool(PVOID ovsContext)
222 {
223     POVS_NBL_POOL ovsPool;
224     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
225     ovsPool = &context->ovsPool;
226     OVS_LOG_TRACE("Enter: context: %p", context);
227 #ifdef DBG
228     ASSERT(ovsPool->fixNBLCount == 0);
229     ASSERT(ovsPool->zeroNBLCount == 0);
230     ASSERT(ovsPool->nblOnlyCount == 0);
231     ASSERT(ovsPool->nbCount == 0);
232     ASSERT(ovsPool->sysNBLCount == 0);
233     ASSERT(ovsPool->fragNBLCount == 0);
234 #endif
235
236     if (ovsPool->fixSizePool) {
237         NdisFreeNetBufferListPool(ovsPool->fixSizePool);
238         ovsPool->fixSizePool = NULL;
239     }
240     if (ovsPool->zeroSizePool) {
241         NdisFreeNetBufferListPool(ovsPool->zeroSizePool);
242         ovsPool->zeroSizePool = NULL;
243     }
244     if (ovsPool->nblOnlyPool) {
245         NdisFreeNetBufferListPool(ovsPool->nblOnlyPool);
246         ovsPool->nblOnlyPool = NULL;
247     }
248     if (ovsPool->nbPool) {
249         NdisFreeNetBufferPool(ovsPool->nbPool);
250         ovsPool->nbPool = NULL;
251     }
252     OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool");
253 }
254
255
256 static VOID
257 OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
258                   UINT16 flags,
259                   UINT32 origDataLength,
260                   UINT32 srcPortNo)
261 {
262     ctx->magic = OVS_CTX_MAGIC;
263     ctx->refCount = 1;
264     ctx->flags = flags;
265     ctx->srcPortNo = srcPortNo;
266     ctx->origDataLength = origDataLength;
267 }
268
269
270 static VOID
271 OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl)
272 {
273     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
274     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
275     if (info == NULL) {
276         return;
277     }
278     OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d "
279                  "isDataSafe: %s, safeDataSize: %d",
280                  nbl, info->NumAvailableDestinations, info->SourcePortId,
281                  info->SourceNicIndex,
282                  info->IsPacketDataSafe ? "TRUE" : "FALSE",
283                  info->IsPacketDataSafe ? 0 : info->SafePacketDataSize);
284
285 }
286
287 static VOID
288 OvsDumpNBLContext(PNET_BUFFER_LIST nbl)
289 {
290     PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context;
291     if (ctx == NULL) {
292         OVS_LOG_INFO("No Net Buffer List context");
293         return;
294     }
295     while (ctx) {
296         OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d",
297                      nbl, ctx, ctx->Size, ctx->Offset);
298         ctx = ctx->Next;
299     }
300 }
301
302
303 static VOID
304 OvsDumpMDLChain(PMDL mdl)
305 {
306     PMDL tmp;
307     tmp = mdl;
308     while (tmp) {
309         OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p"
310                      " ByteCount: %d, ByteOffset: %d",
311                      tmp, tmp->Size, tmp->MappedSystemVa,
312                      tmp->StartVa, tmp->ByteCount, tmp->ByteOffset);
313         tmp = tmp->Next;
314     }
315 }
316
317
318 static VOID
319 OvsDumpNetBuffer(PNET_BUFFER nb)
320 {
321     OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p "
322                  "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d",
323                  nb,
324                  NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle,
325                  NET_BUFFER_FIRST_MDL(nb),
326                  NET_BUFFER_CURRENT_MDL(nb),
327                  NET_BUFFER_CURRENT_MDL_OFFSET(nb),
328                  NET_BUFFER_DATA_LENGTH(nb),
329                  NET_BUFFER_DATA_OFFSET(nb));
330     OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb));
331 }
332
333
334 static VOID
335 OvsDumpNetBufferList(PNET_BUFFER_LIST nbl)
336 {
337     PNET_BUFFER nb;
338     OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d "
339                  "poolHandle: %p",
340                  nbl, nbl->ParentNetBufferList,
341                  nbl->SourceHandle, nbl->ChildRefCount,
342                  nbl->NdisPoolHandle);
343     OvsDumpNBLContext(nbl);
344     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
345     while (nb) {
346         OvsDumpNetBuffer(nb);
347         nb = NET_BUFFER_NEXT_NB(nb);
348     }
349 }
350
351 /*
352  * --------------------------------------------------------------------------
353  * OvsAllocateFixSizeNBL --
354  *
355  *    Allocate fix size NBL which include
356  *       NBL + NB + MBL + Data + Context
357  *    Please note:
358  *       * Forwarding Context is allocated, but forwarding detail information
359  *       is not initailized.
360  *       * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128
361  *       byte).
362  * --------------------------------------------------------------------------
363  */
364 PNET_BUFFER_LIST
365 OvsAllocateFixSizeNBL(PVOID ovsContext,
366                       UINT32 size,
367                       UINT32 headRoom)
368 {
369     PNET_BUFFER_LIST nbl = NULL;
370     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
371     POVS_BUFFER_CONTEXT ctx;
372     POVS_NBL_POOL ovsPool = &context->ovsPool;
373     NDIS_STATUS status;
374     UINT32 line;
375     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
376
377     if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) {
378         line = __LINE__;
379         goto allocate_done;
380     }
381
382     nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool,
383                                     (UINT16)sizeof (OVS_BUFFER_CONTEXT),
384                                     (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
385
386     if (nbl == NULL) {
387         line = __LINE__;
388         goto allocate_done;
389     }
390
391     nbl->SourceHandle = ovsPool->ndisHandle;
392     status = context->NdisSwitchHandlers.
393              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
394
395     if (status != NDIS_STATUS_SUCCESS) {
396         NdisFreeNetBufferList(nbl);
397         nbl = NULL;
398         line = __LINE__;
399         goto allocate_done;
400     }
401     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
402     ASSERT(info);
403     info->IsPacketDataSafe = TRUE;
404     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
405
406     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
407                                            size, 0, NULL);
408     ASSERT(status == NDIS_STATUS_SUCCESS);
409
410 #ifdef DBG
411     InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount);
412     OvsDumpNetBufferList(nbl);
413     OvsDumpForwardingDetails(nbl);
414 #endif
415
416     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
417     ASSERT(ctx);
418
419     OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL |
420                       OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size,
421                       OVS_DEFAULT_PORT_NO);
422     line = __LINE__;
423 allocate_done:
424     OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line);
425     return nbl;
426 }
427
428
429 static PMDL
430 OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
431                       UINT32 dataSize)
432 {
433     PMDL mdl;
434     PVOID data;
435
436     data = OvsAllocateMemoryWithTag(dataSize, OVS_MDL_POOL_TAG);
437     if (data == NULL) {
438         return NULL;
439     }
440
441     mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
442     if (mdl == NULL) {
443         OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
444     }
445
446     return mdl;
447 }
448
449
450 static VOID
451 OvsFreeMDLAndData(PMDL mdl)
452 {
453     PVOID data;
454
455     data = MmGetMdlVirtualAddress(mdl);
456     NdisFreeMdl(mdl);
457     OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
458 }
459
460
461 /*
462  * --------------------------------------------------------------------------
463  * OvsAllocateVariableSizeNBL --
464  *
465  *    Allocate variable size NBL, the NBL looks like
466  *      NBL + NB + Context
467  *      MDL + Data
468  * --------------------------------------------------------------------------
469  */
470 PNET_BUFFER_LIST
471 OvsAllocateVariableSizeNBL(PVOID ovsContext,
472                            UINT32 size,
473                            UINT32 headRoom)
474 {
475     PNET_BUFFER_LIST nbl = NULL;
476     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
477     POVS_NBL_POOL ovsPool = &context->ovsPool;
478     POVS_BUFFER_CONTEXT ctx;
479     UINT32 realSize;
480     PMDL mdl;
481     NDIS_STATUS status;
482     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
483     if (size == 0) {
484         return NULL;
485     }
486     realSize = MEM_ALIGN_SIZE(size + headRoom);
487
488     mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize);
489     if (mdl == NULL) {
490         return NULL;
491     }
492
493     nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool,
494                                          (UINT16)sizeof (OVS_BUFFER_CONTEXT),
495                                          (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL,
496                                                 mdl, realSize, 0);
497     if (nbl == NULL) {
498         OvsFreeMDLAndData(mdl);
499         return NULL;
500     }
501
502     nbl->SourceHandle = ovsPool->ndisHandle;
503     status = context->NdisSwitchHandlers.
504              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
505
506     if (status != NDIS_STATUS_SUCCESS) {
507        /*
508         * do we need to remove mdl from nbl XXX
509         */
510         OvsFreeMDLAndData(mdl);
511         NdisFreeNetBufferList(nbl);
512         return NULL;
513     }
514
515     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
516     ASSERT(info);
517     info->IsPacketDataSafe = TRUE;
518     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
519     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
520                                            size, 0, NULL);
521     ASSERT(status == NDIS_STATUS_SUCCESS);
522
523 #ifdef DBG
524     InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount);
525     OvsDumpNetBufferList(nbl);
526     OvsDumpForwardingDetails(nbl);
527 #endif
528
529     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
530
531     OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
532                            OVS_BUFFER_PRIVATE_FORWARD_CONTEXT |
533                            OVS_BUFFER_FROM_ZERO_SIZE_POOL,
534                       size, OVS_DEFAULT_PORT_NO);
535
536     OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl);
537     return nbl;
538 }
539
540
541 /*
542  * --------------------------------------------------------------------------
543  * OvsInitExternalNBLContext --
544  *
545  *     For NBL not allocated by OVS, it will allocate and initialize
546  *     the NBL context.
547  * --------------------------------------------------------------------------
548  */
549 POVS_BUFFER_CONTEXT
550 OvsInitExternalNBLContext(PVOID ovsContext,
551                           PNET_BUFFER_LIST nbl,
552                           BOOLEAN isRecv)
553 {
554     NDIS_HANDLE poolHandle;
555     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
556     POVS_BUFFER_CONTEXT ctx;
557     PNET_BUFFER nb;
558     NDIS_STATUS status;
559     UINT16 flags;
560
561     poolHandle = NdisGetPoolFromNetBufferList(nbl);
562
563     if (poolHandle == context->ovsPool.ndisHandle ||
564         nbl->SourceHandle == context->ovsPool.ndisHandle) {
565         return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
566     }
567     status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
568                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
569                                               OVS_OTHER_POOL_TAG);
570     if (status != NDIS_STATUS_SUCCESS) {
571         return NULL;
572     }
573 #ifdef DBG
574     OvsDumpNBLContext(nbl);
575     InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
576 #endif
577     flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
578     flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
579     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
580
581     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
582     /*
583      * we use first nb to decide whether we need advance or retreat during
584      * complete.
585      */
586     OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO);
587     return ctx;
588 }
589
590 /*
591  * --------------------------------------------------------------------------
592  * OvsAllocateNBLContext
593  *
594  *    Create NBL buffer context and forwarding context.
595  * --------------------------------------------------------------------------
596  */
597 NDIS_STATUS
598 OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context,
599                       PNET_BUFFER_LIST nbl)
600 {
601     POVS_NBL_POOL ovsPool = &context->ovsPool;
602     NDIS_STATUS status;
603
604     status = NdisAllocateNetBufferListContext(nbl,
605                                               sizeof (OVS_BUFFER_CONTEXT),
606                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
607                                               OVS_OTHER_POOL_TAG);
608     if (status != NDIS_STATUS_SUCCESS) {
609         return NDIS_STATUS_FAILURE;
610     }
611
612     nbl->SourceHandle = ovsPool->ndisHandle;
613     status = context->NdisSwitchHandlers.
614         AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
615
616     if (status != NDIS_STATUS_SUCCESS) {
617         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
618         return NDIS_STATUS_FAILURE;
619     }
620     return status;
621 }
622
623 /*
624  * --------------------------------------------------------------------------
625  * OvsFreeNBLContext
626  *
627  *    Free the NBL buffer context and forwarding context.
628  * --------------------------------------------------------------------------
629  */
630 NDIS_STATUS
631 OvsFreeNBLContext(POVS_SWITCH_CONTEXT context,
632                   PNET_BUFFER_LIST nbl)
633 {
634     POVS_NBL_POOL ovsPool = &context->ovsPool;
635
636     context->NdisSwitchHandlers.
637          FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
638     NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
639
640     return NDIS_STATUS_SUCCESS;
641 }
642
643 /*
644  * --------------------------------------------------------------------------
645  * OvsCopyNBLInfo
646  *
647  *    Copy NBL info from src to dst
648  * --------------------------------------------------------------------------
649  */
650 NDIS_STATUS
651 OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl,
652                POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize,
653                BOOLEAN copyNblInfo)
654 {
655     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo;
656     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
657
658     srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl);
659     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl);
660     if (srcInfo) {
661 #ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO
662         status = context->NdisSwitchHandlers.
663             CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0);
664
665         if (status != NDIS_STATUS_SUCCESS) {
666             return status;
667         }
668 #else
669         dstInfo->SourcePortId = srcInfo->SourcePortId;
670         dstInfo->SourceNicIndex = srcInfo->SourceNicIndex;
671         if (copyNblInfo) {
672             if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) {
673                 NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl);
674             } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) {
675                 NdisCopySendNetBufferListInfo(dstNbl, srcNbl);
676             }
677         }
678 #endif
679         dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe;
680         if (!srcInfo->IsPacketDataSafe && copySize >
681             srcInfo->SafePacketDataSize) {
682             srcInfo->SafePacketDataSize = copySize;
683         }
684     } else {
685         /*
686          * Assume all data are safe
687          */
688         dstInfo->IsPacketDataSafe = TRUE;
689         dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
690     }
691     return status;
692 }
693
694 /*
695  * --------------------------------------------------------------------------
696  * OvsPartialCopyNBL --
697  *
698  *    Partial copy NBL, if there is multiple NB in NBL, each one will be
699  *    copied. We also reserve headroom for the new NBL.
700  *
701  *    Please note,
702  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
703  *       this function.
704  *       The NBL should already have ref to itself so that during copy
705  *       it will not be freed.
706  * --------------------------------------------------------------------------
707  */
708 PNET_BUFFER_LIST
709 OvsPartialCopyNBL(PVOID ovsContext,
710                   PNET_BUFFER_LIST nbl,
711                   UINT32 copySize,
712                   UINT32 headRoom,
713                   BOOLEAN copyNblInfo)
714 {
715     PNET_BUFFER_LIST newNbl;
716     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
717     NDIS_STATUS status;
718     PNET_BUFFER srcNb, dstNb;
719     ULONG byteCopied;
720     POVS_NBL_POOL ovsPool = &context->ovsPool;
721     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
722     UINT16 flags;
723
724     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
725     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
726         OVS_LOG_INFO("src nbl must have ctx initialized");
727         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
728         return NULL;
729     }
730
731     if (copySize) {
732         NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL);
733     }
734     newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool,
735                                             NULL, 0);
736     if (copySize) {
737         status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0,
738                                                    NULL, NULL);
739         ASSERT(status == NDIS_STATUS_SUCCESS);
740     }
741
742     if (newNbl == NULL) {
743         return NULL;
744     }
745
746     /*
747      * Allocate private memory for copy
748      */
749     if (copySize + headRoom) {
750         status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom,
751                                                    0, NULL, NULL);
752         if (status != NDIS_STATUS_SUCCESS) {
753             goto retreat_error;
754         }
755
756         if (headRoom) {
757             NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL);
758         }
759         if (copySize) {
760             srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
761             dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
762
763             while (srcNb) {
764                 status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize,
765                                                           srcNb, 0,
766                                                           &byteCopied);
767                 if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) {
768                     goto nbl_context_error;
769                 }
770                 srcNb = NET_BUFFER_NEXT_NB(srcNb);
771                 dstNb = NET_BUFFER_NEXT_NB(dstNb);
772             }
773         }
774     }
775
776     status = OvsAllocateNBLContext(context, newNbl);
777     if (status != NDIS_STATUS_SUCCESS) {
778         goto nbl_context_error;
779     }
780
781     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo);
782     if (status != NDIS_STATUS_SUCCESS) {
783         goto copy_list_info_error;
784     }
785
786 #ifdef DBG
787     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
788 #endif
789
790     newNbl->ParentNetBufferList = nbl;
791
792     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
793     ASSERT(dstCtx != NULL);
794
795     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
796
797     flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT |
798              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
799
800     srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
801     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize,
802                       OVS_DEFAULT_PORT_NO);
803
804     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
805
806 #ifdef DBG
807     OvsDumpNetBufferList(nbl);
808     OvsDumpForwardingDetails(nbl);
809
810     OvsDumpNetBufferList(newNbl);
811     OvsDumpForwardingDetails(newNbl);
812 #endif
813
814     OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl);
815     return newNbl;
816
817 copy_list_info_error:
818     OvsFreeNBLContext(context, newNbl);
819 nbl_context_error:
820     if (copySize) {
821         NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL);
822     }
823 retreat_error:
824     NdisFreeCloneNetBufferList(newNbl, 0);
825     return NULL;
826 }
827
828 /*
829  * --------------------------------------------------------------------------
830  * OvsPartialCopyToMultipleNBLs --
831  *
832  *     This is similar to OvsPartialCopyNBL() except that each NB will
833  *     have its own NBL.
834  * --------------------------------------------------------------------------
835  */
836 PNET_BUFFER_LIST
837 OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
838                              PNET_BUFFER_LIST nbl,
839                              UINT32 copySize,
840                              UINT32 headRoom,
841                              BOOLEAN copyNblInfo)
842 {
843     PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb;
844     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
845     PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL;
846
847     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
848     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
849         return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo);
850     }
851
852     firstNb = nb;
853     prevNb = nb;
854
855     while (nb) {
856         nextNb = NET_BUFFER_NEXT_NB(nb);
857         NET_BUFFER_NEXT_NB(nb) = NULL;
858
859         NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
860
861         newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom,
862                                    copyNblInfo);
863         if (newNbl == NULL) {
864             goto cleanup;
865         }
866         if (prevNbl == NULL) {
867             firstNbl = newNbl;
868         } else {
869             NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
870             NET_BUFFER_NEXT_NB(prevNb) = nb;
871         }
872         prevNbl = newNbl;
873         prevNb = nb;
874         nb = nextNb;
875     }
876     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
877     return firstNbl;
878
879 cleanup:
880     NET_BUFFER_NEXT_NB(prevNb) = nb;
881     NET_BUFFER_NEXT_NB(nb) = nextNb;
882     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
883
884     newNbl = firstNbl;
885     while (newNbl) {
886         firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl);
887         NET_BUFFER_LIST_NEXT_NBL(newNbl) = NULL;
888         OvsCompleteNBL(context, newNbl, TRUE);
889         newNbl = firstNbl;
890     }
891     return NULL;
892 }
893
894
895 static PNET_BUFFER_LIST
896 OvsCopySinglePacketNBL(PVOID ovsContext,
897                        PNET_BUFFER_LIST nbl,
898                        PNET_BUFFER nb,
899                        UINT32 headRoom,
900                        BOOLEAN copyNblInfo)
901 {
902     UINT32 size;
903     ULONG copiedSize;
904     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
905     PNET_BUFFER_LIST newNbl;
906     PNET_BUFFER newNb;
907     NDIS_STATUS status;
908     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
909
910     size = NET_BUFFER_DATA_LENGTH(nb);
911     if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) {
912         newNbl = OvsAllocateFixSizeNBL(context, size, headRoom);
913     } else {
914         newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom);
915     }
916     if (newNbl == NULL) {
917         return NULL;
918     }
919     newNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
920     status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
921                                               &copiedSize);
922
923     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
924     if (status == NDIS_STATUS_SUCCESS) {
925         status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo);
926     }
927
928     if (status != NDIS_STATUS_SUCCESS || copiedSize != size) {
929         OvsCompleteNBL(context, newNbl, TRUE);
930         return NULL;
931     }
932
933     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
934     ASSERT(dstCtx && srcCtx);
935     ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC);
936
937     dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER |
938                                       OVS_BUFFER_SEND_BUFFER);
939 #ifdef DBG
940     OvsDumpNetBufferList(newNbl);
941     OvsDumpForwardingDetails(newNbl);
942 #endif
943     OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl);
944     return newNbl;
945 }
946
947 /*
948  * --------------------------------------------------------------------------
949  * OvsFullCopyNBL --
950  *
951  *    Copy the NBL to a new NBL including data.
952  *
953  * Notes:
954  *     The NBL can have multiple NBs, but the final result is one NBL.
955  * --------------------------------------------------------------------------
956  */
957 PNET_BUFFER_LIST
958 OvsFullCopyNBL(PVOID ovsContext,
959                PNET_BUFFER_LIST nbl,
960                UINT32 headRoom,
961                BOOLEAN copyNblInfo)
962 {
963     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
964     POVS_NBL_POOL ovsPool = &context->ovsPool;
965     PNET_BUFFER_LIST newNbl;
966     PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL;
967     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
968     PMDL mdl;
969     NDIS_STATUS status;
970     UINT32 size, totalSize;
971     ULONG copiedSize;
972     UINT16 flags;
973     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo;
974
975     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
976     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
977         OVS_LOG_INFO("src nbl must have ctx initialized");
978         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
979         return NULL;
980     }
981
982     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
983
984     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
985         return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
986     }
987
988     newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool,
989                                        (UINT16)sizeof (OVS_BUFFER_CONTEXT),
990                                        (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
991     if (newNbl == NULL) {
992         return NULL;
993     }
994
995     while (nb) {
996         size = NET_BUFFER_DATA_LENGTH(nb);
997         totalSize = MEM_ALIGN_SIZE(size + headRoom);
998         mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize);
999
1000         if (mdl == NULL) {
1001             goto nblcopy_error;
1002         }
1003         newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0);
1004         if (newNb == NULL) {
1005             OvsFreeMDLAndData(mdl);
1006             goto nblcopy_error;
1007         }
1008         if (firstNb == NULL) {
1009             firstNb = newNb;
1010         } else {
1011             NET_BUFFER_NEXT_NB(prevNb) = newNb;
1012         }
1013         prevNb = newNb;
1014 #ifdef DBG
1015         InterlockedIncrement((LONG volatile *)&ovsPool->nbCount);
1016 #endif
1017         status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL);
1018         ASSERT(status == NDIS_STATUS_SUCCESS);
1019
1020         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
1021                                                   &copiedSize);
1022         if (status != NDIS_STATUS_SUCCESS || size != copiedSize) {
1023             goto nblcopy_error;
1024         }
1025
1026         nb = NET_BUFFER_NEXT_NB(nb);
1027     }
1028
1029     NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb;
1030
1031     newNbl->SourceHandle = ovsPool->ndisHandle;
1032     status = context->NdisSwitchHandlers.
1033          AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl);
1034
1035     if (status != NDIS_STATUS_SUCCESS) {
1036         goto nblcopy_error;
1037     }
1038
1039     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo);
1040     if (status != NDIS_STATUS_SUCCESS) {
1041         goto nblcopy_error;
1042     }
1043
1044     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl);
1045     dstInfo->IsPacketDataSafe = TRUE;
1046
1047     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1048
1049     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
1050
1051     flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
1052              OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL |
1053              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
1054
1055     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb),
1056                       OVS_DEFAULT_PORT_NO);
1057
1058 #ifdef DBG
1059     OvsDumpNetBufferList(nbl);
1060     OvsDumpForwardingDetails(nbl);
1061     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
1062 #endif
1063     OVS_LOG_LOUD("newNbl: %p", newNbl);
1064     return newNbl;
1065
1066 nblcopy_error:
1067     while (firstNb) {
1068 #ifdef DBG
1069         InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1070 #endif
1071         prevNb = firstNb;
1072         firstNb = NET_BUFFER_NEXT_NB(prevNb);
1073         mdl = NET_BUFFER_FIRST_MDL(prevNb);
1074         NET_BUFFER_FIRST_MDL(prevNb) = NULL;
1075         NdisFreeNetBuffer(prevNb);
1076         OvsFreeMDLAndData(mdl);
1077     }
1078     NdisFreeNetBufferList(newNbl);
1079     OVS_LOG_ERROR("OvsFullCopyNBL failed");
1080     return NULL;
1081 }
1082
1083 /*
1084  * --------------------------------------------------------------------------
1085  * GetSegmentHeaderInfo
1086  *
1087  *    Extract header size and sequence number for the segment.
1088  * --------------------------------------------------------------------------
1089  */
1090 static NDIS_STATUS
1091 GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
1092                      const POVS_PACKET_HDR_INFO hdrInfo,
1093                      UINT32 *hdrSize, UINT32 *seqNumber)
1094 {
1095     TCPHdr tcpStorage;
1096     const TCPHdr *tcp;
1097
1098     /* Parse the orginal Eth/IP/TCP header */
1099     tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
1100     if (tcp == NULL) {
1101         return NDIS_STATUS_FAILURE;
1102     }
1103     *seqNumber = ntohl(tcp->seq);
1104     *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp);
1105
1106     return NDIS_STATUS_SUCCESS;
1107 }
1108
1109
1110 /*
1111  * --------------------------------------------------------------------------
1112  * FixSegmentHeader
1113  *
1114  *    Fix IP length, IP checksum, TCP sequence number and TCP checksum
1115  *    in the segment.
1116  * --------------------------------------------------------------------------
1117  */
1118 static NDIS_STATUS
1119 FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber)
1120 {
1121     EthHdr *dstEth;
1122     IPHdr *dstIP;
1123     TCPHdr *dstTCP;
1124     PMDL mdl;
1125     PUINT8 bufferStart;
1126
1127     mdl = NET_BUFFER_FIRST_MDL(nb);
1128
1129     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority);
1130     if (!bufferStart) {
1131         return NDIS_STATUS_RESOURCES;
1132     }
1133     dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb));
1134     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1135             >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
1136     dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth);
1137     dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
1138     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1139             >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1140
1141     /* Fix IP length and checksum */
1142     ASSERT(dstIP->protocol == IPPROTO_TCP);
1143     dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1144     dstIP->check = 0;
1145     dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
1146
1147     /* Fix TCP checksum */
1148     dstTCP->seq = htonl(seqNumber);
1149     dstTCP->check =
1150         IPPseudoChecksum((UINT32 *)&dstIP->saddr,
1151                          (UINT32 *)&dstIP->daddr,
1152                          IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP));
1153     dstTCP->check = CalculateChecksumNB(nb,
1154             (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4),
1155             sizeof *dstEth + dstIP->ihl * 4);
1156     return STATUS_SUCCESS;
1157 }
1158
1159 /*
1160  * --------------------------------------------------------------------------
1161  * OvsTcpSegmentyNBL --
1162  *
1163  *    Segment TCP payload, and prepend each segment with ether/IP/TCP header.
1164  *    Leave headRoom for additional encap.
1165  *
1166  *    Please note,
1167  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
1168  *       this function.
1169  *       The NBL should already have ref to itself so that during copy
1170  *       it will not be freed.
1171  *       Currently this API assert there is only one NB in an NBL, it needs
1172  *       to be fixed if we receive multiple NBs in an NBL.
1173  * --------------------------------------------------------------------------
1174  */
1175 PNET_BUFFER_LIST
1176 OvsTcpSegmentNBL(PVOID ovsContext,
1177                  PNET_BUFFER_LIST nbl,
1178                  POVS_PACKET_HDR_INFO hdrInfo,
1179                  UINT32 mss,
1180                  UINT32 headRoom)
1181 {
1182     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1183 #ifdef DBG
1184     POVS_NBL_POOL ovsPool = &context->ovsPool;
1185 #endif
1186     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
1187     UINT32 size, hdrSize, seqNumber;
1188     PNET_BUFFER_LIST newNbl;
1189     PNET_BUFFER nb, newNb;
1190     NDIS_STATUS status;
1191     UINT16 segmentSize;
1192     ULONG copiedSize;
1193
1194     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1195     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1196         OVS_LOG_INFO("src nbl must have ctx initialized");
1197         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1198         return NULL;
1199     }
1200
1201     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1202     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
1203
1204     /* Figure out the segment header size */
1205     status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
1206     if (status != NDIS_STATUS_SUCCESS) {
1207         OVS_LOG_INFO("Cannot parse NBL header");
1208         return NULL;
1209     }
1210
1211     size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
1212
1213     /* XXX add to ovsPool counters? */
1214     newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
1215             NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
1216     if (newNbl == NULL) {
1217         return NULL;
1218     }
1219
1220     /* Now deal with TCP payload */
1221     for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
1222             newNb = NET_BUFFER_NEXT_NB(newNb)) {
1223         segmentSize = (size > mss ? mss : size) & 0xffff;
1224         if (headRoom) {
1225             NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
1226         }
1227
1228         /* Now copy the eth/IP/TCP header and fix up */
1229         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0,
1230                                                   &copiedSize);
1231         if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) {
1232             goto nblcopy_error;
1233         }
1234
1235         status = FixSegmentHeader(newNb, segmentSize, seqNumber);
1236         if (status != NDIS_STATUS_SUCCESS) {
1237             goto nblcopy_error;
1238         }
1239
1240
1241         /* Move on to the next segment */
1242         size -= segmentSize;
1243         seqNumber += segmentSize;
1244     }
1245
1246     status = OvsAllocateNBLContext(context, newNbl);
1247     if (status != NDIS_STATUS_SUCCESS) {
1248         goto nblcopy_error;
1249     }
1250
1251     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE);
1252     if (status != NDIS_STATUS_SUCCESS) {
1253         goto nbl_context_error;
1254     }
1255
1256     newNbl->ParentNetBufferList = nbl;
1257
1258     /* Remember it's a fragment NBL so we can free it properly */
1259     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1260     ASSERT(dstCtx != NULL);
1261     dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT |
1262         OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER;
1263     dstCtx->refCount = 1;
1264     dstCtx->magic = OVS_CTX_MAGIC;
1265     dstCtx->dataOffsetDelta = hdrSize + headRoom;
1266
1267     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
1268 #ifdef DBG
1269     InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount);
1270
1271     OvsDumpNetBufferList(nbl);
1272     OvsDumpForwardingDetails(nbl);
1273
1274     OvsDumpNetBufferList(newNbl);
1275     OvsDumpForwardingDetails(newNbl);
1276 #endif
1277     OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
1278     return newNbl;
1279
1280 nbl_context_error:
1281     OvsFreeNBLContext(context, newNbl);
1282 nblcopy_error:
1283 #ifdef DBG
1284     InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1285 #endif
1286     NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0);
1287     return NULL;
1288 }
1289
1290
1291 /*
1292  * --------------------------------------------------------------------------
1293  * OvsFullCopyToMultipleNBLs --
1294  *
1295  *    Copy NBL to multiple NBLs, each NB will have its own NBL
1296  * --------------------------------------------------------------------------
1297  */
1298 PNET_BUFFER_LIST
1299 OvsFullCopyToMultipleNBLs(PVOID ovsContext,
1300                           PNET_BUFFER_LIST nbl,
1301                           UINT32 headRoom,
1302                           BOOLEAN copyNblInfo)
1303 {
1304
1305     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1306     PNET_BUFFER_LIST firstNbl, currNbl, newNbl;
1307     PNET_BUFFER nb;
1308     POVS_BUFFER_CONTEXT srcCtx;
1309
1310     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1311     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1312         OVS_LOG_INFO("src nbl must have ctx initialized");
1313         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1314         return NULL;
1315     }
1316
1317     nb =  NET_BUFFER_LIST_FIRST_NB(nbl);
1318     newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
1319
1320     if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) {
1321         return newNbl;
1322     } else {
1323         firstNbl = newNbl;
1324         currNbl = newNbl;
1325     }
1326
1327     while (nb) {
1328         newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom,
1329                                         copyNblInfo);
1330         if (newNbl == NULL) {
1331             goto copymultiple_error;
1332         }
1333         NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl;
1334         currNbl = newNbl;
1335         nb = NET_BUFFER_NEXT_NB(nb);
1336     }
1337     return firstNbl;
1338
1339 copymultiple_error:
1340     while (firstNbl) {
1341         currNbl = firstNbl;
1342         firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl);
1343         NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL;
1344         OvsCompleteNBL(context, currNbl, TRUE);
1345     }
1346     return NULL;
1347
1348 }
1349
1350
1351 /*
1352  * --------------------------------------------------------------------------
1353  * OvsCompleteNBL --
1354  *
1355  *     This function tries to free the NBL allocated by OVS buffer
1356  *     management module. If it trigger the completion of the parent
1357  *     NBL, it will recursively call itself. If it trigger the completion
1358  *     of external NBL, it will be returned to the caller. The caller
1359  *     is responsible to call API to return to upper layer.
1360  * --------------------------------------------------------------------------
1361  */
1362 PNET_BUFFER_LIST
1363 OvsCompleteNBL(POVS_SWITCH_CONTEXT context,
1364                PNET_BUFFER_LIST nbl,
1365                BOOLEAN updateRef)
1366 {
1367     POVS_BUFFER_CONTEXT ctx;
1368     UINT16 flags;
1369     PNET_BUFFER_LIST parent;
1370     NDIS_STATUS status;
1371     NDIS_HANDLE poolHandle;
1372     LONG value;
1373     POVS_NBL_POOL ovsPool = &context->ovsPool;
1374     PNET_BUFFER nb;
1375
1376
1377     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1378
1379     ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1380
1381     OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d",
1382                  nbl, ctx, ctx->refCount, updateRef);
1383
1384     if (updateRef) {
1385         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1386         if (value != 0) {
1387             return NULL;
1388         }
1389     } else {
1390         /*
1391          * This is a special case, the refCount must be zero
1392          */
1393         ASSERT(ctx->refCount == 0);
1394     }
1395
1396     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1397
1398     flags = ctx->flags;
1399     if (!(flags & OVS_BUFFER_FRAGMENT) &&
1400         NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) {
1401         UINT32 diff;
1402         if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) {
1403             diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb);
1404             status = NdisRetreatNetBufferListDataStart(nbl, diff, 0,
1405                                                        NULL, NULL);
1406             ASSERT(status == NDIS_STATUS_SUCCESS);
1407         } else {
1408             diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength;
1409             NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL);
1410         }
1411     }
1412
1413     if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) {
1414         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
1415     }
1416
1417     if (flags & OVS_BUFFER_NEED_COMPLETE) {
1418         /*
1419          * return to caller for completion
1420          */
1421 #ifdef DBG
1422         InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount);
1423 #endif
1424         return nbl;
1425     }
1426
1427     if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) {
1428         context->NdisSwitchHandlers.
1429               FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
1430     }
1431
1432     if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) {
1433         PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1434         while (nb) {
1435             PMDL mdl = NET_BUFFER_FIRST_MDL(nb);
1436             NET_BUFFER_FIRST_MDL(nb) = NULL;
1437             ASSERT(mdl->Next == NULL);
1438             OvsFreeMDLAndData(mdl);
1439             nb = NET_BUFFER_NEXT_NB(nb);
1440         }
1441     }
1442
1443     if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) {
1444         PNET_BUFFER nb, nextNb;
1445
1446         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1447         while (nb) {
1448             nextNb = NET_BUFFER_NEXT_NB(nb);
1449             NdisFreeNetBuffer(nb);
1450 #ifdef DBG
1451             InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1452 #endif
1453             nb = nextNb;
1454         }
1455         NET_BUFFER_LIST_FIRST_NB(nbl) = NULL;
1456     }
1457
1458     parent = nbl->ParentNetBufferList;
1459
1460     poolHandle = NdisGetPoolFromNetBufferList(nbl);
1461     if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) {
1462         ASSERT(poolHandle == ovsPool->fixSizePool);
1463 #ifdef DBG
1464         InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount);
1465 #endif
1466         NdisFreeNetBufferList(nbl);
1467     } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) {
1468         ASSERT(poolHandle == ovsPool->zeroSizePool);
1469 #ifdef DBG
1470         InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount);
1471 #endif
1472         NdisFreeNetBufferList(nbl);
1473     } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) {
1474         ASSERT(poolHandle == ovsPool->nblOnlyPool);
1475 #ifdef DBG
1476         InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount);
1477 #endif
1478         NdisFreeCloneNetBufferList(nbl, 0);
1479     } else if (flags & OVS_BUFFER_FRAGMENT) {
1480         OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent);
1481 #ifdef DBG
1482         InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1483 #endif
1484         NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0);
1485     }
1486
1487     if (parent != NULL) {
1488         ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent);
1489         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1490         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1491         if (value == 0) {
1492             return OvsCompleteNBL(context, parent, FALSE);
1493         }
1494     }
1495     return NULL;
1496 }
1497
1498 /*
1499  * --------------------------------------------------------------------------
1500  * OvsSetCtxSourcePortNo --
1501  *      Setter function which stores the source port of an NBL in the NBL
1502  * Context Info.
1503  * --------------------------------------------------------------------------
1504  */
1505 NDIS_STATUS
1506 OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1507                       UINT32 portNo)
1508 {
1509     POVS_BUFFER_CONTEXT ctx;
1510     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1511     if (ctx == NULL) {
1512         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1513         return STATUS_INVALID_PARAMETER;
1514     }
1515
1516     ctx->srcPortNo = portNo;
1517     return NDIS_STATUS_SUCCESS;
1518 }
1519
1520 /*
1521  * --------------------------------------------------------------------------
1522  * OvsGetCtxSourcePortNo --
1523  *      Get source port of an NBL from its Context Info.
1524  * --------------------------------------------------------------------------
1525  */
1526 NDIS_STATUS
1527 OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1528                       UINT32 *portNo)
1529 {
1530     POVS_BUFFER_CONTEXT ctx;
1531     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1532     if (ctx == NULL || portNo == NULL) {
1533         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1534         return STATUS_INVALID_PARAMETER;
1535     }
1536     *portNo = ctx->srcPortNo;
1537     return NDIS_STATUS_SUCCESS;
1538 }