5adbb25182a2972ae67e6da03807db6bc14f1e04
[cascardo/ovs.git] / datapath-windows / ovsext / BufferMgmt.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 /*
18  * ****************************************************************************
19  *
20  *       Simple Buffer Management framework for OVS
21  *
22  *  It introduces four NDIS buffer pools
23  *     **Fix size net buffer list pool--this is used for small buffer
24  *     One allocation will include NBL + NB + MDL + Data + CONTEXT.
25  *
26  *     **Variable size net buffer list pool--this is used for variable size
27  *     buffer. The allocation of net buffer list will include NBL + NB +
28  *     CONTEXT, a separate allocation of MDL + data buffer is required.
29  *
30  *     **NBL only net buffer list pool-- this is used for partial copy
31  *     (or clone). In this case we can not allocate net buffer list and
32  *     net buffer at the same time.
33  *
34  *     **Net buffer pool-- this is required when net buffer need to be
35  *     allocated separately.
36  *
37  *  A Buffer context is defined to track the buffer specific information
38  *  so that during NBL completion, proper action can be taken. Please see
39  *  code for details.
40  *
41  *  Here is the usage of the management API
42  *  All external NBL should be initialized its NBL context by calling
43  *     OvsInitExternalNBLContext()
44  *
45  *  After the external NBL context is initialized, it can call the following
46  *  API to allocate, copy or partial copy NBL.
47  *
48  *     OvsAllocateFixSizeNBL()
49  *     OvsAllocateVariableSizeNBL()
50  *
51  *     OvsPartialCopyNBL()
52  *     OvsPartialCopyToMultipleNBLs()
53  *
54  *     OvsFullCopyNBL()
55  *     OvsFullCopyToMultipleNBLs()
56  *
57  *  See code comments for detail description of the functions.
58  *
59  *  All NBLs is completed through
60  *       OvsCompleteNBL()
61  *     If this API return non NULL value, then the returned NBL should be
62  *     returned to upper layer by calling
63  *     NdisFSendNetBufferListsComplete() if the buffer is from upper
64  *     layer. In case of WFP, it can call the corresponding completion routine
65  *     to return the NBL to the framework.
66  *
67  *  NOTE:
68  *     1. Copy or partial copy will not copy destination port array
69  *     2. Copy or partial copy will copy src port id and index
70  *     3. New Allocated NBL will have src port set to default port id
71  *     4. If original packet has direction flag set, the copied or partial
72  *        copied NBL will still be in same direction.
73  *     5. When you advance or retreate the buffer, you may need to update
74  *        relevant meta data to keep it consistent.
75  *
76  * ****************************************************************************
77  */
78
79 #include "precomp.h"
80 #include "Switch.h"
81
82 #ifdef OVS_DBG_MOD
83 #undef OVS_DBG_MOD
84 #endif
85 #define OVS_DBG_MOD OVS_DBG_BUFMGMT
86 #include "Debug.h"
87 #include "NetProto.h"
88 #include "Flow.h"
89 #include "Checksum.h"
90 #include "PacketParser.h"
91
92 /*
93  * --------------------------------------------------------------------------
94  * OvsInitBufferPool --
95  *
96  *    Allocate NBL and NB pool
97  *
98  * XXX: more optimization may be done for buffer management include local cache
99  * of NBL, NB, data, context, MDL.
100  * --------------------------------------------------------------------------
101  */
102 NDIS_STATUS
103 OvsInitBufferPool(PVOID ovsContext)
104 {
105     POVS_NBL_POOL ovsPool;
106     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
107     NET_BUFFER_LIST_POOL_PARAMETERS  nblParam;
108     NET_BUFFER_POOL_PARAMETERS nbParam;
109
110     C_ASSERT(MEMORY_ALLOCATION_ALIGNMENT >= 8);
111
112     OVS_LOG_TRACE("Enter: context: %p", context);
113
114     ovsPool = &context->ovsPool;
115     RtlZeroMemory(ovsPool, sizeof (OVS_NBL_POOL));
116     ovsPool->ndisHandle = context->NdisFilterHandle;
117     ovsPool->ndisContext = context->NdisSwitchContext;
118     /*
119      * fix size NBL pool includes
120      *    NBL + NB + MDL + DATA + Context
121      *    This is mainly used for Packet execute or slow path when copy is
122      *    required and size is less than OVS_DEFAULT_DATA_SIZE. We expect
123      *    Most of packet from user space will use this Pool. (This is
124      *    true for all bfd and cfm packet.
125      */
126     RtlZeroMemory(&nblParam, sizeof (nblParam));
127     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
128                            NDIS_OBJECT_TYPE_DEFAULT,
129                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
130                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
131     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
132     nblParam.PoolTag = OVS_FIX_SIZE_NBL_POOL_TAG;
133     nblParam.fAllocateNetBuffer = TRUE;
134     nblParam.DataSize = OVS_DEFAULT_DATA_SIZE + OVS_DEFAULT_HEADROOM_SIZE;
135
136     ovsPool->fixSizePool =
137         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
138     if (ovsPool->fixSizePool == NULL) {
139         goto pool_cleanup;
140     }
141
142     /*
143      * Zero Size NBL Pool includes
144      *    NBL + NB + Context
145      *    This is mainly for packet with large data Size, in this case MDL and
146      *    Data will be allocate separately.
147      */
148     RtlZeroMemory(&nblParam, sizeof (nblParam));
149     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
150                            NDIS_OBJECT_TYPE_DEFAULT,
151                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
152                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
153
154     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
155     nblParam.PoolTag = OVS_VARIABLE_SIZE_NBL_POOL_TAG;
156     nblParam.fAllocateNetBuffer = TRUE;
157     nblParam.DataSize = 0;
158
159     ovsPool->zeroSizePool =
160         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
161     if (ovsPool->zeroSizePool == NULL) {
162         goto pool_cleanup;
163     }
164
165     /*
166      * NBL only pool just includes
167      *    NBL (+ context)
168      *    This is mainly used for clone and partial copy
169      */
170     RtlZeroMemory(&nblParam, sizeof (nblParam));
171     OVS_INIT_OBJECT_HEADER(&nblParam.Header,
172                            NDIS_OBJECT_TYPE_DEFAULT,
173                            NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1,
174                            NDIS_SIZEOF_NET_BUFFER_LIST_POOL_PARAMETERS_REVISION_1);
175
176     nblParam.ContextSize = OVS_DEFAULT_NBL_CONTEXT_SIZE;
177     nblParam.PoolTag = OVS_NBL_ONLY_POOL_TAG;
178     nblParam.fAllocateNetBuffer = FALSE;
179     nblParam.DataSize = 0;
180
181     ovsPool->nblOnlyPool =
182         NdisAllocateNetBufferListPool(context->NdisSwitchContext, &nblParam);
183     if (ovsPool->nblOnlyPool == NULL) {
184         goto pool_cleanup;
185     }
186
187     /* nb Pool
188      *    NB only pool, used for copy
189      */
190
191     OVS_INIT_OBJECT_HEADER(&nbParam.Header,
192                            NDIS_OBJECT_TYPE_DEFAULT,
193                            NET_BUFFER_POOL_PARAMETERS_REVISION_1,
194                            NDIS_SIZEOF_NET_BUFFER_POOL_PARAMETERS_REVISION_1);
195     nbParam.PoolTag = OVS_NET_BUFFER_POOL_TAG;
196     nbParam.DataSize = 0;
197     ovsPool->nbPool =
198         NdisAllocateNetBufferPool(context->NdisSwitchContext, &nbParam);
199     if (ovsPool->nbPool == NULL) {
200         goto pool_cleanup;
201     }
202     OVS_LOG_TRACE("Exit: fixSizePool: %p zeroSizePool: %p nblOnlyPool: %p"
203                   "nbPool: %p", ovsPool->fixSizePool, ovsPool->zeroSizePool,
204                   ovsPool->nblOnlyPool, ovsPool->nbPool);
205     return NDIS_STATUS_SUCCESS;
206
207 pool_cleanup:
208     OvsCleanupBufferPool(context);
209     OVS_LOG_TRACE("Exit: Fail to initialize ovs buffer pool");
210     return NDIS_STATUS_RESOURCES;
211 }
212
213
214 /*
215  * --------------------------------------------------------------------------
216  * OvsCleanupBufferPool --
217  *  Free Buffer pool for NBL and NB.
218  * --------------------------------------------------------------------------
219  */
220 VOID
221 OvsCleanupBufferPool(PVOID ovsContext)
222 {
223     POVS_NBL_POOL ovsPool;
224     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
225     ovsPool = &context->ovsPool;
226     OVS_LOG_TRACE("Enter: context: %p", context);
227 #ifdef DBG
228     ASSERT(ovsPool->fixNBLCount == 0);
229     ASSERT(ovsPool->zeroNBLCount == 0);
230     ASSERT(ovsPool->nblOnlyCount == 0);
231     ASSERT(ovsPool->nbCount == 0);
232     ASSERT(ovsPool->sysNBLCount == 0);
233     ASSERT(ovsPool->fragNBLCount == 0);
234 #endif
235
236     if (ovsPool->fixSizePool) {
237         NdisFreeNetBufferListPool(ovsPool->fixSizePool);
238         ovsPool->fixSizePool = NULL;
239     }
240     if (ovsPool->zeroSizePool) {
241         NdisFreeNetBufferListPool(ovsPool->zeroSizePool);
242         ovsPool->zeroSizePool = NULL;
243     }
244     if (ovsPool->nblOnlyPool) {
245         NdisFreeNetBufferListPool(ovsPool->nblOnlyPool);
246         ovsPool->nblOnlyPool = NULL;
247     }
248     if (ovsPool->nbPool) {
249         NdisFreeNetBufferPool(ovsPool->nbPool);
250         ovsPool->nbPool = NULL;
251     }
252     OVS_LOG_TRACE("Exit: cleanup OVS Buffer pool");
253 }
254
255
256 static VOID
257 OvsInitNBLContext(POVS_BUFFER_CONTEXT ctx,
258                   UINT16 flags,
259                   UINT32 origDataLength,
260                   UINT32 srcPortNo)
261 {
262     ctx->magic = OVS_CTX_MAGIC;
263     ctx->refCount = 1;
264     ctx->flags = flags;
265     ctx->srcPortNo = srcPortNo;
266     ctx->origDataLength = origDataLength;
267 }
268
269
270 static VOID
271 OvsDumpForwardingDetails(PNET_BUFFER_LIST nbl)
272 {
273     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
274     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
275     if (info == NULL) {
276         return;
277     }
278     OVS_LOG_INFO("nbl: %p, numAvailableDest: %d, srcId:%d, srcIndex: %d "
279                  "isDataSafe: %s, safeDataSize: %d",
280                  nbl, info->NumAvailableDestinations, info->SourcePortId,
281                  info->SourceNicIndex,
282                  info->IsPacketDataSafe ? "TRUE" : "FALSE",
283                  info->IsPacketDataSafe ? 0 : info->SafePacketDataSize);
284
285 }
286
287 static VOID
288 OvsDumpNBLContext(PNET_BUFFER_LIST nbl)
289 {
290     PNET_BUFFER_LIST_CONTEXT ctx = nbl->Context;
291     if (ctx == NULL) {
292         OVS_LOG_INFO("No Net Buffer List context");
293         return;
294     }
295     while (ctx) {
296         OVS_LOG_INFO("nbl: %p, ctx: %p, TotalSize: %d, Offset: %d",
297                      nbl, ctx, ctx->Size, ctx->Offset);
298         ctx = ctx->Next;
299     }
300 }
301
302
303 static VOID
304 OvsDumpMDLChain(PMDL mdl)
305 {
306     PMDL tmp;
307     tmp = mdl;
308     while (tmp) {
309         OVS_LOG_INFO("MDL: %p, Size: %d, MappedSystemVa: %p, StartVa: %p"
310                      " ByteCount: %d, ByteOffset: %d",
311                      tmp, tmp->Size, tmp->MappedSystemVa,
312                      tmp->StartVa, tmp->ByteCount, tmp->ByteOffset);
313         tmp = tmp->Next;
314     }
315 }
316
317
318 static VOID
319 OvsDumpNetBuffer(PNET_BUFFER nb)
320 {
321     OVS_LOG_INFO("NET_BUFFER: %p, ChecksumBias: %d Handle: %p, MDLChain: %p "
322                  "CurrMDL: %p, CurrOffset: %d, DataLen: %d, Offset: %d",
323                  nb,
324                  NET_BUFFER_CHECKSUM_BIAS(nb), nb->NdisPoolHandle,
325                  NET_BUFFER_FIRST_MDL(nb),
326                  NET_BUFFER_CURRENT_MDL(nb),
327                  NET_BUFFER_CURRENT_MDL_OFFSET(nb),
328                  NET_BUFFER_DATA_LENGTH(nb),
329                  NET_BUFFER_DATA_OFFSET(nb));
330     OvsDumpMDLChain(NET_BUFFER_FIRST_MDL(nb));
331 }
332
333
334 static VOID
335 OvsDumpNetBufferList(PNET_BUFFER_LIST nbl)
336 {
337     PNET_BUFFER nb;
338     OVS_LOG_INFO("NBL: %p, parent: %p, SrcHandle: %p, ChildCount:%d "
339                  "poolHandle: %p",
340                  nbl, nbl->ParentNetBufferList,
341                  nbl->SourceHandle, nbl->ChildRefCount,
342                  nbl->NdisPoolHandle);
343     OvsDumpNBLContext(nbl);
344     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
345     while (nb) {
346         OvsDumpNetBuffer(nb);
347         nb = NET_BUFFER_NEXT_NB(nb);
348     }
349 }
350
351 /*
352  * --------------------------------------------------------------------------
353  * OvsAllocateFixSizeNBL --
354  *
355  *    Allocate fix size NBL which include
356  *       NBL + NB + MBL + Data + Context
357  *    Please note:
358  *       * Forwarding Context is allocated, but forwarding detail information
359  *       is not initailized.
360  *       * The headroom can not be larger than OVS_DEFAULT_HEADROOM_SIZE(128
361  *       byte).
362  * --------------------------------------------------------------------------
363  */
364 PNET_BUFFER_LIST
365 OvsAllocateFixSizeNBL(PVOID ovsContext,
366                       UINT32 size,
367                       UINT32 headRoom)
368 {
369     PNET_BUFFER_LIST nbl = NULL;
370     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
371     POVS_BUFFER_CONTEXT ctx;
372     POVS_NBL_POOL ovsPool = &context->ovsPool;
373     NDIS_STATUS status;
374     UINT32 line;
375     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
376
377     if ((headRoom + size) > OVS_FIX_NBL_DATA_SIZE || size == 0) {
378         line = __LINE__;
379         goto allocate_done;
380     }
381
382     nbl = NdisAllocateNetBufferList(ovsPool->fixSizePool,
383                                     (UINT16)sizeof (OVS_BUFFER_CONTEXT),
384                                     (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
385
386     if (nbl == NULL) {
387         line = __LINE__;
388         goto allocate_done;
389     }
390
391     nbl->SourceHandle = ovsPool->ndisHandle;
392     status = context->NdisSwitchHandlers.
393              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
394
395     if (status != NDIS_STATUS_SUCCESS) {
396         NdisFreeNetBufferList(nbl);
397         nbl = NULL;
398         line = __LINE__;
399         goto allocate_done;
400     }
401     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
402     ASSERT(info);
403     info->IsPacketDataSafe = TRUE;
404     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
405
406     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
407                                            size, 0, NULL);
408     ASSERT(status == NDIS_STATUS_SUCCESS);
409
410 #ifdef DBG
411     InterlockedIncrement((LONG volatile *)&ovsPool->fixNBLCount);
412     OvsDumpNetBufferList(nbl);
413     OvsDumpForwardingDetails(nbl);
414 #endif
415
416     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
417     ASSERT(ctx);
418
419     OvsInitNBLContext(ctx, OVS_BUFFER_FROM_FIX_SIZE_POOL |
420                       OVS_BUFFER_PRIVATE_FORWARD_CONTEXT, size,
421                       OVS_DEFAULT_PORT_NO);
422     line = __LINE__;
423 allocate_done:
424     OVS_LOG_LOUD("Allocate Fix NBL: %p, line: %d", nbl, line);
425     return nbl;
426 }
427
428
429 static PMDL
430 OvsAllocateMDLAndData(NDIS_HANDLE ndisHandle,
431                       UINT32 dataSize)
432 {
433     PMDL mdl;
434     PVOID data;
435
436     data = OvsAllocateMemoryWithTag(dataSize, OVS_MDL_POOL_TAG);
437     if (data == NULL) {
438         return NULL;
439     }
440
441     mdl = NdisAllocateMdl(ndisHandle, data, dataSize);
442     if (mdl == NULL) {
443         OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
444     }
445
446     return mdl;
447 }
448
449
450 static VOID
451 OvsFreeMDLAndData(PMDL mdl)
452 {
453     PVOID data;
454
455     data = MmGetMdlVirtualAddress(mdl);
456     NdisFreeMdl(mdl);
457     OvsFreeMemoryWithTag(data, OVS_MDL_POOL_TAG);
458 }
459
460
461 /*
462  * --------------------------------------------------------------------------
463  * OvsAllocateVariableSizeNBL --
464  *
465  *    Allocate variable size NBL, the NBL looks like
466  *      NBL + NB + Context
467  *      MDL + Data
468  * --------------------------------------------------------------------------
469  */
470 PNET_BUFFER_LIST
471 OvsAllocateVariableSizeNBL(PVOID ovsContext,
472                            UINT32 size,
473                            UINT32 headRoom)
474 {
475     PNET_BUFFER_LIST nbl = NULL;
476     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
477     POVS_NBL_POOL ovsPool = &context->ovsPool;
478     POVS_BUFFER_CONTEXT ctx;
479     UINT32 realSize;
480     PMDL mdl;
481     NDIS_STATUS status;
482     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO info;
483     if (size == 0) {
484         return NULL;
485     }
486     realSize = MEM_ALIGN_SIZE(size + headRoom);
487
488     mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, realSize);
489     if (mdl == NULL) {
490         return NULL;
491     }
492
493     nbl = NdisAllocateNetBufferAndNetBufferList(ovsPool->zeroSizePool,
494                                          (UINT16)sizeof (OVS_BUFFER_CONTEXT),
495                                          (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL,
496                                                 mdl, realSize, 0);
497     if (nbl == NULL) {
498         OvsFreeMDLAndData(mdl);
499         return NULL;
500     }
501
502     nbl->SourceHandle = ovsPool->ndisHandle;
503     status = context->NdisSwitchHandlers.
504              AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
505
506     if (status != NDIS_STATUS_SUCCESS) {
507        /*
508         * do we need to remove mdl from nbl XXX
509         */
510         OvsFreeMDLAndData(mdl);
511         NdisFreeNetBufferList(nbl);
512         return NULL;
513     }
514
515     info = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(nbl);
516     ASSERT(info);
517     info->IsPacketDataSafe = TRUE;
518     info->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
519     status = NdisRetreatNetBufferDataStart(NET_BUFFER_LIST_FIRST_NB(nbl),
520                                            size, 0, NULL);
521     ASSERT(status == NDIS_STATUS_SUCCESS);
522
523 #ifdef DBG
524     InterlockedIncrement((LONG volatile *)&ovsPool->zeroNBLCount);
525     OvsDumpNetBufferList(nbl);
526     OvsDumpForwardingDetails(nbl);
527 #endif
528
529     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
530
531     OvsInitNBLContext(ctx, OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
532                            OVS_BUFFER_PRIVATE_FORWARD_CONTEXT |
533                            OVS_BUFFER_FROM_ZERO_SIZE_POOL,
534                       size, OVS_DEFAULT_PORT_NO);
535
536     OVS_LOG_LOUD("Allocate variable size NBL: %p", nbl);
537     return nbl;
538 }
539
540
541 /*
542  * --------------------------------------------------------------------------
543  * OvsInitExternalNBLContext --
544  *
545  *     For NBL not allocated by OVS, it will allocate and initialize
546  *     the NBL context.
547  * --------------------------------------------------------------------------
548  */
549 POVS_BUFFER_CONTEXT
550 OvsInitExternalNBLContext(PVOID ovsContext,
551                           PNET_BUFFER_LIST nbl,
552                           BOOLEAN isRecv)
553 {
554     NDIS_HANDLE poolHandle;
555     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
556     POVS_BUFFER_CONTEXT ctx;
557     PNET_BUFFER nb;
558     NDIS_STATUS status;
559     UINT16 flags;
560
561     poolHandle = NdisGetPoolFromNetBufferList(nbl);
562
563     if (poolHandle == context->ovsPool.ndisHandle) {
564         return (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
565     }
566     status = NdisAllocateNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT),
567                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
568                                               OVS_OTHER_POOL_TAG);
569     if (status != NDIS_STATUS_SUCCESS) {
570         return NULL;
571     }
572 #ifdef DBG
573     OvsDumpNBLContext(nbl);
574     InterlockedIncrement((LONG volatile *)&context->ovsPool.sysNBLCount);
575 #endif
576     flags = isRecv ? OVS_BUFFER_RECV_BUFFER : OVS_BUFFER_SEND_BUFFER;
577     flags |= OVS_BUFFER_NEED_COMPLETE | OVS_BUFFER_PRIVATE_CONTEXT;
578     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
579
580     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
581     /*
582      * we use first nb to decide whether we need advance or retreat during
583      * complete.
584      */
585     OvsInitNBLContext(ctx, flags, NET_BUFFER_DATA_LENGTH(nb), OVS_DEFAULT_PORT_NO);
586     return ctx;
587 }
588
589 /*
590  * --------------------------------------------------------------------------
591  * OvsAllocateNBLContext
592  *
593  *    Create NBL buffer context and forwarding context.
594  * --------------------------------------------------------------------------
595  */
596 NDIS_STATUS
597 OvsAllocateNBLContext(POVS_SWITCH_CONTEXT context,
598                       PNET_BUFFER_LIST nbl)
599 {
600     POVS_NBL_POOL ovsPool = &context->ovsPool;
601     NDIS_STATUS status;
602
603     status = NdisAllocateNetBufferListContext(nbl,
604                                               sizeof (OVS_BUFFER_CONTEXT),
605                                               OVS_DEFAULT_NBL_CONTEXT_FILL,
606                                               OVS_OTHER_POOL_TAG);
607     if (status != NDIS_STATUS_SUCCESS) {
608         return NDIS_STATUS_FAILURE;
609     }
610
611     nbl->SourceHandle = ovsPool->ndisHandle;
612     status = context->NdisSwitchHandlers.
613         AllocateNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
614
615     if (status != NDIS_STATUS_SUCCESS) {
616         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
617         return NDIS_STATUS_FAILURE;
618     }
619     return status;
620 }
621
622 /*
623  * --------------------------------------------------------------------------
624  * OvsFreeNBLContext
625  *
626  *    Free the NBL buffer context and forwarding context.
627  * --------------------------------------------------------------------------
628  */
629 NDIS_STATUS
630 OvsFreeNBLContext(POVS_SWITCH_CONTEXT context,
631                   PNET_BUFFER_LIST nbl)
632 {
633     POVS_NBL_POOL ovsPool = &context->ovsPool;
634
635     context->NdisSwitchHandlers.
636          FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
637     NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
638
639     return NDIS_STATUS_SUCCESS;
640 }
641
642 /*
643  * --------------------------------------------------------------------------
644  * OvsCopyNBLInfo
645  *
646  *    Copy NBL info from src to dst
647  * --------------------------------------------------------------------------
648  */
649 NDIS_STATUS
650 OvsCopyNBLInfo(PNET_BUFFER_LIST srcNbl, PNET_BUFFER_LIST dstNbl,
651                POVS_BUFFER_CONTEXT srcCtx, UINT32 copySize,
652                BOOLEAN copyNblInfo)
653 {
654     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO srcInfo, dstInfo;
655     NDIS_STATUS status = NDIS_STATUS_SUCCESS;
656
657     srcInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(srcNbl);
658     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(dstNbl);
659     if (srcInfo) {
660 #ifdef OVS_USE_COPY_NET_BUFFER_LIST_INFO
661         status = context->NdisSwitchHandlers.
662             CopyNetBufferListInfo(ovsPool->ndisContext, dstNbl, srcNbl, 0);
663
664         if (status != NDIS_STATUS_SUCCESS) {
665             return status;
666         }
667 #else
668         dstInfo->SourcePortId = srcInfo->SourcePortId;
669         dstInfo->SourceNicIndex = srcInfo->SourceNicIndex;
670         if (copyNblInfo) {
671             if (srcCtx->flags & OVS_BUFFER_RECV_BUFFER) {
672                 NdisCopyReceiveNetBufferListInfo(dstNbl, srcNbl);
673             } else if (srcCtx->flags & OVS_BUFFER_SEND_BUFFER) {
674                 NdisCopySendNetBufferListInfo(dstNbl, srcNbl);
675             }
676         }
677 #endif
678         dstInfo->IsPacketDataSafe = srcInfo->IsPacketDataSafe;
679         if (!srcInfo->IsPacketDataSafe && copySize >
680             srcInfo->SafePacketDataSize) {
681             srcInfo->SafePacketDataSize = copySize;
682         }
683     } else {
684         /*
685          * Assume all data are safe
686          */
687         dstInfo->IsPacketDataSafe = TRUE;
688         dstInfo->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
689     }
690     return status;
691 }
692
693 /*
694  * --------------------------------------------------------------------------
695  * OvsPartialCopyNBL --
696  *
697  *    Partial copy NBL, if there is multiple NB in NBL, each one will be
698  *    copied. We also reserve headroom for the new NBL.
699  *
700  *    Please note,
701  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
702  *       this function.
703  *       The NBL should already have ref to itself so that during copy
704  *       it will not be freed.
705  * --------------------------------------------------------------------------
706  */
707 PNET_BUFFER_LIST
708 OvsPartialCopyNBL(PVOID ovsContext,
709                   PNET_BUFFER_LIST nbl,
710                   UINT32 copySize,
711                   UINT32 headRoom,
712                   BOOLEAN copyNblInfo)
713 {
714     PNET_BUFFER_LIST newNbl;
715     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
716     NDIS_STATUS status;
717     PNET_BUFFER srcNb, dstNb;
718     ULONG byteCopied;
719     POVS_NBL_POOL ovsPool = &context->ovsPool;
720     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
721     UINT16 flags;
722
723     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
724     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
725         OVS_LOG_INFO("src nbl must have ctx initialized");
726         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
727         return NULL;
728     }
729
730     if (copySize) {
731         NdisAdvanceNetBufferListDataStart(nbl, copySize, FALSE, NULL);
732     }
733     newNbl = NdisAllocateCloneNetBufferList(nbl, ovsPool->nblOnlyPool,
734                                             NULL, 0);
735     if (copySize) {
736         status = NdisRetreatNetBufferListDataStart(nbl, copySize, 0,
737                                                    NULL, NULL);
738         ASSERT(status == NDIS_STATUS_SUCCESS);
739     }
740
741     if (newNbl == NULL) {
742         return NULL;
743     }
744
745     /*
746      * Allocate private memory for copy
747      */
748     if (copySize + headRoom) {
749         status = NdisRetreatNetBufferListDataStart(newNbl, copySize + headRoom,
750                                                    0, NULL, NULL);
751         if (status != NDIS_STATUS_SUCCESS) {
752             goto retreat_error;
753         }
754
755         if (headRoom) {
756             NdisAdvanceNetBufferListDataStart(newNbl, headRoom, FALSE, NULL);
757         }
758         if (copySize) {
759             srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
760             dstNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
761
762             while (srcNb) {
763                 status = NdisCopyFromNetBufferToNetBuffer(dstNb, 0, copySize,
764                                                           srcNb, 0,
765                                                           &byteCopied);
766                 if (status != NDIS_STATUS_SUCCESS || copySize != byteCopied) {
767                     goto nbl_context_error;
768                 }
769                 srcNb = NET_BUFFER_NEXT_NB(srcNb);
770                 dstNb = NET_BUFFER_NEXT_NB(dstNb);
771             }
772         }
773     }
774
775     status = OvsAllocateNBLContext(context, newNbl);
776     if (status != NDIS_STATUS_SUCCESS) {
777         goto nbl_context_error;
778     }
779
780     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copySize, copyNblInfo);
781     if (status != NDIS_STATUS_SUCCESS) {
782         goto copy_list_info_error;
783     }
784
785 #ifdef DBG
786     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
787 #endif
788
789     newNbl->ParentNetBufferList = nbl;
790
791     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
792     ASSERT(dstCtx != NULL);
793
794     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
795
796     flags |= OVS_BUFFER_FROM_NBL_ONLY_POOL | OVS_BUFFER_PRIVATE_CONTEXT |
797              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
798
799     srcNb = NET_BUFFER_LIST_FIRST_NB(nbl);
800     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(srcNb) - copySize,
801                       OVS_DEFAULT_PORT_NO);
802
803     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
804 #ifdef DBG
805     OvsDumpNetBufferList(nbl);
806     OvsDumpForwardingDetails(nbl);
807
808     OvsDumpNetBufferList(newNbl);
809     OvsDumpForwardingDetails(newNbl);
810 #endif
811     OVS_LOG_LOUD("Partial Copy new NBL: %p", newNbl);
812     return newNbl;
813
814 copy_list_info_error:
815     OvsFreeNBLContext(context, newNbl);
816 nbl_context_error:
817     if (copySize) {
818         NdisAdvanceNetBufferListDataStart(newNbl, copySize, TRUE, NULL);
819     }
820 retreat_error:
821     NdisFreeCloneNetBufferList(newNbl, 0);
822     return NULL;
823 }
824
825 /*
826  * --------------------------------------------------------------------------
827  * OvsPartialCopyToMultipleNBLs --
828  *
829  *     This is similar to OvsPartialCopyNBL() except that each NB will
830  *     have its own NBL.
831  * --------------------------------------------------------------------------
832  */
833 PNET_BUFFER_LIST
834 OvsPartialCopyToMultipleNBLs(PVOID ovsContext,
835                              PNET_BUFFER_LIST nbl,
836                              UINT32 copySize,
837                              UINT32 headRoom,
838                              BOOLEAN copyNblInfo)
839 {
840     PNET_BUFFER nb, nextNb = NULL, firstNb, prevNb;
841     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
842     PNET_BUFFER_LIST firstNbl = NULL, newNbl, prevNbl = NULL;
843
844     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
845     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
846         return OvsPartialCopyNBL(context, nbl, copySize, headRoom, copyNblInfo);
847     }
848
849     firstNb = nb;
850     prevNb = nb;
851
852     while (nb) {
853         nextNb = NET_BUFFER_NEXT_NB(nb);
854         NET_BUFFER_NEXT_NB(nb) = NULL;
855
856         NET_BUFFER_LIST_FIRST_NB(nbl) = nb;
857
858         newNbl = OvsPartialCopyNBL(context, nbl, copySize, headRoom,
859                                    copyNblInfo);
860         if (newNbl == NULL) {
861             goto cleanup;
862         }
863         if (prevNbl == NULL) {
864             firstNbl = newNbl;
865         } else {
866             NET_BUFFER_LIST_NEXT_NBL(prevNbl) = newNbl;
867             NET_BUFFER_NEXT_NB(prevNb) = nb;
868         }
869         prevNbl = newNbl;
870         prevNb = nb;
871         nb = nextNb;
872     }
873     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
874     return firstNbl;
875
876 cleanup:
877     NET_BUFFER_NEXT_NB(prevNb) = nb;
878     NET_BUFFER_NEXT_NB(nb) = nextNb;
879     NET_BUFFER_LIST_FIRST_NB(nbl) = firstNb;
880
881     newNbl = firstNbl;
882     while (newNbl) {
883         firstNbl = NET_BUFFER_LIST_NEXT_NBL(newNbl);
884         NET_BUFFER_LIST_NEXT_NBL(firstNbl) = NULL;
885         OvsCompleteNBL(context, newNbl, TRUE);
886         newNbl = firstNbl;
887     }
888     return NULL;
889 }
890
891
892 static PNET_BUFFER_LIST
893 OvsCopySinglePacketNBL(PVOID ovsContext,
894                        PNET_BUFFER_LIST nbl,
895                        PNET_BUFFER nb,
896                        UINT32 headRoom,
897                        BOOLEAN copyNblInfo)
898 {
899     UINT32 size;
900     ULONG copiedSize;
901     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
902     PNET_BUFFER_LIST newNbl;
903     PNET_BUFFER newNb;
904     NDIS_STATUS status;
905     POVS_BUFFER_CONTEXT srcCtx, dstCtx;
906
907     size = NET_BUFFER_DATA_LENGTH(nb);
908     if ((size + headRoom) <= OVS_FIX_NBL_DATA_SIZE) {
909         newNbl = OvsAllocateFixSizeNBL(context, size, headRoom);
910     } else {
911         newNbl = OvsAllocateVariableSizeNBL(context, size, headRoom);
912     }
913     if (newNbl == NULL) {
914         return NULL;
915     }
916     newNb = NET_BUFFER_LIST_FIRST_NB(newNbl);
917     status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
918                                               &copiedSize);
919
920     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
921     if (status == NDIS_STATUS_SUCCESS) {
922         status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, copiedSize, copyNblInfo);
923     }
924
925     if (status != NDIS_STATUS_SUCCESS || copiedSize != size) {
926         OvsCompleteNBL(context, newNbl, TRUE);
927         return NULL;
928     }
929
930     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
931     ASSERT(dstCtx && srcCtx);
932     ASSERT(srcCtx->magic == OVS_CTX_MAGIC && dstCtx->magic == OVS_CTX_MAGIC);
933
934     dstCtx->flags |= srcCtx->flags & (OVS_BUFFER_RECV_BUFFER |
935                                       OVS_BUFFER_SEND_BUFFER);
936 #ifdef DBG
937     OvsDumpNetBufferList(newNbl);
938     OvsDumpForwardingDetails(newNbl);
939 #endif
940     OVS_LOG_LOUD("Copy single nb to new NBL: %p", newNbl);
941     return newNbl;
942 }
943
944 /*
945  * --------------------------------------------------------------------------
946  * OvsFullCopyNBL --
947  *
948  *    Copy the NBL to a new NBL including data.
949  *
950  * Notes:
951  *     The NBL can have multiple NBs, but the final result is one NBL.
952  * --------------------------------------------------------------------------
953  */
954 PNET_BUFFER_LIST
955 OvsFullCopyNBL(PVOID ovsContext,
956                PNET_BUFFER_LIST nbl,
957                UINT32 headRoom,
958                BOOLEAN copyNblInfo)
959 {
960     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
961     POVS_NBL_POOL ovsPool = &context->ovsPool;
962     PNET_BUFFER_LIST newNbl;
963     PNET_BUFFER nb, newNb, firstNb = NULL, prevNb = NULL;
964     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
965     PMDL mdl;
966     NDIS_STATUS status;
967     UINT32 size, totalSize;
968     ULONG copiedSize;
969     UINT16 flags;
970     PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO dstInfo;
971
972     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
973     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
974         OVS_LOG_INFO("src nbl must have ctx initialized");
975         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
976         return NULL;
977     }
978
979     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
980
981     if (NET_BUFFER_NEXT_NB(nb) == NULL) {
982         return OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
983     }
984
985     newNbl = NdisAllocateNetBufferList(ovsPool->nblOnlyPool,
986                                        (UINT16)sizeof (OVS_BUFFER_CONTEXT),
987                                        (UINT16)OVS_DEFAULT_NBL_CONTEXT_FILL);
988     if (newNbl == NULL) {
989         return NULL;
990     }
991
992     while (nb) {
993         size = NET_BUFFER_DATA_LENGTH(nb);
994         totalSize = MEM_ALIGN_SIZE(size + headRoom);
995         mdl = OvsAllocateMDLAndData(ovsPool->ndisHandle, totalSize);
996
997         if (mdl == NULL) {
998             goto nblcopy_error;
999         }
1000         newNb = NdisAllocateNetBuffer(ovsPool->nbPool, mdl, totalSize, 0);
1001         if (newNb == NULL) {
1002             OvsFreeMDLAndData(mdl);
1003             goto nblcopy_error;
1004         }
1005         if (firstNb == NULL) {
1006             firstNb = newNb;
1007         } else {
1008             NET_BUFFER_NEXT_NB(prevNb) = newNb;
1009         }
1010         prevNb = newNb;
1011 #ifdef DBG
1012         InterlockedIncrement((LONG volatile *)&ovsPool->nbCount);
1013 #endif
1014         status = NdisRetreatNetBufferDataStart(newNb, size, 0, NULL);
1015         ASSERT(status == NDIS_STATUS_SUCCESS);
1016
1017         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, size, nb, 0,
1018                                                   &copiedSize);
1019         if (status != NDIS_STATUS_SUCCESS || size != copiedSize) {
1020             goto nblcopy_error;
1021         }
1022
1023         nb = NET_BUFFER_NEXT_NB(nb);
1024     }
1025
1026     NET_BUFFER_LIST_FIRST_NB(newNbl) = firstNb;
1027
1028     newNbl->SourceHandle = ovsPool->ndisHandle;
1029     status = context->NdisSwitchHandlers.
1030          AllocateNetBufferListForwardingContext(ovsPool->ndisContext, newNbl);
1031
1032     if (status != NDIS_STATUS_SUCCESS) {
1033         goto nblcopy_error;
1034     }
1035
1036     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, 0, copyNblInfo);
1037     if (status != NDIS_STATUS_SUCCESS) {
1038         goto nblcopy_error;
1039     }
1040
1041     dstInfo = NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl);
1042     dstInfo->IsPacketDataSafe = TRUE;
1043
1044     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1045
1046     flags = srcCtx->flags & (OVS_BUFFER_RECV_BUFFER | OVS_BUFFER_SEND_BUFFER);
1047
1048     flags |= OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA |
1049              OVS_BUFFER_PRIVATE_NET_BUFFER | OVS_BUFFER_FROM_NBL_ONLY_POOL |
1050              OVS_BUFFER_PRIVATE_FORWARD_CONTEXT;
1051
1052     OvsInitNBLContext(dstCtx, flags, NET_BUFFER_DATA_LENGTH(firstNb),
1053                       OVS_DEFAULT_PORT_NO);
1054
1055 #ifdef DBG
1056     OvsDumpNetBufferList(nbl);
1057     OvsDumpForwardingDetails(nbl);
1058     InterlockedIncrement((LONG volatile *)&ovsPool->nblOnlyCount);
1059 #endif
1060     OVS_LOG_LOUD("newNbl: %p", newNbl);
1061     return newNbl;
1062
1063 nblcopy_error:
1064     while (firstNb) {
1065 #ifdef DBG
1066         InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1067 #endif
1068         prevNb = firstNb;
1069         firstNb = NET_BUFFER_NEXT_NB(prevNb);
1070         mdl = NET_BUFFER_FIRST_MDL(prevNb);
1071         NET_BUFFER_FIRST_MDL(prevNb) = NULL;
1072         NdisFreeNetBuffer(prevNb);
1073         OvsFreeMDLAndData(mdl);
1074     }
1075     NdisFreeNetBufferList(newNbl);
1076     OVS_LOG_ERROR("OvsFullCopyNBL failed");
1077     return NULL;
1078 }
1079
1080 /*
1081  * --------------------------------------------------------------------------
1082  * GetSegmentHeaderInfo
1083  *
1084  *    Extract header size and sequence number for the segment.
1085  * --------------------------------------------------------------------------
1086  */
1087 static NDIS_STATUS
1088 GetSegmentHeaderInfo(PNET_BUFFER_LIST nbl,
1089                      const POVS_PACKET_HDR_INFO hdrInfo,
1090                      UINT32 *hdrSize, UINT32 *seqNumber)
1091 {
1092     TCPHdr tcpStorage;
1093     const TCPHdr *tcp;
1094
1095     /* Parse the orginal Eth/IP/TCP header */
1096     tcp = OvsGetPacketBytes(nbl, sizeof *tcp, hdrInfo->l4Offset, &tcpStorage);
1097     if (tcp == NULL) {
1098         return NDIS_STATUS_FAILURE;
1099     }
1100     *seqNumber = ntohl(tcp->seq);
1101     *hdrSize = hdrInfo->l4Offset + TCP_HDR_LEN(tcp);
1102
1103     return NDIS_STATUS_SUCCESS;
1104 }
1105
1106
1107 /*
1108  * --------------------------------------------------------------------------
1109  * FixSegmentHeader
1110  *
1111  *    Fix IP length, IP checksum, TCP sequence number and TCP checksum
1112  *    in the segment.
1113  * --------------------------------------------------------------------------
1114  */
1115 static NDIS_STATUS
1116 FixSegmentHeader(PNET_BUFFER nb, UINT16 segmentSize, UINT32 seqNumber)
1117 {
1118     EthHdr *dstEth;
1119     IPHdr *dstIP;
1120     TCPHdr *dstTCP;
1121     PMDL mdl;
1122     PUINT8 bufferStart;
1123
1124     mdl = NET_BUFFER_FIRST_MDL(nb);
1125
1126     bufferStart = (PUINT8)MmGetSystemAddressForMdlSafe(mdl, LowPagePriority);
1127     if (!bufferStart) {
1128         return NDIS_STATUS_RESOURCES;
1129     }
1130     dstEth = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(nb));
1131     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1132             >= sizeof(EthHdr) + sizeof(IPHdr) + sizeof(TCPHdr));
1133     dstIP = (IPHdr *)((PCHAR)dstEth + sizeof *dstEth);
1134     dstTCP = (TCPHdr *)((PCHAR)dstIP + dstIP->ihl * 4);
1135     ASSERT((INT)MmGetMdlByteCount(mdl) - NET_BUFFER_CURRENT_MDL_OFFSET(nb)
1136             >= sizeof(EthHdr) + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1137
1138     /* Fix IP length and checksum */
1139     ASSERT(dstIP->protocol == IPPROTO_TCP);
1140     dstIP->tot_len = htons(segmentSize + dstIP->ihl * 4 + TCP_HDR_LEN(dstTCP));
1141     dstIP->check = 0;
1142     dstIP->check = IPChecksum((UINT8 *)dstIP, dstIP->ihl * 4, 0);
1143
1144     /* Fix TCP checksum */
1145     dstTCP->seq = htonl(seqNumber);
1146     dstTCP->check =
1147         IPPseudoChecksum((UINT32 *)&dstIP->saddr,
1148                          (UINT32 *)&dstIP->daddr,
1149                          IPPROTO_TCP, segmentSize + TCP_HDR_LEN(dstTCP));
1150     dstTCP->check = CalculateChecksumNB(nb,
1151             (UINT16)(NET_BUFFER_DATA_LENGTH(nb) - sizeof *dstEth - dstIP->ihl * 4),
1152             sizeof *dstEth + dstIP->ihl * 4);
1153     return STATUS_SUCCESS;
1154 }
1155
1156 /*
1157  * --------------------------------------------------------------------------
1158  * OvsTcpSegmentyNBL --
1159  *
1160  *    Segment TCP payload, and prepend each segment with ether/IP/TCP header.
1161  *    Leave headRoom for additional encap.
1162  *
1163  *    Please note,
1164  *       NBL should have OVS_BUFFER_CONTEXT setup before calling
1165  *       this function.
1166  *       The NBL should already have ref to itself so that during copy
1167  *       it will not be freed.
1168  *       Currently this API assert there is only one NB in an NBL, it needs
1169  *       to be fixed if we receive multiple NBs in an NBL.
1170  * --------------------------------------------------------------------------
1171  */
1172 PNET_BUFFER_LIST
1173 OvsTcpSegmentNBL(PVOID ovsContext,
1174                  PNET_BUFFER_LIST nbl,
1175                  POVS_PACKET_HDR_INFO hdrInfo,
1176                  UINT32 mss,
1177                  UINT32 headRoom)
1178 {
1179     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1180 #ifdef DBG
1181     POVS_NBL_POOL ovsPool = &context->ovsPool;
1182 #endif
1183     POVS_BUFFER_CONTEXT dstCtx, srcCtx;
1184     UINT32 size, hdrSize, seqNumber;
1185     PNET_BUFFER_LIST newNbl;
1186     PNET_BUFFER nb, newNb;
1187     NDIS_STATUS status;
1188     UINT16 segmentSize;
1189     ULONG copiedSize;
1190
1191     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1192     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1193         OVS_LOG_INFO("src nbl must have ctx initialized");
1194         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1195         return NULL;
1196     }
1197
1198     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1199     ASSERT(NET_BUFFER_NEXT_NB(nb) == NULL);
1200
1201     /* Figure out the segment header size */
1202     status = GetSegmentHeaderInfo(nbl, hdrInfo, &hdrSize, &seqNumber);
1203     if (status != NDIS_STATUS_SUCCESS) {
1204         OVS_LOG_INFO("Cannot parse NBL header");
1205         return NULL;
1206     }
1207
1208     size = NET_BUFFER_DATA_LENGTH(nb) - hdrSize;
1209
1210     /* XXX add to ovsPool counters? */
1211     newNbl = NdisAllocateFragmentNetBufferList(nbl, NULL,
1212             NULL, hdrSize, mss, hdrSize + headRoom , 0, 0);
1213     if (newNbl == NULL) {
1214         return NULL;
1215     }
1216
1217     /* Now deal with TCP payload */
1218     for (newNb = NET_BUFFER_LIST_FIRST_NB(newNbl); newNb != NULL;
1219             newNb = NET_BUFFER_NEXT_NB(newNb)) {
1220         segmentSize = (size > mss ? mss : size) & 0xffff;
1221         if (headRoom) {
1222             NdisAdvanceNetBufferDataStart(newNb, headRoom, FALSE, NULL);
1223         }
1224
1225         /* Now copy the eth/IP/TCP header and fix up */
1226         status = NdisCopyFromNetBufferToNetBuffer(newNb, 0, hdrSize, nb, 0,
1227                                                   &copiedSize);
1228         if (status != NDIS_STATUS_SUCCESS || hdrSize != copiedSize) {
1229             goto nblcopy_error;
1230         }
1231
1232         status = FixSegmentHeader(newNb, segmentSize, seqNumber);
1233         if (status != NDIS_STATUS_SUCCESS) {
1234             goto nblcopy_error;
1235         }
1236
1237
1238         /* Move on to the next segment */
1239         size -= segmentSize;
1240         seqNumber += segmentSize;
1241     }
1242
1243     status = OvsAllocateNBLContext(context, newNbl);
1244     if (status != NDIS_STATUS_SUCCESS) {
1245         goto nblcopy_error;
1246     }
1247
1248     status = OvsCopyNBLInfo(nbl, newNbl, srcCtx, hdrSize + headRoom, FALSE);
1249     if (status != NDIS_STATUS_SUCCESS) {
1250         goto nbl_context_error;
1251     }
1252
1253     newNbl->ParentNetBufferList = nbl;
1254
1255     /* Remember it's a fragment NBL so we can free it properly */
1256     dstCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(newNbl);
1257     ASSERT(dstCtx != NULL);
1258     dstCtx->flags = OVS_BUFFER_FRAGMENT | OVS_BUFFER_PRIVATE_CONTEXT |
1259         OVS_BUFFER_PRIVATE_FORWARD_CONTEXT | OVS_BUFFER_SEND_BUFFER;
1260     dstCtx->refCount = 1;
1261     dstCtx->magic = OVS_CTX_MAGIC;
1262     dstCtx->dataOffsetDelta = hdrSize + headRoom;
1263
1264     InterlockedIncrement((LONG volatile *)&srcCtx->refCount);
1265 #ifdef DBG
1266     InterlockedIncrement((LONG volatile *)&ovsPool->fragNBLCount);
1267
1268     OvsDumpNetBufferList(nbl);
1269     OvsDumpForwardingDetails(nbl);
1270
1271     OvsDumpNetBufferList(newNbl);
1272     OvsDumpForwardingDetails(newNbl);
1273 #endif
1274     OVS_LOG_TRACE("Segment nbl %p to newNbl: %p", nbl, newNbl);
1275     return newNbl;
1276
1277 nbl_context_error:
1278     OvsFreeNBLContext(context, newNbl);
1279 nblcopy_error:
1280 #ifdef DBG
1281     InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1282 #endif
1283     NdisFreeFragmentNetBufferList(newNbl, hdrSize + headRoom, 0);
1284     return NULL;
1285 }
1286
1287
1288 /*
1289  * --------------------------------------------------------------------------
1290  * OvsFullCopyToMultipleNBLs --
1291  *
1292  *    Copy NBL to multiple NBLs, each NB will have its own NBL
1293  * --------------------------------------------------------------------------
1294  */
1295 PNET_BUFFER_LIST
1296 OvsFullCopyToMultipleNBLs(PVOID ovsContext,
1297                           PNET_BUFFER_LIST nbl,
1298                           UINT32 headRoom,
1299                           BOOLEAN copyNblInfo)
1300 {
1301
1302     POVS_SWITCH_CONTEXT context = (POVS_SWITCH_CONTEXT)ovsContext;
1303     PNET_BUFFER_LIST firstNbl, currNbl, newNbl;
1304     PNET_BUFFER nb;
1305     POVS_BUFFER_CONTEXT srcCtx;
1306
1307     srcCtx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1308     if (srcCtx == NULL || srcCtx->magic != OVS_CTX_MAGIC) {
1309         OVS_LOG_INFO("src nbl must have ctx initialized");
1310         ASSERT(srcCtx && srcCtx->magic == OVS_CTX_MAGIC);
1311         return NULL;
1312     }
1313
1314     nb =  NET_BUFFER_LIST_FIRST_NB(nbl);
1315     newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom, copyNblInfo);
1316
1317     if (newNbl == NULL || NET_BUFFER_NEXT_NB(nb) == NULL) {
1318         return newNbl;
1319     } else {
1320         firstNbl = newNbl;
1321         currNbl = newNbl;
1322     }
1323
1324     while (nb) {
1325         newNbl = OvsCopySinglePacketNBL(context, nbl, nb, headRoom,
1326                                         copyNblInfo);
1327         if (newNbl == NULL) {
1328             goto copymultiple_error;
1329         }
1330         NET_BUFFER_LIST_NEXT_NBL(currNbl) = newNbl;
1331         currNbl = newNbl;
1332         nb = NET_BUFFER_NEXT_NB(nb);
1333     }
1334     return firstNbl;
1335
1336 copymultiple_error:
1337     while (firstNbl) {
1338         currNbl = firstNbl;
1339         firstNbl = NET_BUFFER_LIST_NEXT_NBL(firstNbl);
1340         NET_BUFFER_LIST_NEXT_NBL(currNbl) = NULL;
1341         OvsCompleteNBL(context, currNbl, TRUE);
1342     }
1343     return NULL;
1344
1345 }
1346
1347
1348 /*
1349  * --------------------------------------------------------------------------
1350  * OvsCompleteNBL --
1351  *
1352  *     This function tries to free the NBL allocated by OVS buffer
1353  *     management module. If it trigger the completion of the parent
1354  *     NBL, it will recursively call itself. If it trigger the completion
1355  *     of external NBL, it will be returned to the caller. The caller
1356  *     is responsible to call API to return to upper layer.
1357  * --------------------------------------------------------------------------
1358  */
1359 PNET_BUFFER_LIST
1360 OvsCompleteNBL(POVS_SWITCH_CONTEXT context,
1361                PNET_BUFFER_LIST nbl,
1362                BOOLEAN updateRef)
1363 {
1364     POVS_BUFFER_CONTEXT ctx;
1365     UINT16 flags;
1366     PNET_BUFFER_LIST parent;
1367     NDIS_STATUS status;
1368     NDIS_HANDLE poolHandle;
1369     LONG value;
1370     POVS_NBL_POOL ovsPool = &context->ovsPool;
1371     PNET_BUFFER nb;
1372
1373
1374     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1375
1376     ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1377
1378     OVS_LOG_TRACE("Enter: nbl: %p, ctx: %p, refCount: %d, updateRef:%d",
1379                  nbl, ctx, ctx->refCount, updateRef);
1380
1381     if (updateRef) {
1382         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1383         if (value != 0) {
1384             return NULL;
1385         }
1386     } else {
1387         /*
1388          * This is a special case, the refCount must be zero
1389          */
1390         ASSERT(ctx->refCount == 0);
1391     }
1392
1393     nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1394
1395     flags = ctx->flags;
1396     if (!(flags & OVS_BUFFER_FRAGMENT) &&
1397         NET_BUFFER_DATA_LENGTH(nb) != ctx->origDataLength) {
1398         UINT32 diff;
1399         if (NET_BUFFER_DATA_LENGTH(nb) < ctx->origDataLength) {
1400             diff = ctx->origDataLength -NET_BUFFER_DATA_LENGTH(nb);
1401             status = NdisRetreatNetBufferListDataStart(nbl, diff, 0,
1402                                                        NULL, NULL);
1403             ASSERT(status == NDIS_STATUS_SUCCESS);
1404         } else {
1405             diff = NET_BUFFER_DATA_LENGTH(nb) - ctx->origDataLength;
1406             NdisAdvanceNetBufferListDataStart(nbl, diff, TRUE, NULL);
1407         }
1408     }
1409
1410     if (ctx->flags & OVS_BUFFER_PRIVATE_CONTEXT) {
1411         NdisFreeNetBufferListContext(nbl, sizeof (OVS_BUFFER_CONTEXT));
1412     }
1413
1414     if (flags & OVS_BUFFER_NEED_COMPLETE) {
1415         /*
1416          * return to caller for completion
1417          */
1418 #ifdef DBG
1419         InterlockedDecrement((LONG volatile *)&ovsPool->sysNBLCount);
1420 #endif
1421         return nbl;
1422     }
1423
1424     if (flags & OVS_BUFFER_PRIVATE_FORWARD_CONTEXT) {
1425         context->NdisSwitchHandlers.
1426               FreeNetBufferListForwardingContext(ovsPool->ndisContext, nbl);
1427     }
1428
1429     if (flags & (OVS_BUFFER_PRIVATE_MDL | OVS_BUFFER_PRIVATE_DATA)) {
1430         PNET_BUFFER nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1431         while (nb) {
1432             PMDL mdl = NET_BUFFER_FIRST_MDL(nb);
1433             NET_BUFFER_FIRST_MDL(nb) = NULL;
1434             ASSERT(mdl->Next == NULL);
1435             OvsFreeMDLAndData(mdl);
1436             nb = NET_BUFFER_NEXT_NB(nb);
1437         }
1438     }
1439
1440     if (flags & OVS_BUFFER_PRIVATE_NET_BUFFER) {
1441         PNET_BUFFER nb, nextNb;
1442
1443         nb = NET_BUFFER_LIST_FIRST_NB(nbl);
1444         while (nb) {
1445             nextNb = NET_BUFFER_NEXT_NB(nb);
1446             NdisFreeNetBuffer(nb);
1447 #ifdef DBG
1448             InterlockedDecrement((LONG volatile *)&ovsPool->nbCount);
1449 #endif
1450             nb = nextNb;
1451         }
1452         NET_BUFFER_LIST_FIRST_NB(nbl) = NULL;
1453     }
1454
1455     parent = nbl->ParentNetBufferList;
1456
1457     poolHandle = NdisGetPoolFromNetBufferList(nbl);
1458     if (flags & OVS_BUFFER_FROM_FIX_SIZE_POOL) {
1459         ASSERT(poolHandle == ovsPool->fixSizePool);
1460 #ifdef DBG
1461         InterlockedDecrement((LONG volatile *)&ovsPool->fixNBLCount);
1462 #endif
1463         NdisFreeNetBufferList(nbl);
1464     } else if (flags & OVS_BUFFER_FROM_ZERO_SIZE_POOL) {
1465         ASSERT(poolHandle == ovsPool->zeroSizePool);
1466 #ifdef DBG
1467         InterlockedDecrement((LONG volatile *)&ovsPool->zeroNBLCount);
1468 #endif
1469         NdisFreeNetBufferList(nbl);
1470     } else if (flags & OVS_BUFFER_FROM_NBL_ONLY_POOL) {
1471         ASSERT(poolHandle == ovsPool->nblOnlyPool);
1472 #ifdef DBG
1473         InterlockedDecrement((LONG volatile *)&ovsPool->nblOnlyCount);
1474 #endif
1475         NdisFreeCloneNetBufferList(nbl, 0);
1476     } else if (flags & OVS_BUFFER_FRAGMENT) {
1477         OVS_LOG_TRACE("Free fragment %p parent %p", nbl, parent);
1478 #ifdef DBG
1479         InterlockedDecrement((LONG volatile *)&ovsPool->fragNBLCount);
1480 #endif
1481         NdisFreeFragmentNetBufferList(nbl, ctx->dataOffsetDelta, 0);
1482     }
1483
1484     if (parent != NULL) {
1485         ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(parent);
1486         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1487         value = InterlockedDecrement((LONG volatile *)&ctx->refCount);
1488         if (value == 0) {
1489             return OvsCompleteNBL(context, parent, FALSE);
1490         }
1491     }
1492     return NULL;
1493 }
1494
1495 /*
1496  * --------------------------------------------------------------------------
1497  * OvsSetCtxSourcePortNo --
1498  *      Setter function which stores the source port of an NBL in the NBL
1499  * Context Info.
1500  * --------------------------------------------------------------------------
1501  */
1502 NDIS_STATUS
1503 OvsSetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1504                       UINT32 portNo)
1505 {
1506     POVS_BUFFER_CONTEXT ctx;
1507     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1508     if (ctx == NULL) {
1509         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1510         return STATUS_INVALID_PARAMETER;
1511     }
1512
1513     ctx->srcPortNo = portNo;
1514     return NDIS_STATUS_SUCCESS;
1515 }
1516
1517 /*
1518  * --------------------------------------------------------------------------
1519  * OvsGetCtxSourcePortNo --
1520  *      Get source port of an NBL from its Context Info.
1521  * --------------------------------------------------------------------------
1522  */
1523 NDIS_STATUS
1524 OvsGetCtxSourcePortNo(PNET_BUFFER_LIST nbl,
1525                       UINT32 *portNo)
1526 {
1527     POVS_BUFFER_CONTEXT ctx;
1528     ctx = (POVS_BUFFER_CONTEXT)NET_BUFFER_LIST_CONTEXT_DATA_START(nbl);
1529     if (ctx == NULL || portNo == NULL) {
1530         ASSERT(ctx && ctx->magic == OVS_CTX_MAGIC);
1531         return STATUS_INVALID_PARAMETER;
1532     }
1533     *portNo = ctx->srcPortNo;
1534     return NDIS_STATUS_SUCCESS;
1535 }