2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
34 #define OVS_DBG_MOD OVS_DBG_ACTION
37 typedef struct _OVS_ACTION_STATS {
47 UINT32 failedFlowMiss;
49 UINT32 failedFlowExtract;
54 UINT32 cannotGrowDest;
56 UINT32 failedChecksum;
57 } OVS_ACTION_STATS, *POVS_ACTION_STATS;
59 OVS_ACTION_STATS ovsActionStats;
62 * There a lot of data that needs to be maintained while executing the pipeline
63 * as dictated by the actions of a flow, across different functions at different
64 * levels. Such data is put together in a 'context' structure. Care should be
65 * exercised while adding new members to the structure - only add ones that get
66 * used across multiple stages in the pipeline/get used in multiple functions.
68 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
69 typedef struct OvsForwardingContext {
70 POVS_SWITCH_CONTEXT switchContext;
71 /* The NBL currently used in the pipeline. */
72 PNET_BUFFER_LIST curNbl;
73 /* NDIS forwarding detail for 'curNbl'. */
74 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
75 /* Array of destination ports for 'curNbl'. */
76 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
77 /* send flags while sending 'curNbl' into NDIS. */
79 /* Total number of output ports, used + unused, in 'curNbl'. */
80 UINT32 destPortsSizeIn;
81 /* Total number of used output ports in 'curNbl'. */
82 UINT32 destPortsSizeOut;
84 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
87 OvsCompletionList *completionList;
89 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
90 * bridge. ie. during tunneling on the Rx side.
96 * - specified in actions during tunneling Tx
97 * - extracted from an NBL during tunneling Rx
99 OvsIPv4TunnelKey tunKey;
103 * To store the output port, when it is a tunneled port. We don't foresee
104 * multiple tunneled ports as outport for any given NBL.
106 POVS_VPORT_ENTRY tunnelTxNic;
110 * Points to the Internal port on the PIF Bridge, if the packet needs to be
113 POVS_VPORT_ENTRY tunnelRxNic;
115 /* header information */
116 OVS_PACKET_HDR_INFO layers;
117 } OvsForwardingContext;
121 * --------------------------------------------------------------------------
122 * OvsInitForwardingCtx --
123 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
127 * NDIS_STATUS_SUCCESS on success
128 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
129 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
130 * enough for OvsCompleteNBLForwardingCtx() to do its work.
131 * --------------------------------------------------------------------------
133 static __inline NDIS_STATUS
134 OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
135 POVS_SWITCH_CONTEXT switchContext,
136 PNET_BUFFER_LIST curNbl,
139 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
140 OvsCompletionList *completionList,
141 OVS_PACKET_HDR_INFO *layers,
142 BOOLEAN resetTunnelInfo)
145 ASSERT(switchContext);
150 * Set values for curNbl and switchContext so upon failures, we have enough
151 * information to do cleanup.
153 ovsFwdCtx->curNbl = curNbl;
154 ovsFwdCtx->switchContext = switchContext;
155 ovsFwdCtx->completionList = completionList;
156 ovsFwdCtx->fwdDetail = fwdDetail;
158 if (fwdDetail->NumAvailableDestinations > 0) {
160 * XXX: even though MSDN says GetNetBufferListDestinations() returns
161 * NDIS_STATUS, the header files say otherwise.
163 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
164 switchContext->NdisSwitchContext, curNbl,
165 &ovsFwdCtx->destinationPorts);
167 ASSERT(ovsFwdCtx->destinationPorts);
168 /* Ensure that none of the elements are consumed yet. */
169 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
170 fwdDetail->NumAvailableDestinations);
172 ovsFwdCtx->destinationPorts = NULL;
174 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
175 ovsFwdCtx->destPortsSizeOut = 0;
176 ovsFwdCtx->srcVportNo = srcVportNo;
177 ovsFwdCtx->sendFlags = sendFlags;
179 ovsFwdCtx->layers = *layers;
181 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
183 if (resetTunnelInfo) {
184 ovsFwdCtx->tunnelTxNic = NULL;
185 ovsFwdCtx->tunnelRxNic = NULL;
186 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
189 return NDIS_STATUS_SUCCESS;
193 * --------------------------------------------------------------------------
194 * OvsDetectTunnelRxPkt --
195 * Utility function for an RX packet to detect its tunnel type.
198 * True - if the tunnel type was detected.
199 * False - if not a tunnel packet or tunnel type not supported.
200 * --------------------------------------------------------------------------
202 static __inline BOOLEAN
203 OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
204 const OvsFlowKey *flowKey)
206 POVS_VPORT_ENTRY tunnelVport = NULL;
208 /* XXX: we should also check for the length of the UDP payload to pick
209 * packets only if they are at least VXLAN header size.
211 if (!flowKey->ipKey.nwFrag) {
212 UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
213 switch (flowKey->ipKey.nwProto) {
215 tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext,
218 ovsActionStats.rxGre++;
222 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
226 ovsActionStats.rxStt++;
230 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
232 OVS_VPORT_TYPE_VXLAN);
234 ovsActionStats.rxVxlan++;
240 // We might get tunnel packets even before the tunnel gets initialized.
242 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
243 ovsFwdCtx->tunnelRxNic = tunnelVport;
251 * --------------------------------------------------------------------------
252 * OvsDetectTunnelPkt --
253 * Utility function to detect if a packet is to be subjected to
254 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
255 * port, destination port, packet contents, and previously setup tunnel
259 * True - If the packet is to be subjected to tunneling.
260 * In case of invalid tunnel context, the tunneling functionality is
261 * a no-op and is completed within this function itself by consuming
262 * all of the tunneling context.
263 * False - If not a tunnel packet or tunnel type not supported. Caller should
264 * process the packet as a non-tunnel packet.
265 * --------------------------------------------------------------------------
267 static __inline BOOLEAN
268 OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
269 const POVS_VPORT_ENTRY dstVport,
270 const OvsFlowKey *flowKey)
272 if (OvsIsInternalVportType(dstVport->ovsType)) {
275 * The source of NBL during tunneling Rx could be the external
276 * port or if it is being executed from userspace, the source port is
279 BOOLEAN validSrcPort =
280 (ovsFwdCtx->fwdDetail->SourcePortId ==
281 ovsFwdCtx->switchContext->virtualExternalPortId) ||
282 (ovsFwdCtx->fwdDetail->SourcePortId ==
283 NDIS_SWITCH_DEFAULT_PORT_ID);
285 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
286 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
287 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
290 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
291 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
292 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
296 * The destination port is a tunnel port. Encapsulation must be
297 * performed only on packets that originate from:
299 * - a bridge-internal port (packets generated from userspace)
302 * If the packet will not be encapsulated, consume the tunnel context
305 if (ovsFwdCtx->srcVportNo != OVS_DPPORT_NUMBER_INVALID) {
307 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
308 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
311 (vport->ovsType != OVS_VPORT_TYPE_NETDEV &&
312 !OvsIsBridgeInternalVport(vport))) {
313 ovsFwdCtx->tunKey.dst = 0;
317 /* Tunnel the packet only if tunnel context is set. */
318 if (ovsFwdCtx->tunKey.dst != 0) {
319 switch(dstVport->ovsType) {
320 case OVS_VPORT_TYPE_GRE:
321 ovsActionStats.txGre++;
323 case OVS_VPORT_TYPE_VXLAN:
324 ovsActionStats.txVxlan++;
326 case OVS_VPORT_TYPE_STT:
327 ovsActionStats.txStt++;
330 ovsFwdCtx->tunnelTxNic = dstVport;
341 * --------------------------------------------------------------------------
343 * Add the specified destination vport into the forwarding context. If the
344 * vport is a VIF/external port, it is added directly to the NBL. If it is
345 * a tunneling port, it is NOT added to the NBL.
348 * NDIS_STATUS_SUCCESS on success
349 * Other NDIS_STATUS upon failure.
350 * --------------------------------------------------------------------------
352 static __inline NDIS_STATUS
353 OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
355 NDIS_SWITCH_PORT_ID dstPortId,
356 BOOLEAN preserveVLAN,
357 BOOLEAN preservePriority)
359 POVS_VPORT_ENTRY vport;
360 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
362 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
365 * We hold the dispatch lock that protects the list of vports, so vports
366 * validated here can be added as destinations safely before we call into
369 * Some of the vports can be tunnelled ports as well in which case
370 * they should be added to a separate list of tunnelled destination ports
371 * instead of the VIF ports. The context for the tunnel is settable
372 * in OvsForwardingContext.
374 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
375 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
377 * There may be some latency between a port disappearing, and userspace
378 * updating the recalculated flows. In the meantime, handle invalid
381 ovsActionStats.noVport++;
382 return NDIS_STATUS_SUCCESS;
384 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
385 vport->stats.txPackets++;
386 vport->stats.txBytes +=
387 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
389 if (OvsIsBridgeInternalVport(vport)) {
390 return NDIS_STATUS_SUCCESS;
393 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
394 return NDIS_STATUS_SUCCESS;
397 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
398 if (ovsFwdCtx->destPortsSizeIn == 0) {
399 ASSERT(ovsFwdCtx->destinationPorts == NULL);
400 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
402 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
403 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
404 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
405 &ovsFwdCtx->destinationPorts);
406 if (status != NDIS_STATUS_SUCCESS) {
407 ovsActionStats.cannotGrowDest++;
410 ovsFwdCtx->destPortsSizeIn =
411 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
412 ASSERT(ovsFwdCtx->destinationPorts);
414 ASSERT(ovsFwdCtx->destinationPorts != NULL);
417 * A ULONG value that specifies the total number of
418 * NDIS_SWITCH_PORT_DESTINATION elements in the
419 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
422 * A ULONG value that specifies the number of
423 * NDIS_SWITCH_PORT_DESTINATION elements in the
424 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
425 * specify port destinations.
427 * NumAvailableDestinations:
428 * A value that specifies the number of unused extensible switch
429 * destination ports elements within an NET_BUFFER_LIST structure.
431 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
432 ovsFwdCtx->destPortsSizeIn);
433 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
434 ovsFwdCtx->destPortsSizeOut -
435 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
436 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
438 * Before we grow the array of destination ports, the current set
439 * of ports needs to be committed. Only the ports added since the
440 * last commit need to be part of the new update.
442 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
443 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
444 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
445 ovsFwdCtx->destinationPorts);
446 if (status != NDIS_STATUS_SUCCESS) {
447 ovsActionStats.cannotGrowDest++;
450 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
451 ovsFwdCtx->destPortsSizeIn);
452 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
453 ovsFwdCtx->destPortsSizeOut);
454 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
456 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
457 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
458 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
459 if (status != NDIS_STATUS_SUCCESS) {
460 ovsActionStats.cannotGrowDest++;
463 ASSERT(ovsFwdCtx->destinationPorts != NULL);
464 ovsFwdCtx->destPortsSizeIn <<= 1;
468 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
470 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
471 ovsFwdCtx->destPortsSizeOut);
473 fwdPort->PortId = vport->portId;
474 fwdPort->NicIndex = vport->nicIndex;
475 fwdPort->IsExcluded = 0;
476 fwdPort->PreserveVLAN = preserveVLAN;
477 fwdPort->PreservePriority = preservePriority;
478 ovsFwdCtx->destPortsSizeOut += 1;
480 return NDIS_STATUS_SUCCESS;
485 * --------------------------------------------------------------------------
486 * OvsClearTunTxCtx --
487 * Utility function to clear tx tunneling context.
488 * --------------------------------------------------------------------------
491 OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
493 ovsFwdCtx->tunnelTxNic = NULL;
494 ovsFwdCtx->tunKey.dst = 0;
499 * --------------------------------------------------------------------------
500 * OvsClearTunRxCtx --
501 * Utility function to clear rx tunneling context.
502 * --------------------------------------------------------------------------
505 OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
507 ovsFwdCtx->tunnelRxNic = NULL;
508 ovsFwdCtx->tunKey.dst = 0;
513 * --------------------------------------------------------------------------
514 * OvsCompleteNBLForwardingCtx --
515 * This utility function is responsible for freeing/completing an NBL - either
516 * by adding it to a completion list or by freeing it.
519 * It also resets the necessary fields in 'ovsFwdCtx'.
520 * --------------------------------------------------------------------------
523 OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
526 NDIS_STRING filterReason;
528 RtlInitUnicodeString(&filterReason, dropReason);
529 if (ovsFwdCtx->completionList) {
530 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
531 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
533 ovsFwdCtx->curNbl = NULL;
535 /* If there is no completionList, we assume this is ovs created NBL */
536 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
537 ovsFwdCtx->curNbl, TRUE);
538 ASSERT(ovsFwdCtx->curNbl == NULL);
540 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
541 * using these fields should reset the values at the end of the pipeline. */
542 ovsFwdCtx->destPortsSizeOut = 0;
543 ovsFwdCtx->tunnelTxNic = NULL;
544 ovsFwdCtx->tunnelRxNic = NULL;
548 * --------------------------------------------------------------------------
549 * OvsDoFlowLookupOutput --
550 * Function to be used for the second stage of a tunneling workflow, ie.:
551 * - On the encapsulated packet on Tx path, to do a flow extract, flow
552 * lookup and excuting the actions.
553 * - On the decapsulated packet on Rx path, to do a flow extract, flow
554 * lookup and excuting the actions.
556 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
557 * until the new buffer management framework is adopted.
560 * The NBL in 'ovsFwdCtx' is consumed.
561 * --------------------------------------------------------------------------
563 static __inline NDIS_STATUS
564 OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
570 POVS_VPORT_ENTRY vport =
571 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
572 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
573 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
574 L"OVS-Dropped due to internal/tunnel port removal");
575 ovsActionStats.noVport++;
576 return NDIS_STATUS_SUCCESS;
578 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
580 /* Assert that in the Rx direction, key is always setup. */
581 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
583 OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
584 &key, &ovsFwdCtx->layers,
585 ovsFwdCtx->tunKey.dst != 0 ? &ovsFwdCtx->tunKey : NULL);
586 if (status != NDIS_STATUS_SUCCESS) {
587 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
588 L"OVS-Flow extract failed");
589 ovsActionStats.failedFlowExtract++;
593 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
595 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
596 ovsFwdCtx->switchContext->datapath.hits++;
597 status = OvsActionsExecute(ovsFwdCtx->switchContext,
598 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
599 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
600 &key, &hash, &ovsFwdCtx->layers,
601 flow->actions, flow->actionsLen);
602 ovsFwdCtx->curNbl = NULL;
604 LIST_ENTRY missedPackets;
606 ovsFwdCtx->switchContext->datapath.misses++;
607 InitializeListHead(&missedPackets);
608 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
609 &key,ovsFwdCtx->curNbl,
610 FALSE, &ovsFwdCtx->layers,
611 ovsFwdCtx->switchContext, &missedPackets, &num);
613 OvsQueuePackets(&missedPackets, num);
615 if (status == NDIS_STATUS_SUCCESS) {
616 /* Complete the packet since it was copied to user buffer. */
617 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
618 L"OVS-Dropped since packet was copied to userspace");
619 ovsActionStats.flowMiss++;
620 status = NDIS_STATUS_SUCCESS;
622 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
623 L"OVS-Dropped due to failure to queue to userspace");
624 status = NDIS_STATUS_FAILURE;
625 ovsActionStats.failedFlowMiss++;
633 * --------------------------------------------------------------------------
635 * The start function for Tx tunneling - encapsulates the packet, and
636 * outputs the packet on the PIF bridge.
639 * The NBL in 'ovsFwdCtx' is consumed.
640 * --------------------------------------------------------------------------
642 static __inline NDIS_STATUS
643 OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
645 NDIS_STATUS status = NDIS_STATUS_FAILURE;
646 PNET_BUFFER_LIST newNbl = NULL;
649 * Setup the source port to be the internal port to as to facilitate the
650 * second OvsLookupFlow.
652 if (ovsFwdCtx->switchContext->internalVport == NULL ||
653 ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
654 OvsClearTunTxCtx(ovsFwdCtx);
655 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
656 L"OVS-Dropped since either internal or external port is absent");
657 return NDIS_STATUS_FAILURE;
659 ovsFwdCtx->srcVportNo =
660 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
662 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
663 ovsFwdCtx->fwdDetail->SourceNicIndex =
664 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
666 /* Do the encap. Encap function does not consume the NBL. */
667 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
668 case OVS_VPORT_TYPE_GRE:
669 status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
670 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
671 &ovsFwdCtx->layers, &newNbl);
673 case OVS_VPORT_TYPE_VXLAN:
674 status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
675 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
676 &ovsFwdCtx->layers, &newNbl);
678 case OVS_VPORT_TYPE_STT:
679 status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
680 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
681 &ovsFwdCtx->layers, &newNbl);
684 ASSERT(! "Tx: Unhandled tunnel type");
687 /* Reset the tunnel context so that it doesn't get used after this point. */
688 OvsClearTunTxCtx(ovsFwdCtx);
690 if (status == NDIS_STATUS_SUCCESS) {
692 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
693 L"Complete after cloning NBL for encapsulation");
694 ovsFwdCtx->curNbl = newNbl;
695 status = OvsDoFlowLookupOutput(ovsFwdCtx);
696 ASSERT(ovsFwdCtx->curNbl == NULL);
699 * XXX: Temporary freeing of the packet until we register a
700 * callback to IP helper.
702 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
703 L"OVS-Dropped due to encap failure");
704 ovsActionStats.failedEncap++;
705 status = NDIS_STATUS_SUCCESS;
712 * --------------------------------------------------------------------------
714 * Decapsulate the incoming NBL based on the tunnel type and goes through
715 * the flow lookup for the inner packet.
717 * Note: IP checksum is validate here, but L4 checksum validation needs
718 * to be done by the corresponding tunnel types.
721 * The NBL in 'ovsFwdCtx' is consumed.
722 * --------------------------------------------------------------------------
724 static __inline NDIS_STATUS
725 OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
727 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
728 PNET_BUFFER_LIST newNbl = NULL;
729 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
730 PCWSTR dropReason = L"OVS-dropped due to new decap packet";
732 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
733 != NDIS_STATUS_SUCCESS) {
734 ovsActionStats.failedChecksum++;
735 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
740 * Decap port functions should return a new NBL if it was copied, and
741 * this new NBL should be setup as the ovsFwdCtx->curNbl.
744 switch(tunnelRxVport->ovsType) {
745 case OVS_VPORT_TYPE_GRE:
746 status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
747 &ovsFwdCtx->tunKey, &newNbl);
749 case OVS_VPORT_TYPE_VXLAN:
750 status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
751 &ovsFwdCtx->tunKey, &newNbl);
753 case OVS_VPORT_TYPE_STT:
754 status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
755 &ovsFwdCtx->tunKey, &newNbl);
756 if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
757 /* This was an STT-LSO Fragment */
758 dropReason = L"OVS-STT segment is cached";
762 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
763 tunnelRxVport->ovsType);
764 ASSERT(! "Rx: Unhandled tunnel type");
765 status = NDIS_STATUS_NOT_SUPPORTED;
768 if (status != NDIS_STATUS_SUCCESS) {
769 ovsActionStats.failedDecap++;
774 * tunnelRxNic and other fields will be cleared, re-init the context
777 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
780 /* Decapsulated packet is in a new NBL */
781 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
782 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
783 newNbl, tunnelRxVport->portNo, 0,
784 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
785 ovsFwdCtx->completionList,
786 &ovsFwdCtx->layers, FALSE);
789 * Set the NBL's SourcePortId and SourceNicIndex to default values to
790 * keep NDIS happy when we forward the packet.
792 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
793 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
795 status = OvsDoFlowLookupOutput(ovsFwdCtx);
797 ASSERT(ovsFwdCtx->curNbl == NULL);
798 OvsClearTunRxCtx(ovsFwdCtx);
803 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
804 L"OVS-dropped due to decap failure");
805 OvsClearTunRxCtx(ovsFwdCtx);
811 * --------------------------------------------------------------------------
812 * OvsOutputForwardingCtx --
813 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
814 * the ports added so far into 'ovsFwdCtx'.
817 * This function consumes the NBL - either by forwarding it successfully to
818 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
820 * Also makes sure that the list of destination ports - tunnel or otherwise is
822 * --------------------------------------------------------------------------
824 static __inline NDIS_STATUS
825 OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
827 NDIS_STATUS status = STATUS_SUCCESS;
828 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
832 * Handle the case where the some of the destination ports are tunneled
833 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
834 * tunneling pipeline starts when we output the packet to tunneled port.
836 if (ovsFwdCtx->destPortsSizeOut > 0) {
837 PNET_BUFFER_LIST newNbl = NULL;
839 UINT32 portsToUpdate =
840 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
841 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
843 ASSERT(ovsFwdCtx->destinationPorts != NULL);
846 * Create a copy of the packet in order to do encap on it later. Also,
847 * don't copy the offload context since the encap'd packet has a
848 * different set of headers. This will change when we implement offloads
849 * before doing encapsulation.
851 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
852 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
853 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
854 0, 0, TRUE /*copy NBL info*/);
855 if (newNbl == NULL) {
856 status = NDIS_STATUS_RESOURCES;
857 ovsActionStats.noCopiedNbl++;
858 dropReason = L"Dropped due to failure to create NBL copy.";
863 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
864 ASSERT(portsToUpdate > 0);
865 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
866 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
867 portsToUpdate, ovsFwdCtx->destinationPorts);
868 if (status != NDIS_STATUS_SUCCESS) {
869 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
870 ovsActionStats.cannotGrowDest++;
871 dropReason = L"Dropped due to failure to update destinations.";
875 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
876 ovsFwdCtx->sendFlags);
877 /* End this pipeline by resetting the corresponding context. */
878 ovsFwdCtx->destPortsSizeOut = 0;
879 ovsFwdCtx->curNbl = NULL;
881 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
882 newNbl, ovsFwdCtx->srcVportNo, 0,
883 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
884 ovsFwdCtx->completionList,
885 &ovsFwdCtx->layers, FALSE);
886 if (status != NDIS_STATUS_SUCCESS) {
887 dropReason = L"Dropped due to resouces.";
893 if (ovsFwdCtx->tunnelTxNic != NULL) {
894 status = OvsTunnelPortTx(ovsFwdCtx);
895 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
896 ASSERT(ovsFwdCtx->tunKey.dst == 0);
897 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
898 status = OvsTunnelPortRx(ovsFwdCtx);
899 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
900 ASSERT(ovsFwdCtx->tunKey.dst == 0);
902 ASSERT(ovsFwdCtx->curNbl == NULL);
907 if (status != NDIS_STATUS_SUCCESS) {
908 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
916 * --------------------------------------------------------------------------
917 * OvsLookupFlowOutput --
918 * Utility function for external callers to do flow extract, lookup,
919 * actions execute on a given NBL.
921 * Note: If this is being used from a callback function, make sure that the
922 * arguments specified are still valid in the asynchronous context.
925 * This function consumes the NBL.
926 * --------------------------------------------------------------------------
929 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
931 PNET_BUFFER_LIST curNbl)
934 OvsForwardingContext ovsFwdCtx;
935 POVS_VPORT_ENTRY internalVport =
936 (POVS_VPORT_ENTRY)switchContext->internalVport;
938 /* XXX: make sure comp list was not a stack variable previously. */
939 OvsCompletionList *completionList = (OvsCompletionList *)compList;
942 * XXX: can internal port disappear while we are busy doing ARP resolution?
943 * It could, but will we get this callback from IP helper in that case. Need
946 ASSERT(switchContext->internalVport);
947 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
948 internalVport->portNo, 0,
949 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
950 completionList, NULL, TRUE);
951 if (status != NDIS_STATUS_SUCCESS) {
952 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
953 L"OVS-Dropped due to resources");
959 * XXX: We need to acquire the dispatch lock and the datapath lock.
962 OvsDoFlowLookupOutput(&ovsFwdCtx);
967 * --------------------------------------------------------------------------
968 * OvsOutputBeforeSetAction --
969 * Function to be called to complete one set of actions on an NBL, before
970 * we start the next one.
971 * --------------------------------------------------------------------------
973 static __inline NDIS_STATUS
974 OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
976 PNET_BUFFER_LIST newNbl;
977 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
980 * Create a copy and work on the copy after this point. The original NBL is
981 * forwarded. One reason to not use the copy for forwarding is that
982 * ports have already been added to the original NBL, and it might be
983 * inefficient/impossible to remove/re-add them to the copy. There's no
984 * notion of removing the ports, the ports need to be marked as
985 * "isExcluded". There's seems no real advantage to retaining the original
986 * and sending out the copy instead.
988 * XXX: We are copying the offload context here. This is to handle actions
990 * outport, pop_vlan(), outport, push_vlan(), outport
992 * copy size needs to include inner ether + IP + TCP, need to revisit
993 * if we support IP options.
994 * XXX Head room needs to include the additional encap.
995 * XXX copySize check is not considering multiple NBs.
997 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
998 0, 0, TRUE /*copy NBL info*/);
1000 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
1001 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
1003 /* Send the original packet out and save the original source port number */
1004 UINT32 tempVportNo = ovsFwdCtx->srcVportNo;
1005 status = OvsOutputForwardingCtx(ovsFwdCtx);
1006 ASSERT(ovsFwdCtx->curNbl == NULL);
1007 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
1008 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
1009 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
1011 /* If we didn't make a copy, can't continue. */
1012 if (newNbl == NULL) {
1013 ovsActionStats.noCopiedNbl++;
1014 return NDIS_STATUS_RESOURCES;
1017 /* Finish the remaining actions with the new NBL */
1018 if (status != NDIS_STATUS_SUCCESS) {
1019 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
1021 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1022 newNbl, tempVportNo, 0,
1023 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1024 ovsFwdCtx->completionList,
1025 &ovsFwdCtx->layers, FALSE);
1033 * --------------------------------------------------------------------------
1034 * OvsPopVlanInPktBuf --
1035 * Function to pop a VLAN tag when the tag is in the packet buffer.
1036 * --------------------------------------------------------------------------
1038 static __inline NDIS_STATUS
1039 OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
1044 ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
1045 UINT32 packetLen, mdlLen;
1046 PNET_BUFFER_LIST newNbl;
1050 * Declare a dummy vlanTag structure since we need to compute the size
1051 * of shiftLength. The NDIS one is a unionized structure.
1053 NDIS_PACKET_8021Q_INFO vlanTag = {0};
1054 ULONG shiftLength = sizeof (vlanTag.TagHeader);
1055 PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
1057 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1058 0, 0, TRUE /* copy NBL info */);
1060 ovsActionStats.noCopiedNbl++;
1061 return NDIS_STATUS_RESOURCES;
1064 /* Complete the original NBL and create a copy to modify. */
1065 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
1067 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1068 newNbl, ovsFwdCtx->srcVportNo, 0,
1069 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1070 NULL, &ovsFwdCtx->layers, FALSE);
1071 if (status != NDIS_STATUS_SUCCESS) {
1072 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1073 L"Dropped due to resouces");
1074 return NDIS_STATUS_RESOURCES;
1077 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1078 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1079 ASSERT(curNb->Next == NULL);
1080 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1081 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1083 return NDIS_STATUS_RESOURCES;
1085 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1086 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1087 if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
1089 return NDIS_STATUS_FAILURE;
1091 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1092 RtlCopyMemory(tempBuffer, bufferStart, dataLength);
1093 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
1094 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1096 return NDIS_STATUS_SUCCESS;
1100 * --------------------------------------------------------------------------
1101 * OvsTunnelAttrToIPv4TunnelKey --
1102 * Convert tunnel attribute to OvsIPv4TunnelKey.
1103 * --------------------------------------------------------------------------
1105 static __inline NDIS_STATUS
1106 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
1107 OvsIPv4TunnelKey *tunKey)
1112 tunKey->attr[0] = 0;
1113 tunKey->attr[1] = 0;
1114 tunKey->attr[2] = 0;
1115 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
1117 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1118 NlAttrGetSize(attr)) {
1119 switch (NlAttrType(a)) {
1120 case OVS_TUNNEL_KEY_ATTR_ID:
1121 tunKey->tunnelId = NlAttrGetBe64(a);
1122 tunKey->flags |= OVS_TNL_F_KEY;
1124 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1125 tunKey->src = NlAttrGetBe32(a);
1127 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1128 tunKey->dst = NlAttrGetBe32(a);
1130 case OVS_TUNNEL_KEY_ATTR_TOS:
1131 tunKey->tos = NlAttrGetU8(a);
1133 case OVS_TUNNEL_KEY_ATTR_TTL:
1134 tunKey->ttl = NlAttrGetU8(a);
1136 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1137 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1139 case OVS_TUNNEL_KEY_ATTR_CSUM:
1140 tunKey->flags |= OVS_TNL_F_CSUM;
1147 return NDIS_STATUS_SUCCESS;
1151 *----------------------------------------------------------------------------
1152 * OvsUpdateEthHeader --
1153 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1155 *----------------------------------------------------------------------------
1157 static __inline NDIS_STATUS
1158 OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1159 const struct ovs_key_ethernet *ethAttr)
1165 UINT32 packetLen, mdlLen;
1167 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1168 ASSERT(curNb->Next == NULL);
1169 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1170 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1171 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1173 ovsActionStats.noResource++;
1174 return NDIS_STATUS_RESOURCES;
1176 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1178 /* Bail out if the L2 header is not in a contiguous buffer. */
1179 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1181 return NDIS_STATUS_FAILURE;
1183 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1185 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1186 sizeof ethHdr->Destination);
1187 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1189 return NDIS_STATUS_SUCCESS;
1193 *----------------------------------------------------------------------------
1194 * OvsUpdateIPv4Header --
1195 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1197 *----------------------------------------------------------------------------
1199 static __inline NDIS_STATUS
1200 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1201 const struct ovs_key_ipv4 *ipAttr)
1207 UINT32 mdlLen, hdrSize, packetLen;
1208 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1211 TCPHdr *tcpHdr = NULL;
1212 UDPHdr *udpHdr = NULL;
1214 ASSERT(layers->value != 0);
1217 * Peek into the MDL to get a handle to the IP header and if required
1218 * the TCP/UDP header as well. We check if the required headers are in one
1219 * contiguous MDL, and if not, we copy them over to one MDL.
1221 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1222 ASSERT(curNb->Next == NULL);
1223 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1224 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1225 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1227 ovsActionStats.noResource++;
1228 return NDIS_STATUS_RESOURCES;
1230 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1231 mdlLen -= curMdlOffset;
1232 ASSERT((INT)mdlLen >= 0);
1234 if (layers->isTcp || layers->isUdp) {
1235 hdrSize = layers->l4Offset +
1236 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1238 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1241 /* Count of number of bytes of valid data there are in the first MDL. */
1242 mdlLen = MIN(packetLen, mdlLen);
1243 if (mdlLen < hdrSize) {
1244 PNET_BUFFER_LIST newNbl;
1245 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1246 hdrSize, 0, TRUE /*copy NBL info*/);
1248 ovsActionStats.noCopiedNbl++;
1249 return NDIS_STATUS_RESOURCES;
1251 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1252 L"Complete after partial copy.");
1254 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1255 newNbl, ovsFwdCtx->srcVportNo, 0,
1256 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1257 NULL, &ovsFwdCtx->layers, FALSE);
1258 if (status != NDIS_STATUS_SUCCESS) {
1259 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1260 L"OVS-Dropped due to resources");
1261 return NDIS_STATUS_RESOURCES;
1264 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1265 ASSERT(curNb->Next == NULL);
1266 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1267 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1269 ovsActionStats.noResource++;
1270 return NDIS_STATUS_RESOURCES;
1272 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1273 mdlLen -= curMdlOffset;
1274 ASSERT(mdlLen >= hdrSize);
1277 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1279 if (layers->isTcp) {
1280 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1281 } else if (layers->isUdp) {
1282 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1286 * Adjust the IP header inline as dictated by the action, nad also update
1287 * the IP and the TCP checksum for the data modified.
1289 * In the future, this could be optimized to make one call to
1290 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1291 * case, we only update the TTL.
1293 if (ipHdr->saddr != ipAttr->ipv4_src) {
1295 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1297 } else if (udpHdr && udpHdr->check) {
1298 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1302 if (ipHdr->check != 0) {
1303 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1306 ipHdr->saddr = ipAttr->ipv4_src;
1308 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1310 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1312 } else if (udpHdr && udpHdr->check) {
1313 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1317 if (ipHdr->check != 0) {
1318 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1321 ipHdr->daddr = ipAttr->ipv4_dst;
1323 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1324 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1325 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1327 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1328 } else if (udpHdr && udpHdr->check) {
1329 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1332 if (ipHdr->check != 0) {
1333 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1335 ipHdr->protocol = ipAttr->ipv4_proto;
1337 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1338 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1339 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1340 if (ipHdr->check != 0) {
1341 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1343 ipHdr->ttl = ipAttr->ipv4_ttl;
1346 return NDIS_STATUS_SUCCESS;
1350 * --------------------------------------------------------------------------
1351 * OvsExecuteSetAction --
1352 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1353 * --------------------------------------------------------------------------
1355 static __inline NDIS_STATUS
1356 OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1361 enum ovs_key_attr type = NlAttrType(a);
1362 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1365 case OVS_KEY_ATTR_ETHERNET:
1366 status = OvsUpdateEthHeader(ovsFwdCtx,
1367 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
1370 case OVS_KEY_ATTR_IPV4:
1371 status = OvsUpdateIPv4Header(ovsFwdCtx,
1372 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
1375 case OVS_KEY_ATTR_TUNNEL:
1377 OvsIPv4TunnelKey tunKey;
1378 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1379 ASSERT(status == NDIS_STATUS_SUCCESS);
1380 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
1381 tunKey.dst_port = key->ipKey.l4.tpDst;
1382 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
1387 OVS_LOG_INFO("Unhandled attribute %#x", type);
1394 * --------------------------------------------------------------------------
1395 * OvsActionsExecute --
1396 * Interpret and execute the specified 'actions' on the specifed packet
1397 * 'curNbl'. The expectation is that if the packet needs to be dropped
1398 * (completed) for some reason, it is added to 'completionList' so that the
1399 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1400 * assumed to be generated by OVS and freed up. Otherwise, the function
1401 * consumes the NBL by generating a NDIS send indication for the packet.
1403 * There are one or more of "clone" NBLs that may get generated while
1404 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1405 * and the caller does not have to worry about them.
1407 * Success or failure is returned based on whether the specified actions
1408 * were executed successfully on the packet or not.
1409 * --------------------------------------------------------------------------
1412 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1413 OvsCompletionList *completionList,
1414 PNET_BUFFER_LIST curNbl,
1419 OVS_PACKET_HDR_INFO *layers,
1420 const PNL_ATTR actions,
1426 OvsForwardingContext ovsFwdCtx;
1427 PCWSTR dropReason = L"";
1429 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1430 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1432 /* XXX: ASSERT that the flow table lock is held. */
1433 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1434 sendFlags, fwdDetail, completionList,
1436 if (status != NDIS_STATUS_SUCCESS) {
1437 dropReason = L"OVS-initing destination port list failed";
1441 if (actionsLen == 0) {
1442 dropReason = L"OVS-Dropped due to Flow action";
1443 ovsActionStats.zeroActionLen++;
1447 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
1448 switch(NlAttrType(a)) {
1449 case OVS_ACTION_ATTR_OUTPUT:
1450 dstPortID = NlAttrGetU32(a);
1451 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1453 if (status != NDIS_STATUS_SUCCESS) {
1454 dropReason = L"OVS-adding destination port failed";
1459 case OVS_ACTION_ATTR_PUSH_VLAN:
1461 struct ovs_action_push_vlan *vlan;
1463 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1465 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1466 || ovsFwdCtx.tunnelRxNic != NULL) {
1467 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1468 if (status != NDIS_STATUS_SUCCESS) {
1469 dropReason = L"OVS-adding destination failed";
1474 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1475 Ieee8021QNetBufferListInfo);
1476 if (vlanTagValue != NULL) {
1478 * XXX: We don't support double VLAN tag offload. In such cases,
1479 * we need to insert the existing one into the packet buffer,
1480 * and add the new one as offload. This will take care of
1481 * guest tag-in-tag case as well as OVS rules that specify
1486 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
1487 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
1488 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1489 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1491 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1492 Ieee8021QNetBufferListInfo) = vlanTagValue;
1497 case OVS_ACTION_ATTR_POP_VLAN:
1499 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1500 || ovsFwdCtx.tunnelRxNic != NULL) {
1501 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1502 if (status != NDIS_STATUS_SUCCESS) {
1503 dropReason = L"OVS-adding destination failed";
1508 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1509 Ieee8021QNetBufferListInfo) != 0) {
1510 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1511 Ieee8021QNetBufferListInfo) = 0;
1514 * The VLAN tag is inserted into the packet buffer. Pop the tag
1515 * by packet buffer modification.
1517 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1518 if (status != NDIS_STATUS_SUCCESS) {
1519 dropReason = L"OVS-pop vlan action failed";
1526 case OVS_ACTION_ATTR_USERSPACE:
1528 PNL_ATTR userdataAttr;
1530 POVS_PACKET_QUEUE_ELEM elem;
1531 BOOLEAN isRecv = FALSE;
1533 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
1537 if (vport->isExternal ||
1538 OvsIsTunnelVportType(vport->ovsType)) {
1543 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1544 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
1546 elem = OvsCreateQueueNlPacket((PVOID)userdataAttr,
1547 userdataAttr->nlaLen,
1548 OVS_PACKET_CMD_ACTION,
1549 vport, key, ovsFwdCtx.curNbl,
1550 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1554 LIST_ENTRY missedPackets;
1555 InitializeListHead(&missedPackets);
1556 InsertTailList(&missedPackets, &elem->link);
1557 OvsQueuePackets(&missedPackets, 1);
1558 dropReason = L"OVS-Completed since packet was copied to "
1561 dropReason = L"OVS-Dropped due to failure to queue to "
1567 case OVS_ACTION_ATTR_SET:
1569 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1570 || ovsFwdCtx.tunnelRxNic != NULL) {
1571 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1572 if (status != NDIS_STATUS_SUCCESS) {
1573 dropReason = L"OVS-adding destination failed";
1578 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
1579 (const PNL_ATTR)NlAttrGet
1580 ((const PNL_ATTR)a));
1581 if (status != NDIS_STATUS_SUCCESS) {
1582 dropReason = L"OVS-set action failed";
1587 case OVS_ACTION_ATTR_SAMPLE:
1589 status = NDIS_STATUS_NOT_SUPPORTED;
1594 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1595 || ovsFwdCtx.tunnelRxNic != NULL) {
1596 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1597 ASSERT(ovsFwdCtx.curNbl == NULL);
1600 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1601 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1602 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1606 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1608 if (ovsFwdCtx.curNbl) {
1609 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);