2 * Copyright (c) 2014, 2016 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
36 #define OVS_DBG_MOD OVS_DBG_ACTION
38 typedef struct _OVS_ACTION_STATS {
48 UINT32 failedFlowMiss;
50 UINT32 failedFlowExtract;
55 UINT32 cannotGrowDest;
57 UINT32 failedChecksum;
58 } OVS_ACTION_STATS, *POVS_ACTION_STATS;
60 OVS_ACTION_STATS ovsActionStats;
63 * There a lot of data that needs to be maintained while executing the pipeline
64 * as dictated by the actions of a flow, across different functions at different
65 * levels. Such data is put together in a 'context' structure. Care should be
66 * exercised while adding new members to the structure - only add ones that get
67 * used across multiple stages in the pipeline/get used in multiple functions.
69 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
70 typedef struct OvsForwardingContext {
71 POVS_SWITCH_CONTEXT switchContext;
72 /* The NBL currently used in the pipeline. */
73 PNET_BUFFER_LIST curNbl;
74 /* NDIS forwarding detail for 'curNbl'. */
75 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
76 /* Array of destination ports for 'curNbl'. */
77 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
78 /* send flags while sending 'curNbl' into NDIS. */
80 /* Total number of output ports, used + unused, in 'curNbl'. */
81 UINT32 destPortsSizeIn;
82 /* Total number of used output ports in 'curNbl'. */
83 UINT32 destPortsSizeOut;
85 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
88 OvsCompletionList *completionList;
90 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
91 * bridge. ie. during tunneling on the Rx side.
97 * - specified in actions during tunneling Tx
98 * - extracted from an NBL during tunneling Rx
100 OvsIPv4TunnelKey tunKey;
104 * To store the output port, when it is a tunneled port. We don't foresee
105 * multiple tunneled ports as outport for any given NBL.
107 POVS_VPORT_ENTRY tunnelTxNic;
111 * Points to the Internal port on the PIF Bridge, if the packet needs to be
114 POVS_VPORT_ENTRY tunnelRxNic;
116 /* header information */
117 OVS_PACKET_HDR_INFO layers;
118 } OvsForwardingContext;
122 * --------------------------------------------------------------------------
123 * OvsInitForwardingCtx --
124 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
128 * NDIS_STATUS_SUCCESS on success
129 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
130 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
131 * enough for OvsCompleteNBLForwardingCtx() to do its work.
132 * --------------------------------------------------------------------------
134 static __inline NDIS_STATUS
135 OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
136 POVS_SWITCH_CONTEXT switchContext,
137 PNET_BUFFER_LIST curNbl,
140 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
141 OvsCompletionList *completionList,
142 OVS_PACKET_HDR_INFO *layers,
143 BOOLEAN resetTunnelInfo)
146 ASSERT(switchContext);
151 * Set values for curNbl and switchContext so upon failures, we have enough
152 * information to do cleanup.
154 ovsFwdCtx->curNbl = curNbl;
155 ovsFwdCtx->switchContext = switchContext;
156 ovsFwdCtx->completionList = completionList;
157 ovsFwdCtx->fwdDetail = fwdDetail;
159 if (fwdDetail->NumAvailableDestinations > 0) {
161 * XXX: even though MSDN says GetNetBufferListDestinations() returns
162 * NDIS_STATUS, the header files say otherwise.
164 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
165 switchContext->NdisSwitchContext, curNbl,
166 &ovsFwdCtx->destinationPorts);
168 ASSERT(ovsFwdCtx->destinationPorts);
169 /* Ensure that none of the elements are consumed yet. */
170 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
171 fwdDetail->NumAvailableDestinations);
173 ovsFwdCtx->destinationPorts = NULL;
175 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
176 ovsFwdCtx->destPortsSizeOut = 0;
177 ovsFwdCtx->srcVportNo = srcVportNo;
178 ovsFwdCtx->sendFlags = sendFlags;
180 ovsFwdCtx->layers = *layers;
182 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
184 if (resetTunnelInfo) {
185 ovsFwdCtx->tunnelTxNic = NULL;
186 ovsFwdCtx->tunnelRxNic = NULL;
187 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
190 return NDIS_STATUS_SUCCESS;
194 * --------------------------------------------------------------------------
195 * OvsDetectTunnelRxPkt --
196 * Utility function for an RX packet to detect its tunnel type.
199 * True - if the tunnel type was detected.
200 * False - if not a tunnel packet or tunnel type not supported.
201 * --------------------------------------------------------------------------
203 static __inline BOOLEAN
204 OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
205 const OvsFlowKey *flowKey)
207 POVS_VPORT_ENTRY tunnelVport = NULL;
209 /* XXX: we should also check for the length of the UDP payload to pick
210 * packets only if they are at least VXLAN header size.
212 if (!flowKey->ipKey.nwFrag) {
213 UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
214 switch (flowKey->ipKey.nwProto) {
216 tunnelVport = OvsFindTunnelVportByPortType(ovsFwdCtx->switchContext,
219 ovsActionStats.rxGre++;
223 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
227 ovsActionStats.rxStt++;
231 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
233 OVS_VPORT_TYPE_VXLAN);
235 ovsActionStats.rxVxlan++;
241 // We might get tunnel packets even before the tunnel gets initialized.
243 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
244 ovsFwdCtx->tunnelRxNic = tunnelVport;
252 * --------------------------------------------------------------------------
253 * OvsDetectTunnelPkt --
254 * Utility function to detect if a packet is to be subjected to
255 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
256 * port, destination port, packet contents, and previously setup tunnel
260 * True - If the packet is to be subjected to tunneling.
261 * In case of invalid tunnel context, the tunneling functionality is
262 * a no-op and is completed within this function itself by consuming
263 * all of the tunneling context.
264 * False - If not a tunnel packet or tunnel type not supported. Caller should
265 * process the packet as a non-tunnel packet.
266 * --------------------------------------------------------------------------
268 static __inline BOOLEAN
269 OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
270 const POVS_VPORT_ENTRY dstVport,
271 const OvsFlowKey *flowKey)
273 if (OvsIsInternalVportType(dstVport->ovsType)) {
276 * The source of NBL during tunneling Rx could be the external
277 * port or if it is being executed from userspace, the source port is
280 BOOLEAN validSrcPort =
281 (ovsFwdCtx->fwdDetail->SourcePortId ==
282 ovsFwdCtx->switchContext->virtualExternalPortId) ||
283 (ovsFwdCtx->fwdDetail->SourcePortId ==
284 NDIS_SWITCH_DEFAULT_PORT_ID);
286 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
287 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
288 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
291 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
292 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
293 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
297 * The destination port is a tunnel port. Encapsulation must be
298 * performed only on packets that originate from:
300 * - a bridge-internal port (packets generated from userspace)
303 * If the packet will not be encapsulated, consume the tunnel context
306 if (ovsFwdCtx->srcVportNo != OVS_DPPORT_NUMBER_INVALID) {
308 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
309 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
312 (vport->ovsType != OVS_VPORT_TYPE_NETDEV &&
313 !OvsIsBridgeInternalVport(vport))) {
314 ovsFwdCtx->tunKey.dst = 0;
318 /* Tunnel the packet only if tunnel context is set. */
319 if (ovsFwdCtx->tunKey.dst != 0) {
320 switch(dstVport->ovsType) {
321 case OVS_VPORT_TYPE_GRE:
322 ovsActionStats.txGre++;
324 case OVS_VPORT_TYPE_VXLAN:
325 ovsActionStats.txVxlan++;
327 case OVS_VPORT_TYPE_STT:
328 ovsActionStats.txStt++;
331 ovsFwdCtx->tunnelTxNic = dstVport;
342 * --------------------------------------------------------------------------
344 * Add the specified destination vport into the forwarding context. If the
345 * vport is a VIF/external port, it is added directly to the NBL. If it is
346 * a tunneling port, it is NOT added to the NBL.
349 * NDIS_STATUS_SUCCESS on success
350 * Other NDIS_STATUS upon failure.
351 * --------------------------------------------------------------------------
353 static __inline NDIS_STATUS
354 OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
356 NDIS_SWITCH_PORT_ID dstPortId,
357 BOOLEAN preserveVLAN,
358 BOOLEAN preservePriority)
360 POVS_VPORT_ENTRY vport;
361 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
363 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
366 * We hold the dispatch lock that protects the list of vports, so vports
367 * validated here can be added as destinations safely before we call into
370 * Some of the vports can be tunnelled ports as well in which case
371 * they should be added to a separate list of tunnelled destination ports
372 * instead of the VIF ports. The context for the tunnel is settable
373 * in OvsForwardingContext.
375 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
376 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
378 * There may be some latency between a port disappearing, and userspace
379 * updating the recalculated flows. In the meantime, handle invalid
382 ovsActionStats.noVport++;
383 return NDIS_STATUS_SUCCESS;
385 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
386 vport->stats.txPackets++;
387 vport->stats.txBytes +=
388 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
390 if (OvsIsBridgeInternalVport(vport)) {
391 return NDIS_STATUS_SUCCESS;
394 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
395 return NDIS_STATUS_SUCCESS;
398 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
399 if (ovsFwdCtx->destPortsSizeIn == 0) {
400 ASSERT(ovsFwdCtx->destinationPorts == NULL);
401 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
403 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
404 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
405 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
406 &ovsFwdCtx->destinationPorts);
407 if (status != NDIS_STATUS_SUCCESS) {
408 ovsActionStats.cannotGrowDest++;
411 ovsFwdCtx->destPortsSizeIn =
412 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
413 ASSERT(ovsFwdCtx->destinationPorts);
415 ASSERT(ovsFwdCtx->destinationPorts != NULL);
418 * A ULONG value that specifies the total number of
419 * NDIS_SWITCH_PORT_DESTINATION elements in the
420 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
423 * A ULONG value that specifies the number of
424 * NDIS_SWITCH_PORT_DESTINATION elements in the
425 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
426 * specify port destinations.
428 * NumAvailableDestinations:
429 * A value that specifies the number of unused extensible switch
430 * destination ports elements within an NET_BUFFER_LIST structure.
432 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
433 ovsFwdCtx->destPortsSizeIn);
434 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
435 ovsFwdCtx->destPortsSizeOut -
436 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
437 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
439 * Before we grow the array of destination ports, the current set
440 * of ports needs to be committed. Only the ports added since the
441 * last commit need to be part of the new update.
443 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
444 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
445 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
446 ovsFwdCtx->destinationPorts);
447 if (status != NDIS_STATUS_SUCCESS) {
448 ovsActionStats.cannotGrowDest++;
451 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
452 ovsFwdCtx->destPortsSizeIn);
453 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
454 ovsFwdCtx->destPortsSizeOut);
455 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
457 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
458 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
459 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
460 if (status != NDIS_STATUS_SUCCESS) {
461 ovsActionStats.cannotGrowDest++;
464 ASSERT(ovsFwdCtx->destinationPorts != NULL);
465 ovsFwdCtx->destPortsSizeIn <<= 1;
469 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
471 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
472 ovsFwdCtx->destPortsSizeOut);
474 fwdPort->PortId = vport->portId;
475 fwdPort->NicIndex = vport->nicIndex;
476 fwdPort->IsExcluded = 0;
477 fwdPort->PreserveVLAN = preserveVLAN;
478 fwdPort->PreservePriority = preservePriority;
479 ovsFwdCtx->destPortsSizeOut += 1;
481 return NDIS_STATUS_SUCCESS;
486 * --------------------------------------------------------------------------
487 * OvsClearTunTxCtx --
488 * Utility function to clear tx tunneling context.
489 * --------------------------------------------------------------------------
492 OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
494 ovsFwdCtx->tunnelTxNic = NULL;
495 ovsFwdCtx->tunKey.dst = 0;
500 * --------------------------------------------------------------------------
501 * OvsClearTunRxCtx --
502 * Utility function to clear rx tunneling context.
503 * --------------------------------------------------------------------------
506 OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
508 ovsFwdCtx->tunnelRxNic = NULL;
509 ovsFwdCtx->tunKey.dst = 0;
514 * --------------------------------------------------------------------------
515 * OvsCompleteNBLForwardingCtx --
516 * This utility function is responsible for freeing/completing an NBL - either
517 * by adding it to a completion list or by freeing it.
520 * It also resets the necessary fields in 'ovsFwdCtx'.
521 * --------------------------------------------------------------------------
524 OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
527 NDIS_STRING filterReason;
529 RtlInitUnicodeString(&filterReason, dropReason);
530 if (ovsFwdCtx->completionList) {
531 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
532 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
534 ovsFwdCtx->curNbl = NULL;
536 /* If there is no completionList, we assume this is ovs created NBL */
537 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
538 ovsFwdCtx->curNbl, TRUE);
539 ASSERT(ovsFwdCtx->curNbl == NULL);
541 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
542 * using these fields should reset the values at the end of the pipeline. */
543 ovsFwdCtx->destPortsSizeOut = 0;
544 ovsFwdCtx->tunnelTxNic = NULL;
545 ovsFwdCtx->tunnelRxNic = NULL;
549 * --------------------------------------------------------------------------
550 * OvsDoFlowLookupOutput --
551 * Function to be used for the second stage of a tunneling workflow, ie.:
552 * - On the encapsulated packet on Tx path, to do a flow extract, flow
553 * lookup and excuting the actions.
554 * - On the decapsulated packet on Rx path, to do a flow extract, flow
555 * lookup and excuting the actions.
557 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
558 * until the new buffer management framework is adopted.
561 * The NBL in 'ovsFwdCtx' is consumed.
562 * --------------------------------------------------------------------------
564 static __inline NDIS_STATUS
565 OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
571 POVS_VPORT_ENTRY vport =
572 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
573 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
574 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
575 L"OVS-Dropped due to internal/tunnel port removal");
576 ovsActionStats.noVport++;
577 return NDIS_STATUS_SUCCESS;
579 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
581 /* Assert that in the Rx direction, key is always setup. */
582 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
584 OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
585 &key, &ovsFwdCtx->layers,
586 ovsFwdCtx->tunKey.dst != 0 ? &ovsFwdCtx->tunKey : NULL);
587 if (status != NDIS_STATUS_SUCCESS) {
588 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
589 L"OVS-Flow extract failed");
590 ovsActionStats.failedFlowExtract++;
594 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
596 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
597 ovsFwdCtx->switchContext->datapath.hits++;
598 status = OvsActionsExecute(ovsFwdCtx->switchContext,
599 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
600 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
601 &key, &hash, &ovsFwdCtx->layers,
602 flow->actions, flow->actionsLen);
603 ovsFwdCtx->curNbl = NULL;
605 LIST_ENTRY missedPackets;
607 ovsFwdCtx->switchContext->datapath.misses++;
608 InitializeListHead(&missedPackets);
609 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
610 &key,ovsFwdCtx->curNbl,
611 FALSE, &ovsFwdCtx->layers,
612 ovsFwdCtx->switchContext, &missedPackets, &num);
614 OvsQueuePackets(&missedPackets, num);
616 if (status == NDIS_STATUS_SUCCESS) {
617 /* Complete the packet since it was copied to user buffer. */
618 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
619 L"OVS-Dropped since packet was copied to userspace");
620 ovsActionStats.flowMiss++;
621 status = NDIS_STATUS_SUCCESS;
623 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
624 L"OVS-Dropped due to failure to queue to userspace");
625 status = NDIS_STATUS_FAILURE;
626 ovsActionStats.failedFlowMiss++;
634 * --------------------------------------------------------------------------
636 * The start function for Tx tunneling - encapsulates the packet, and
637 * outputs the packet on the PIF bridge.
640 * The NBL in 'ovsFwdCtx' is consumed.
641 * --------------------------------------------------------------------------
643 static __inline NDIS_STATUS
644 OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
646 NDIS_STATUS status = NDIS_STATUS_FAILURE;
647 PNET_BUFFER_LIST newNbl = NULL;
650 * Setup the source port to be the internal port to as to facilitate the
651 * second OvsLookupFlow.
653 if (ovsFwdCtx->switchContext->internalVport == NULL ||
654 ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
655 OvsClearTunTxCtx(ovsFwdCtx);
656 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
657 L"OVS-Dropped since either internal or external port is absent");
658 return NDIS_STATUS_FAILURE;
660 ovsFwdCtx->srcVportNo =
661 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
663 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
664 ovsFwdCtx->fwdDetail->SourceNicIndex =
665 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
667 /* Do the encap. Encap function does not consume the NBL. */
668 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
669 case OVS_VPORT_TYPE_GRE:
670 status = OvsEncapGre(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
671 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
672 &ovsFwdCtx->layers, &newNbl);
674 case OVS_VPORT_TYPE_VXLAN:
675 status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
676 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
677 &ovsFwdCtx->layers, &newNbl);
679 case OVS_VPORT_TYPE_STT:
680 status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
681 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
682 &ovsFwdCtx->layers, &newNbl);
685 ASSERT(! "Tx: Unhandled tunnel type");
688 /* Reset the tunnel context so that it doesn't get used after this point. */
689 OvsClearTunTxCtx(ovsFwdCtx);
691 if (status == NDIS_STATUS_SUCCESS) {
693 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
694 L"Complete after cloning NBL for encapsulation");
695 ovsFwdCtx->curNbl = newNbl;
696 status = OvsDoFlowLookupOutput(ovsFwdCtx);
697 ASSERT(ovsFwdCtx->curNbl == NULL);
700 * XXX: Temporary freeing of the packet until we register a
701 * callback to IP helper.
703 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
704 L"OVS-Dropped due to encap failure");
705 ovsActionStats.failedEncap++;
706 status = NDIS_STATUS_SUCCESS;
713 * --------------------------------------------------------------------------
715 * Decapsulate the incoming NBL based on the tunnel type and goes through
716 * the flow lookup for the inner packet.
718 * Note: IP checksum is validate here, but L4 checksum validation needs
719 * to be done by the corresponding tunnel types.
722 * The NBL in 'ovsFwdCtx' is consumed.
723 * --------------------------------------------------------------------------
725 static __inline NDIS_STATUS
726 OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
728 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
729 PNET_BUFFER_LIST newNbl = NULL;
730 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
731 PCWSTR dropReason = L"OVS-dropped due to new decap packet";
733 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
734 != NDIS_STATUS_SUCCESS) {
735 ovsActionStats.failedChecksum++;
736 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
741 * Decap port functions should return a new NBL if it was copied, and
742 * this new NBL should be setup as the ovsFwdCtx->curNbl.
745 switch(tunnelRxVport->ovsType) {
746 case OVS_VPORT_TYPE_GRE:
747 status = OvsDecapGre(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
748 &ovsFwdCtx->tunKey, &newNbl);
750 case OVS_VPORT_TYPE_VXLAN:
751 status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
752 &ovsFwdCtx->tunKey, &newNbl);
754 case OVS_VPORT_TYPE_STT:
755 status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
756 &ovsFwdCtx->tunKey, &newNbl);
757 if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
758 /* This was an STT-LSO Fragment */
759 dropReason = L"OVS-STT segment is cached";
763 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
764 tunnelRxVport->ovsType);
765 ASSERT(! "Rx: Unhandled tunnel type");
766 status = NDIS_STATUS_NOT_SUPPORTED;
769 if (status != NDIS_STATUS_SUCCESS) {
770 ovsActionStats.failedDecap++;
775 * tunnelRxNic and other fields will be cleared, re-init the context
778 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
781 /* Decapsulated packet is in a new NBL */
782 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
783 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
784 newNbl, tunnelRxVport->portNo, 0,
785 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
786 ovsFwdCtx->completionList,
787 &ovsFwdCtx->layers, FALSE);
790 * Set the NBL's SourcePortId and SourceNicIndex to default values to
791 * keep NDIS happy when we forward the packet.
793 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
794 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
796 status = OvsDoFlowLookupOutput(ovsFwdCtx);
798 ASSERT(ovsFwdCtx->curNbl == NULL);
799 OvsClearTunRxCtx(ovsFwdCtx);
804 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
805 L"OVS-dropped due to decap failure");
806 OvsClearTunRxCtx(ovsFwdCtx);
812 * --------------------------------------------------------------------------
813 * OvsOutputForwardingCtx --
814 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
815 * the ports added so far into 'ovsFwdCtx'.
818 * This function consumes the NBL - either by forwarding it successfully to
819 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
821 * Also makes sure that the list of destination ports - tunnel or otherwise is
823 * --------------------------------------------------------------------------
825 static __inline NDIS_STATUS
826 OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
828 NDIS_STATUS status = STATUS_SUCCESS;
829 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
833 * Handle the case where the some of the destination ports are tunneled
834 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
835 * tunneling pipeline starts when we output the packet to tunneled port.
837 if (ovsFwdCtx->destPortsSizeOut > 0) {
838 PNET_BUFFER_LIST newNbl = NULL;
840 UINT32 portsToUpdate =
841 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
842 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
844 ASSERT(ovsFwdCtx->destinationPorts != NULL);
847 * Create a copy of the packet in order to do encap on it later. Also,
848 * don't copy the offload context since the encap'd packet has a
849 * different set of headers. This will change when we implement offloads
850 * before doing encapsulation.
852 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
853 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
854 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
855 0, 0, TRUE /*copy NBL info*/);
856 if (newNbl == NULL) {
857 status = NDIS_STATUS_RESOURCES;
858 ovsActionStats.noCopiedNbl++;
859 dropReason = L"Dropped due to failure to create NBL copy.";
864 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
865 ASSERT(portsToUpdate > 0);
866 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
867 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
868 portsToUpdate, ovsFwdCtx->destinationPorts);
869 if (status != NDIS_STATUS_SUCCESS) {
870 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
871 ovsActionStats.cannotGrowDest++;
872 dropReason = L"Dropped due to failure to update destinations.";
876 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
877 ovsFwdCtx->sendFlags);
878 /* End this pipeline by resetting the corresponding context. */
879 ovsFwdCtx->destPortsSizeOut = 0;
880 ovsFwdCtx->curNbl = NULL;
882 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
883 newNbl, ovsFwdCtx->srcVportNo, 0,
884 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
885 ovsFwdCtx->completionList,
886 &ovsFwdCtx->layers, FALSE);
887 if (status != NDIS_STATUS_SUCCESS) {
888 dropReason = L"Dropped due to resouces.";
894 if (ovsFwdCtx->tunnelTxNic != NULL) {
895 status = OvsTunnelPortTx(ovsFwdCtx);
896 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
897 ASSERT(ovsFwdCtx->tunKey.dst == 0);
898 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
899 status = OvsTunnelPortRx(ovsFwdCtx);
900 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
901 ASSERT(ovsFwdCtx->tunKey.dst == 0);
903 ASSERT(ovsFwdCtx->curNbl == NULL);
908 if (status != NDIS_STATUS_SUCCESS) {
909 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
917 * --------------------------------------------------------------------------
918 * OvsLookupFlowOutput --
919 * Utility function for external callers to do flow extract, lookup,
920 * actions execute on a given NBL.
922 * Note: If this is being used from a callback function, make sure that the
923 * arguments specified are still valid in the asynchronous context.
926 * This function consumes the NBL.
927 * --------------------------------------------------------------------------
930 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
932 PNET_BUFFER_LIST curNbl)
935 OvsForwardingContext ovsFwdCtx;
936 POVS_VPORT_ENTRY internalVport =
937 (POVS_VPORT_ENTRY)switchContext->internalVport;
939 /* XXX: make sure comp list was not a stack variable previously. */
940 OvsCompletionList *completionList = (OvsCompletionList *)compList;
943 * XXX: can internal port disappear while we are busy doing ARP resolution?
944 * It could, but will we get this callback from IP helper in that case. Need
947 ASSERT(switchContext->internalVport);
948 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
949 internalVport->portNo, 0,
950 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
951 completionList, NULL, TRUE);
952 if (status != NDIS_STATUS_SUCCESS) {
953 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
954 L"OVS-Dropped due to resources");
960 * XXX: We need to acquire the dispatch lock and the datapath lock.
963 OvsDoFlowLookupOutput(&ovsFwdCtx);
968 * --------------------------------------------------------------------------
969 * OvsOutputBeforeSetAction --
970 * Function to be called to complete one set of actions on an NBL, before
971 * we start the next one.
972 * --------------------------------------------------------------------------
974 static __inline NDIS_STATUS
975 OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
977 PNET_BUFFER_LIST newNbl;
978 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
981 * Create a copy and work on the copy after this point. The original NBL is
982 * forwarded. One reason to not use the copy for forwarding is that
983 * ports have already been added to the original NBL, and it might be
984 * inefficient/impossible to remove/re-add them to the copy. There's no
985 * notion of removing the ports, the ports need to be marked as
986 * "isExcluded". There's seems no real advantage to retaining the original
987 * and sending out the copy instead.
989 * XXX: We are copying the offload context here. This is to handle actions
991 * outport, pop_vlan(), outport, push_vlan(), outport
993 * copy size needs to include inner ether + IP + TCP, need to revisit
994 * if we support IP options.
995 * XXX Head room needs to include the additional encap.
996 * XXX copySize check is not considering multiple NBs.
998 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
999 0, 0, TRUE /*copy NBL info*/);
1001 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
1002 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
1004 /* Send the original packet out and save the original source port number */
1005 UINT32 tempVportNo = ovsFwdCtx->srcVportNo;
1006 status = OvsOutputForwardingCtx(ovsFwdCtx);
1007 ASSERT(ovsFwdCtx->curNbl == NULL);
1008 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
1009 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
1010 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
1012 /* If we didn't make a copy, can't continue. */
1013 if (newNbl == NULL) {
1014 ovsActionStats.noCopiedNbl++;
1015 return NDIS_STATUS_RESOURCES;
1018 /* Finish the remaining actions with the new NBL */
1019 if (status != NDIS_STATUS_SUCCESS) {
1020 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
1022 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1023 newNbl, tempVportNo, 0,
1024 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1025 ovsFwdCtx->completionList,
1026 &ovsFwdCtx->layers, FALSE);
1034 * --------------------------------------------------------------------------
1035 * OvsPopFieldInPacketBuf --
1036 * Function to pop a specified field of length 'shiftLength' located at
1037 * 'shiftOffset' from the ethernet header. The data on the left of the
1038 * 'shiftOffset' is right shifted.
1040 * Returns a pointer to the new start in 'bufferData'.
1041 * --------------------------------------------------------------------------
1043 static __inline NDIS_STATUS
1044 OvsPopFieldInPacketBuf(OvsForwardingContext *ovsFwdCtx,
1052 UINT32 packetLen, mdlLen;
1053 PNET_BUFFER_LIST newNbl;
1055 PUINT8 tempBuffer[ETH_HEADER_LENGTH];
1057 ASSERT(shiftOffset > ETH_ADDR_LENGTH);
1059 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1060 0, 0, TRUE /* copy NBL info */);
1062 ovsActionStats.noCopiedNbl++;
1063 return NDIS_STATUS_RESOURCES;
1066 /* Complete the original NBL and create a copy to modify. */
1067 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
1069 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext, newNbl,
1070 ovsFwdCtx->srcVportNo, 0,
1071 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1072 NULL, &ovsFwdCtx->layers, FALSE);
1073 if (status != NDIS_STATUS_SUCCESS) {
1074 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1075 L"Dropped due to resouces");
1076 return NDIS_STATUS_RESOURCES;
1079 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1080 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1081 ASSERT(curNb->Next == NULL);
1082 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1083 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1085 return NDIS_STATUS_RESOURCES;
1087 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1088 /* Bail out if L2 + shiftLength is not contiguous in the first buffer. */
1089 if (MIN(packetLen, mdlLen) < sizeof(EthHdr) + shiftLength) {
1091 return NDIS_STATUS_FAILURE;
1093 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1094 RtlCopyMemory(tempBuffer, bufferStart, shiftOffset);
1095 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, shiftOffset);
1096 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1099 *bufferData = bufferStart + shiftLength;
1102 return NDIS_STATUS_SUCCESS;
1107 * --------------------------------------------------------------------------
1108 * OvsPopVlanInPktBuf --
1109 * Function to pop a VLAN tag when the tag is in the packet buffer.
1110 * --------------------------------------------------------------------------
1112 static __inline NDIS_STATUS
1113 OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
1116 * Declare a dummy vlanTag structure since we need to compute the size
1117 * of shiftLength. The NDIS one is a unionized structure.
1119 NDIS_PACKET_8021Q_INFO vlanTag = {0};
1120 UINT32 shiftLength = sizeof(vlanTag.TagHeader);
1121 UINT32 shiftOffset = sizeof(DL_EUI48) + sizeof(DL_EUI48);
1123 return OvsPopFieldInPacketBuf(ovsFwdCtx, shiftOffset, shiftLength, NULL);
1128 * --------------------------------------------------------------------------
1129 * OvsActionMplsPop --
1130 * Function to pop the first MPLS label from the current packet.
1131 * --------------------------------------------------------------------------
1133 static __inline NDIS_STATUS
1134 OvsActionMplsPop(OvsForwardingContext *ovsFwdCtx,
1137 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1138 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1139 EthHdr *ethHdr = NULL;
1141 status = OvsPopFieldInPacketBuf(ovsFwdCtx, sizeof(*ethHdr),
1142 MPLS_HLEN, (PUINT8*)ðHdr);
1143 if (status == NDIS_STATUS_SUCCESS) {
1144 if (ethHdr && OvsEthertypeIsMpls(ethHdr->Type)) {
1145 ethHdr->Type = ethertype;
1148 layers->l3Offset -= MPLS_HLEN;
1149 layers->l4Offset -= MPLS_HLEN;
1157 * --------------------------------------------------------------------------
1158 * OvsActionMplsPush --
1159 * Function to push the MPLS label into the current packet.
1160 * --------------------------------------------------------------------------
1162 static __inline NDIS_STATUS
1163 OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx,
1164 const struct ovs_action_push_mpls *mpls)
1167 PNET_BUFFER curNb = NULL;
1169 PUINT8 bufferStart = NULL;
1170 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1171 EthHdr *ethHdr = NULL;
1172 MPLSHdr *mplsHdr = NULL;
1173 UINT32 mdlLen = 0, curMdlOffset = 0;
1174 PNET_BUFFER_LIST newNbl;
1176 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1177 layers->l3Offset, MPLS_HLEN, TRUE);
1179 ovsActionStats.noCopiedNbl++;
1180 return NDIS_STATUS_RESOURCES;
1182 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1183 L"Complete after partial copy.");
1185 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1186 newNbl, ovsFwdCtx->srcVportNo, 0,
1187 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1188 NULL, &ovsFwdCtx->layers, FALSE);
1189 if (status != NDIS_STATUS_SUCCESS) {
1190 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1191 L"OVS-Dropped due to resources");
1192 return NDIS_STATUS_RESOURCES;
1195 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1196 ASSERT(curNb->Next == NULL);
1198 status = NdisRetreatNetBufferDataStart(curNb, MPLS_HLEN, 0, NULL);
1199 if (status != NDIS_STATUS_SUCCESS) {
1203 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1204 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1206 ovsActionStats.noResource++;
1207 return NDIS_STATUS_RESOURCES;
1210 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1211 mdlLen -= curMdlOffset;
1212 ASSERT(mdlLen >= MPLS_HLEN);
1214 ethHdr = (EthHdr *)(bufferStart + curMdlOffset);
1215 RtlMoveMemory(ethHdr, (UINT8*)ethHdr + MPLS_HLEN, sizeof(*ethHdr));
1216 ethHdr->Type = mpls->mpls_ethertype;
1218 mplsHdr = (MPLSHdr *)(ethHdr + 1);
1219 mplsHdr->lse = mpls->mpls_lse;
1221 layers->l3Offset += MPLS_HLEN;
1222 layers->l4Offset += MPLS_HLEN;
1224 return NDIS_STATUS_SUCCESS;
1228 * --------------------------------------------------------------------------
1229 * OvsTunnelAttrToIPv4TunnelKey --
1230 * Convert tunnel attribute to OvsIPv4TunnelKey.
1231 * --------------------------------------------------------------------------
1233 static __inline NDIS_STATUS
1234 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
1235 OvsIPv4TunnelKey *tunKey)
1240 tunKey->attr[0] = 0;
1241 tunKey->attr[1] = 0;
1242 tunKey->attr[2] = 0;
1243 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
1245 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1246 NlAttrGetSize(attr)) {
1247 switch (NlAttrType(a)) {
1248 case OVS_TUNNEL_KEY_ATTR_ID:
1249 tunKey->tunnelId = NlAttrGetBe64(a);
1250 tunKey->flags |= OVS_TNL_F_KEY;
1252 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1253 tunKey->src = NlAttrGetBe32(a);
1255 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1256 tunKey->dst = NlAttrGetBe32(a);
1258 case OVS_TUNNEL_KEY_ATTR_TOS:
1259 tunKey->tos = NlAttrGetU8(a);
1261 case OVS_TUNNEL_KEY_ATTR_TTL:
1262 tunKey->ttl = NlAttrGetU8(a);
1264 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1265 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1267 case OVS_TUNNEL_KEY_ATTR_CSUM:
1268 tunKey->flags |= OVS_TNL_F_CSUM;
1275 return NDIS_STATUS_SUCCESS;
1279 *----------------------------------------------------------------------------
1280 * OvsUpdateEthHeader --
1281 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1283 *----------------------------------------------------------------------------
1285 static __inline NDIS_STATUS
1286 OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1287 const struct ovs_key_ethernet *ethAttr)
1293 UINT32 packetLen, mdlLen;
1295 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1296 ASSERT(curNb->Next == NULL);
1297 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1298 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1299 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1301 ovsActionStats.noResource++;
1302 return NDIS_STATUS_RESOURCES;
1304 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1306 /* Bail out if the L2 header is not in a contiguous buffer. */
1307 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1309 return NDIS_STATUS_FAILURE;
1311 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1313 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1314 sizeof ethHdr->Destination);
1315 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1317 return NDIS_STATUS_SUCCESS;
1321 *----------------------------------------------------------------------------
1322 * OvsUpdateIPv4Header --
1323 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1325 *----------------------------------------------------------------------------
1327 static __inline NDIS_STATUS
1328 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1329 const struct ovs_key_ipv4 *ipAttr)
1335 UINT32 mdlLen, hdrSize, packetLen;
1336 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1339 TCPHdr *tcpHdr = NULL;
1340 UDPHdr *udpHdr = NULL;
1342 ASSERT(layers->value != 0);
1345 * Peek into the MDL to get a handle to the IP header and if required
1346 * the TCP/UDP header as well. We check if the required headers are in one
1347 * contiguous MDL, and if not, we copy them over to one MDL.
1349 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1350 ASSERT(curNb->Next == NULL);
1351 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1352 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1353 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1355 ovsActionStats.noResource++;
1356 return NDIS_STATUS_RESOURCES;
1358 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1359 mdlLen -= curMdlOffset;
1360 ASSERT((INT)mdlLen >= 0);
1362 if (layers->isTcp || layers->isUdp) {
1363 hdrSize = layers->l4Offset +
1364 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1366 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1369 /* Count of number of bytes of valid data there are in the first MDL. */
1370 mdlLen = MIN(packetLen, mdlLen);
1371 if (mdlLen < hdrSize) {
1372 PNET_BUFFER_LIST newNbl;
1373 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1374 hdrSize, 0, TRUE /*copy NBL info*/);
1376 ovsActionStats.noCopiedNbl++;
1377 return NDIS_STATUS_RESOURCES;
1379 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1380 L"Complete after partial copy.");
1382 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1383 newNbl, ovsFwdCtx->srcVportNo, 0,
1384 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1385 NULL, &ovsFwdCtx->layers, FALSE);
1386 if (status != NDIS_STATUS_SUCCESS) {
1387 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1388 L"OVS-Dropped due to resources");
1389 return NDIS_STATUS_RESOURCES;
1392 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1393 ASSERT(curNb->Next == NULL);
1394 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1395 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1397 ovsActionStats.noResource++;
1398 return NDIS_STATUS_RESOURCES;
1400 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1401 mdlLen -= curMdlOffset;
1402 ASSERT(mdlLen >= hdrSize);
1405 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1407 if (layers->isTcp) {
1408 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1409 } else if (layers->isUdp) {
1410 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1414 * Adjust the IP header inline as dictated by the action, nad also update
1415 * the IP and the TCP checksum for the data modified.
1417 * In the future, this could be optimized to make one call to
1418 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1419 * case, we only update the TTL.
1421 if (ipHdr->saddr != ipAttr->ipv4_src) {
1423 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1425 } else if (udpHdr && udpHdr->check) {
1426 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1430 if (ipHdr->check != 0) {
1431 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1434 ipHdr->saddr = ipAttr->ipv4_src;
1436 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1438 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1440 } else if (udpHdr && udpHdr->check) {
1441 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1445 if (ipHdr->check != 0) {
1446 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1449 ipHdr->daddr = ipAttr->ipv4_dst;
1451 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1452 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1453 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1455 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1456 } else if (udpHdr && udpHdr->check) {
1457 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1460 if (ipHdr->check != 0) {
1461 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1463 ipHdr->protocol = ipAttr->ipv4_proto;
1465 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1466 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1467 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1468 if (ipHdr->check != 0) {
1469 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1471 ipHdr->ttl = ipAttr->ipv4_ttl;
1474 return NDIS_STATUS_SUCCESS;
1478 * --------------------------------------------------------------------------
1479 * OvsExecuteSetAction --
1480 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1481 * --------------------------------------------------------------------------
1483 static __inline NDIS_STATUS
1484 OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1489 enum ovs_key_attr type = NlAttrType(a);
1490 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1493 case OVS_KEY_ATTR_ETHERNET:
1494 status = OvsUpdateEthHeader(ovsFwdCtx,
1495 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
1498 case OVS_KEY_ATTR_IPV4:
1499 status = OvsUpdateIPv4Header(ovsFwdCtx,
1500 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
1503 case OVS_KEY_ATTR_TUNNEL:
1505 OvsIPv4TunnelKey tunKey;
1506 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1507 ASSERT(status == NDIS_STATUS_SUCCESS);
1508 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
1509 tunKey.dst_port = key->ipKey.l4.tpDst;
1510 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
1515 OVS_LOG_INFO("Unhandled attribute %#x", type);
1522 * --------------------------------------------------------------------------
1523 * OvsActionsExecute --
1524 * Interpret and execute the specified 'actions' on the specifed packet
1525 * 'curNbl'. The expectation is that if the packet needs to be dropped
1526 * (completed) for some reason, it is added to 'completionList' so that the
1527 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1528 * assumed to be generated by OVS and freed up. Otherwise, the function
1529 * consumes the NBL by generating a NDIS send indication for the packet.
1531 * There are one or more of "clone" NBLs that may get generated while
1532 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1533 * and the caller does not have to worry about them.
1535 * Success or failure is returned based on whether the specified actions
1536 * were executed successfully on the packet or not.
1537 * --------------------------------------------------------------------------
1540 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1541 OvsCompletionList *completionList,
1542 PNET_BUFFER_LIST curNbl,
1547 OVS_PACKET_HDR_INFO *layers,
1548 const PNL_ATTR actions,
1554 OvsForwardingContext ovsFwdCtx;
1555 PCWSTR dropReason = L"";
1557 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1558 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1560 /* XXX: ASSERT that the flow table lock is held. */
1561 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1562 sendFlags, fwdDetail, completionList,
1564 if (status != NDIS_STATUS_SUCCESS) {
1565 dropReason = L"OVS-initing destination port list failed";
1569 if (actionsLen == 0) {
1570 dropReason = L"OVS-Dropped due to Flow action";
1571 ovsActionStats.zeroActionLen++;
1575 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
1576 switch(NlAttrType(a)) {
1577 case OVS_ACTION_ATTR_OUTPUT:
1578 dstPortID = NlAttrGetU32(a);
1579 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1581 if (status != NDIS_STATUS_SUCCESS) {
1582 dropReason = L"OVS-adding destination port failed";
1587 case OVS_ACTION_ATTR_PUSH_VLAN:
1589 struct ovs_action_push_vlan *vlan;
1591 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1593 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1594 || ovsFwdCtx.tunnelRxNic != NULL) {
1595 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1596 if (status != NDIS_STATUS_SUCCESS) {
1597 dropReason = L"OVS-adding destination failed";
1602 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1603 Ieee8021QNetBufferListInfo);
1604 if (vlanTagValue != NULL) {
1606 * XXX: We don't support double VLAN tag offload. In such cases,
1607 * we need to insert the existing one into the packet buffer,
1608 * and add the new one as offload. This will take care of
1609 * guest tag-in-tag case as well as OVS rules that specify
1614 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
1615 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
1616 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1617 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1619 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1620 Ieee8021QNetBufferListInfo) = vlanTagValue;
1625 case OVS_ACTION_ATTR_POP_VLAN:
1627 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1628 || ovsFwdCtx.tunnelRxNic != NULL) {
1629 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1630 if (status != NDIS_STATUS_SUCCESS) {
1631 dropReason = L"OVS-adding destination failed";
1636 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1637 Ieee8021QNetBufferListInfo) != 0) {
1638 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1639 Ieee8021QNetBufferListInfo) = 0;
1642 * The VLAN tag is inserted into the packet buffer. Pop the tag
1643 * by packet buffer modification.
1645 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1646 if (status != NDIS_STATUS_SUCCESS) {
1647 dropReason = L"OVS-pop vlan action failed";
1654 case OVS_ACTION_ATTR_PUSH_MPLS:
1656 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1657 || ovsFwdCtx.tunnelRxNic != NULL) {
1658 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1659 if (status != NDIS_STATUS_SUCCESS) {
1660 dropReason = L"OVS-adding destination failed";
1665 status = OvsActionMplsPush(&ovsFwdCtx,
1666 (struct ovs_action_push_mpls *)NlAttrGet
1667 ((const PNL_ATTR)a));
1668 if (status != NDIS_STATUS_SUCCESS) {
1669 dropReason = L"OVS-push MPLS action failed";
1672 layers->l3Offset += MPLS_HLEN;
1673 layers->l4Offset += MPLS_HLEN;
1677 case OVS_ACTION_ATTR_POP_MPLS:
1679 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1680 || ovsFwdCtx.tunnelRxNic != NULL) {
1681 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1682 if (status != NDIS_STATUS_SUCCESS) {
1683 dropReason = L"OVS-adding destination failed";
1688 status = OvsActionMplsPop(&ovsFwdCtx, NlAttrGetBe16(a));
1689 if (status != NDIS_STATUS_SUCCESS) {
1690 dropReason = L"OVS-pop MPLS action failed";
1693 layers->l3Offset -= MPLS_HLEN;
1694 layers->l4Offset -= MPLS_HLEN;
1698 case OVS_ACTION_ATTR_USERSPACE:
1700 PNL_ATTR userdataAttr;
1702 POVS_PACKET_QUEUE_ELEM elem;
1703 BOOLEAN isRecv = FALSE;
1705 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
1709 if (vport->isExternal ||
1710 OvsIsTunnelVportType(vport->ovsType)) {
1715 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1716 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
1718 elem = OvsCreateQueueNlPacket((PVOID)userdataAttr,
1719 userdataAttr->nlaLen,
1720 OVS_PACKET_CMD_ACTION,
1721 vport, key, ovsFwdCtx.curNbl,
1722 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1726 LIST_ENTRY missedPackets;
1727 InitializeListHead(&missedPackets);
1728 InsertTailList(&missedPackets, &elem->link);
1729 OvsQueuePackets(&missedPackets, 1);
1730 dropReason = L"OVS-Completed since packet was copied to "
1733 dropReason = L"OVS-Dropped due to failure to queue to "
1739 case OVS_ACTION_ATTR_SET:
1741 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1742 || ovsFwdCtx.tunnelRxNic != NULL) {
1743 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1744 if (status != NDIS_STATUS_SUCCESS) {
1745 dropReason = L"OVS-adding destination failed";
1750 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
1751 (const PNL_ATTR)NlAttrGet
1752 ((const PNL_ATTR)a));
1753 if (status != NDIS_STATUS_SUCCESS) {
1754 dropReason = L"OVS-set action failed";
1759 case OVS_ACTION_ATTR_SAMPLE:
1761 status = NDIS_STATUS_NOT_SUPPORTED;
1766 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1767 || ovsFwdCtx.tunnelRxNic != NULL) {
1768 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1769 ASSERT(ovsFwdCtx.curNbl == NULL);
1772 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1773 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1774 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1778 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1780 if (ovsFwdCtx.curNbl) {
1781 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);