2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
33 #define OVS_DBG_MOD OVS_DBG_ACTION
36 typedef struct _OVS_ACTION_STATS {
44 UINT32 failedFlowMiss;
46 UINT32 failedFlowExtract;
51 UINT32 cannotGrowDest;
53 UINT32 failedChecksum;
54 } OVS_ACTION_STATS, *POVS_ACTION_STATS;
56 OVS_ACTION_STATS ovsActionStats;
59 * There a lot of data that needs to be maintained while executing the pipeline
60 * as dictated by the actions of a flow, across different functions at different
61 * levels. Such data is put together in a 'context' structure. Care should be
62 * exercised while adding new members to the structure - only add ones that get
63 * used across multiple stages in the pipeline/get used in multiple functions.
65 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
66 typedef struct OvsForwardingContext {
67 POVS_SWITCH_CONTEXT switchContext;
68 /* The NBL currently used in the pipeline. */
69 PNET_BUFFER_LIST curNbl;
70 /* NDIS forwarding detail for 'curNbl'. */
71 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
72 /* Array of destination ports for 'curNbl'. */
73 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
74 /* send flags while sending 'curNbl' into NDIS. */
76 /* Total number of output ports, used + unused, in 'curNbl'. */
77 UINT32 destPortsSizeIn;
78 /* Total number of used output ports in 'curNbl'. */
79 UINT32 destPortsSizeOut;
81 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
84 OvsCompletionList *completionList;
86 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
87 * bridge. ie. during tunneling on the Rx side.
93 * - specified in actions during tunneling Tx
94 * - extracted from an NBL during tunneling Rx
96 OvsIPv4TunnelKey tunKey;
100 * To store the output port, when it is a tunneled port. We don't foresee
101 * multiple tunneled ports as outport for any given NBL.
103 POVS_VPORT_ENTRY tunnelTxNic;
107 * Points to the Internal port on the PIF Bridge, if the packet needs to be
110 POVS_VPORT_ENTRY tunnelRxNic;
112 /* header information */
113 OVS_PACKET_HDR_INFO layers;
114 } OvsForwardingContext;
118 * --------------------------------------------------------------------------
119 * OvsInitForwardingCtx --
120 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
124 * NDIS_STATUS_SUCCESS on success
125 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
126 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
127 * enough for OvsCompleteNBLForwardingCtx() to do its work.
128 * --------------------------------------------------------------------------
130 static __inline NDIS_STATUS
131 OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
132 POVS_SWITCH_CONTEXT switchContext,
133 PNET_BUFFER_LIST curNbl,
136 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
137 OvsCompletionList *completionList,
138 OVS_PACKET_HDR_INFO *layers,
139 BOOLEAN resetTunnelInfo)
142 ASSERT(switchContext);
147 * Set values for curNbl and switchContext so upon failures, we have enough
148 * information to do cleanup.
150 ovsFwdCtx->curNbl = curNbl;
151 ovsFwdCtx->switchContext = switchContext;
152 ovsFwdCtx->completionList = completionList;
153 ovsFwdCtx->fwdDetail = fwdDetail;
155 if (fwdDetail->NumAvailableDestinations > 0) {
157 * XXX: even though MSDN says GetNetBufferListDestinations() returns
158 * NDIS_STATUS, the header files say otherwise.
160 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
161 switchContext->NdisSwitchContext, curNbl,
162 &ovsFwdCtx->destinationPorts);
164 ASSERT(ovsFwdCtx->destinationPorts);
165 /* Ensure that none of the elements are consumed yet. */
166 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
167 fwdDetail->NumAvailableDestinations);
169 ovsFwdCtx->destinationPorts = NULL;
171 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
172 ovsFwdCtx->destPortsSizeOut = 0;
173 ovsFwdCtx->srcVportNo = srcVportNo;
174 ovsFwdCtx->sendFlags = sendFlags;
176 ovsFwdCtx->layers = *layers;
178 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
180 if (resetTunnelInfo) {
181 ovsFwdCtx->tunnelTxNic = NULL;
182 ovsFwdCtx->tunnelRxNic = NULL;
183 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
186 return NDIS_STATUS_SUCCESS;
190 * --------------------------------------------------------------------------
191 * OvsDetectTunnelRxPkt --
192 * Utility function for an RX packet to detect its tunnel type.
195 * True - if the tunnel type was detected.
196 * False - if not a tunnel packet or tunnel type not supported.
197 * --------------------------------------------------------------------------
199 static __inline BOOLEAN
200 OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
201 const OvsFlowKey *flowKey)
203 POVS_VPORT_ENTRY tunnelVport = NULL;
205 /* XXX: we should also check for the length of the UDP payload to pick
206 * packets only if they are at least VXLAN header size.
208 if (!flowKey->ipKey.nwFrag &&
209 flowKey->ipKey.nwProto == IPPROTO_UDP) {
210 UINT16 dstPort = ntohs(flowKey->ipKey.l4.tpDst);
211 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
213 OVS_VPORT_TYPE_VXLAN);
215 ovsActionStats.rxVxlan++;
217 } else if (!flowKey->ipKey.nwFrag &&
218 flowKey->ipKey.nwProto == IPPROTO_TCP) {
219 UINT16 dstPort = htons(flowKey->ipKey.l4.tpDst);
220 tunnelVport = OvsFindTunnelVportByDstPort(ovsFwdCtx->switchContext,
224 ovsActionStats.rxStt++;
229 // We might get tunnel packets even before the tunnel gets initialized.
231 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
232 ovsFwdCtx->tunnelRxNic = tunnelVport;
240 * --------------------------------------------------------------------------
241 * OvsDetectTunnelPkt --
242 * Utility function to detect if a packet is to be subjected to
243 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
244 * port, destination port, packet contents, and previously setup tunnel
248 * True - If the packet is to be subjected to tunneling.
249 * In case of invalid tunnel context, the tunneling functionality is
250 * a no-op and is completed within this function itself by consuming
251 * all of the tunneling context.
252 * False - If not a tunnel packet or tunnel type not supported. Caller should
253 * process the packet as a non-tunnel packet.
254 * --------------------------------------------------------------------------
256 static __inline BOOLEAN
257 OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
258 const POVS_VPORT_ENTRY dstVport,
259 const OvsFlowKey *flowKey)
261 if (OvsIsInternalVportType(dstVport->ovsType)) {
264 * The source of NBL during tunneling Rx could be the external
265 * port or if it is being executed from userspace, the source port is
268 BOOLEAN validSrcPort =
269 (ovsFwdCtx->fwdDetail->SourcePortId ==
270 ovsFwdCtx->switchContext->virtualExternalPortId) ||
271 (ovsFwdCtx->fwdDetail->SourcePortId ==
272 NDIS_SWITCH_DEFAULT_PORT_ID);
274 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
275 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
276 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
279 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
280 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
281 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
285 * The destination port is a tunnel port. Encapsulation must be
286 * performed only on packets that originate from:
288 * - a bridge-internal port (packets generated from userspace)
291 * If the packet will not be encapsulated, consume the tunnel context
294 if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO) {
296 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
297 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
300 (vport->ovsType != OVS_VPORT_TYPE_NETDEV &&
301 !OvsIsBridgeInternalVport(vport))) {
302 ovsFwdCtx->tunKey.dst = 0;
306 /* Tunnel the packet only if tunnel context is set. */
307 if (ovsFwdCtx->tunKey.dst != 0) {
308 switch(dstVport->ovsType) {
309 case OVS_VPORT_TYPE_VXLAN:
310 ovsActionStats.txVxlan++;
312 case OVS_VPORT_TYPE_STT:
313 ovsActionStats.txStt++;
316 ovsFwdCtx->tunnelTxNic = dstVport;
327 * --------------------------------------------------------------------------
329 * Add the specified destination vport into the forwarding context. If the
330 * vport is a VIF/external port, it is added directly to the NBL. If it is
331 * a tunneling port, it is NOT added to the NBL.
334 * NDIS_STATUS_SUCCESS on success
335 * Other NDIS_STATUS upon failure.
336 * --------------------------------------------------------------------------
338 static __inline NDIS_STATUS
339 OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
341 NDIS_SWITCH_PORT_ID dstPortId,
342 BOOLEAN preserveVLAN,
343 BOOLEAN preservePriority)
345 POVS_VPORT_ENTRY vport;
346 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
348 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
351 * We hold the dispatch lock that protects the list of vports, so vports
352 * validated here can be added as destinations safely before we call into
355 * Some of the vports can be tunnelled ports as well in which case
356 * they should be added to a separate list of tunnelled destination ports
357 * instead of the VIF ports. The context for the tunnel is settable
358 * in OvsForwardingContext.
360 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
361 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
363 * There may be some latency between a port disappearing, and userspace
364 * updating the recalculated flows. In the meantime, handle invalid
367 ovsActionStats.noVport++;
368 return NDIS_STATUS_SUCCESS;
370 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
371 vport->stats.txPackets++;
372 vport->stats.txBytes +=
373 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
375 if (OvsIsBridgeInternalVport(vport)) {
376 return NDIS_STATUS_SUCCESS;
379 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
380 return NDIS_STATUS_SUCCESS;
383 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
384 if (ovsFwdCtx->destPortsSizeIn == 0) {
385 ASSERT(ovsFwdCtx->destinationPorts == NULL);
386 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
388 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
389 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
390 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
391 &ovsFwdCtx->destinationPorts);
392 if (status != NDIS_STATUS_SUCCESS) {
393 ovsActionStats.cannotGrowDest++;
396 ovsFwdCtx->destPortsSizeIn =
397 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
398 ASSERT(ovsFwdCtx->destinationPorts);
400 ASSERT(ovsFwdCtx->destinationPorts != NULL);
403 * A ULONG value that specifies the total number of
404 * NDIS_SWITCH_PORT_DESTINATION elements in the
405 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
408 * A ULONG value that specifies the number of
409 * NDIS_SWITCH_PORT_DESTINATION elements in the
410 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
411 * specify port destinations.
413 * NumAvailableDestinations:
414 * A value that specifies the number of unused extensible switch
415 * destination ports elements within an NET_BUFFER_LIST structure.
417 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
418 ovsFwdCtx->destPortsSizeIn);
419 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
420 ovsFwdCtx->destPortsSizeOut -
421 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
422 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
424 * Before we grow the array of destination ports, the current set
425 * of ports needs to be committed. Only the ports added since the
426 * last commit need to be part of the new update.
428 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
429 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
430 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
431 ovsFwdCtx->destinationPorts);
432 if (status != NDIS_STATUS_SUCCESS) {
433 ovsActionStats.cannotGrowDest++;
436 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
437 ovsFwdCtx->destPortsSizeIn);
438 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
439 ovsFwdCtx->destPortsSizeOut);
440 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
442 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
443 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
444 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
445 if (status != NDIS_STATUS_SUCCESS) {
446 ovsActionStats.cannotGrowDest++;
449 ASSERT(ovsFwdCtx->destinationPorts != NULL);
450 ovsFwdCtx->destPortsSizeIn <<= 1;
454 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
456 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
457 ovsFwdCtx->destPortsSizeOut);
459 fwdPort->PortId = vport->portId;
460 fwdPort->NicIndex = vport->nicIndex;
461 fwdPort->IsExcluded = 0;
462 fwdPort->PreserveVLAN = preserveVLAN;
463 fwdPort->PreservePriority = preservePriority;
464 ovsFwdCtx->destPortsSizeOut += 1;
466 return NDIS_STATUS_SUCCESS;
471 * --------------------------------------------------------------------------
472 * OvsClearTunTxCtx --
473 * Utility function to clear tx tunneling context.
474 * --------------------------------------------------------------------------
477 OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
479 ovsFwdCtx->tunnelTxNic = NULL;
480 ovsFwdCtx->tunKey.dst = 0;
485 * --------------------------------------------------------------------------
486 * OvsClearTunRxCtx --
487 * Utility function to clear rx tunneling context.
488 * --------------------------------------------------------------------------
491 OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
493 ovsFwdCtx->tunnelRxNic = NULL;
494 ovsFwdCtx->tunKey.dst = 0;
499 * --------------------------------------------------------------------------
500 * OvsCompleteNBLForwardingCtx --
501 * This utility function is responsible for freeing/completing an NBL - either
502 * by adding it to a completion list or by freeing it.
505 * It also resets the necessary fields in 'ovsFwdCtx'.
506 * --------------------------------------------------------------------------
509 OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
512 NDIS_STRING filterReason;
514 RtlInitUnicodeString(&filterReason, dropReason);
515 if (ovsFwdCtx->completionList) {
516 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
517 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
519 ovsFwdCtx->curNbl = NULL;
521 /* If there is no completionList, we assume this is ovs created NBL */
522 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
523 ovsFwdCtx->curNbl, TRUE);
524 ASSERT(ovsFwdCtx->curNbl == NULL);
526 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
527 * using these fields should reset the values at the end of the pipeline. */
528 ovsFwdCtx->destPortsSizeOut = 0;
529 ovsFwdCtx->tunnelTxNic = NULL;
530 ovsFwdCtx->tunnelRxNic = NULL;
534 * --------------------------------------------------------------------------
535 * OvsDoFlowLookupOutput --
536 * Function to be used for the second stage of a tunneling workflow, ie.:
537 * - On the encapsulated packet on Tx path, to do a flow extract, flow
538 * lookup and excuting the actions.
539 * - On the decapsulated packet on Rx path, to do a flow extract, flow
540 * lookup and excuting the actions.
542 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
543 * until the new buffer management framework is adopted.
546 * The NBL in 'ovsFwdCtx' is consumed.
547 * --------------------------------------------------------------------------
549 static __inline NDIS_STATUS
550 OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
556 POVS_VPORT_ENTRY vport =
557 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
558 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
559 ASSERT(FALSE); // XXX: let's catch this for now
560 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
561 L"OVS-Dropped due to internal/tunnel port removal");
562 ovsActionStats.noVport++;
563 return NDIS_STATUS_SUCCESS;
565 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
567 /* Assert that in the Rx direction, key is always setup. */
568 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
570 OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
571 &key, &ovsFwdCtx->layers,
572 ovsFwdCtx->tunKey.dst != 0 ? &ovsFwdCtx->tunKey : NULL);
573 if (status != NDIS_STATUS_SUCCESS) {
574 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
575 L"OVS-Flow extract failed");
576 ovsActionStats.failedFlowExtract++;
580 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
582 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
583 ovsFwdCtx->switchContext->datapath.hits++;
584 status = OvsActionsExecute(ovsFwdCtx->switchContext,
585 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
586 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
587 &key, &hash, &ovsFwdCtx->layers,
588 flow->actions, flow->actionsLen);
589 ovsFwdCtx->curNbl = NULL;
591 LIST_ENTRY missedPackets;
593 ovsFwdCtx->switchContext->datapath.misses++;
594 InitializeListHead(&missedPackets);
595 status = OvsCreateAndAddPackets(NULL, 0, OVS_PACKET_CMD_MISS, vport,
596 &key,ovsFwdCtx->curNbl,
597 FALSE, &ovsFwdCtx->layers,
598 ovsFwdCtx->switchContext, &missedPackets, &num);
600 OvsQueuePackets(&missedPackets, num);
602 if (status == NDIS_STATUS_SUCCESS) {
603 /* Complete the packet since it was copied to user buffer. */
604 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
605 L"OVS-Dropped since packet was copied to userspace");
606 ovsActionStats.flowMiss++;
607 status = NDIS_STATUS_SUCCESS;
609 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
610 L"OVS-Dropped due to failure to queue to userspace");
611 status = NDIS_STATUS_FAILURE;
612 ovsActionStats.failedFlowMiss++;
620 * --------------------------------------------------------------------------
622 * The start function for Tx tunneling - encapsulates the packet, and
623 * outputs the packet on the PIF bridge.
626 * The NBL in 'ovsFwdCtx' is consumed.
627 * --------------------------------------------------------------------------
629 static __inline NDIS_STATUS
630 OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
632 NDIS_STATUS status = NDIS_STATUS_FAILURE;
633 PNET_BUFFER_LIST newNbl = NULL;
636 * Setup the source port to be the internal port to as to facilitate the
637 * second OvsLookupFlow.
639 if (ovsFwdCtx->switchContext->internalVport == NULL ||
640 ovsFwdCtx->switchContext->virtualExternalVport == NULL) {
641 OvsClearTunTxCtx(ovsFwdCtx);
642 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
643 L"OVS-Dropped since either internal or external port is absent");
644 return NDIS_STATUS_FAILURE;
646 ovsFwdCtx->srcVportNo =
647 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
649 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
650 ovsFwdCtx->fwdDetail->SourceNicIndex =
651 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
653 /* Do the encap. Encap function does not consume the NBL. */
654 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
655 case OVS_VPORT_TYPE_VXLAN:
656 status = OvsEncapVxlan(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
657 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
658 &ovsFwdCtx->layers, &newNbl);
660 case OVS_VPORT_TYPE_STT:
661 status = OvsEncapStt(ovsFwdCtx->tunnelTxNic, ovsFwdCtx->curNbl,
662 &ovsFwdCtx->tunKey, ovsFwdCtx->switchContext,
663 &ovsFwdCtx->layers, &newNbl);
666 ASSERT(! "Tx: Unhandled tunnel type");
669 /* Reset the tunnel context so that it doesn't get used after this point. */
670 OvsClearTunTxCtx(ovsFwdCtx);
672 if (status == NDIS_STATUS_SUCCESS) {
674 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
675 L"Complete after cloning NBL for encapsulation");
676 ovsFwdCtx->curNbl = newNbl;
677 status = OvsDoFlowLookupOutput(ovsFwdCtx);
678 ASSERT(ovsFwdCtx->curNbl == NULL);
681 * XXX: Temporary freeing of the packet until we register a
682 * callback to IP helper.
684 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
685 L"OVS-Dropped due to encap failure");
686 ovsActionStats.failedEncap++;
687 status = NDIS_STATUS_SUCCESS;
694 * --------------------------------------------------------------------------
696 * Decapsulate the incoming NBL based on the tunnel type and goes through
697 * the flow lookup for the inner packet.
699 * Note: IP checksum is validate here, but L4 checksum validation needs
700 * to be done by the corresponding tunnel types.
703 * The NBL in 'ovsFwdCtx' is consumed.
704 * --------------------------------------------------------------------------
706 static __inline NDIS_STATUS
707 OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
709 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
710 PNET_BUFFER_LIST newNbl = NULL;
711 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
712 PCWSTR dropReason = L"OVS-dropped due to new decap packet";
714 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
715 != NDIS_STATUS_SUCCESS) {
716 ovsActionStats.failedChecksum++;
717 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
722 * Decap port functions should return a new NBL if it was copied, and
723 * this new NBL should be setup as the ovsFwdCtx->curNbl.
726 switch(tunnelRxVport->ovsType) {
727 case OVS_VPORT_TYPE_VXLAN:
728 status = OvsDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
729 &ovsFwdCtx->tunKey, &newNbl);
731 case OVS_VPORT_TYPE_STT:
732 status = OvsDecapStt(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
733 &ovsFwdCtx->tunKey, &newNbl);
734 if (status == NDIS_STATUS_SUCCESS && newNbl == NULL) {
735 /* This was an STT-LSO Fragment */
736 dropReason = L"OVS-STT segment is cached";
740 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
741 tunnelRxVport->ovsType);
742 ASSERT(! "Rx: Unhandled tunnel type");
743 status = NDIS_STATUS_NOT_SUPPORTED;
746 if (status != NDIS_STATUS_SUCCESS) {
747 ovsActionStats.failedDecap++;
752 * tunnelRxNic and other fields will be cleared, re-init the context
755 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
758 /* Decapsulated packet is in a new NBL */
759 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
760 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
761 newNbl, tunnelRxVport->portNo, 0,
762 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
763 ovsFwdCtx->completionList,
764 &ovsFwdCtx->layers, FALSE);
767 * Set the NBL's SourcePortId and SourceNicIndex to default values to
768 * keep NDIS happy when we forward the packet.
770 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
771 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
773 status = OvsDoFlowLookupOutput(ovsFwdCtx);
775 ASSERT(ovsFwdCtx->curNbl == NULL);
776 OvsClearTunRxCtx(ovsFwdCtx);
781 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
782 L"OVS-dropped due to decap failure");
783 OvsClearTunRxCtx(ovsFwdCtx);
789 * --------------------------------------------------------------------------
790 * OvsOutputForwardingCtx --
791 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
792 * the ports added so far into 'ovsFwdCtx'.
795 * This function consumes the NBL - either by forwarding it successfully to
796 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
798 * Also makes sure that the list of destination ports - tunnel or otherwise is
800 * --------------------------------------------------------------------------
802 static __inline NDIS_STATUS
803 OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
805 NDIS_STATUS status = STATUS_SUCCESS;
806 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
810 * Handle the case where the some of the destination ports are tunneled
811 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
812 * tunneling pipeline starts when we output the packet to tunneled port.
814 if (ovsFwdCtx->destPortsSizeOut > 0) {
815 PNET_BUFFER_LIST newNbl = NULL;
817 UINT32 portsToUpdate =
818 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
819 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
821 ASSERT(ovsFwdCtx->destinationPorts != NULL);
824 * Create a copy of the packet in order to do encap on it later. Also,
825 * don't copy the offload context since the encap'd packet has a
826 * different set of headers. This will change when we implement offloads
827 * before doing encapsulation.
829 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
830 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
831 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
832 0, 0, TRUE /*copy NBL info*/);
833 if (newNbl == NULL) {
834 status = NDIS_STATUS_RESOURCES;
835 ovsActionStats.noCopiedNbl++;
836 dropReason = L"Dropped due to failure to create NBL copy.";
841 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
842 ASSERT(portsToUpdate > 0);
843 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
844 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
845 portsToUpdate, ovsFwdCtx->destinationPorts);
846 if (status != NDIS_STATUS_SUCCESS) {
847 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
848 ovsActionStats.cannotGrowDest++;
849 dropReason = L"Dropped due to failure to update destinations.";
853 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
854 ovsFwdCtx->sendFlags);
855 /* End this pipeline by resetting the corresponding context. */
856 ovsFwdCtx->destPortsSizeOut = 0;
857 ovsFwdCtx->curNbl = NULL;
859 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
860 newNbl, ovsFwdCtx->srcVportNo, 0,
861 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
862 ovsFwdCtx->completionList,
863 &ovsFwdCtx->layers, FALSE);
864 if (status != NDIS_STATUS_SUCCESS) {
865 dropReason = L"Dropped due to resouces.";
871 if (ovsFwdCtx->tunnelTxNic != NULL) {
872 status = OvsTunnelPortTx(ovsFwdCtx);
873 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
874 ASSERT(ovsFwdCtx->tunKey.dst == 0);
875 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
876 status = OvsTunnelPortRx(ovsFwdCtx);
877 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
878 ASSERT(ovsFwdCtx->tunKey.dst == 0);
880 ASSERT(ovsFwdCtx->curNbl == NULL);
885 if (status != NDIS_STATUS_SUCCESS) {
886 OvsCompleteNBLForwardingCtx(ovsFwdCtx, dropReason);
894 * --------------------------------------------------------------------------
895 * OvsLookupFlowOutput --
896 * Utility function for external callers to do flow extract, lookup,
897 * actions execute on a given NBL.
899 * Note: If this is being used from a callback function, make sure that the
900 * arguments specified are still valid in the asynchronous context.
903 * This function consumes the NBL.
904 * --------------------------------------------------------------------------
907 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
909 PNET_BUFFER_LIST curNbl)
912 OvsForwardingContext ovsFwdCtx;
913 POVS_VPORT_ENTRY internalVport =
914 (POVS_VPORT_ENTRY)switchContext->internalVport;
916 /* XXX: make sure comp list was not a stack variable previously. */
917 OvsCompletionList *completionList = (OvsCompletionList *)compList;
920 * XXX: can internal port disappear while we are busy doing ARP resolution?
921 * It could, but will we get this callback from IP helper in that case. Need
924 ASSERT(switchContext->internalVport);
925 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
926 internalVport->portNo, 0,
927 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
928 completionList, NULL, TRUE);
929 if (status != NDIS_STATUS_SUCCESS) {
930 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
931 L"OVS-Dropped due to resources");
937 * XXX: We need to acquire the dispatch lock and the datapath lock.
940 OvsDoFlowLookupOutput(&ovsFwdCtx);
945 * --------------------------------------------------------------------------
946 * OvsOutputBeforeSetAction --
947 * Function to be called to complete one set of actions on an NBL, before
948 * we start the next one.
949 * --------------------------------------------------------------------------
951 static __inline NDIS_STATUS
952 OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
954 PNET_BUFFER_LIST newNbl;
955 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
958 * Create a copy and work on the copy after this point. The original NBL is
959 * forwarded. One reason to not use the copy for forwarding is that
960 * ports have already been added to the original NBL, and it might be
961 * inefficient/impossible to remove/re-add them to the copy. There's no
962 * notion of removing the ports, the ports need to be marked as
963 * "isExcluded". There's seems no real advantage to retaining the original
964 * and sending out the copy instead.
966 * XXX: We are copying the offload context here. This is to handle actions
968 * outport, pop_vlan(), outport, push_vlan(), outport
970 * copy size needs to include inner ether + IP + TCP, need to revisit
971 * if we support IP options.
972 * XXX Head room needs to include the additional encap.
973 * XXX copySize check is not considering multiple NBs.
975 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
976 0, 0, TRUE /*copy NBL info*/);
978 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
979 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
981 /* Send the original packet out and save the original source port number */
982 UINT32 tempVportNo = ovsFwdCtx->srcVportNo;
983 status = OvsOutputForwardingCtx(ovsFwdCtx);
984 ASSERT(ovsFwdCtx->curNbl == NULL);
985 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
986 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
987 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
989 /* If we didn't make a copy, can't continue. */
990 if (newNbl == NULL) {
991 ovsActionStats.noCopiedNbl++;
992 return NDIS_STATUS_RESOURCES;
995 /* Finish the remaining actions with the new NBL */
996 if (status != NDIS_STATUS_SUCCESS) {
997 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
999 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1000 newNbl, tempVportNo, 0,
1001 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1002 ovsFwdCtx->completionList,
1003 &ovsFwdCtx->layers, FALSE);
1011 * --------------------------------------------------------------------------
1012 * OvsPopVlanInPktBuf --
1013 * Function to pop a VLAN tag when the tag is in the packet buffer.
1014 * --------------------------------------------------------------------------
1016 static __inline NDIS_STATUS
1017 OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
1022 ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
1023 UINT32 packetLen, mdlLen;
1024 PNET_BUFFER_LIST newNbl;
1028 * Declare a dummy vlanTag structure since we need to compute the size
1029 * of shiftLength. The NDIS one is a unionized structure.
1031 NDIS_PACKET_8021Q_INFO vlanTag = {0};
1032 ULONG shiftLength = sizeof (vlanTag.TagHeader);
1033 PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
1035 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1036 0, 0, TRUE /* copy NBL info */);
1038 ovsActionStats.noCopiedNbl++;
1039 return NDIS_STATUS_RESOURCES;
1042 /* Complete the original NBL and create a copy to modify. */
1043 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
1045 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1046 newNbl, ovsFwdCtx->srcVportNo, 0,
1047 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1048 NULL, &ovsFwdCtx->layers, FALSE);
1049 if (status != NDIS_STATUS_SUCCESS) {
1050 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1051 L"Dropped due to resouces");
1052 return NDIS_STATUS_RESOURCES;
1055 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1056 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1057 ASSERT(curNb->Next == NULL);
1058 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1059 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1061 return NDIS_STATUS_RESOURCES;
1063 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1064 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1065 if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
1067 return NDIS_STATUS_FAILURE;
1069 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1070 RtlCopyMemory(tempBuffer, bufferStart, dataLength);
1071 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
1072 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1074 return NDIS_STATUS_SUCCESS;
1078 * --------------------------------------------------------------------------
1079 * OvsTunnelAttrToIPv4TunnelKey --
1080 * Convert tunnel attribute to OvsIPv4TunnelKey.
1081 * --------------------------------------------------------------------------
1083 static __inline NDIS_STATUS
1084 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
1085 OvsIPv4TunnelKey *tunKey)
1090 tunKey->attr[0] = 0;
1091 tunKey->attr[1] = 0;
1092 tunKey->attr[2] = 0;
1093 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
1095 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1096 NlAttrGetSize(attr)) {
1097 switch (NlAttrType(a)) {
1098 case OVS_TUNNEL_KEY_ATTR_ID:
1099 tunKey->tunnelId = NlAttrGetBe64(a);
1100 tunKey->flags |= OVS_TNL_F_KEY;
1102 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1103 tunKey->src = NlAttrGetBe32(a);
1105 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1106 tunKey->dst = NlAttrGetBe32(a);
1108 case OVS_TUNNEL_KEY_ATTR_TOS:
1109 tunKey->tos = NlAttrGetU8(a);
1111 case OVS_TUNNEL_KEY_ATTR_TTL:
1112 tunKey->ttl = NlAttrGetU8(a);
1114 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1115 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1117 case OVS_TUNNEL_KEY_ATTR_CSUM:
1118 tunKey->flags |= OVS_TNL_F_CSUM;
1125 return NDIS_STATUS_SUCCESS;
1129 *----------------------------------------------------------------------------
1130 * OvsUpdateEthHeader --
1131 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1133 *----------------------------------------------------------------------------
1135 static __inline NDIS_STATUS
1136 OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1137 const struct ovs_key_ethernet *ethAttr)
1143 UINT32 packetLen, mdlLen;
1145 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1146 ASSERT(curNb->Next == NULL);
1147 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1148 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1149 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1151 ovsActionStats.noResource++;
1152 return NDIS_STATUS_RESOURCES;
1154 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1156 /* Bail out if the L2 header is not in a contiguous buffer. */
1157 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1159 return NDIS_STATUS_FAILURE;
1161 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1163 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1164 sizeof ethHdr->Destination);
1165 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1167 return NDIS_STATUS_SUCCESS;
1171 *----------------------------------------------------------------------------
1172 * OvsUpdateIPv4Header --
1173 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1175 *----------------------------------------------------------------------------
1177 static __inline NDIS_STATUS
1178 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1179 const struct ovs_key_ipv4 *ipAttr)
1185 UINT32 mdlLen, hdrSize, packetLen;
1186 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1189 TCPHdr *tcpHdr = NULL;
1190 UDPHdr *udpHdr = NULL;
1192 ASSERT(layers->value != 0);
1195 * Peek into the MDL to get a handle to the IP header and if required
1196 * the TCP/UDP header as well. We check if the required headers are in one
1197 * contiguous MDL, and if not, we copy them over to one MDL.
1199 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1200 ASSERT(curNb->Next == NULL);
1201 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1202 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1203 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1205 ovsActionStats.noResource++;
1206 return NDIS_STATUS_RESOURCES;
1208 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1209 mdlLen -= curMdlOffset;
1210 ASSERT((INT)mdlLen >= 0);
1212 if (layers->isTcp || layers->isUdp) {
1213 hdrSize = layers->l4Offset +
1214 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1216 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1219 /* Count of number of bytes of valid data there are in the first MDL. */
1220 mdlLen = MIN(packetLen, mdlLen);
1221 if (mdlLen < hdrSize) {
1222 PNET_BUFFER_LIST newNbl;
1223 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1224 hdrSize, 0, TRUE /*copy NBL info*/);
1226 ovsActionStats.noCopiedNbl++;
1227 return NDIS_STATUS_RESOURCES;
1229 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1230 L"Complete after partial copy.");
1232 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1233 newNbl, ovsFwdCtx->srcVportNo, 0,
1234 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1235 NULL, &ovsFwdCtx->layers, FALSE);
1236 if (status != NDIS_STATUS_SUCCESS) {
1237 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1238 L"OVS-Dropped due to resources");
1239 return NDIS_STATUS_RESOURCES;
1242 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1243 ASSERT(curNb->Next == NULL);
1244 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1245 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1247 ovsActionStats.noResource++;
1248 return NDIS_STATUS_RESOURCES;
1250 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1251 mdlLen -= curMdlOffset;
1252 ASSERT(mdlLen >= hdrSize);
1255 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1257 if (layers->isTcp) {
1258 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1259 } else if (layers->isUdp) {
1260 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1264 * Adjust the IP header inline as dictated by the action, nad also update
1265 * the IP and the TCP checksum for the data modified.
1267 * In the future, this could be optimized to make one call to
1268 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1269 * case, we only update the TTL.
1271 if (ipHdr->saddr != ipAttr->ipv4_src) {
1273 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1275 } else if (udpHdr && udpHdr->check) {
1276 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1280 if (ipHdr->check != 0) {
1281 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1284 ipHdr->saddr = ipAttr->ipv4_src;
1286 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1288 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1290 } else if (udpHdr && udpHdr->check) {
1291 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1295 if (ipHdr->check != 0) {
1296 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1299 ipHdr->daddr = ipAttr->ipv4_dst;
1301 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1302 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1303 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1305 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1306 } else if (udpHdr && udpHdr->check) {
1307 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1310 if (ipHdr->check != 0) {
1311 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1313 ipHdr->protocol = ipAttr->ipv4_proto;
1315 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1316 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1317 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1318 if (ipHdr->check != 0) {
1319 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1321 ipHdr->ttl = ipAttr->ipv4_ttl;
1324 return NDIS_STATUS_SUCCESS;
1328 * --------------------------------------------------------------------------
1329 * OvsExecuteSetAction --
1330 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1331 * --------------------------------------------------------------------------
1333 static __inline NDIS_STATUS
1334 OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1339 enum ovs_key_attr type = NlAttrType(a);
1340 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1343 case OVS_KEY_ATTR_ETHERNET:
1344 status = OvsUpdateEthHeader(ovsFwdCtx,
1345 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
1348 case OVS_KEY_ATTR_IPV4:
1349 status = OvsUpdateIPv4Header(ovsFwdCtx,
1350 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
1353 case OVS_KEY_ATTR_TUNNEL:
1355 OvsIPv4TunnelKey tunKey;
1356 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1357 ASSERT(status == NDIS_STATUS_SUCCESS);
1358 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
1359 tunKey.dst_port = key->ipKey.l4.tpDst;
1360 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
1363 case OVS_KEY_ATTR_SKB_MARK:
1364 /* XXX: Not relevant to Hyper-V. Return OK */
1366 case OVS_KEY_ATTR_UNSPEC:
1367 case OVS_KEY_ATTR_ENCAP:
1368 case OVS_KEY_ATTR_ETHERTYPE:
1369 case OVS_KEY_ATTR_IN_PORT:
1370 case OVS_KEY_ATTR_VLAN:
1371 case OVS_KEY_ATTR_ICMP:
1372 case OVS_KEY_ATTR_ICMPV6:
1373 case OVS_KEY_ATTR_ARP:
1374 case OVS_KEY_ATTR_ND:
1375 case __OVS_KEY_ATTR_MAX:
1377 OVS_LOG_INFO("Unhandled attribute %#x", type);
1384 * --------------------------------------------------------------------------
1385 * OvsActionsExecute --
1386 * Interpret and execute the specified 'actions' on the specifed packet
1387 * 'curNbl'. The expectation is that if the packet needs to be dropped
1388 * (completed) for some reason, it is added to 'completionList' so that the
1389 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1390 * assumed to be generated by OVS and freed up. Otherwise, the function
1391 * consumes the NBL by generating a NDIS send indication for the packet.
1393 * There are one or more of "clone" NBLs that may get generated while
1394 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1395 * and the caller does not have to worry about them.
1397 * Success or failure is returned based on whether the specified actions
1398 * were executed successfully on the packet or not.
1399 * --------------------------------------------------------------------------
1402 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1403 OvsCompletionList *completionList,
1404 PNET_BUFFER_LIST curNbl,
1409 OVS_PACKET_HDR_INFO *layers,
1410 const PNL_ATTR actions,
1416 OvsForwardingContext ovsFwdCtx;
1417 PCWSTR dropReason = L"";
1419 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1420 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1422 /* XXX: ASSERT that the flow table lock is held. */
1423 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1424 sendFlags, fwdDetail, completionList,
1426 if (status != NDIS_STATUS_SUCCESS) {
1427 dropReason = L"OVS-initing destination port list failed";
1431 if (actionsLen == 0) {
1432 dropReason = L"OVS-Dropped due to Flow action";
1433 ovsActionStats.zeroActionLen++;
1437 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
1438 switch(NlAttrType(a)) {
1439 case OVS_ACTION_ATTR_OUTPUT:
1440 dstPortID = NlAttrGetU32(a);
1441 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1443 if (status != NDIS_STATUS_SUCCESS) {
1444 dropReason = L"OVS-adding destination port failed";
1449 case OVS_ACTION_ATTR_PUSH_VLAN:
1451 struct ovs_action_push_vlan *vlan;
1453 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1455 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1456 || ovsFwdCtx.tunnelRxNic != NULL) {
1457 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1458 if (status != NDIS_STATUS_SUCCESS) {
1459 dropReason = L"OVS-adding destination failed";
1464 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1465 Ieee8021QNetBufferListInfo);
1466 if (vlanTagValue != NULL) {
1468 * XXX: We don't support double VLAN tag offload. In such cases,
1469 * we need to insert the existing one into the packet buffer,
1470 * and add the new one as offload. This will take care of
1471 * guest tag-in-tag case as well as OVS rules that specify
1476 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
1477 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
1478 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1479 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1481 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1482 Ieee8021QNetBufferListInfo) = vlanTagValue;
1487 case OVS_ACTION_ATTR_POP_VLAN:
1489 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1490 || ovsFwdCtx.tunnelRxNic != NULL) {
1491 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1492 if (status != NDIS_STATUS_SUCCESS) {
1493 dropReason = L"OVS-adding destination failed";
1498 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1499 Ieee8021QNetBufferListInfo) != 0) {
1500 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1501 Ieee8021QNetBufferListInfo) = 0;
1504 * The VLAN tag is inserted into the packet buffer. Pop the tag
1505 * by packet buffer modification.
1507 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1508 if (status != NDIS_STATUS_SUCCESS) {
1509 dropReason = L"OVS-pop vlan action failed";
1516 case OVS_ACTION_ATTR_USERSPACE:
1518 PNL_ATTR userdataAttr;
1520 POVS_PACKET_QUEUE_ELEM elem;
1521 BOOLEAN isRecv = FALSE;
1523 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
1527 if (vport->isExternal ||
1528 OvsIsTunnelVportType(vport->ovsType)) {
1533 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1534 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
1536 elem = OvsCreateQueueNlPacket((PVOID)userdataAttr,
1537 userdataAttr->nlaLen,
1538 OVS_PACKET_CMD_ACTION,
1539 vport, key, ovsFwdCtx.curNbl,
1540 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1544 LIST_ENTRY missedPackets;
1545 InitializeListHead(&missedPackets);
1546 InsertTailList(&missedPackets, &elem->link);
1547 OvsQueuePackets(&missedPackets, 1);
1548 dropReason = L"OVS-Completed since packet was copied to "
1551 dropReason = L"OVS-Dropped due to failure to queue to "
1557 case OVS_ACTION_ATTR_SET:
1559 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1560 || ovsFwdCtx.tunnelRxNic != NULL) {
1561 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1562 if (status != NDIS_STATUS_SUCCESS) {
1563 dropReason = L"OVS-adding destination failed";
1568 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
1569 (const PNL_ATTR)NlAttrGet
1570 ((const PNL_ATTR)a));
1571 if (status != NDIS_STATUS_SUCCESS) {
1572 dropReason = L"OVS-set action failed";
1577 case OVS_ACTION_ATTR_SAMPLE:
1579 status = NDIS_STATUS_NOT_SUPPORTED;
1584 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1585 || ovsFwdCtx.tunnelRxNic != NULL) {
1586 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1587 ASSERT(ovsFwdCtx.curNbl == NULL);
1590 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1591 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1592 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1596 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1598 if (ovsFwdCtx.curNbl) {
1599 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);