2 * Copyright (c) 2014 VMware, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
32 #define OVS_DBG_MOD OVS_DBG_ACTION
35 typedef struct _OVS_ACTION_STATS {
41 UINT32 failedFlowMiss;
43 UINT32 failedFlowExtract;
48 UINT32 cannotGrowDest;
50 UINT32 failedChecksum;
51 } OVS_ACTION_STATS, *POVS_ACTION_STATS;
53 OVS_ACTION_STATS ovsActionStats;
56 * There a lot of data that needs to be maintained while executing the pipeline
57 * as dictated by the actions of a flow, across different functions at different
58 * levels. Such data is put together in a 'context' structure. Care should be
59 * exercised while adding new members to the structure - only add ones that get
60 * used across multiple stages in the pipeline/get used in multiple functions.
62 #define OVS_DEST_PORTS_ARRAY_MIN_SIZE 2
63 typedef struct OvsForwardingContext {
64 POVS_SWITCH_CONTEXT switchContext;
65 /* The NBL currently used in the pipeline. */
66 PNET_BUFFER_LIST curNbl;
67 /* NDIS forwarding detail for 'curNbl'. */
68 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail;
69 /* Array of destination ports for 'curNbl'. */
70 PNDIS_SWITCH_FORWARDING_DESTINATION_ARRAY destinationPorts;
71 /* send flags while sending 'curNbl' into NDIS. */
73 /* Total number of output ports, used + unused, in 'curNbl'. */
74 UINT32 destPortsSizeIn;
75 /* Total number of used output ports in 'curNbl'. */
76 UINT32 destPortsSizeOut;
78 * If 'curNbl' is not owned by OVS, they need to be tracked, if they need to
81 OvsCompletionList *completionList;
83 * vport number of 'curNbl' when it is passed from the PIF bridge to the INT
84 * bridge. ie. during tunneling on the Rx side.
90 * - specified in actions during tunneling Tx
91 * - extracted from an NBL during tunneling Rx
93 OvsIPv4TunnelKey tunKey;
97 * To store the output port, when it is a tunneled port. We don't foresee
98 * multiple tunneled ports as outport for any given NBL.
100 POVS_VPORT_ENTRY tunnelTxNic;
104 * Points to the Internal port on the PIF Bridge, if the packet needs to be
107 POVS_VPORT_ENTRY tunnelRxNic;
109 /* header information */
110 OVS_PACKET_HDR_INFO layers;
111 } OvsForwardingContext;
115 * --------------------------------------------------------------------------
116 * OvsInitForwardingCtx --
117 * Function to init/re-init the 'ovsFwdCtx' context as the actions pipeline
121 * NDIS_STATUS_SUCCESS on success
122 * Other NDIS_STATUS upon failure. Upon failure, it is safe to call
123 * OvsCompleteNBLForwardingCtx(), since 'ovsFwdCtx' has been initialized
124 * enough for OvsCompleteNBLForwardingCtx() to do its work.
125 * --------------------------------------------------------------------------
127 static __inline NDIS_STATUS
128 OvsInitForwardingCtx(OvsForwardingContext *ovsFwdCtx,
129 POVS_SWITCH_CONTEXT switchContext,
130 PNET_BUFFER_LIST curNbl,
133 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail,
134 OvsCompletionList *completionList,
135 OVS_PACKET_HDR_INFO *layers,
136 BOOLEAN resetTunnelInfo)
139 ASSERT(switchContext);
144 * Set values for curNbl and switchContext so upon failures, we have enough
145 * information to do cleanup.
147 ovsFwdCtx->curNbl = curNbl;
148 ovsFwdCtx->switchContext = switchContext;
149 ovsFwdCtx->completionList = completionList;
150 ovsFwdCtx->fwdDetail = fwdDetail;
152 if (fwdDetail->NumAvailableDestinations > 0) {
154 * XXX: even though MSDN says GetNetBufferListDestinations() returns
155 * NDIS_STATUS, the header files say otherwise.
157 switchContext->NdisSwitchHandlers.GetNetBufferListDestinations(
158 switchContext->NdisSwitchContext, curNbl,
159 &ovsFwdCtx->destinationPorts);
161 ASSERT(ovsFwdCtx->destinationPorts);
162 /* Ensure that none of the elements are consumed yet. */
163 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
164 fwdDetail->NumAvailableDestinations);
166 ovsFwdCtx->destinationPorts = NULL;
168 ovsFwdCtx->destPortsSizeIn = fwdDetail->NumAvailableDestinations;
169 ovsFwdCtx->destPortsSizeOut = 0;
170 ovsFwdCtx->srcVportNo = srcVportNo;
171 ovsFwdCtx->sendFlags = sendFlags;
173 ovsFwdCtx->layers = *layers;
175 RtlZeroMemory(&ovsFwdCtx->layers, sizeof ovsFwdCtx->layers);
177 if (resetTunnelInfo) {
178 ovsFwdCtx->tunnelTxNic = NULL;
179 ovsFwdCtx->tunnelRxNic = NULL;
180 RtlZeroMemory(&ovsFwdCtx->tunKey, sizeof ovsFwdCtx->tunKey);
183 return NDIS_STATUS_SUCCESS;
187 * --------------------------------------------------------------------------
188 * OvsDetectTunnelRxPkt --
189 * Utility function for an RX packet to detect its tunnel type.
192 * True - if the tunnel type was detected.
193 * False - if not a tunnel packet or tunnel type not supported.
194 * --------------------------------------------------------------------------
196 static __inline BOOLEAN
197 OvsDetectTunnelRxPkt(OvsForwardingContext *ovsFwdCtx,
198 const OvsFlowKey *flowKey)
200 POVS_VPORT_ENTRY tunnelVport = NULL;
202 /* XXX: we should also check for the length of the UDP payload to pick
203 * packets only if they are at least VXLAN header size.
205 if (!flowKey->ipKey.nwFrag &&
206 flowKey->ipKey.nwProto == IPPROTO_UDP &&
207 flowKey->ipKey.l4.tpDst == VXLAN_UDP_PORT_NBO) {
208 tunnelVport = ovsFwdCtx->switchContext->vxlanVport;
209 ovsActionStats.rxVxlan++;
212 // We might get tunnel packets even before the tunnel gets initialized.
214 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
215 ovsFwdCtx->tunnelRxNic = tunnelVport;
223 * --------------------------------------------------------------------------
224 * OvsDetectTunnelPkt --
225 * Utility function to detect if a packet is to be subjected to
226 * tunneling (Tx) or de-tunneling (Rx). Various factors such as source
227 * port, destination port, packet contents, and previously setup tunnel
231 * True - If the packet is to be subjected to tunneling.
232 * In case of invalid tunnel context, the tunneling functionality is
233 * a no-op and is completed within this function itself by consuming
234 * all of the tunneling context.
235 * False - If not a tunnel packet or tunnel type not supported. Caller should
236 * process the packet as a non-tunnel packet.
237 * --------------------------------------------------------------------------
239 static __inline BOOLEAN
240 OvsDetectTunnelPkt(OvsForwardingContext *ovsFwdCtx,
241 const POVS_VPORT_ENTRY dstVport,
242 const OvsFlowKey *flowKey)
244 if (OvsIsInternalVportType(dstVport->ovsType)) {
247 * The source of NBL during tunneling Rx could be the external
248 * port or if it is being executed from userspace, the source port is
251 BOOLEAN validSrcPort = (ovsFwdCtx->fwdDetail->SourcePortId ==
252 ovsFwdCtx->switchContext->externalPortId) ||
253 (ovsFwdCtx->fwdDetail->SourcePortId ==
254 NDIS_SWITCH_DEFAULT_PORT_ID);
256 if (validSrcPort && OvsDetectTunnelRxPkt(ovsFwdCtx, flowKey)) {
257 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
258 ASSERT(ovsFwdCtx->tunnelRxNic != NULL);
261 } else if (OvsIsTunnelVportType(dstVport->ovsType)) {
262 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
263 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
267 * The destination port is a tunnel port. Encapsulation must be
268 * performed only on packets that originate from a VIF port or from
269 * userspace (default port)
271 * If the packet will not be encapsulated, consume the tunnel context
274 if (ovsFwdCtx->srcVportNo != OVS_DEFAULT_PORT_NO) {
276 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(
277 ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
279 if (!vport || vport->ovsType != OVS_VPORT_TYPE_NETDEV) {
280 ovsFwdCtx->tunKey.dst = 0;
284 /* Tunnel the packet only if tunnel context is set. */
285 if (ovsFwdCtx->tunKey.dst != 0) {
286 ovsActionStats.txVxlan++;
287 ovsFwdCtx->tunnelTxNic = dstVport;
298 * --------------------------------------------------------------------------
300 * Add the specified destination vport into the forwarding context. If the
301 * vport is a VIF/external port, it is added directly to the NBL. If it is
302 * a tunneling port, it is NOT added to the NBL.
305 * NDIS_STATUS_SUCCESS on success
306 * Other NDIS_STATUS upon failure.
307 * --------------------------------------------------------------------------
309 static __inline NDIS_STATUS
310 OvsAddPorts(OvsForwardingContext *ovsFwdCtx,
312 NDIS_SWITCH_PORT_ID dstPortId,
313 BOOLEAN preserveVLAN,
314 BOOLEAN preservePriority)
316 POVS_VPORT_ENTRY vport;
317 PNDIS_SWITCH_PORT_DESTINATION fwdPort;
319 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
322 * We hold the dispatch lock that protects the list of vports, so vports
323 * validated here can be added as destinations safely before we call into
326 * Some of the vports can be tunnelled ports as well in which case
327 * they should be added to a separate list of tunnelled destination ports
328 * instead of the VIF ports. The context for the tunnel is settable
329 * in OvsForwardingContext.
331 vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, dstPortId);
332 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
334 * There may be some latency between a port disappearing, and userspace
335 * updating the recalculated flows. In the meantime, handle invalid
338 ovsActionStats.noVport++;
339 return NDIS_STATUS_SUCCESS;
341 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
342 vport->stats.txPackets++;
343 vport->stats.txBytes +=
344 NET_BUFFER_DATA_LENGTH(NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl));
346 if (OvsDetectTunnelPkt(ovsFwdCtx, vport, flowKey)) {
347 return NDIS_STATUS_SUCCESS;
350 if (ovsFwdCtx->destPortsSizeOut == ovsFwdCtx->destPortsSizeIn) {
351 if (ovsFwdCtx->destPortsSizeIn == 0) {
352 ASSERT(ovsFwdCtx->destinationPorts == NULL);
353 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
355 switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
356 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
357 OVS_DEST_PORTS_ARRAY_MIN_SIZE,
358 &ovsFwdCtx->destinationPorts);
359 if (status != NDIS_STATUS_SUCCESS) {
360 ovsActionStats.cannotGrowDest++;
363 ovsFwdCtx->destPortsSizeIn =
364 ovsFwdCtx->fwdDetail->NumAvailableDestinations;
365 ASSERT(ovsFwdCtx->destinationPorts);
367 ASSERT(ovsFwdCtx->destinationPorts != NULL);
370 * A ULONG value that specifies the total number of
371 * NDIS_SWITCH_PORT_DESTINATION elements in the
372 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure.
375 * A ULONG value that specifies the number of
376 * NDIS_SWITCH_PORT_DESTINATION elements in the
377 * NDIS_SWITCH_FORWARDING_DESTINATION_ARRAY structure that
378 * specify port destinations.
380 * NumAvailableDestinations:
381 * A value that specifies the number of unused extensible switch
382 * destination ports elements within an NET_BUFFER_LIST structure.
384 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
385 ovsFwdCtx->destPortsSizeIn);
386 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
387 ovsFwdCtx->destPortsSizeOut -
388 ovsFwdCtx->fwdDetail->NumAvailableDestinations);
389 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations > 0);
391 * Before we grow the array of destination ports, the current set
392 * of ports needs to be committed. Only the ports added since the
393 * last commit need to be part of the new update.
395 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
396 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
397 ovsFwdCtx->fwdDetail->NumAvailableDestinations,
398 ovsFwdCtx->destinationPorts);
399 if (status != NDIS_STATUS_SUCCESS) {
400 ovsActionStats.cannotGrowDest++;
403 ASSERT(ovsFwdCtx->destinationPorts->NumElements ==
404 ovsFwdCtx->destPortsSizeIn);
405 ASSERT(ovsFwdCtx->destinationPorts->NumDestinations ==
406 ovsFwdCtx->destPortsSizeOut);
407 ASSERT(ovsFwdCtx->fwdDetail->NumAvailableDestinations == 0);
409 status = switchContext->NdisSwitchHandlers.GrowNetBufferListDestinations(
410 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
411 ovsFwdCtx->destPortsSizeIn, &ovsFwdCtx->destinationPorts);
412 if (status != NDIS_STATUS_SUCCESS) {
413 ovsActionStats.cannotGrowDest++;
416 ASSERT(ovsFwdCtx->destinationPorts != NULL);
417 ovsFwdCtx->destPortsSizeIn <<= 1;
421 ASSERT(ovsFwdCtx->destPortsSizeOut < ovsFwdCtx->destPortsSizeIn);
423 NDIS_SWITCH_PORT_DESTINATION_AT_ARRAY_INDEX(ovsFwdCtx->destinationPorts,
424 ovsFwdCtx->destPortsSizeOut);
426 fwdPort->PortId = vport->portId;
427 fwdPort->NicIndex = vport->nicIndex;
428 fwdPort->IsExcluded = 0;
429 fwdPort->PreserveVLAN = preserveVLAN;
430 fwdPort->PreservePriority = preservePriority;
431 ovsFwdCtx->destPortsSizeOut += 1;
433 return NDIS_STATUS_SUCCESS;
438 * --------------------------------------------------------------------------
439 * OvsClearTunTxCtx --
440 * Utility function to clear tx tunneling context.
441 * --------------------------------------------------------------------------
444 OvsClearTunTxCtx(OvsForwardingContext *ovsFwdCtx)
446 ovsFwdCtx->tunnelTxNic = NULL;
447 ovsFwdCtx->tunKey.dst = 0;
452 * --------------------------------------------------------------------------
453 * OvsClearTunRxCtx --
454 * Utility function to clear rx tunneling context.
455 * --------------------------------------------------------------------------
458 OvsClearTunRxCtx(OvsForwardingContext *ovsFwdCtx)
460 ovsFwdCtx->tunnelRxNic = NULL;
461 ovsFwdCtx->tunKey.dst = 0;
466 * --------------------------------------------------------------------------
467 * OvsCompleteNBLForwardingCtx --
468 * This utility function is responsible for freeing/completing an NBL - either
469 * by adding it to a completion list or by freeing it.
472 * It also resets the necessary fields in 'ovsFwdCtx'.
473 * --------------------------------------------------------------------------
476 OvsCompleteNBLForwardingCtx(OvsForwardingContext *ovsFwdCtx,
479 NDIS_STRING filterReason;
481 RtlInitUnicodeString(&filterReason, dropReason);
482 if (ovsFwdCtx->completionList) {
483 OvsAddPktCompletionList(ovsFwdCtx->completionList, TRUE,
484 ovsFwdCtx->fwdDetail->SourcePortId, ovsFwdCtx->curNbl, 1,
486 ovsFwdCtx->curNbl = NULL;
488 /* If there is no completionList, we assume this is ovs created NBL */
489 ovsFwdCtx->curNbl = OvsCompleteNBL(ovsFwdCtx->switchContext,
490 ovsFwdCtx->curNbl, TRUE);
491 ASSERT(ovsFwdCtx->curNbl == NULL);
493 /* XXX: these can be made debug only to save cycles. Ideally the pipeline
494 * using these fields should reset the values at the end of the pipeline. */
495 ovsFwdCtx->destPortsSizeOut = 0;
496 ovsFwdCtx->tunnelTxNic = NULL;
497 ovsFwdCtx->tunnelRxNic = NULL;
501 * --------------------------------------------------------------------------
502 * OvsDoFlowLookupOutput --
503 * Function to be used for the second stage of a tunneling workflow, ie.:
504 * - On the encapsulated packet on Tx path, to do a flow extract, flow
505 * lookup and excuting the actions.
506 * - On the decapsulated packet on Rx path, to do a flow extract, flow
507 * lookup and excuting the actions.
509 * XXX: It is assumed that the NBL in 'ovsFwdCtx' is owned by OVS. This is
510 * until the new buffer management framework is adopted.
513 * The NBL in 'ovsFwdCtx' is consumed.
514 * --------------------------------------------------------------------------
516 static __inline NDIS_STATUS
517 OvsDoFlowLookupOutput(OvsForwardingContext *ovsFwdCtx)
523 POVS_VPORT_ENTRY vport =
524 OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo);
525 if (vport == NULL || vport->ovsState != OVS_STATE_CONNECTED) {
526 ASSERT(FALSE); // XXX: let's catch this for now
527 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
528 L"OVS-Dropped due to internal/tunnel port removal");
529 ovsActionStats.noVport++;
530 return NDIS_STATUS_SUCCESS;
532 ASSERT(vport->nicState == NdisSwitchNicStateConnected);
534 /* Assert that in the Rx direction, key is always setup. */
535 ASSERT(ovsFwdCtx->tunnelRxNic == NULL || ovsFwdCtx->tunKey.dst != 0);
536 status = OvsExtractFlow(ovsFwdCtx->curNbl, ovsFwdCtx->srcVportNo,
537 &key, &ovsFwdCtx->layers, ovsFwdCtx->tunKey.dst != 0 ?
538 &ovsFwdCtx->tunKey : NULL);
539 if (status != NDIS_STATUS_SUCCESS) {
540 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
541 L"OVS-Flow extract failed");
542 ovsActionStats.failedFlowExtract++;
546 flow = OvsLookupFlow(&ovsFwdCtx->switchContext->datapath, &key, &hash, FALSE);
548 OvsFlowUsed(flow, ovsFwdCtx->curNbl, &ovsFwdCtx->layers);
549 ovsFwdCtx->switchContext->datapath.hits++;
550 status = OvsActionsExecute(ovsFwdCtx->switchContext,
551 ovsFwdCtx->completionList, ovsFwdCtx->curNbl,
552 ovsFwdCtx->srcVportNo, ovsFwdCtx->sendFlags,
553 &key, &hash, &ovsFwdCtx->layers,
554 flow->actions, flow->actionsLen);
555 ovsFwdCtx->curNbl = NULL;
557 LIST_ENTRY missedPackets;
559 ovsFwdCtx->switchContext->datapath.misses++;
560 InitializeListHead(&missedPackets);
561 status = OvsCreateAndAddPackets(
562 OVS_DEFAULT_PACKET_QUEUE, NULL, 0, OVS_PACKET_CMD_MISS,
563 ovsFwdCtx->srcVportNo,
564 key.tunKey.dst != 0 ?
565 (OvsIPv4TunnelKey *)&key.tunKey : NULL,
567 ovsFwdCtx->tunnelRxNic != NULL, &ovsFwdCtx->layers,
568 ovsFwdCtx->switchContext, &missedPackets, &num);
570 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, num);
572 if (status == NDIS_STATUS_SUCCESS) {
573 /* Complete the packet since it was copied to user buffer. */
574 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
575 L"OVS-Dropped since packet was copied to userspace");
576 ovsActionStats.flowMiss++;
577 status = NDIS_STATUS_SUCCESS;
579 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
580 L"OVS-Dropped due to failure to queue to userspace");
581 status = NDIS_STATUS_FAILURE;
582 ovsActionStats.failedFlowMiss++;
590 * --------------------------------------------------------------------------
592 * The start function for Tx tunneling - encapsulates the packet, and
593 * outputs the packet on the PIF bridge.
596 * The NBL in 'ovsFwdCtx' is consumed.
597 * --------------------------------------------------------------------------
599 static __inline NDIS_STATUS
600 OvsTunnelPortTx(OvsForwardingContext *ovsFwdCtx)
602 NDIS_STATUS status = NDIS_STATUS_FAILURE;
603 PNET_BUFFER_LIST newNbl = NULL;
606 * Setup the source port to be the internal port to as to facilitate the
607 * second OvsLookupFlow.
609 if (ovsFwdCtx->switchContext->internalVport == NULL) {
610 OvsClearTunTxCtx(ovsFwdCtx);
611 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
612 L"OVS-Dropped since internal port is absent");
613 return NDIS_STATUS_FAILURE;
615 ovsFwdCtx->srcVportNo =
616 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->portNo;
618 ovsFwdCtx->fwdDetail->SourcePortId = ovsFwdCtx->switchContext->internalPortId;
619 ovsFwdCtx->fwdDetail->SourceNicIndex =
620 ((POVS_VPORT_ENTRY)ovsFwdCtx->switchContext->internalVport)->nicIndex;
622 /* Do the encap. Encap function does not consume the NBL. */
623 switch(ovsFwdCtx->tunnelTxNic->ovsType) {
624 case OVS_VPORT_TYPE_VXLAN:
625 status = OvsEncapVxlan(ovsFwdCtx->curNbl, &ovsFwdCtx->tunKey,
626 ovsFwdCtx->switchContext,
627 (VOID *)ovsFwdCtx->completionList,
628 &ovsFwdCtx->layers, &newNbl);
631 ASSERT(! "Tx: Unhandled tunnel type");
634 /* Reset the tunnel context so that it doesn't get used after this point. */
635 OvsClearTunTxCtx(ovsFwdCtx);
637 if (status == NDIS_STATUS_SUCCESS) {
639 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
640 L"Complete after cloning NBL for encapsulation");
641 ovsFwdCtx->curNbl = newNbl;
642 status = OvsDoFlowLookupOutput(ovsFwdCtx);
643 ASSERT(ovsFwdCtx->curNbl == NULL);
646 * XXX: Temporary freeing of the packet until we register a
647 * callback to IP helper.
649 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
650 L"OVS-Dropped due to encap failure");
651 ovsActionStats.failedEncap++;
652 status = NDIS_STATUS_SUCCESS;
659 * --------------------------------------------------------------------------
661 * Decapsulate the incoming NBL based on the tunnel type and goes through
662 * the flow lookup for the inner packet.
664 * Note: IP checksum is validate here, but L4 checksum validation needs
665 * to be done by the corresponding tunnel types.
668 * The NBL in 'ovsFwdCtx' is consumed.
669 * --------------------------------------------------------------------------
671 static __inline NDIS_STATUS
672 OvsTunnelPortRx(OvsForwardingContext *ovsFwdCtx)
674 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
675 PNET_BUFFER_LIST newNbl = NULL;
676 POVS_VPORT_ENTRY tunnelRxVport = ovsFwdCtx->tunnelRxNic;
678 if (OvsValidateIPChecksum(ovsFwdCtx->curNbl, &ovsFwdCtx->layers)
679 != NDIS_STATUS_SUCCESS) {
680 ovsActionStats.failedChecksum++;
681 OVS_LOG_INFO("Packet dropped due to IP checksum failure.");
685 switch(tunnelRxVport->ovsType) {
686 case OVS_VPORT_TYPE_VXLAN:
688 * OvsDoDecapVxlan should return a new NBL if it was copied, and
689 * this new NBL should be setup as the ovsFwdCtx->curNbl.
691 status = OvsDoDecapVxlan(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
692 &ovsFwdCtx->tunKey, &newNbl);
695 OVS_LOG_ERROR("Rx: Unhandled tunnel type: %d\n",
696 tunnelRxVport->ovsType);
697 ASSERT(! "Rx: Unhandled tunnel type");
698 status = NDIS_STATUS_NOT_SUPPORTED;
701 if (status != NDIS_STATUS_SUCCESS) {
702 ovsActionStats.failedDecap++;
707 * tunnelRxNic and other fields will be cleared, re-init the context
710 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
711 L"OVS-dropped due to new decap packet");
713 /* Decapsulated packet is in a new NBL */
714 ovsFwdCtx->tunnelRxNic = tunnelRxVport;
715 OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
716 newNbl, tunnelRxVport->portNo, 0,
717 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
718 ovsFwdCtx->completionList,
719 &ovsFwdCtx->layers, FALSE);
722 * Set the NBL's SourcePortId and SourceNicIndex to default values to
723 * keep NDIS happy when we forward the packet.
725 ovsFwdCtx->fwdDetail->SourcePortId = NDIS_SWITCH_DEFAULT_PORT_ID;
726 ovsFwdCtx->fwdDetail->SourceNicIndex = 0;
728 status = OvsDoFlowLookupOutput(ovsFwdCtx);
729 ASSERT(ovsFwdCtx->curNbl == NULL);
730 OvsClearTunRxCtx(ovsFwdCtx);
735 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
736 L"OVS-dropped due to decap failure");
737 OvsClearTunRxCtx(ovsFwdCtx);
743 * --------------------------------------------------------------------------
744 * OvsOutputForwardingCtx --
745 * This function outputs an NBL to NDIS or to a tunneling pipeline based on
746 * the ports added so far into 'ovsFwdCtx'.
749 * This function consumes the NBL - either by forwarding it successfully to
750 * NDIS, or adding it to the completion list in 'ovsFwdCtx', or freeing it.
752 * Also makes sure that the list of destination ports - tunnel or otherwise is
754 * --------------------------------------------------------------------------
756 static __inline NDIS_STATUS
757 OvsOutputForwardingCtx(OvsForwardingContext *ovsFwdCtx)
759 NDIS_STATUS status = STATUS_SUCCESS;
760 POVS_SWITCH_CONTEXT switchContext = ovsFwdCtx->switchContext;
763 * Handle the case where the some of the destination ports are tunneled
764 * ports - the non-tunneled ports get a unmodified copy of the NBL, and the
765 * tunneling pipeline starts when we output the packet to tunneled port.
767 if (ovsFwdCtx->destPortsSizeOut > 0) {
768 PNET_BUFFER_LIST newNbl = NULL;
770 UINT32 portsToUpdate =
771 ovsFwdCtx->fwdDetail->NumAvailableDestinations -
772 (ovsFwdCtx->destPortsSizeIn - ovsFwdCtx->destPortsSizeOut);
774 ASSERT(ovsFwdCtx->destinationPorts != NULL);
777 * Create a copy of the packet in order to do encap on it later. Also,
778 * don't copy the offload context since the encap'd packet has a
779 * different set of headers. This will change when we implement offloads
780 * before doing encapsulation.
782 if (ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL) {
783 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
784 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
785 0, 0, TRUE /*copy NBL info*/);
786 if (newNbl == NULL) {
787 status = NDIS_STATUS_RESOURCES;
788 ovsActionStats.noCopiedNbl++;
793 /* It does not seem like we'll get here unless 'portsToUpdate' > 0. */
794 ASSERT(portsToUpdate > 0);
795 status = switchContext->NdisSwitchHandlers.UpdateNetBufferListDestinations(
796 switchContext->NdisSwitchContext, ovsFwdCtx->curNbl,
797 portsToUpdate, ovsFwdCtx->destinationPorts);
798 if (status != NDIS_STATUS_SUCCESS) {
799 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
800 ovsActionStats.cannotGrowDest++;
804 OvsSendNBLIngress(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
805 ovsFwdCtx->sendFlags);
806 /* End this pipeline by resetting the corresponding context. */
807 ovsFwdCtx->destPortsSizeOut = 0;
808 ovsFwdCtx->curNbl = NULL;
810 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
811 newNbl, ovsFwdCtx->srcVportNo, 0,
812 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
813 ovsFwdCtx->completionList,
814 &ovsFwdCtx->layers, FALSE);
815 if (status != NDIS_STATUS_SUCCESS) {
816 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
817 L"Dropped due to resouces");
823 if (ovsFwdCtx->tunnelTxNic != NULL) {
824 status = OvsTunnelPortTx(ovsFwdCtx);
825 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
826 ASSERT(ovsFwdCtx->tunKey.dst == 0);
827 } else if (ovsFwdCtx->tunnelRxNic != NULL) {
828 status = OvsTunnelPortRx(ovsFwdCtx);
829 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
830 ASSERT(ovsFwdCtx->tunKey.dst == 0);
832 ASSERT(ovsFwdCtx->curNbl == NULL);
837 if (status != NDIS_STATUS_SUCCESS) {
838 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"Dropped due to XXX");
846 * --------------------------------------------------------------------------
847 * OvsLookupFlowOutput --
848 * Utility function for external callers to do flow extract, lookup,
849 * actions execute on a given NBL.
851 * Note: If this is being used from a callback function, make sure that the
852 * arguments specified are still valid in the asynchronous context.
855 * This function consumes the NBL.
856 * --------------------------------------------------------------------------
859 OvsLookupFlowOutput(POVS_SWITCH_CONTEXT switchContext,
861 PNET_BUFFER_LIST curNbl)
864 OvsForwardingContext ovsFwdCtx;
865 POVS_VPORT_ENTRY internalVport =
866 (POVS_VPORT_ENTRY)switchContext->internalVport;
868 /* XXX: make sure comp list was not a stack variable previously. */
869 OvsCompletionList *completionList = (OvsCompletionList *)compList;
872 * XXX: can internal port disappear while we are busy doing ARP resolution?
873 * It could, but will we get this callback from IP helper in that case. Need
876 ASSERT(switchContext->internalVport);
877 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl,
878 internalVport->portNo, 0,
879 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl),
880 completionList, NULL, TRUE);
881 if (status != NDIS_STATUS_SUCCESS) {
882 OvsCompleteNBLForwardingCtx(&ovsFwdCtx,
883 L"OVS-Dropped due to resources");
889 * XXX: We need to acquire the dispatch lock and the datapath lock.
892 OvsDoFlowLookupOutput(&ovsFwdCtx);
897 * --------------------------------------------------------------------------
898 * OvsOutputBeforeSetAction --
899 * Function to be called to complete one set of actions on an NBL, before
900 * we start the next one.
901 * --------------------------------------------------------------------------
903 static __inline NDIS_STATUS
904 OvsOutputBeforeSetAction(OvsForwardingContext *ovsFwdCtx)
906 PNET_BUFFER_LIST newNbl;
907 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
911 * Create a copy and work on the copy after this point. The original NBL is
912 * forwarded. One reason to not use the copy for forwarding is that
913 * ports have already been added to the original NBL, and it might be
914 * inefficient/impossible to remove/re-add them to the copy. There's no
915 * notion of removing the ports, the ports need to be marked as
916 * "isExcluded". There's seems no real advantage to retaining the original
917 * and sending out the copy instead.
919 * XXX: We are copying the offload context here. This is to handle actions
921 * outport, pop_vlan(), outport, push_vlan(), outport
923 * copy size needs to include inner ether + IP + TCP, need to revisit
924 * if we support IP options.
925 * XXX Head room needs to include the additional encap.
926 * XXX copySize check is not considering multiple NBs.
928 nb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
929 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
930 0, 0, TRUE /*copy NBL info*/);
932 ASSERT(ovsFwdCtx->destPortsSizeOut > 0 ||
933 ovsFwdCtx->tunnelTxNic != NULL || ovsFwdCtx->tunnelRxNic != NULL);
935 /* Send the original packet out */
936 status = OvsOutputForwardingCtx(ovsFwdCtx);
937 ASSERT(ovsFwdCtx->curNbl == NULL);
938 ASSERT(ovsFwdCtx->destPortsSizeOut == 0);
939 ASSERT(ovsFwdCtx->tunnelRxNic == NULL);
940 ASSERT(ovsFwdCtx->tunnelTxNic == NULL);
942 /* If we didn't make a copy, can't continue. */
943 if (newNbl == NULL) {
944 ovsActionStats.noCopiedNbl++;
945 return NDIS_STATUS_RESOURCES;
948 /* Finish the remaining actions with the new NBL */
949 if (status != NDIS_STATUS_SUCCESS) {
950 OvsCompleteNBL(ovsFwdCtx->switchContext, newNbl, TRUE);
952 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
953 newNbl, ovsFwdCtx->srcVportNo, 0,
954 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
955 ovsFwdCtx->completionList,
956 &ovsFwdCtx->layers, FALSE);
964 * --------------------------------------------------------------------------
965 * OvsPopVlanInPktBuf --
966 * Function to pop a VLAN tag when the tag is in the packet buffer.
967 * --------------------------------------------------------------------------
969 static __inline NDIS_STATUS
970 OvsPopVlanInPktBuf(OvsForwardingContext *ovsFwdCtx)
975 ULONG dataLength = sizeof (DL_EUI48) + sizeof (DL_EUI48);
976 UINT32 packetLen, mdlLen;
977 PNET_BUFFER_LIST newNbl;
981 * Declare a dummy vlanTag structure since we need to compute the size
982 * of shiftLength. The NDIS one is a unionized structure.
984 NDIS_PACKET_8021Q_INFO vlanTag = {0};
985 ULONG shiftLength = sizeof (vlanTag.TagHeader);
986 PUINT8 tempBuffer[sizeof (DL_EUI48) + sizeof (DL_EUI48)];
988 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
989 0, 0, TRUE /* copy NBL info */);
991 ovsActionStats.noCopiedNbl++;
992 return NDIS_STATUS_RESOURCES;
995 /* Complete the original NBL and create a copy to modify. */
996 OvsCompleteNBLForwardingCtx(ovsFwdCtx, L"OVS-Dropped due to copy");
998 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
999 newNbl, ovsFwdCtx->srcVportNo, 0,
1000 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1001 NULL, &ovsFwdCtx->layers, FALSE);
1002 if (status != NDIS_STATUS_SUCCESS) {
1003 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1004 L"Dropped due to resouces");
1005 return NDIS_STATUS_RESOURCES;
1008 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1009 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1010 ASSERT(curNb->Next == NULL);
1011 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1012 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1014 return NDIS_STATUS_RESOURCES;
1016 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1017 /* Bail out if L2 + VLAN header is not contiguous in the first buffer. */
1018 if (MIN(packetLen, mdlLen) < sizeof (EthHdr) + shiftLength) {
1020 return NDIS_STATUS_FAILURE;
1022 bufferStart += NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1023 RtlCopyMemory(tempBuffer, bufferStart, dataLength);
1024 RtlCopyMemory(bufferStart + shiftLength, tempBuffer, dataLength);
1025 NdisAdvanceNetBufferDataStart(curNb, shiftLength, FALSE, NULL);
1027 return NDIS_STATUS_SUCCESS;
1031 * --------------------------------------------------------------------------
1032 * OvsTunnelAttrToIPv4TunnelKey --
1033 * Convert tunnel attribute to OvsIPv4TunnelKey.
1034 * --------------------------------------------------------------------------
1036 static __inline NDIS_STATUS
1037 OvsTunnelAttrToIPv4TunnelKey(PNL_ATTR attr,
1038 OvsIPv4TunnelKey *tunKey)
1043 tunKey->attr[0] = 0;
1044 tunKey->attr[1] = 0;
1045 tunKey->attr[2] = 0;
1046 ASSERT(NlAttrType(attr) == OVS_KEY_ATTR_TUNNEL);
1048 NL_ATTR_FOR_EACH_UNSAFE (a, rem, NlAttrData(attr),
1049 NlAttrGetSize(attr)) {
1050 switch (NlAttrType(a)) {
1051 case OVS_TUNNEL_KEY_ATTR_ID:
1052 tunKey->tunnelId = NlAttrGetBe64(a);
1053 tunKey->flags |= OVS_TNL_F_KEY;
1055 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
1056 tunKey->src = NlAttrGetBe32(a);
1058 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
1059 tunKey->dst = NlAttrGetBe32(a);
1061 case OVS_TUNNEL_KEY_ATTR_TOS:
1062 tunKey->tos = NlAttrGetU8(a);
1064 case OVS_TUNNEL_KEY_ATTR_TTL:
1065 tunKey->ttl = NlAttrGetU8(a);
1067 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
1068 tunKey->flags |= OVS_TNL_F_DONT_FRAGMENT;
1070 case OVS_TUNNEL_KEY_ATTR_CSUM:
1071 tunKey->flags |= OVS_TNL_F_CSUM;
1078 return NDIS_STATUS_SUCCESS;
1082 *----------------------------------------------------------------------------
1083 * OvsUpdateEthHeader --
1084 * Updates the ethernet header in ovsFwdCtx.curNbl inline based on the
1086 *----------------------------------------------------------------------------
1088 static __inline NDIS_STATUS
1089 OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
1090 const struct ovs_key_ethernet *ethAttr)
1096 UINT32 packetLen, mdlLen;
1098 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1099 ASSERT(curNb->Next == NULL);
1100 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1101 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1102 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1104 ovsActionStats.noResource++;
1105 return NDIS_STATUS_RESOURCES;
1107 mdlLen -= NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1109 /* Bail out if the L2 header is not in a contiguous buffer. */
1110 if (MIN(packetLen, mdlLen) < sizeof *ethHdr) {
1112 return NDIS_STATUS_FAILURE;
1114 ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
1116 RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
1117 sizeof ethHdr->Destination);
1118 RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
1120 return NDIS_STATUS_SUCCESS;
1124 *----------------------------------------------------------------------------
1125 * OvsUpdateIPv4Header --
1126 * Updates the IPv4 header in ovsFwdCtx.curNbl inline based on the
1128 *----------------------------------------------------------------------------
1130 static __inline NDIS_STATUS
1131 OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
1132 const struct ovs_key_ipv4 *ipAttr)
1138 UINT32 mdlLen, hdrSize, packetLen;
1139 OVS_PACKET_HDR_INFO *layers = &ovsFwdCtx->layers;
1142 TCPHdr *tcpHdr = NULL;
1143 UDPHdr *udpHdr = NULL;
1145 ASSERT(layers->value != 0);
1148 * Peek into the MDL to get a handle to the IP header and if required
1149 * the TCP/UDP header as well. We check if the required headers are in one
1150 * contiguous MDL, and if not, we copy them over to one MDL.
1152 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1153 ASSERT(curNb->Next == NULL);
1154 packetLen = NET_BUFFER_DATA_LENGTH(curNb);
1155 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1156 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1158 ovsActionStats.noResource++;
1159 return NDIS_STATUS_RESOURCES;
1161 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1162 mdlLen -= curMdlOffset;
1163 ASSERT((INT)mdlLen >= 0);
1165 if (layers->isTcp || layers->isUdp) {
1166 hdrSize = layers->l4Offset +
1167 layers->isTcp ? sizeof (*tcpHdr) : sizeof (*udpHdr);
1169 hdrSize = layers->l3Offset + sizeof (*ipHdr);
1172 /* Count of number of bytes of valid data there are in the first MDL. */
1173 mdlLen = MIN(packetLen, mdlLen);
1174 if (mdlLen < hdrSize) {
1175 PNET_BUFFER_LIST newNbl;
1176 newNbl = OvsPartialCopyNBL(ovsFwdCtx->switchContext, ovsFwdCtx->curNbl,
1177 hdrSize, 0, TRUE /*copy NBL info*/);
1179 ovsActionStats.noCopiedNbl++;
1180 return NDIS_STATUS_RESOURCES;
1182 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1183 L"Complete after partial copy.");
1185 status = OvsInitForwardingCtx(ovsFwdCtx, ovsFwdCtx->switchContext,
1186 newNbl, ovsFwdCtx->srcVportNo, 0,
1187 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(newNbl),
1188 NULL, &ovsFwdCtx->layers, FALSE);
1189 if (status != NDIS_STATUS_SUCCESS) {
1190 OvsCompleteNBLForwardingCtx(ovsFwdCtx,
1191 L"OVS-Dropped due to resources");
1192 return NDIS_STATUS_RESOURCES;
1195 curNb = NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl);
1196 ASSERT(curNb->Next == NULL);
1197 curMdl = NET_BUFFER_CURRENT_MDL(curNb);
1198 NdisQueryMdl(curMdl, &bufferStart, &mdlLen, LowPagePriority);
1200 ovsActionStats.noResource++;
1201 return NDIS_STATUS_RESOURCES;
1203 curMdlOffset = NET_BUFFER_CURRENT_MDL_OFFSET(curNb);
1204 mdlLen -= curMdlOffset;
1205 ASSERT(mdlLen >= hdrSize);
1208 ipHdr = (IPHdr *)(bufferStart + curMdlOffset + layers->l3Offset);
1210 if (layers->isTcp) {
1211 tcpHdr = (TCPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1212 } else if (layers->isUdp) {
1213 udpHdr = (UDPHdr *)(bufferStart + curMdlOffset + layers->l4Offset);
1217 * Adjust the IP header inline as dictated by the action, nad also update
1218 * the IP and the TCP checksum for the data modified.
1220 * In the future, this could be optimized to make one call to
1221 * ChecksumUpdate32(). Ignoring this for now, since for the most common
1222 * case, we only update the TTL.
1224 if (ipHdr->saddr != ipAttr->ipv4_src) {
1226 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->saddr,
1228 } else if (udpHdr && udpHdr->check) {
1229 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->saddr,
1233 if (ipHdr->check != 0) {
1234 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->saddr,
1237 ipHdr->saddr = ipAttr->ipv4_src;
1239 if (ipHdr->daddr != ipAttr->ipv4_dst) {
1241 tcpHdr->check = ChecksumUpdate32(tcpHdr->check, ipHdr->daddr,
1243 } else if (udpHdr && udpHdr->check) {
1244 udpHdr->check = ChecksumUpdate32(udpHdr->check, ipHdr->daddr,
1248 if (ipHdr->check != 0) {
1249 ipHdr->check = ChecksumUpdate32(ipHdr->check, ipHdr->daddr,
1252 ipHdr->daddr = ipAttr->ipv4_dst;
1254 if (ipHdr->protocol != ipAttr->ipv4_proto) {
1255 UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
1256 UINT16 newProto = (ipAttr->ipv4_proto << 16) & 0xff00;
1258 tcpHdr->check = ChecksumUpdate16(tcpHdr->check, oldProto, newProto);
1259 } else if (udpHdr && udpHdr->check) {
1260 udpHdr->check = ChecksumUpdate16(udpHdr->check, oldProto, newProto);
1263 if (ipHdr->check != 0) {
1264 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
1266 ipHdr->protocol = ipAttr->ipv4_proto;
1268 if (ipHdr->ttl != ipAttr->ipv4_ttl) {
1269 UINT16 oldTtl = (ipHdr->ttl) & 0xff;
1270 UINT16 newTtl = (ipAttr->ipv4_ttl) & 0xff;
1271 if (ipHdr->check != 0) {
1272 ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
1274 ipHdr->ttl = ipAttr->ipv4_ttl;
1277 return NDIS_STATUS_SUCCESS;
1281 * --------------------------------------------------------------------------
1282 * OvsExecuteSetAction --
1283 * Executes a set() action, but storing the actions into 'ovsFwdCtx'
1284 * --------------------------------------------------------------------------
1286 static __inline NDIS_STATUS
1287 OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
1292 enum ovs_key_attr type = NlAttrType(a);
1293 NDIS_STATUS status = NDIS_STATUS_SUCCESS;
1296 case OVS_KEY_ATTR_ETHERNET:
1297 status = OvsUpdateEthHeader(ovsFwdCtx,
1298 NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
1301 case OVS_KEY_ATTR_IPV4:
1302 status = OvsUpdateIPv4Header(ovsFwdCtx,
1303 NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
1306 case OVS_KEY_ATTR_TUNNEL:
1308 OvsIPv4TunnelKey tunKey;
1310 status = OvsTunnelAttrToIPv4TunnelKey((PNL_ATTR)a, &tunKey);
1311 ASSERT(status == NDIS_STATUS_SUCCESS);
1312 tunKey.flow_hash = (uint16)(hash ? *hash : OvsHashFlow(key));
1313 RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
1317 case OVS_KEY_ATTR_SKB_MARK:
1318 /* XXX: Not relevant to Hyper-V. Return OK */
1320 case OVS_KEY_ATTR_UNSPEC:
1321 case OVS_KEY_ATTR_ENCAP:
1322 case OVS_KEY_ATTR_ETHERTYPE:
1323 case OVS_KEY_ATTR_IN_PORT:
1324 case OVS_KEY_ATTR_VLAN:
1325 case OVS_KEY_ATTR_ICMP:
1326 case OVS_KEY_ATTR_ICMPV6:
1327 case OVS_KEY_ATTR_ARP:
1328 case OVS_KEY_ATTR_ND:
1329 case __OVS_KEY_ATTR_MAX:
1331 OVS_LOG_INFO("Unhandled attribute %#x", type);
1338 * --------------------------------------------------------------------------
1339 * OvsActionsExecute --
1340 * Interpret and execute the specified 'actions' on the specifed packet
1341 * 'curNbl'. The expectation is that if the packet needs to be dropped
1342 * (completed) for some reason, it is added to 'completionList' so that the
1343 * caller can complete the packet. If 'completionList' is NULL, the NBL is
1344 * assumed to be generated by OVS and freed up. Otherwise, the function
1345 * consumes the NBL by generating a NDIS send indication for the packet.
1347 * There are one or more of "clone" NBLs that may get generated while
1348 * executing the actions. Upon any failures, the "cloned" NBLs are freed up,
1349 * and the caller does not have to worry about them.
1351 * Success or failure is returned based on whether the specified actions
1352 * were executed successfully on the packet or not.
1353 * --------------------------------------------------------------------------
1356 OvsActionsExecute(POVS_SWITCH_CONTEXT switchContext,
1357 OvsCompletionList *completionList,
1358 PNET_BUFFER_LIST curNbl,
1363 OVS_PACKET_HDR_INFO *layers,
1364 const PNL_ATTR actions,
1370 OvsForwardingContext ovsFwdCtx;
1371 PCWSTR dropReason = L"";
1373 PNDIS_SWITCH_FORWARDING_DETAIL_NET_BUFFER_LIST_INFO fwdDetail =
1374 NET_BUFFER_LIST_SWITCH_FORWARDING_DETAIL(curNbl);
1376 /* XXX: ASSERT that the flow table lock is held. */
1377 status = OvsInitForwardingCtx(&ovsFwdCtx, switchContext, curNbl, portNo,
1378 sendFlags, fwdDetail, completionList,
1380 if (status != NDIS_STATUS_SUCCESS) {
1381 dropReason = L"OVS-initing destination port list failed";
1385 if (actionsLen == 0) {
1386 dropReason = L"OVS-Dropped due to Flow action";
1387 ovsActionStats.zeroActionLen++;
1391 NL_ATTR_FOR_EACH_UNSAFE (a, rem, actions, actionsLen) {
1392 switch(NlAttrType(a)) {
1393 case OVS_ACTION_ATTR_OUTPUT:
1394 dstPortID = NlAttrGetU32(a);
1395 status = OvsAddPorts(&ovsFwdCtx, key, dstPortID,
1397 if (status != NDIS_STATUS_SUCCESS) {
1398 dropReason = L"OVS-adding destination port failed";
1403 case OVS_ACTION_ATTR_PUSH_VLAN:
1405 struct ovs_action_push_vlan *vlan;
1407 PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag;
1409 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1410 || ovsFwdCtx.tunnelRxNic != NULL) {
1411 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1412 if (status != NDIS_STATUS_SUCCESS) {
1413 dropReason = L"OVS-adding destination failed";
1418 vlanTagValue = NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1419 Ieee8021QNetBufferListInfo);
1420 if (vlanTagValue != NULL) {
1422 * XXX: We don't support double VLAN tag offload. In such cases,
1423 * we need to insert the existing one into the packet buffer,
1424 * and add the new one as offload. This will take care of
1425 * guest tag-in-tag case as well as OVS rules that specify
1430 vlanTag = (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
1431 vlan = (struct ovs_action_push_vlan *)NlAttrGet((const PNL_ATTR)a);
1432 vlanTag->TagHeader.VlanId = ntohs(vlan->vlan_tci) & 0xfff;
1433 vlanTag->TagHeader.UserPriority = ntohs(vlan->vlan_tci) >> 13;
1435 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1436 Ieee8021QNetBufferListInfo) = vlanTagValue;
1441 case OVS_ACTION_ATTR_POP_VLAN:
1443 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1444 || ovsFwdCtx.tunnelRxNic != NULL) {
1445 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1446 if (status != NDIS_STATUS_SUCCESS) {
1447 dropReason = L"OVS-adding destination failed";
1452 if (NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1453 Ieee8021QNetBufferListInfo) != 0) {
1454 NET_BUFFER_LIST_INFO(ovsFwdCtx.curNbl,
1455 Ieee8021QNetBufferListInfo) = 0;
1458 * The VLAN tag is inserted into the packet buffer. Pop the tag
1459 * by packet buffer modification.
1461 status = OvsPopVlanInPktBuf(&ovsFwdCtx);
1462 if (status != NDIS_STATUS_SUCCESS) {
1463 dropReason = L"OVS-pop vlan action failed";
1470 case OVS_ACTION_ATTR_USERSPACE:
1472 PNL_ATTR userdataAttr;
1474 POVS_PACKET_QUEUE_ELEM elem;
1475 UINT32 queueId = OVS_DEFAULT_PACKET_QUEUE;
1476 BOOLEAN isRecv = FALSE;
1478 POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(switchContext,
1482 if (vport->isExternal ||
1483 OvsIsTunnelVportType(vport->ovsType)) {
1488 queueAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_PID);
1489 userdataAttr = NlAttrFindNested(a, OVS_USERSPACE_ATTR_USERDATA);
1491 elem = OvsCreateQueuePacket(queueId, (PVOID)userdataAttr,
1492 userdataAttr->nlaLen,
1493 OVS_PACKET_CMD_ACTION,
1494 portNo, (OvsIPv4TunnelKey *)&key->tunKey,
1496 NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx.curNbl),
1500 LIST_ENTRY missedPackets;
1501 InitializeListHead(&missedPackets);
1502 InsertTailList(&missedPackets, &elem->link);
1503 OvsQueuePackets(OVS_DEFAULT_PACKET_QUEUE, &missedPackets, 1);
1504 dropReason = L"OVS-Completed since packet was copied to "
1507 dropReason = L"OVS-Dropped due to failure to queue to "
1513 case OVS_ACTION_ATTR_SET:
1515 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1516 || ovsFwdCtx.tunnelRxNic != NULL) {
1517 status = OvsOutputBeforeSetAction(&ovsFwdCtx);
1518 if (status != NDIS_STATUS_SUCCESS) {
1519 dropReason = L"OVS-adding destination failed";
1524 status = OvsExecuteSetAction(&ovsFwdCtx, key, hash,
1525 (const PNL_ATTR)NlAttrGet
1526 ((const PNL_ATTR)a));
1527 if (status != NDIS_STATUS_SUCCESS) {
1528 dropReason = L"OVS-set action failed";
1533 case OVS_ACTION_ATTR_SAMPLE:
1535 case OVS_ACTION_ATTR_UNSPEC:
1536 case __OVS_ACTION_ATTR_MAX:
1542 if (ovsFwdCtx.destPortsSizeOut > 0 || ovsFwdCtx.tunnelTxNic != NULL
1543 || ovsFwdCtx.tunnelRxNic != NULL) {
1544 status = OvsOutputForwardingCtx(&ovsFwdCtx);
1545 ASSERT(ovsFwdCtx.curNbl == NULL);
1548 ASSERT(ovsFwdCtx.destPortsSizeOut == 0);
1549 ASSERT(ovsFwdCtx.tunnelRxNic == NULL);
1550 ASSERT(ovsFwdCtx.tunnelTxNic == NULL);
1554 * If curNbl != NULL, it implies the NBL has not been not freed up so far.
1556 if (ovsFwdCtx.curNbl) {
1557 OvsCompleteNBLForwardingCtx(&ovsFwdCtx, dropReason);