dae1dca852802e6dfee09b95fc46c16283ccf207
[cascardo/ovs.git] / datapath-windows / ovsext / Flow.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Util.h"
20 #include "Jhash.h"
21 #include "Flow.h"
22 #include "PacketParser.h"
23
24 #ifdef OVS_DBG_MOD
25 #undef OVS_DBG_MOD
26 #endif
27 #define OVS_DBG_MOD OVS_DBG_FLOW
28 #include "Debug.h"
29
30 #pragma warning( push )
31 #pragma warning( disable:4127 )
32
33 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
34 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
35 extern UINT64 ovsTimeIncrementPerTick;
36
37 static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags,
38                                UINT32 getActionsLen, OvsFlowInfo *info);
39 static NTSTATUS HandleFlowPut(OvsFlowPut *put,
40                                   OVS_DATAPATH *datapath,
41                                   struct OvsFlowStats *stats);
42 static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put,
43                                UINT64 hash);
44 static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow);
45 static VOID DeleteAllFlows(OVS_DATAPATH *datapath);
46 static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow);
47 static VOID FreeFlow(OvsFlow *flow);
48 static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB);
49
50 #define OVS_FLOW_TABLE_SIZE 2048
51 #define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1)
52 #define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK)
53
54 /*
55  *----------------------------------------------------------------------------
56  * OvsDeleteFlowTable --
57  * Results:
58  *    NDIS_STATUS_SUCCESS always.
59  *----------------------------------------------------------------------------
60  */
61 NDIS_STATUS
62 OvsDeleteFlowTable(OVS_DATAPATH *datapath)
63 {
64     if (datapath == NULL || datapath->flowTable == NULL) {
65         return NDIS_STATUS_SUCCESS;
66     }
67
68     DeleteAllFlows(datapath);
69     OvsFreeMemory(datapath->flowTable);
70     datapath->flowTable = NULL;
71     NdisFreeRWLock(datapath->lock);
72
73     return NDIS_STATUS_SUCCESS;
74 }
75
76 /*
77  *----------------------------------------------------------------------------
78  * OvsAllocateFlowTable --
79  * Results:
80  *    NDIS_STATUS_SUCCESS on success.
81  *    NDIS_STATUS_RESOURCES if memory couldn't be allocated
82  *----------------------------------------------------------------------------
83  */
84 NDIS_STATUS
85 OvsAllocateFlowTable(OVS_DATAPATH *datapath,
86                      POVS_SWITCH_CONTEXT switchContext)
87 {
88     PLIST_ENTRY bucket;
89     int i;
90
91     datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE *
92                                             sizeof (LIST_ENTRY));
93     if (!datapath->flowTable) {
94         return NDIS_STATUS_RESOURCES;
95     }
96     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
97         bucket = &(datapath->flowTable[i]);
98         InitializeListHead(bucket);
99     }
100     datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
101
102     return NDIS_STATUS_SUCCESS;
103 }
104
105
106 /*
107  *----------------------------------------------------------------------------
108  *  GetStartAddrNBL --
109  *    Get the virtual address of the frame.
110  *
111  *  Results:
112  *    Virtual address of the frame.
113  *----------------------------------------------------------------------------
114  */
115 static __inline VOID *
116 GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
117 {
118     PMDL curMdl;
119     PUINT8 curBuffer;
120     PEthHdr curHeader;
121
122     ASSERT(_pNB);
123
124     // Ethernet Header is a guaranteed safe access.
125     curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
126     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
127     if (!curBuffer) {
128         return NULL;
129     }
130
131     curHeader = (PEthHdr)
132     (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
133
134     return (VOID *) curHeader;
135 }
136
137 VOID
138 OvsFlowUsed(OvsFlow *flow,
139             const NET_BUFFER_LIST *packet,
140             const POVS_PACKET_HDR_INFO layers)
141 {
142     LARGE_INTEGER tickCount;
143
144     KeQueryTickCount(&tickCount);
145     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
146     flow->packetCount++;
147     flow->byteCount += OvsPacketLenNBL(packet);
148     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
149 }
150
151
152 VOID
153 DeleteAllFlows(OVS_DATAPATH *datapath)
154 {
155     INT i;
156     PLIST_ENTRY bucket;
157
158     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
159         PLIST_ENTRY next;
160         bucket = &(datapath->flowTable[i]);
161         while (!IsListEmpty(bucket)) {
162             OvsFlow *flow;
163             next = bucket->Flink;
164             flow = CONTAINING_RECORD(next, OvsFlow, ListEntry);
165             RemoveFlow(datapath, &flow);
166         }
167     }
168 }
169
170 /*
171  *----------------------------------------------------------------------------
172  * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
173  * 'ofp_in_port'.
174  *
175  * Initializes 'packet' header pointers as follows:
176  *
177  *    - packet->l2 to the start of the Ethernet header.
178  *
179  *    - packet->l3 to just past the Ethernet header, or just past the
180  *      vlan_header if one is present, to the first byte of the payload of the
181  *      Ethernet frame.
182  *
183  *    - packet->l4 to just past the IPv4 header, if one is present and has a
184  *      correct length, and otherwise NULL.
185  *
186  *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
187  *      present and has a correct length, and otherwise NULL.
188  *
189  * Returns NDIS_STATUS_SUCCESS normally.  Fails only if packet data cannot be accessed
190  * (e.g. if Pkt_CopyBytesOut() returns an error).
191  *----------------------------------------------------------------------------
192  */
193 NDIS_STATUS
194 OvsExtractFlow(const NET_BUFFER_LIST *packet,
195                UINT32 inPort,
196                OvsFlowKey *flow,
197                POVS_PACKET_HDR_INFO layers,
198                OvsIPv4TunnelKey *tunKey)
199 {
200     struct Eth_Header *eth;
201     UINT8 offset = 0;
202     PVOID vlanTagValue;
203
204     layers->value = 0;
205
206     if (tunKey) {
207         ASSERT(tunKey->dst != 0);
208         RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey);
209         flow->l2.offset = 0;
210     } else {
211         flow->tunKey.dst = 0;
212         flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE;
213     }
214
215     flow->l2.inPort = inPort;
216
217     if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
218         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
219         return NDIS_STATUS_SUCCESS;
220     }
221
222     /* Link layer. */
223     eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
224     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
225     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
226
227     /*
228      * vlan_tci.
229      */
230     vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
231     if (vlanTagValue) {
232         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
233             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
234         flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI |
235                                  (vlanTag->TagHeader.UserPriority << 13));
236     } else {
237         if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) {
238             Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)&eth->dix.typeNBO;
239             flow->l2.vlanTci = ((UINT16)tag->priority << 13) |
240                                OVSWIN_VLAN_CFI |
241                                ((UINT16)tag->vidHi << 8)  | tag->vidLo;
242             offset = sizeof (Eth_802_1pq_Tag);
243         } else {
244             flow->l2.vlanTci = 0;
245         }
246         /*
247         * XXX
248         * Please note after this point, src mac and dst mac should
249         * not be accessed through eth
250         */
251         eth = (Eth_Header *)((UINT8 *)eth + offset);
252     }
253
254     /*
255      * dl_type.
256      *
257      * XXX assume that at least the first
258      * 12 bytes of received packets are mapped.  This code has the stronger
259      * assumption that at least the first 22 bytes of 'packet' is mapped (if my
260      * arithmetic is right).
261      */
262     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
263         flow->l2.dlType = eth->dix.typeNBO;
264         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
265     } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
266               eth->e802_3.llc.dsap == 0xaa &&
267               eth->e802_3.llc.ssap == 0xaa &&
268               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
269               eth->e802_3.snap.snapOrg[0] == 0x00 &&
270               eth->e802_3.snap.snapOrg[1] == 0x00 &&
271               eth->e802_3.snap.snapOrg[2] == 0x00) {
272         flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO;
273         layers->l3Offset = ETH_HEADER_LEN_802_3 + offset;
274     } else {
275         flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE);
276         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
277     }
278
279     flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset;
280     /* Network layer. */
281     if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) {
282         struct IPHdr ip_storage;
283         const struct IPHdr *nh;
284         IpKey *ipKey = &flow->ipKey;
285
286         flow->l2.keyLen += OVS_IP_KEY_SIZE;
287         layers->isIPv4 = 1;
288         nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
289         if (nh) {
290             layers->l4Offset = layers->l3Offset + nh->ihl * 4;
291
292             ipKey->nwSrc = nh->saddr;
293             ipKey->nwDst = nh->daddr;
294             ipKey->nwProto = nh->protocol;
295
296             ipKey->nwTos = nh->tos;
297             if (nh->frag_off & htons(IP_MF | IP_OFFSET)) {
298                 ipKey->nwFrag = OVSWIN_NW_FRAG_ANY;
299                 if (nh->frag_off & htons(IP_OFFSET)) {
300                     ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER;
301                 }
302             } else {
303                 ipKey->nwFrag = 0;
304             }
305
306             ipKey->nwTtl = nh->ttl;
307             ipKey->l4.tpSrc = 0;
308             ipKey->l4.tpDst = 0;
309
310             if (!(nh->frag_off & htons(IP_OFFSET))) {
311                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
312                     OvsParseTcp(packet, &ipKey->l4, layers);
313                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
314                     OvsParseUdp(packet, &ipKey->l4, layers);
315                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
316                     ICMPHdr icmpStorage;
317                     const ICMPHdr *icmp;
318
319                     icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
320                     if (icmp) {
321                         ipKey->l4.tpSrc = htons(icmp->type);
322                         ipKey->l4.tpDst = htons(icmp->code);
323                         layers->l7Offset = layers->l4Offset + sizeof *icmp;
324                     }
325                 }
326             }
327         } else {
328             ((UINT64 *)ipKey)[0] = 0;
329             ((UINT64 *)ipKey)[1] = 0;
330         }
331     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
332         NDIS_STATUS status;
333         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
334         status = OvsParseIPv6(packet, flow, layers);
335         if (status != NDIS_STATUS_SUCCESS) {
336             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
337             return status;
338         }
339         layers->isIPv6 = 1;
340         flow->ipv6Key.l4.tpSrc = 0;
341         flow->ipv6Key.l4.tpDst = 0;
342         flow->ipv6Key.pad = 0;
343
344         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
345             OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
346         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
347             OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
348         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
349             OvsParseIcmpV6(packet, flow, layers);
350             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
351         }
352     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
353         EtherArp arpStorage;
354         const EtherArp *arp;
355         ArpKey *arpKey = &flow->arpKey;
356         ((UINT64 *)arpKey)[0] = 0;
357         ((UINT64 *)arpKey)[1] = 0;
358         ((UINT64 *)arpKey)[2] = 0;
359         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
360         arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
361         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
362             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
363             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
364             arp->ea_hdr.ar_pln == 4) {
365             /* We only match on the lower 8 bits of the opcode. */
366             if (ntohs(arp->ea_hdr.ar_op) <= 0xff) {
367                 arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op);
368             }
369             if (arpKey->nwProto == ARPOP_REQUEST
370                 || arpKey->nwProto == ARPOP_REPLY) {
371                 memcpy(&arpKey->nwSrc, arp->arp_spa, 4);
372                 memcpy(&arpKey->nwDst, arp->arp_tpa, 4);
373                 memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
374                 memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
375             }
376         }
377     }
378
379     return NDIS_STATUS_SUCCESS;
380 }
381
382 __inline BOOLEAN
383 FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size)
384 {
385     UINT32 i;
386     ASSERT((size & 0x7) == 0);
387     ASSERT(((UINT64)src & 0x7) == 0);
388     ASSERT(((UINT64)dst & 0x7) == 0);
389     for (i = 0; i < (size >> 3); i++) {
390         if (src[i] != dst[i]) {
391             return FALSE;
392         }
393     }
394     return TRUE;
395 }
396
397
398 /*
399  * ----------------------------------------------------------------------------
400  * AddFlow --
401  *    Add a flow to flow table.
402  *
403  * Results:
404  *   NDIS_STATUS_SUCCESS if no same flow in the flow table.
405  * ----------------------------------------------------------------------------
406  */
407 NTSTATUS
408 AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
409 {
410     PLIST_ENTRY head;
411
412     if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) {
413         return STATUS_INVALID_HANDLE;
414     }
415
416     head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]);
417     /*
418      * We need fence here to make sure flow's nextPtr is updated before
419      * head->nextPtr is updated.
420      */
421     KeMemoryBarrier();
422
423     //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
424     InsertTailList(head, &flow->ListEntry);
425     //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);
426
427     datapath->nFlows++;
428
429     return STATUS_SUCCESS;
430 }
431
432
433 /* ----------------------------------------------------------------------------
434  * RemoveFlow --
435  *   Remove a flow from flow table, and added to wait list
436  * ----------------------------------------------------------------------------
437  */
438 VOID
439 RemoveFlow(OVS_DATAPATH *datapath,
440            OvsFlow **flow)
441 {
442     OvsFlow *f = *flow;
443     *flow = NULL;
444     UNREFERENCED_PARAMETER(datapath);
445
446     ASSERT(datapath->nFlows);
447     datapath->nFlows--;
448     // Remove the flow  from queue
449     RemoveEntryList(&f->ListEntry);
450     FreeFlow(f);
451 }
452
453
454 /*
455  * ----------------------------------------------------------------------------
456  * OvsLookupFlow --
457  *
458  *    Find flow from flow table based on flow key.
459  *    Caller should either hold portset handle or should
460  *    have a flowRef in datapath or Acquired datapath.
461  *
462  * Results:
463  *    Flow pointer if lookup successful.
464  *    NULL if not exists.
465  * ----------------------------------------------------------------------------
466  */
467 OvsFlow *
468 OvsLookupFlow(OVS_DATAPATH *datapath,
469               const OvsFlowKey *key,
470               UINT64 *hash,
471               BOOLEAN hashValid)
472 {
473     PLIST_ENTRY link, head;
474     UINT16 offset = key->l2.offset;
475     UINT16 size = key->l2.keyLen;
476     UINT8 *start;
477
478     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
479     ASSERT(!key->tunKey.dst || offset == 0);
480
481     start = (UINT8 *)key + offset;
482
483     if (!hashValid) {
484         *hash = OvsJhashBytes(start, size, 0);
485     }
486
487     head = &datapath->flowTable[HASH_BUCKET(*hash)];
488     link  = head->Flink;
489     while (link != head) {
490         OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry);
491
492         if (flow->hash == *hash &&
493             flow->key.l2.val == key->l2.val &&
494             FlowEqual((UINT64 *)((uint8 *)&flow->key + offset),
495                          (UINT64 *)start, size)) {
496             return flow;
497         }
498         link = link->Flink;
499     }
500     return NULL;
501 }
502
503
504 /*
505  * ----------------------------------------------------------------------------
506  * OvsHashFlow --
507  *    Calculate the hash for the given flow key.
508  * ----------------------------------------------------------------------------
509  */
510 UINT64
511 OvsHashFlow(const OvsFlowKey *key)
512 {
513     UINT16 offset = key->l2.offset;
514     UINT16 size = key->l2.keyLen;
515     UINT8 *start;
516
517     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
518     ASSERT(!key->tunKey.dst || offset == 0);
519     start = (UINT8 *)key + offset;
520     return OvsJhashBytes(start, size, 0);
521 }
522
523
524 /*
525  * ----------------------------------------------------------------------------
526  * FreeFlow --
527  *    Free a flow and its actions.
528  * ----------------------------------------------------------------------------
529  */
530 VOID
531 FreeFlow(OvsFlow *flow)
532 {
533     ASSERT(flow);
534     OvsFreeMemory(flow);
535 }
536
537 NTSTATUS
538 OvsDoDumpFlows(OvsFlowDumpInput *dumpInput,
539                OvsFlowDumpOutput *dumpOutput,
540                UINT32 *replyLen)
541 {
542     UINT32 dpNo;
543     OVS_DATAPATH *datapath = NULL;
544     OvsFlow *flow;
545     PLIST_ENTRY node, head;
546     UINT32 column = 0;
547     UINT32 rowIndex, columnIndex;
548     LOCK_STATE_EX dpLockState;
549     NTSTATUS status = STATUS_SUCCESS;
550     BOOLEAN findNextNonEmpty = FALSE;
551
552     dpNo = dumpInput->dpNo;
553     NdisAcquireSpinLock(gOvsCtrlLock);
554     if (gOvsSwitchContext == NULL ||
555         gOvsSwitchContext->dpNo != dpNo) {
556         status = STATUS_INVALID_PARAMETER;
557         goto unlock;
558     }
559
560     rowIndex = dumpInput->position[0];
561     if (rowIndex >= OVS_FLOW_TABLE_SIZE) {
562         dumpOutput->n = 0;
563         *replyLen = sizeof(*dumpOutput);
564         goto unlock;
565     }
566
567     columnIndex = dumpInput->position[1];
568
569     datapath = &gOvsSwitchContext->datapath;
570     ASSERT(datapath);
571     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
572
573     head = &datapath->flowTable[rowIndex];
574     node = head->Flink;
575
576     while (column < columnIndex) {
577         if (node == head) {
578             break;
579         }
580         node = node->Flink;
581         column++;
582     }
583
584     if (node == head) {
585         findNextNonEmpty = TRUE;
586         columnIndex = 0;
587     }
588
589     if (findNextNonEmpty) {
590         while (head == node) {
591             if (++rowIndex >= OVS_FLOW_TABLE_SIZE) {
592                 dumpOutput->n = 0;
593                 goto dp_unlock;
594             }
595             head = &datapath->flowTable[rowIndex];
596             node = head->Flink;
597         }
598     }
599
600     ASSERT(node != head);
601     ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE);
602
603     flow = CONTAINING_RECORD(node, OvsFlow, ListEntry);
604     status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen,
605                                                             &dumpOutput->flow);
606
607     if (status == STATUS_BUFFER_TOO_SMALL) {
608         dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen;
609         *replyLen = sizeof(*dumpOutput);
610     } else {
611         dumpOutput->n = 1; //one flow reported.
612         *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen;
613     }
614
615     dumpOutput->position[0] = rowIndex;
616     dumpOutput->position[1] = ++columnIndex;
617
618 dp_unlock:
619     OvsReleaseDatapath(datapath, &dpLockState);
620
621 unlock:
622     NdisReleaseSpinLock(gOvsCtrlLock);
623     return status;
624 }
625
626 NTSTATUS
627 OvsDumpFlowIoctl(PVOID inputBuffer,
628                  UINT32 inputLength,
629                  PVOID outputBuffer,
630                  UINT32 outputLength,
631                  UINT32 *replyLen)
632 {
633     OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer;
634     OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer;
635
636     if (inputBuffer == NULL || outputBuffer == NULL) {
637         return STATUS_INVALID_PARAMETER;
638     }
639
640     if ((inputLength != sizeof(OvsFlowDumpInput))
641         || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) {
642         return STATUS_INFO_LENGTH_MISMATCH;
643     }
644
645     return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen);
646 }
647
648 static NTSTATUS
649 ReportFlowInfo(OvsFlow *flow,
650                UINT32 getFlags,
651                UINT32 getActionsLen,
652                OvsFlowInfo *info)
653 {
654     NTSTATUS status = STATUS_SUCCESS;
655
656     if (getFlags & FLOW_GET_KEY) {
657         // always copy the tunnel key part
658         RtlCopyMemory(&info->key, &flow->key,
659                             flow->key.l2.keyLen + flow->key.l2.offset);
660     }
661
662     if (getFlags & FLOW_GET_STATS) {
663         OvsFlowStats *stats = &info->stats;
664         stats->packetCount = flow->packetCount;
665         stats->byteCount = flow->byteCount;
666         stats->used = (UINT32)flow->used;
667         stats->tcpFlags = flow->tcpFlags;
668     }
669
670     if (getFlags & FLOW_GET_ACTIONS) {
671         if (flow->actionsLen == 0) {
672             info->actionsLen = 0;
673         } else if (flow->actionsLen > getActionsLen) {
674             info->actionsLen = 0;
675             status = STATUS_BUFFER_TOO_SMALL;
676         } else {
677             RtlCopyMemory(info->actions, flow->actions, flow->actionsLen);
678             info->actionsLen = flow->actionsLen;
679         }
680     }
681
682     return status;
683 }
684
685 NTSTATUS
686 OvsPutFlowIoctl(PVOID inputBuffer,
687                 UINT32 inputLength,
688                 PVOID outputBuffer,
689                 UINT32 outputLength,
690                 UINT32 *replyLen)
691 {
692     NTSTATUS status = STATUS_SUCCESS;
693     OVS_DATAPATH *datapath = NULL;
694     struct OvsFlowStats stats;
695     ULONG actionsLen;
696     OvsFlowPut *put;
697     UINT32 dpNo;
698     LOCK_STATE_EX dpLockState;
699
700     if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) {
701         return STATUS_INFO_LENGTH_MISMATCH;
702     }
703
704     if ((outputLength != sizeof(stats)) || (outputBuffer == NULL)) {
705         return STATUS_INFO_LENGTH_MISMATCH;
706     }
707
708     put = (OvsFlowPut *)inputBuffer;
709     if (put->actionsLen > 0) {
710         actionsLen = put->actionsLen;
711     } else {
712         actionsLen = 0;
713     }
714     if (inputLength != actionsLen + sizeof(*put)) {
715         return STATUS_INFO_LENGTH_MISMATCH;
716     }
717
718     dpNo = put->dpNo;
719     NdisAcquireSpinLock(gOvsCtrlLock);
720     if (gOvsSwitchContext == NULL ||
721         gOvsSwitchContext->dpNo != dpNo) {
722         status = STATUS_INVALID_PARAMETER;
723         goto unlock;
724     }
725
726     datapath = &gOvsSwitchContext->datapath;
727     ASSERT(datapath);
728     RtlZeroMemory(&stats, sizeof(stats));
729     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
730     status = HandleFlowPut(put, datapath, &stats);
731     OvsReleaseDatapath(datapath, &dpLockState);
732
733     if (status == STATUS_SUCCESS) {
734         // Copy stats to User mode app
735         NdisMoveMemory(outputBuffer, (PVOID)&stats, sizeof(stats));
736         *replyLen = sizeof stats;
737     }
738
739 unlock:
740     NdisReleaseSpinLock(gOvsCtrlLock);
741     return status;
742 }
743
744
745 /* Handles flow add, modify as well as delete */
746 static NTSTATUS
747 HandleFlowPut(OvsFlowPut *put,
748               OVS_DATAPATH *datapath,
749               struct OvsFlowStats *stats)
750 {
751     BOOLEAN   mayCreate, mayModify, mayDelete;
752     OvsFlow   *KernelFlow;
753     UINT64    hash;
754     NTSTATUS  status = STATUS_SUCCESS;
755
756     mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0;
757     mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0;
758     mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0;
759
760     if ((mayCreate || mayModify) == mayDelete) {
761         return STATUS_INVALID_PARAMETER;
762     }
763
764     KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE);
765     if (!KernelFlow) {
766         if (!mayCreate) {
767             return STATUS_INVALID_PARAMETER;
768         }
769
770         status = OvsPrepareFlow(&KernelFlow, put, hash);
771         if (status != STATUS_SUCCESS) {
772             FreeFlow(KernelFlow);
773             return STATUS_UNSUCCESSFUL;
774         }
775
776         status = AddFlow(datapath, KernelFlow);
777         if (status != STATUS_SUCCESS) {
778             FreeFlow(KernelFlow);
779             return STATUS_UNSUCCESSFUL;
780         }
781
782         /* Validate the flow addition */
783         {
784             UINT64 newHash;
785             OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash,
786                                                                     FALSE);
787             ASSERT(flow);
788             ASSERT(newHash == hash);
789             if (!flow || newHash != hash) {
790                 return STATUS_UNSUCCESSFUL;
791             }
792         }
793     } else {
794         stats->packetCount = KernelFlow->packetCount;
795         stats->byteCount = KernelFlow->byteCount;
796         stats->tcpFlags = KernelFlow->tcpFlags;
797         stats->used = (UINT32)KernelFlow->used;
798
799         if (mayModify) {
800             OvsFlow *newFlow;
801             status = OvsPrepareFlow(&newFlow, put, hash);
802             if (status != STATUS_SUCCESS) {
803                 return STATUS_UNSUCCESSFUL;
804             }
805
806             KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE);
807             if (KernelFlow)  {
808                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) {
809                     newFlow->packetCount = KernelFlow->packetCount;
810                     newFlow->byteCount = KernelFlow->byteCount;
811                     newFlow->tcpFlags = KernelFlow->tcpFlags;
812                 }
813                 RemoveFlow(datapath, &KernelFlow);
814             }  else  {
815                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0)  {
816                     newFlow->packetCount = stats->packetCount;
817                     newFlow->byteCount = stats->byteCount;
818                     newFlow->tcpFlags = stats->tcpFlags;
819                 }
820             }
821             status = AddFlow(datapath, newFlow);
822             ASSERT(status == STATUS_SUCCESS);
823
824             /* Validate the flow addition */
825             {
826                 UINT64 newHash;
827                 OvsFlow *testflow = OvsLookupFlow(datapath, &put->key,
828                                                             &newHash, FALSE);
829                 ASSERT(testflow);
830                 ASSERT(newHash == hash);
831                 if (!testflow || newHash != hash) {
832                     FreeFlow(newFlow);
833                     return STATUS_UNSUCCESSFUL;
834                 }
835             }
836         } else {
837             if (mayDelete) {
838                 if (KernelFlow) {
839                     RemoveFlow(datapath, &KernelFlow);
840                 }
841             } else {
842                 return STATUS_UNSUCCESSFUL;
843             }
844         }
845     }
846     return STATUS_SUCCESS;
847 }
848
849 static NTSTATUS
850 OvsPrepareFlow(OvsFlow **flow,
851                const OvsFlowPut *put,
852                UINT64 hash)
853 {
854     OvsFlow     *localFlow = *flow;
855     NTSTATUS status = STATUS_SUCCESS;
856
857     do {
858         *flow = localFlow =
859             OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen);
860         if (localFlow == NULL) {
861             status = STATUS_NO_MEMORY;
862             break;
863         }
864
865         localFlow->key = put->key;
866         localFlow->actionsLen = put->actionsLen;
867         if (put->actionsLen) {
868             NdisMoveMemory((PUCHAR)localFlow->actions, put->actions,
869                                        put->actionsLen);
870         }
871         localFlow->userActionsLen = 0;  // 0 indicate no conversion is made
872         localFlow->used = 0;
873         localFlow->packetCount = 0;
874         localFlow->byteCount = 0;
875         localFlow->tcpFlags = 0;
876         localFlow->hash = hash;
877     } while(FALSE);
878
879     return status;
880 }
881
882 NTSTATUS
883 OvsGetFlowIoctl(PVOID inputBuffer,
884                 UINT32 inputLength,
885                 PVOID outputBuffer,
886                 UINT32 outputLength,
887                 UINT32 *replyLen)
888 {
889     NTSTATUS status = STATUS_SUCCESS;
890     OVS_DATAPATH *datapath = NULL;
891     OvsFlow *flow;
892     UINT32 getFlags, getActionsLen;
893     OvsFlowGetInput *getInput;
894     OvsFlowGetOutput *getOutput;
895     UINT64 hash;
896     UINT32 dpNo;
897     LOCK_STATE_EX dpLockState;
898
899     if (inputLength != sizeof(OvsFlowGetInput)
900         || inputBuffer == NULL) {
901         return STATUS_INFO_LENGTH_MISMATCH;
902     }
903
904     getInput = (OvsFlowGetInput *) inputBuffer;
905     getFlags = getInput->getFlags;
906     getActionsLen = getInput->actionsLen;
907     if (getInput->getFlags & FLOW_GET_KEY) {
908         return STATUS_INVALID_PARAMETER;
909     }
910
911     if (outputBuffer == NULL
912         || outputLength != (sizeof *getOutput +
913                             getInput->actionsLen)) {
914         return STATUS_INFO_LENGTH_MISMATCH;
915     }
916
917     dpNo = getInput->dpNo;
918     NdisAcquireSpinLock(gOvsCtrlLock);
919     if (gOvsSwitchContext == NULL ||
920         gOvsSwitchContext->dpNo != dpNo) {
921         status = STATUS_INVALID_PARAMETER;
922         goto unlock;
923     }
924
925     datapath = &gOvsSwitchContext->datapath;
926     ASSERT(datapath);
927     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
928     flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE);
929     if (!flow) {
930         status = STATUS_INVALID_PARAMETER;
931         goto dp_unlock;
932     }
933
934     // XXX: can be optimized to return only how much is written out
935     *replyLen = outputLength;
936     getOutput = (OvsFlowGetOutput *)outputBuffer;
937     ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info);
938
939 dp_unlock:
940     OvsReleaseDatapath(datapath, &dpLockState);
941 unlock:
942     NdisReleaseSpinLock(gOvsCtrlLock);
943     return status;
944 }
945
946 NTSTATUS
947 OvsFlushFlowIoctl(PVOID inputBuffer,
948                   UINT32 inputLength)
949 {
950     NTSTATUS status = STATUS_SUCCESS;
951     OVS_DATAPATH *datapath = NULL;
952     UINT32 dpNo;
953     LOCK_STATE_EX dpLockState;
954
955     if (inputLength != sizeof(UINT32) || inputBuffer == NULL) {
956         return STATUS_INFO_LENGTH_MISMATCH;
957     }
958
959     dpNo = *(UINT32 *)inputBuffer;
960     NdisAcquireSpinLock(gOvsCtrlLock);
961     if (gOvsSwitchContext == NULL ||
962         gOvsSwitchContext->dpNo != dpNo) {
963         status = STATUS_INVALID_PARAMETER;
964         goto unlock;
965     }
966
967     datapath = &gOvsSwitchContext->datapath;
968     ASSERT(datapath);
969     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
970     DeleteAllFlows(datapath);
971     OvsReleaseDatapath(datapath, &dpLockState);
972
973 unlock:
974     NdisReleaseSpinLock(gOvsCtrlLock);
975     return status;
976 }
977
978 #pragma warning( pop )