datapath-windows/Flow.c: FLOW_NEW command handler.
[cascardo/ovs.git] / datapath-windows / ovsext / Flow.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Util.h"
20 #include "Jhash.h"
21 #include "Flow.h"
22 #include "PacketParser.h"
23
24 #ifdef OVS_DBG_MOD
25 #undef OVS_DBG_MOD
26 #endif
27 #define OVS_DBG_MOD OVS_DBG_FLOW
28 #include "Debug.h"
29
30 #pragma warning( push )
31 #pragma warning( disable:4127 )
32
33 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
34 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
35 extern UINT64 ovsTimeIncrementPerTick;
36
37 static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags,
38                                UINT32 getActionsLen, OvsFlowInfo *info);
39 static NTSTATUS HandleFlowPut(OvsFlowPut *put,
40                                   OVS_DATAPATH *datapath,
41                                   struct OvsFlowStats *stats);
42 static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put,
43                                UINT64 hash);
44 static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow);
45 static VOID DeleteAllFlows(OVS_DATAPATH *datapath);
46 static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow);
47 static VOID FreeFlow(OvsFlow *flow);
48 static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB);
49 static NTSTATUS _MapNlToFlowPut(POVS_MESSAGE msgIn, PNL_ATTR keyAttr,
50                                 PNL_ATTR actionAttr,
51                                 PNL_ATTR flowAttrClear,
52                                 OvsFlowPut *mappedFlow);
53 static VOID _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
54                                  PNL_ATTR *tunnelAttrs,
55                                  OvsFlowPut *mappedFlow);
56
57 static VOID _MapTunAttrToFlowPut(PNL_ATTR *keyAttrs,
58                                  PNL_ATTR *tunnelAttrs,
59                                  OvsFlowKey *destKey);
60 static VOID _MapNlToFlowPutFlags(PGENL_MSG_HDR genlMsgHdr,
61                                  PNL_ATTR flowAttrClear,
62                                  OvsFlowPut *mappedFlow);
63
64 #define OVS_FLOW_TABLE_SIZE 2048
65 #define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1)
66 #define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK)
67
68 /* Flow family related netlink policies */
69
70 /* For Parsing attributes in FLOW_* commands */
71 static const NL_POLICY nlFlowPolicy[] = {
72     [OVS_FLOW_ATTR_KEY] = {.type = NL_A_NESTED, .optional = FALSE},
73     [OVS_FLOW_ATTR_MASK] = {.type = NL_A_NESTED, .optional = TRUE},
74     [OVS_FLOW_ATTR_ACTIONS] = {.type = NL_A_NESTED, .optional = TRUE},
75     [OVS_FLOW_ATTR_STATS] = {.type = NL_A_UNSPEC,
76                              .minLen = sizeof(struct ovs_flow_stats),
77                              .maxLen = sizeof(struct ovs_flow_stats),
78                              .optional = TRUE},
79     [OVS_FLOW_ATTR_TCP_FLAGS] = {NL_A_U8, .optional = TRUE},
80     [OVS_FLOW_ATTR_USED] = {NL_A_U64, .optional = TRUE}
81 };
82
83 /* For Parsing nested OVS_FLOW_ATTR_KEY attributes.
84  * Some of the attributes like OVS_KEY_ATTR_RECIRC_ID
85  * & OVS_KEY_ATTR_MPLS are not supported yet. */
86
87 static const NL_POLICY nlFlowKeyPolicy[] = {
88     [OVS_KEY_ATTR_ENCAP] = {.type = NL_A_VAR_LEN, .optional = TRUE},
89     [OVS_KEY_ATTR_PRIORITY] = {.type = NL_A_UNSPEC, .minLen = 4,
90                                .maxLen = 4, .optional = TRUE},
91     [OVS_KEY_ATTR_IN_PORT] = {.type = NL_A_UNSPEC, .minLen = 4,
92                               .maxLen = 4, .optional = FALSE},
93     [OVS_KEY_ATTR_ETHERNET] = {.type = NL_A_UNSPEC,
94                                .minLen = sizeof(struct ovs_key_ethernet),
95                                .maxLen = sizeof(struct ovs_key_ethernet),
96                                .optional = FALSE},
97     [OVS_KEY_ATTR_VLAN] = {.type = NL_A_UNSPEC, .minLen = 2,
98                            .maxLen = 2, .optional = TRUE},
99     [OVS_KEY_ATTR_ETHERTYPE] = {.type = NL_A_UNSPEC, .minLen = 2,
100                                 .maxLen = 2, .optional = TRUE},
101     [OVS_KEY_ATTR_IPV4] = {.type = NL_A_UNSPEC,
102                            .minLen = sizeof(struct ovs_key_ipv4),
103                            .maxLen = sizeof(struct ovs_key_ipv4),
104                            .optional = TRUE},
105     [OVS_KEY_ATTR_IPV6] = {.type = NL_A_UNSPEC,
106                            .minLen = sizeof(struct ovs_key_ipv6),
107                            .maxLen = sizeof(struct ovs_key_ipv6),
108                            .optional = TRUE},
109     [OVS_KEY_ATTR_TCP] = {.type = NL_A_UNSPEC,
110                           .minLen = sizeof(struct ovs_key_tcp),
111                           .maxLen = sizeof(struct ovs_key_tcp),
112                           .optional = TRUE},
113     [OVS_KEY_ATTR_UDP] = {.type = NL_A_UNSPEC,
114                           .minLen = sizeof(struct ovs_key_udp),
115                           .maxLen = sizeof(struct ovs_key_udp),
116                           .optional = TRUE},
117     [OVS_KEY_ATTR_ICMP] = {.type = NL_A_UNSPEC,
118                            .minLen = sizeof(struct ovs_key_icmp),
119                            .maxLen = sizeof(struct ovs_key_icmp),
120                            .optional = TRUE},
121     [OVS_KEY_ATTR_ICMPV6] = {.type = NL_A_UNSPEC,
122                              .minLen = sizeof(struct ovs_key_icmpv6),
123                              .maxLen = sizeof(struct ovs_key_icmpv6),
124                              .optional = TRUE},
125     [OVS_KEY_ATTR_ARP] = {.type = NL_A_UNSPEC,
126                           .minLen = sizeof(struct ovs_key_arp),
127                           .maxLen = sizeof(struct ovs_key_arp),
128                           .optional = TRUE},
129     [OVS_KEY_ATTR_ND] = {.type = NL_A_UNSPEC,
130                          .minLen = sizeof(struct ovs_key_nd),
131                          .maxLen = sizeof(struct ovs_key_nd),
132                          .optional = TRUE},
133     [OVS_KEY_ATTR_SKB_MARK] = {.type = NL_A_UNSPEC, .minLen = 4,
134                                .maxLen = 4, .optional = TRUE},
135     [OVS_KEY_ATTR_TUNNEL] = {.type = NL_A_VAR_LEN, .optional = TRUE},
136     [OVS_KEY_ATTR_SCTP] = {.type = NL_A_UNSPEC,
137                            .minLen = sizeof(struct ovs_key_sctp),
138                            .maxLen = sizeof(struct ovs_key_sctp),
139                            .optional = TRUE},
140     [OVS_KEY_ATTR_TCP_FLAGS] = {.type = NL_A_UNSPEC,
141                                 .minLen = 2, .maxLen = 2,
142                                 .optional = TRUE},
143     [OVS_KEY_ATTR_DP_HASH] = {.type = NL_A_UNSPEC, .minLen = 4,
144                               .maxLen = 4, .optional = TRUE},
145     [OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
146                                 .maxLen = 4, .optional = TRUE},
147     [OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE}
148 };
149
150 /* For Parsing nested OVS_KEY_ATTR_TUNNEL attributes */
151 static const NL_POLICY nlFlowTunnelKeyPolicy[] = {
152     [OVS_TUNNEL_KEY_ATTR_ID] = {.type = NL_A_UNSPEC, .minLen = 8,
153                                 .maxLen = 8, .optional = TRUE},
154     [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = {.type = NL_A_UNSPEC, .minLen = 4,
155                                       .maxLen = 4, .optional = TRUE},
156     [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = {.type = NL_A_UNSPEC, .minLen = 4 ,
157                                       .maxLen = 4, .optional = FALSE},
158     [OVS_TUNNEL_KEY_ATTR_TOS] = {.type = NL_A_UNSPEC, .minLen = 1,
159                                  .maxLen = 1, .optional = TRUE},
160     [OVS_TUNNEL_KEY_ATTR_TTL] = {.type = NL_A_UNSPEC, .minLen = 1,
161                                  .maxLen = 1, .optional = TRUE},
162     [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = {.type = NL_A_UNSPEC, .minLen = 0,
163                                            .maxLen = 0, .optional = TRUE},
164     [OVS_TUNNEL_KEY_ATTR_CSUM] = {.type = NL_A_UNSPEC, .minLen = 0,
165                                   .maxLen = 0, .optional = TRUE},
166     [OVS_TUNNEL_KEY_ATTR_OAM] = {.type = NL_A_UNSPEC, .minLen = 0,
167                                  .maxLen = 0, .optional = TRUE},
168     [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = {.type = NL_A_VAR_LEN,
169                                          .optional = TRUE}
170 };
171
172 /* For Parsing nested OVS_FLOW_ATTR_ACTIONS attributes */
173 static const NL_POLICY nlFlowActionPolicy[] = {
174     [OVS_ACTION_ATTR_OUTPUT] = {.type = NL_A_UNSPEC, .minLen = sizeof(UINT32),
175                                 .maxLen = sizeof(UINT32), .optional = TRUE},
176     [OVS_ACTION_ATTR_USERSPACE] = {.type = NL_A_VAR_LEN, .optional = TRUE},
177     [OVS_ACTION_ATTR_PUSH_VLAN] = {.type = NL_A_UNSPEC,
178                                    .minLen =
179                                    sizeof(struct ovs_action_push_vlan),
180                                    .maxLen =
181                                    sizeof(struct ovs_action_push_vlan),
182                                    .optional = TRUE},
183     [OVS_ACTION_ATTR_POP_VLAN] = {.type = NL_A_UNSPEC, .optional = TRUE},
184     [OVS_ACTION_ATTR_PUSH_MPLS] = {.type = NL_A_UNSPEC,
185                                    .minLen =
186                                    sizeof(struct ovs_action_push_mpls),
187                                    .maxLen =
188                                    sizeof(struct ovs_action_push_mpls),
189                                    .optional = TRUE},
190     [OVS_ACTION_ATTR_POP_MPLS] = {.type = NL_A_UNSPEC,
191                                   .minLen = sizeof(UINT16),
192                                   .maxLen = sizeof(UINT16),
193                                   .optional = TRUE},
194     [OVS_ACTION_ATTR_RECIRC] = {.type = NL_A_UNSPEC,
195                                 .minLen = sizeof(UINT32),
196                                 .maxLen = sizeof(UINT32),
197                                 .optional = TRUE},
198     [OVS_ACTION_ATTR_HASH] = {.type = NL_A_UNSPEC,
199                               .minLen = sizeof(struct ovs_action_hash),
200                               .maxLen = sizeof(struct ovs_action_hash),
201                               .optional = TRUE},
202     [OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
203     [OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE}
204 };
205
206 /*
207  *----------------------------------------------------------------------------
208  * Netlink interface for flow commands.
209  *----------------------------------------------------------------------------
210  */
211
212 /*
213  *----------------------------------------------------------------------------
214  *  OvsFlowNlNewCmdHandler --
215  *    Handler for OVS_FLOW_CMD_NEW command.
216  *----------------------------------------------------------------------------
217  */
218 NTSTATUS
219 OvsFlowNlNewCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
220                        UINT32 *replyLen)
221 {
222     NTSTATUS rc = STATUS_SUCCESS;
223     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
224     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
225     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
226     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
227     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
228     PNL_ATTR nlAttrs[__OVS_FLOW_ATTR_MAX];
229     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
230     OvsFlowPut mappedFlow;
231     OvsFlowStats stats;
232     struct ovs_flow_stats replyStats;
233
234     NL_BUFFER nlBuf;
235
236     RtlZeroMemory(&mappedFlow, sizeof(OvsFlowPut));
237     RtlZeroMemory(&stats, sizeof(stats));
238     RtlZeroMemory(&replyStats, sizeof(replyStats));
239
240     *replyLen = 0;
241
242     /* Get all the top level Flow attributes */
243     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
244                      nlFlowPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
245                      != TRUE) {
246         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
247                        nlMsgHdr);
248         rc = STATUS_UNSUCCESSFUL;
249         goto done;
250     }
251
252     if ((_MapNlToFlowPut(msgIn, nlAttrs[OVS_FLOW_ATTR_KEY],
253          nlAttrs[OVS_FLOW_ATTR_ACTIONS], nlAttrs[OVS_FLOW_ATTR_CLEAR],
254          &mappedFlow))
255         != STATUS_SUCCESS) {
256         OVS_LOG_ERROR("Conversion to OvsFlowPut failed");
257         goto done;
258     }
259
260     rc = OvsPutFlowIoctl(&mappedFlow, sizeof (struct OvsFlowPut),
261                          &stats);
262     if (rc != STATUS_SUCCESS) {
263         OVS_LOG_ERROR("OvsFlowPut failed.");
264         goto done;
265     }
266
267     if (!(usrParamsCtx->outputBuffer)) {
268         /* No output buffer */
269         OVS_LOG_ERROR("outputBuffer NULL.");
270         goto done;
271     }
272
273     replyStats.n_packets = stats.packetCount;
274     replyStats.n_bytes = stats.byteCount;
275
276     /* So far so good. Prepare the reply for userspace */
277     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
278               usrParamsCtx->outputLength);
279
280     /* Prepare nl Msg headers */
281     rc = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
282                       nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
283                       genlMsgHdr->cmd, OVS_FLOW_VERSION,
284                       ovsHdr->dp_ifindex);
285     ASSERT(rc);
286
287     /* Append OVS_FLOW_ATTR_STATS attribute */
288     if (!NlMsgPutTailUnspec(&nlBuf, OVS_FLOW_ATTR_STATS,
289         (PCHAR)(&replyStats), sizeof(replyStats))) {
290         OVS_LOG_ERROR("Adding OVS_FLOW_ATTR_STATS attribute failed.");
291         rc = STATUS_UNSUCCESSFUL;
292     }
293
294     *replyLen = msgOut->nlMsg.nlmsgLen;
295
296 done:
297     return rc;
298 }
299
300 /*
301  *----------------------------------------------------------------------------
302  *  _MapNlToFlowPut --
303  *    Maps input netlink message to OvsFlowPut.
304  *----------------------------------------------------------------------------
305  */
306 static NTSTATUS
307 _MapNlToFlowPut(POVS_MESSAGE msgIn, PNL_ATTR keyAttr,
308                 PNL_ATTR actionAttr, PNL_ATTR flowAttrClear,
309                 OvsFlowPut *mappedFlow)
310 {
311     NTSTATUS rc = STATUS_SUCCESS;
312     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
313     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
314     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
315
316     UINT32 keyAttrOffset = (UINT32)((PCHAR)keyAttr - (PCHAR)nlMsgHdr);
317     UINT32 tunnelKeyAttrOffset;
318
319     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
320     PNL_ATTR tunnelAttrs[__OVS_TUNNEL_KEY_ATTR_MAX] = {NULL};
321
322     /* Get flow keys attributes */
323     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset, NlAttrLen(keyAttr),
324                            nlFlowKeyPolicy, keyAttrs, ARRAY_SIZE(keyAttrs)))
325                            != TRUE) {
326         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p",
327                        nlMsgHdr);
328         rc = STATUS_UNSUCCESSFUL;
329         goto done;
330     }
331
332     if (keyAttrs[OVS_KEY_ATTR_TUNNEL]) {
333         tunnelKeyAttrOffset = (UINT32)((PCHAR)
334                               (keyAttrs[OVS_KEY_ATTR_TUNNEL])
335                               - (PCHAR)nlMsgHdr);
336
337         OVS_LOG_ERROR("Parse Flow Tunnel Key Policy");
338
339         /* Get tunnel keys attributes */
340         if ((NlAttrParseNested(nlMsgHdr, tunnelKeyAttrOffset,
341                                NlAttrLen(keyAttr), nlFlowTunnelKeyPolicy,
342                                tunnelAttrs, ARRAY_SIZE(tunnelAttrs)))
343                                != TRUE) {
344             OVS_LOG_ERROR("Tunnel key Attr Parsing failed for msg: %p",
345                            nlMsgHdr);
346             rc = STATUS_UNSUCCESSFUL;
347             goto done;
348         }
349     }
350
351     _MapKeyAttrToFlowPut(keyAttrs, tunnelAttrs,
352                          mappedFlow);
353
354     /* Map the action */
355     mappedFlow->actionsLen = NlAttrGetSize(actionAttr);
356     mappedFlow->actions = NlAttrGet(actionAttr);
357     mappedFlow->dpNo = ovsHdr->dp_ifindex;
358
359     _MapNlToFlowPutFlags(genlMsgHdr, flowAttrClear,
360                                 mappedFlow);
361
362 done:
363     return rc;
364 }
365
366 /*
367  *----------------------------------------------------------------------------
368  *  _MapNlToFlowPutFlags --
369  *    Maps netlink message to OvsFlowPut->flags.
370  *----------------------------------------------------------------------------
371  */
372 static VOID
373 _MapNlToFlowPutFlags(PGENL_MSG_HDR genlMsgHdr,
374                      PNL_ATTR flowAttrClear, OvsFlowPut *mappedFlow)
375 {
376     uint32_t flags = 0;
377
378     switch (genlMsgHdr->cmd) {
379     case OVS_FLOW_CMD_NEW:
380          flags |= OVSWIN_FLOW_PUT_CREATE;
381          break;
382     case OVS_FLOW_CMD_DEL:
383          flags |= OVSWIN_FLOW_PUT_DELETE;
384          break;
385     case OVS_FLOW_CMD_SET:
386          flags |= OVSWIN_FLOW_PUT_MODIFY;
387          break;
388     default:
389          ASSERT(0);
390     }
391
392     if (flowAttrClear) {
393         flags |= OVSWIN_FLOW_PUT_CLEAR;
394     }
395
396     mappedFlow->flags = flags;
397 }
398
399 /*
400  *----------------------------------------------------------------------------
401  *  _MapKeyAttrToFlowPut --
402  *    Converts FLOW_KEY attribute to OvsFlowPut->key.
403  *----------------------------------------------------------------------------
404  */
405 static VOID
406 _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
407                      PNL_ATTR *tunnelAttrs,
408                      OvsFlowPut *mappedFlow)
409 {
410     const struct ovs_key_ethernet *eth_key;
411     OvsFlowKey *destKey = &(mappedFlow->key);
412
413     _MapTunAttrToFlowPut(keyAttrs, tunnelAttrs, destKey);
414
415     /* ===== L2 headers ===== */
416     destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
417     eth_key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ETHERNET]);
418     RtlCopyMemory(destKey->l2.dlSrc, eth_key->eth_src, ETH_ADDR_LEN);
419     RtlCopyMemory(destKey->l2.dlDst, eth_key->eth_dst, ETH_ADDR_LEN);
420
421     destKey->l2.dlType = ntohs((NlAttrGetU16(keyAttrs
422                           [OVS_KEY_ATTR_ETHERTYPE])));
423
424     if (keyAttrs[OVS_KEY_ATTR_VLAN]) {
425         destKey->l2.vlanTci = NlAttrGetU16(keyAttrs
426                               [OVS_KEY_ATTR_VLAN]);
427     }
428
429     /* ==== L3 + L4. ==== */
430     destKey->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE
431                          - destKey->l2.offset;
432
433     switch (destKey->l2.dlType) {
434     case ETH_TYPE_IPV4: {
435
436         if (keyAttrs[OVS_KEY_ATTR_IPV4]) {
437             const struct ovs_key_ipv4 *ipv4Key;
438
439             ipv4Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_IPV4]);
440             IpKey *ipv4FlowPutKey = &(destKey->ipKey);
441             ipv4FlowPutKey->nwSrc = ipv4Key->ipv4_src;
442             ipv4FlowPutKey->nwDst = ipv4Key->ipv4_dst;
443             ipv4FlowPutKey->nwProto = ipv4Key->ipv4_proto;
444             ipv4FlowPutKey->nwTos = ipv4Key->ipv4_tos;
445             ipv4FlowPutKey->nwTtl = ipv4Key->ipv4_ttl;
446             ipv4FlowPutKey->nwFrag = ipv4Key->ipv4_frag;
447
448             if (keyAttrs[OVS_KEY_ATTR_TCP]) {
449                 const struct ovs_key_tcp *tcpKey;
450                 tcpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_TCP]);
451                 ipv4FlowPutKey->l4.tpSrc = tcpKey->tcp_src;
452                 ipv4FlowPutKey->l4.tpDst = tcpKey->tcp_dst;
453             }
454
455             if (keyAttrs[OVS_KEY_ATTR_UDP]) {
456                 const struct ovs_key_udp *udpKey;
457                 udpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_UDP]);
458                 ipv4FlowPutKey->l4.tpSrc = udpKey->udp_src;
459                 ipv4FlowPutKey->l4.tpDst = udpKey->udp_dst;
460             }
461
462             if (keyAttrs[OVS_KEY_ATTR_SCTP]) {
463                 const struct ovs_key_sctp *sctpKey;
464                 sctpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_SCTP]);
465                 ipv4FlowPutKey->l4.tpSrc = sctpKey->sctp_src;
466                 ipv4FlowPutKey->l4.tpDst = sctpKey->sctp_dst;
467             }
468
469             destKey->l2.keyLen += OVS_IP_KEY_SIZE;
470         }
471         break;
472     }
473     case ETH_TYPE_IPV6: {
474
475         if (keyAttrs[OVS_KEY_ATTR_IPV6]) {
476             const struct ovs_key_ipv6 *ipv6Key;
477
478             ipv6Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_IPV6]);
479             Ipv6Key *ipv6FlowPutKey = &(destKey->ipv6Key);
480
481             RtlCopyMemory(&ipv6FlowPutKey->ipv6Src, ipv6Key->ipv6_src,
482                           sizeof ipv6Key->ipv6_src);
483             RtlCopyMemory(&ipv6FlowPutKey->ipv6Dst, ipv6Key->ipv6_dst,
484                           sizeof ipv6Key->ipv6_dst);
485
486             ipv6FlowPutKey->ipv6Label = ipv6Key->ipv6_label;
487             ipv6FlowPutKey->nwProto  = ipv6Key->ipv6_proto;
488             ipv6FlowPutKey->nwTos = ipv6Key->ipv6_tclass;
489             ipv6FlowPutKey->nwTtl = ipv6Key->ipv6_hlimit;
490             ipv6FlowPutKey->nwFrag = ipv6Key->ipv6_frag;
491
492             if (keyAttrs[OVS_KEY_ATTR_TCP]) {
493                 const struct ovs_key_tcp *tcpKey;
494                 tcpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_TCP]);
495                 ipv6FlowPutKey->l4.tpSrc = tcpKey->tcp_src;
496                 ipv6FlowPutKey->l4.tpDst = tcpKey->tcp_dst;
497             }
498
499             if (keyAttrs[OVS_KEY_ATTR_UDP]) {
500                 const struct ovs_key_udp *udpKey;
501                 udpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_UDP]);
502                 ipv6FlowPutKey->l4.tpSrc = udpKey->udp_src;
503                 ipv6FlowPutKey->l4.tpDst = udpKey->udp_dst;
504             }
505
506             if (keyAttrs[OVS_KEY_ATTR_SCTP]) {
507                 const struct ovs_key_sctp *sctpKey;
508                 sctpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_SCTP]);
509                 ipv6FlowPutKey->l4.tpSrc = sctpKey->sctp_src;
510                 ipv6FlowPutKey->l4.tpDst = sctpKey->sctp_dst;
511             }
512
513             if (keyAttrs[OVS_KEY_ATTR_ICMPV6]) {
514                 const struct ovs_key_icmpv6 *icmpv6Key;
515
516                 Icmp6Key *icmp6FlowPutKey= &(destKey->icmp6Key);
517
518                 icmpv6Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ICMPV6]);
519
520                 icmp6FlowPutKey->l4.tpSrc = icmpv6Key->icmpv6_type;
521                 icmp6FlowPutKey->l4.tpDst = icmpv6Key->icmpv6_code;
522
523                 if (keyAttrs[OVS_KEY_ATTR_ND]) {
524                     const struct ovs_key_nd *ndKey;
525
526                     ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]);
527                     RtlCopyMemory(&icmp6FlowPutKey->ndTarget,
528                                   ndKey->nd_target, sizeof (icmp6FlowPutKey->ndTarget));
529                     RtlCopyMemory(icmp6FlowPutKey->arpSha,
530                                   ndKey->nd_sll, ETH_ADDR_LEN);
531                     RtlCopyMemory(icmp6FlowPutKey->arpTha,
532                                   ndKey->nd_tll, ETH_ADDR_LEN);
533                 }
534
535                 destKey->l2.keyLen += OVS_ICMPV6_KEY_SIZE;
536
537             } else {
538
539                 destKey->l2.keyLen += OVS_IPV6_KEY_SIZE;
540             }
541
542             ipv6FlowPutKey->pad = 0;
543         }
544         break;
545     }
546     case ETH_TYPE_ARP:
547     case ETH_TYPE_RARP: {
548         ArpKey *arpFlowPutKey = &destKey->arpKey;
549         const struct ovs_key_arp *arpKey;
550
551         arpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ARP]);
552
553         arpFlowPutKey->nwSrc = arpKey->arp_sip;
554         arpFlowPutKey->nwDst = arpKey->arp_tip;
555
556         RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, ETH_ADDR_LEN);
557         RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, ETH_ADDR_LEN);
558         arpFlowPutKey->nwProto = (UINT8)(arpKey->arp_op);
559         arpFlowPutKey->pad[0] = 0;
560         arpFlowPutKey->pad[1] = 0;
561         arpFlowPutKey->pad[2] = 0;
562         destKey->l2.keyLen += OVS_ARP_KEY_SIZE;
563         break;
564     }
565     }
566 }
567
568 /*
569  *----------------------------------------------------------------------------
570  *  _MapTunAttrToFlowPut --
571  *    Converts FLOW_TUNNEL_KEY attribute to OvsFlowKey->tunKey.
572  *----------------------------------------------------------------------------
573  */
574 static VOID
575 _MapTunAttrToFlowPut(PNL_ATTR *keyAttrs,
576                      PNL_ATTR *tunAttrs,
577                      OvsFlowKey *destKey)
578 {
579     if (keyAttrs[OVS_KEY_ATTR_TUNNEL]) {
580
581         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_ID]) {
582             destKey->tunKey.tunnelId = NlAttrGetU64
583                                        (tunAttrs[OVS_TUNNEL_KEY_ATTR_ID]);
584             destKey->tunKey.flags |= OVS_TNL_F_KEY;
585         }
586
587         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]) {
588         destKey->tunKey.dst = NlAttrGetU32
589                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]);
590         }
591
592         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]) {
593         destKey->tunKey.src = NlAttrGetU32
594                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]);
595         }
596
597         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT]) {
598             destKey->tunKey.flags |= OVS_TNL_F_DONT_FRAGMENT;
599         }
600
601         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_CSUM]) {
602             destKey->tunKey.flags |= OVS_TNL_F_CSUM;
603         }
604
605         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_TOS]) {
606         destKey->tunKey.tos = NlAttrGetU8
607                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_TOS]);
608         }
609
610         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_TTL]) {
611         destKey->tunKey.ttl = NlAttrGetU8
612                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_TTL]);
613         }
614
615         destKey->tunKey.pad = 0;
616         destKey->l2.offset = 0;
617     } else {
618         destKey->tunKey.attr[0] = 0;
619         destKey->tunKey.attr[1] = 0;
620         destKey->tunKey.attr[2] = 0;
621         destKey->l2.offset = sizeof destKey->tunKey;
622     }
623 }
624
625 /*
626  *----------------------------------------------------------------------------
627  * OvsDeleteFlowTable --
628  * Results:
629  *    NDIS_STATUS_SUCCESS always.
630  *----------------------------------------------------------------------------
631  */
632 NDIS_STATUS
633 OvsDeleteFlowTable(OVS_DATAPATH *datapath)
634 {
635     if (datapath == NULL || datapath->flowTable == NULL) {
636         return NDIS_STATUS_SUCCESS;
637     }
638
639     DeleteAllFlows(datapath);
640     OvsFreeMemory(datapath->flowTable);
641     datapath->flowTable = NULL;
642     NdisFreeRWLock(datapath->lock);
643
644     return NDIS_STATUS_SUCCESS;
645 }
646
647 /*
648  *----------------------------------------------------------------------------
649  * OvsAllocateFlowTable --
650  * Results:
651  *    NDIS_STATUS_SUCCESS on success.
652  *    NDIS_STATUS_RESOURCES if memory couldn't be allocated
653  *----------------------------------------------------------------------------
654  */
655 NDIS_STATUS
656 OvsAllocateFlowTable(OVS_DATAPATH *datapath,
657                      POVS_SWITCH_CONTEXT switchContext)
658 {
659     PLIST_ENTRY bucket;
660     int i;
661
662     datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE *
663                                             sizeof (LIST_ENTRY));
664     if (!datapath->flowTable) {
665         return NDIS_STATUS_RESOURCES;
666     }
667     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
668         bucket = &(datapath->flowTable[i]);
669         InitializeListHead(bucket);
670     }
671     datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
672
673     return NDIS_STATUS_SUCCESS;
674 }
675
676
677 /*
678  *----------------------------------------------------------------------------
679  *  GetStartAddrNBL --
680  *    Get the virtual address of the frame.
681  *
682  *  Results:
683  *    Virtual address of the frame.
684  *----------------------------------------------------------------------------
685  */
686 static __inline VOID *
687 GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
688 {
689     PMDL curMdl;
690     PUINT8 curBuffer;
691     PEthHdr curHeader;
692
693     ASSERT(_pNB);
694
695     // Ethernet Header is a guaranteed safe access.
696     curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
697     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
698     if (!curBuffer) {
699         return NULL;
700     }
701
702     curHeader = (PEthHdr)
703     (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
704
705     return (VOID *) curHeader;
706 }
707
708 VOID
709 OvsFlowUsed(OvsFlow *flow,
710             const NET_BUFFER_LIST *packet,
711             const POVS_PACKET_HDR_INFO layers)
712 {
713     LARGE_INTEGER tickCount;
714
715     KeQueryTickCount(&tickCount);
716     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
717     flow->packetCount++;
718     flow->byteCount += OvsPacketLenNBL(packet);
719     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
720 }
721
722
723 VOID
724 DeleteAllFlows(OVS_DATAPATH *datapath)
725 {
726     INT i;
727     PLIST_ENTRY bucket;
728
729     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
730         PLIST_ENTRY next;
731         bucket = &(datapath->flowTable[i]);
732         while (!IsListEmpty(bucket)) {
733             OvsFlow *flow;
734             next = bucket->Flink;
735             flow = CONTAINING_RECORD(next, OvsFlow, ListEntry);
736             RemoveFlow(datapath, &flow);
737         }
738     }
739 }
740
741 /*
742  *----------------------------------------------------------------------------
743  * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
744  * 'ofp_in_port'.
745  *
746  * Initializes 'packet' header pointers as follows:
747  *
748  *    - packet->l2 to the start of the Ethernet header.
749  *
750  *    - packet->l3 to just past the Ethernet header, or just past the
751  *      vlan_header if one is present, to the first byte of the payload of the
752  *      Ethernet frame.
753  *
754  *    - packet->l4 to just past the IPv4 header, if one is present and has a
755  *      correct length, and otherwise NULL.
756  *
757  *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
758  *      present and has a correct length, and otherwise NULL.
759  *
760  * Returns NDIS_STATUS_SUCCESS normally.  Fails only if packet data cannot be accessed
761  * (e.g. if Pkt_CopyBytesOut() returns an error).
762  *----------------------------------------------------------------------------
763  */
764 NDIS_STATUS
765 OvsExtractFlow(const NET_BUFFER_LIST *packet,
766                UINT32 inPort,
767                OvsFlowKey *flow,
768                POVS_PACKET_HDR_INFO layers,
769                OvsIPv4TunnelKey *tunKey)
770 {
771     struct Eth_Header *eth;
772     UINT8 offset = 0;
773     PVOID vlanTagValue;
774
775     layers->value = 0;
776
777     if (tunKey) {
778         ASSERT(tunKey->dst != 0);
779         RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey);
780         flow->l2.offset = 0;
781     } else {
782         flow->tunKey.dst = 0;
783         flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE;
784     }
785
786     flow->l2.inPort = inPort;
787
788     if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
789         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
790         return NDIS_STATUS_SUCCESS;
791     }
792
793     /* Link layer. */
794     eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
795     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
796     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
797
798     /*
799      * vlan_tci.
800      */
801     vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
802     if (vlanTagValue) {
803         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
804             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
805         flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI |
806                                  (vlanTag->TagHeader.UserPriority << 13));
807     } else {
808         if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) {
809             Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)&eth->dix.typeNBO;
810             flow->l2.vlanTci = ((UINT16)tag->priority << 13) |
811                                OVSWIN_VLAN_CFI |
812                                ((UINT16)tag->vidHi << 8)  | tag->vidLo;
813             offset = sizeof (Eth_802_1pq_Tag);
814         } else {
815             flow->l2.vlanTci = 0;
816         }
817         /*
818         * XXX
819         * Please note after this point, src mac and dst mac should
820         * not be accessed through eth
821         */
822         eth = (Eth_Header *)((UINT8 *)eth + offset);
823     }
824
825     /*
826      * dl_type.
827      *
828      * XXX assume that at least the first
829      * 12 bytes of received packets are mapped.  This code has the stronger
830      * assumption that at least the first 22 bytes of 'packet' is mapped (if my
831      * arithmetic is right).
832      */
833     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
834         flow->l2.dlType = eth->dix.typeNBO;
835         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
836     } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
837               eth->e802_3.llc.dsap == 0xaa &&
838               eth->e802_3.llc.ssap == 0xaa &&
839               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
840               eth->e802_3.snap.snapOrg[0] == 0x00 &&
841               eth->e802_3.snap.snapOrg[1] == 0x00 &&
842               eth->e802_3.snap.snapOrg[2] == 0x00) {
843         flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO;
844         layers->l3Offset = ETH_HEADER_LEN_802_3 + offset;
845     } else {
846         flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE);
847         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
848     }
849
850     flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset;
851     /* Network layer. */
852     if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) {
853         struct IPHdr ip_storage;
854         const struct IPHdr *nh;
855         IpKey *ipKey = &flow->ipKey;
856
857         flow->l2.keyLen += OVS_IP_KEY_SIZE;
858         layers->isIPv4 = 1;
859         nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
860         if (nh) {
861             layers->l4Offset = layers->l3Offset + nh->ihl * 4;
862
863             ipKey->nwSrc = nh->saddr;
864             ipKey->nwDst = nh->daddr;
865             ipKey->nwProto = nh->protocol;
866
867             ipKey->nwTos = nh->tos;
868             if (nh->frag_off & htons(IP_MF | IP_OFFSET)) {
869                 ipKey->nwFrag = OVSWIN_NW_FRAG_ANY;
870                 if (nh->frag_off & htons(IP_OFFSET)) {
871                     ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER;
872                 }
873             } else {
874                 ipKey->nwFrag = 0;
875             }
876
877             ipKey->nwTtl = nh->ttl;
878             ipKey->l4.tpSrc = 0;
879             ipKey->l4.tpDst = 0;
880
881             if (!(nh->frag_off & htons(IP_OFFSET))) {
882                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
883                     OvsParseTcp(packet, &ipKey->l4, layers);
884                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
885                     OvsParseUdp(packet, &ipKey->l4, layers);
886                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
887                     ICMPHdr icmpStorage;
888                     const ICMPHdr *icmp;
889
890                     icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
891                     if (icmp) {
892                         ipKey->l4.tpSrc = htons(icmp->type);
893                         ipKey->l4.tpDst = htons(icmp->code);
894                         layers->l7Offset = layers->l4Offset + sizeof *icmp;
895                     }
896                 }
897             }
898         } else {
899             ((UINT64 *)ipKey)[0] = 0;
900             ((UINT64 *)ipKey)[1] = 0;
901         }
902     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
903         NDIS_STATUS status;
904         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
905         status = OvsParseIPv6(packet, flow, layers);
906         if (status != NDIS_STATUS_SUCCESS) {
907             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
908             return status;
909         }
910         layers->isIPv6 = 1;
911         flow->ipv6Key.l4.tpSrc = 0;
912         flow->ipv6Key.l4.tpDst = 0;
913         flow->ipv6Key.pad = 0;
914
915         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
916             OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
917         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
918             OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
919         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
920             OvsParseIcmpV6(packet, flow, layers);
921             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
922         }
923     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
924         EtherArp arpStorage;
925         const EtherArp *arp;
926         ArpKey *arpKey = &flow->arpKey;
927         ((UINT64 *)arpKey)[0] = 0;
928         ((UINT64 *)arpKey)[1] = 0;
929         ((UINT64 *)arpKey)[2] = 0;
930         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
931         arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
932         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
933             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
934             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
935             arp->ea_hdr.ar_pln == 4) {
936             /* We only match on the lower 8 bits of the opcode. */
937             if (ntohs(arp->ea_hdr.ar_op) <= 0xff) {
938                 arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op);
939             }
940             if (arpKey->nwProto == ARPOP_REQUEST
941                 || arpKey->nwProto == ARPOP_REPLY) {
942                 memcpy(&arpKey->nwSrc, arp->arp_spa, 4);
943                 memcpy(&arpKey->nwDst, arp->arp_tpa, 4);
944                 memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
945                 memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
946             }
947         }
948     }
949
950     return NDIS_STATUS_SUCCESS;
951 }
952
953 __inline BOOLEAN
954 FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size)
955 {
956     UINT32 i;
957     ASSERT((size & 0x7) == 0);
958     ASSERT(((UINT64)src & 0x7) == 0);
959     ASSERT(((UINT64)dst & 0x7) == 0);
960     for (i = 0; i < (size >> 3); i++) {
961         if (src[i] != dst[i]) {
962             return FALSE;
963         }
964     }
965     return TRUE;
966 }
967
968
969 /*
970  * ----------------------------------------------------------------------------
971  * AddFlow --
972  *    Add a flow to flow table.
973  *
974  * Results:
975  *   NDIS_STATUS_SUCCESS if no same flow in the flow table.
976  * ----------------------------------------------------------------------------
977  */
978 NTSTATUS
979 AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
980 {
981     PLIST_ENTRY head;
982
983     if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) {
984         return STATUS_INVALID_HANDLE;
985     }
986
987     head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]);
988     /*
989      * We need fence here to make sure flow's nextPtr is updated before
990      * head->nextPtr is updated.
991      */
992     KeMemoryBarrier();
993
994     //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
995     InsertTailList(head, &flow->ListEntry);
996     //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);
997
998     datapath->nFlows++;
999
1000     return STATUS_SUCCESS;
1001 }
1002
1003
1004 /* ----------------------------------------------------------------------------
1005  * RemoveFlow --
1006  *   Remove a flow from flow table, and added to wait list
1007  * ----------------------------------------------------------------------------
1008  */
1009 VOID
1010 RemoveFlow(OVS_DATAPATH *datapath,
1011            OvsFlow **flow)
1012 {
1013     OvsFlow *f = *flow;
1014     *flow = NULL;
1015     UNREFERENCED_PARAMETER(datapath);
1016
1017     ASSERT(datapath->nFlows);
1018     datapath->nFlows--;
1019     // Remove the flow  from queue
1020     RemoveEntryList(&f->ListEntry);
1021     FreeFlow(f);
1022 }
1023
1024
1025 /*
1026  * ----------------------------------------------------------------------------
1027  * OvsLookupFlow --
1028  *
1029  *    Find flow from flow table based on flow key.
1030  *    Caller should either hold portset handle or should
1031  *    have a flowRef in datapath or Acquired datapath.
1032  *
1033  * Results:
1034  *    Flow pointer if lookup successful.
1035  *    NULL if not exists.
1036  * ----------------------------------------------------------------------------
1037  */
1038 OvsFlow *
1039 OvsLookupFlow(OVS_DATAPATH *datapath,
1040               const OvsFlowKey *key,
1041               UINT64 *hash,
1042               BOOLEAN hashValid)
1043 {
1044     PLIST_ENTRY link, head;
1045     UINT16 offset = key->l2.offset;
1046     UINT16 size = key->l2.keyLen;
1047     UINT8 *start;
1048
1049     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
1050     ASSERT(!key->tunKey.dst || offset == 0);
1051
1052     start = (UINT8 *)key + offset;
1053
1054     if (!hashValid) {
1055         *hash = OvsJhashBytes(start, size, 0);
1056     }
1057
1058     head = &datapath->flowTable[HASH_BUCKET(*hash)];
1059     link  = head->Flink;
1060     while (link != head) {
1061         OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry);
1062
1063         if (flow->hash == *hash &&
1064             flow->key.l2.val == key->l2.val &&
1065             FlowEqual((UINT64 *)((uint8 *)&flow->key + offset),
1066                          (UINT64 *)start, size)) {
1067             return flow;
1068         }
1069         link = link->Flink;
1070     }
1071     return NULL;
1072 }
1073
1074
1075 /*
1076  * ----------------------------------------------------------------------------
1077  * OvsHashFlow --
1078  *    Calculate the hash for the given flow key.
1079  * ----------------------------------------------------------------------------
1080  */
1081 UINT64
1082 OvsHashFlow(const OvsFlowKey *key)
1083 {
1084     UINT16 offset = key->l2.offset;
1085     UINT16 size = key->l2.keyLen;
1086     UINT8 *start;
1087
1088     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
1089     ASSERT(!key->tunKey.dst || offset == 0);
1090     start = (UINT8 *)key + offset;
1091     return OvsJhashBytes(start, size, 0);
1092 }
1093
1094
1095 /*
1096  * ----------------------------------------------------------------------------
1097  * FreeFlow --
1098  *    Free a flow and its actions.
1099  * ----------------------------------------------------------------------------
1100  */
1101 VOID
1102 FreeFlow(OvsFlow *flow)
1103 {
1104     ASSERT(flow);
1105     OvsFreeMemory(flow);
1106 }
1107
1108 NTSTATUS
1109 OvsDoDumpFlows(OvsFlowDumpInput *dumpInput,
1110                OvsFlowDumpOutput *dumpOutput,
1111                UINT32 *replyLen)
1112 {
1113     UINT32 dpNo;
1114     OVS_DATAPATH *datapath = NULL;
1115     OvsFlow *flow;
1116     PLIST_ENTRY node, head;
1117     UINT32 column = 0;
1118     UINT32 rowIndex, columnIndex;
1119     LOCK_STATE_EX dpLockState;
1120     NTSTATUS status = STATUS_SUCCESS;
1121     BOOLEAN findNextNonEmpty = FALSE;
1122
1123     dpNo = dumpInput->dpNo;
1124     NdisAcquireSpinLock(gOvsCtrlLock);
1125     if (gOvsSwitchContext == NULL ||
1126         gOvsSwitchContext->dpNo != dpNo) {
1127         status = STATUS_INVALID_PARAMETER;
1128         goto unlock;
1129     }
1130
1131     rowIndex = dumpInput->position[0];
1132     if (rowIndex >= OVS_FLOW_TABLE_SIZE) {
1133         dumpOutput->n = 0;
1134         *replyLen = sizeof(*dumpOutput);
1135         goto unlock;
1136     }
1137
1138     columnIndex = dumpInput->position[1];
1139
1140     datapath = &gOvsSwitchContext->datapath;
1141     ASSERT(datapath);
1142     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
1143
1144     head = &datapath->flowTable[rowIndex];
1145     node = head->Flink;
1146
1147     while (column < columnIndex) {
1148         if (node == head) {
1149             break;
1150         }
1151         node = node->Flink;
1152         column++;
1153     }
1154
1155     if (node == head) {
1156         findNextNonEmpty = TRUE;
1157         columnIndex = 0;
1158     }
1159
1160     if (findNextNonEmpty) {
1161         while (head == node) {
1162             if (++rowIndex >= OVS_FLOW_TABLE_SIZE) {
1163                 dumpOutput->n = 0;
1164                 goto dp_unlock;
1165             }
1166             head = &datapath->flowTable[rowIndex];
1167             node = head->Flink;
1168         }
1169     }
1170
1171     ASSERT(node != head);
1172     ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE);
1173
1174     flow = CONTAINING_RECORD(node, OvsFlow, ListEntry);
1175     status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen,
1176                                                             &dumpOutput->flow);
1177
1178     if (status == STATUS_BUFFER_TOO_SMALL) {
1179         dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen;
1180         *replyLen = sizeof(*dumpOutput);
1181     } else {
1182         dumpOutput->n = 1; //one flow reported.
1183         *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen;
1184     }
1185
1186     dumpOutput->position[0] = rowIndex;
1187     dumpOutput->position[1] = ++columnIndex;
1188
1189 dp_unlock:
1190     OvsReleaseDatapath(datapath, &dpLockState);
1191
1192 unlock:
1193     NdisReleaseSpinLock(gOvsCtrlLock);
1194     return status;
1195 }
1196
1197 NTSTATUS
1198 OvsDumpFlowIoctl(PVOID inputBuffer,
1199                  UINT32 inputLength,
1200                  PVOID outputBuffer,
1201                  UINT32 outputLength,
1202                  UINT32 *replyLen)
1203 {
1204     OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer;
1205     OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer;
1206
1207     if (inputBuffer == NULL || outputBuffer == NULL) {
1208         return STATUS_INVALID_PARAMETER;
1209     }
1210
1211     if ((inputLength != sizeof(OvsFlowDumpInput))
1212         || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) {
1213         return STATUS_INFO_LENGTH_MISMATCH;
1214     }
1215
1216     return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen);
1217 }
1218
1219 static NTSTATUS
1220 ReportFlowInfo(OvsFlow *flow,
1221                UINT32 getFlags,
1222                UINT32 getActionsLen,
1223                OvsFlowInfo *info)
1224 {
1225     NTSTATUS status = STATUS_SUCCESS;
1226
1227     if (getFlags & FLOW_GET_KEY) {
1228         // always copy the tunnel key part
1229         RtlCopyMemory(&info->key, &flow->key,
1230                             flow->key.l2.keyLen + flow->key.l2.offset);
1231     }
1232
1233     if (getFlags & FLOW_GET_STATS) {
1234         OvsFlowStats *stats = &info->stats;
1235         stats->packetCount = flow->packetCount;
1236         stats->byteCount = flow->byteCount;
1237         stats->used = (UINT32)flow->used;
1238         stats->tcpFlags = flow->tcpFlags;
1239     }
1240
1241     if (getFlags & FLOW_GET_ACTIONS) {
1242         if (flow->actionsLen == 0) {
1243             info->actionsLen = 0;
1244         } else if (flow->actionsLen > getActionsLen) {
1245             info->actionsLen = 0;
1246             status = STATUS_BUFFER_TOO_SMALL;
1247         } else {
1248             RtlCopyMemory(info->actions, flow->actions, flow->actionsLen);
1249             info->actionsLen = flow->actionsLen;
1250         }
1251     }
1252
1253     return status;
1254 }
1255
1256 NTSTATUS
1257 OvsPutFlowIoctl(PVOID inputBuffer,
1258                 UINT32 inputLength,
1259                 struct OvsFlowStats *stats)
1260 {
1261     NTSTATUS status = STATUS_SUCCESS;
1262     OVS_DATAPATH *datapath = NULL;
1263     ULONG actionsLen;
1264     OvsFlowPut *put;
1265     UINT32 dpNo;
1266     LOCK_STATE_EX dpLockState;
1267
1268     if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) {
1269         return STATUS_INFO_LENGTH_MISMATCH;
1270     }
1271
1272     put = (OvsFlowPut *)inputBuffer;
1273     if (put->actionsLen > 0) {
1274         actionsLen = put->actionsLen;
1275     } else {
1276         actionsLen = 0;
1277     }
1278
1279     dpNo = put->dpNo;
1280     NdisAcquireSpinLock(gOvsCtrlLock);
1281     if (gOvsSwitchContext == NULL ||
1282         gOvsSwitchContext->dpNo != dpNo) {
1283         status = STATUS_INVALID_PARAMETER;
1284         goto unlock;
1285     }
1286
1287     datapath = &gOvsSwitchContext->datapath;
1288     ASSERT(datapath);
1289     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
1290     status = HandleFlowPut(put, datapath, stats);
1291     OvsReleaseDatapath(datapath, &dpLockState);
1292
1293 unlock:
1294     NdisReleaseSpinLock(gOvsCtrlLock);
1295     return status;
1296 }
1297
1298
1299 /* Handles flow add, modify as well as delete */
1300 static NTSTATUS
1301 HandleFlowPut(OvsFlowPut *put,
1302               OVS_DATAPATH *datapath,
1303               struct OvsFlowStats *stats)
1304 {
1305     BOOLEAN   mayCreate, mayModify, mayDelete;
1306     OvsFlow   *KernelFlow;
1307     UINT64    hash;
1308     NTSTATUS  status = STATUS_SUCCESS;
1309
1310     mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0;
1311     mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0;
1312     mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0;
1313
1314     if ((mayCreate || mayModify) == mayDelete) {
1315         return STATUS_INVALID_PARAMETER;
1316     }
1317
1318     KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE);
1319     if (!KernelFlow) {
1320         if (!mayCreate) {
1321             return STATUS_INVALID_PARAMETER;
1322         }
1323
1324         status = OvsPrepareFlow(&KernelFlow, put, hash);
1325         if (status != STATUS_SUCCESS) {
1326             FreeFlow(KernelFlow);
1327             return STATUS_UNSUCCESSFUL;
1328         }
1329
1330         status = AddFlow(datapath, KernelFlow);
1331         if (status != STATUS_SUCCESS) {
1332             FreeFlow(KernelFlow);
1333             return STATUS_UNSUCCESSFUL;
1334         }
1335
1336         /* Validate the flow addition */
1337         {
1338             UINT64 newHash;
1339             OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash,
1340                                                                     FALSE);
1341             ASSERT(flow);
1342             ASSERT(newHash == hash);
1343             if (!flow || newHash != hash) {
1344                 return STATUS_UNSUCCESSFUL;
1345             }
1346         }
1347     } else {
1348         stats->packetCount = KernelFlow->packetCount;
1349         stats->byteCount = KernelFlow->byteCount;
1350         stats->tcpFlags = KernelFlow->tcpFlags;
1351         stats->used = (UINT32)KernelFlow->used;
1352
1353         if (mayModify) {
1354             OvsFlow *newFlow;
1355             status = OvsPrepareFlow(&newFlow, put, hash);
1356             if (status != STATUS_SUCCESS) {
1357                 return STATUS_UNSUCCESSFUL;
1358             }
1359
1360             KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE);
1361             if (KernelFlow)  {
1362                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) {
1363                     newFlow->packetCount = KernelFlow->packetCount;
1364                     newFlow->byteCount = KernelFlow->byteCount;
1365                     newFlow->tcpFlags = KernelFlow->tcpFlags;
1366                 }
1367                 RemoveFlow(datapath, &KernelFlow);
1368             }  else  {
1369                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0)  {
1370                     newFlow->packetCount = stats->packetCount;
1371                     newFlow->byteCount = stats->byteCount;
1372                     newFlow->tcpFlags = stats->tcpFlags;
1373                 }
1374             }
1375             status = AddFlow(datapath, newFlow);
1376             ASSERT(status == STATUS_SUCCESS);
1377
1378             /* Validate the flow addition */
1379             {
1380                 UINT64 newHash;
1381                 OvsFlow *testflow = OvsLookupFlow(datapath, &put->key,
1382                                                             &newHash, FALSE);
1383                 ASSERT(testflow);
1384                 ASSERT(newHash == hash);
1385                 if (!testflow || newHash != hash) {
1386                     FreeFlow(newFlow);
1387                     return STATUS_UNSUCCESSFUL;
1388                 }
1389             }
1390         } else {
1391             if (mayDelete) {
1392                 if (KernelFlow) {
1393                     RemoveFlow(datapath, &KernelFlow);
1394                 }
1395             } else {
1396                 return STATUS_UNSUCCESSFUL;
1397             }
1398         }
1399     }
1400     return STATUS_SUCCESS;
1401 }
1402
1403 static NTSTATUS
1404 OvsPrepareFlow(OvsFlow **flow,
1405                const OvsFlowPut *put,
1406                UINT64 hash)
1407 {
1408     OvsFlow     *localFlow = *flow;
1409     NTSTATUS status = STATUS_SUCCESS;
1410
1411     do {
1412         *flow = localFlow =
1413             OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen);
1414         if (localFlow == NULL) {
1415             status = STATUS_NO_MEMORY;
1416             break;
1417         }
1418
1419         localFlow->key = put->key;
1420         localFlow->actionsLen = put->actionsLen;
1421         if (put->actionsLen) {
1422             NdisMoveMemory((PUCHAR)localFlow->actions, put->actions,
1423                                        put->actionsLen);
1424         }
1425         localFlow->userActionsLen = 0;  // 0 indicate no conversion is made
1426         localFlow->used = 0;
1427         localFlow->packetCount = 0;
1428         localFlow->byteCount = 0;
1429         localFlow->tcpFlags = 0;
1430         localFlow->hash = hash;
1431     } while(FALSE);
1432
1433     return status;
1434 }
1435
1436 NTSTATUS
1437 OvsGetFlowIoctl(PVOID inputBuffer,
1438                 UINT32 inputLength,
1439                 PVOID outputBuffer,
1440                 UINT32 outputLength,
1441                 UINT32 *replyLen)
1442 {
1443     NTSTATUS status = STATUS_SUCCESS;
1444     OVS_DATAPATH *datapath = NULL;
1445     OvsFlow *flow;
1446     UINT32 getFlags, getActionsLen;
1447     OvsFlowGetInput *getInput;
1448     OvsFlowGetOutput *getOutput;
1449     UINT64 hash;
1450     UINT32 dpNo;
1451     LOCK_STATE_EX dpLockState;
1452
1453     if (inputLength != sizeof(OvsFlowGetInput)
1454         || inputBuffer == NULL) {
1455         return STATUS_INFO_LENGTH_MISMATCH;
1456     }
1457
1458     getInput = (OvsFlowGetInput *) inputBuffer;
1459     getFlags = getInput->getFlags;
1460     getActionsLen = getInput->actionsLen;
1461     if (getInput->getFlags & FLOW_GET_KEY) {
1462         return STATUS_INVALID_PARAMETER;
1463     }
1464
1465     if (outputBuffer == NULL
1466         || outputLength != (sizeof *getOutput +
1467                             getInput->actionsLen)) {
1468         return STATUS_INFO_LENGTH_MISMATCH;
1469     }
1470
1471     dpNo = getInput->dpNo;
1472     NdisAcquireSpinLock(gOvsCtrlLock);
1473     if (gOvsSwitchContext == NULL ||
1474         gOvsSwitchContext->dpNo != dpNo) {
1475         status = STATUS_INVALID_PARAMETER;
1476         goto unlock;
1477     }
1478
1479     datapath = &gOvsSwitchContext->datapath;
1480     ASSERT(datapath);
1481     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
1482     flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE);
1483     if (!flow) {
1484         status = STATUS_INVALID_PARAMETER;
1485         goto dp_unlock;
1486     }
1487
1488     // XXX: can be optimized to return only how much is written out
1489     *replyLen = outputLength;
1490     getOutput = (OvsFlowGetOutput *)outputBuffer;
1491     ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info);
1492
1493 dp_unlock:
1494     OvsReleaseDatapath(datapath, &dpLockState);
1495 unlock:
1496     NdisReleaseSpinLock(gOvsCtrlLock);
1497     return status;
1498 }
1499
1500 NTSTATUS
1501 OvsFlushFlowIoctl(PVOID inputBuffer,
1502                   UINT32 inputLength)
1503 {
1504     NTSTATUS status = STATUS_SUCCESS;
1505     OVS_DATAPATH *datapath = NULL;
1506     UINT32 dpNo;
1507     LOCK_STATE_EX dpLockState;
1508
1509     if (inputLength != sizeof(UINT32) || inputBuffer == NULL) {
1510         return STATUS_INFO_LENGTH_MISMATCH;
1511     }
1512
1513     dpNo = *(UINT32 *)inputBuffer;
1514     NdisAcquireSpinLock(gOvsCtrlLock);
1515     if (gOvsSwitchContext == NULL ||
1516         gOvsSwitchContext->dpNo != dpNo) {
1517         status = STATUS_INVALID_PARAMETER;
1518         goto unlock;
1519     }
1520
1521     datapath = &gOvsSwitchContext->datapath;
1522     ASSERT(datapath);
1523     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
1524     DeleteAllFlows(datapath);
1525     OvsReleaseDatapath(datapath, &dpLockState);
1526
1527 unlock:
1528     NdisReleaseSpinLock(gOvsCtrlLock);
1529     return status;
1530 }
1531
1532 #pragma warning( pop )