dc00b041bb4c84bb6ff05a066f11aaf4f71657e0
[cascardo/ovs.git] / datapath-windows / ovsext / Flow.c
1 /*
2  * Copyright (c) 2014 VMware, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include "precomp.h"
18 #include "NetProto.h"
19 #include "Util.h"
20 #include "Jhash.h"
21 #include "Flow.h"
22 #include "PacketParser.h"
23
24 #ifdef OVS_DBG_MOD
25 #undef OVS_DBG_MOD
26 #endif
27 #define OVS_DBG_MOD OVS_DBG_FLOW
28 #include "Debug.h"
29
30 #pragma warning( push )
31 #pragma warning( disable:4127 )
32
33 extern PNDIS_SPIN_LOCK gOvsCtrlLock;
34 extern POVS_SWITCH_CONTEXT gOvsSwitchContext;
35 extern UINT64 ovsTimeIncrementPerTick;
36
37 static NTSTATUS ReportFlowInfo(OvsFlow *flow, UINT32 getFlags,
38                                UINT32 getActionsLen, OvsFlowInfo *info);
39 static NTSTATUS HandleFlowPut(OvsFlowPut *put,
40                                   OVS_DATAPATH *datapath,
41                                   struct OvsFlowStats *stats);
42 static NTSTATUS OvsPrepareFlow(OvsFlow **flow, const OvsFlowPut *put,
43                                UINT64 hash);
44 static VOID RemoveFlow(OVS_DATAPATH *datapath, OvsFlow **flow);
45 static VOID DeleteAllFlows(OVS_DATAPATH *datapath);
46 static NTSTATUS AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow);
47 static VOID FreeFlow(OvsFlow *flow);
48 static VOID __inline *GetStartAddrNBL(const NET_BUFFER_LIST *_pNB);
49 static NTSTATUS _MapNlToFlowPut(POVS_MESSAGE msgIn, PNL_ATTR keyAttr,
50                                 PNL_ATTR actionAttr,
51                                 PNL_ATTR flowAttrClear,
52                                 OvsFlowPut *mappedFlow);
53 static VOID _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
54                                  PNL_ATTR *tunnelAttrs,
55                                  OvsFlowPut *mappedFlow);
56
57 static VOID _MapTunAttrToFlowPut(PNL_ATTR *keyAttrs,
58                                  PNL_ATTR *tunnelAttrs,
59                                  OvsFlowKey *destKey);
60 static VOID _MapNlToFlowPutFlags(PGENL_MSG_HDR genlMsgHdr,
61                                  PNL_ATTR flowAttrClear,
62                                  OvsFlowPut *mappedFlow);
63
64 #define OVS_FLOW_TABLE_SIZE 2048
65 #define OVS_FLOW_TABLE_MASK (OVS_FLOW_TABLE_SIZE -1)
66 #define HASH_BUCKET(hash) ((hash) & OVS_FLOW_TABLE_MASK)
67
68 /* Flow family related netlink policies */
69
70 /* For Parsing attributes in FLOW_* commands */
71 static const NL_POLICY nlFlowPolicy[] = {
72     [OVS_FLOW_ATTR_KEY] = {.type = NL_A_NESTED, .optional = FALSE},
73     [OVS_FLOW_ATTR_MASK] = {.type = NL_A_NESTED, .optional = TRUE},
74     [OVS_FLOW_ATTR_ACTIONS] = {.type = NL_A_NESTED, .optional = TRUE},
75     [OVS_FLOW_ATTR_STATS] = {.type = NL_A_UNSPEC,
76                              .minLen = sizeof(struct ovs_flow_stats),
77                              .maxLen = sizeof(struct ovs_flow_stats),
78                              .optional = TRUE},
79     [OVS_FLOW_ATTR_TCP_FLAGS] = {NL_A_U8, .optional = TRUE},
80     [OVS_FLOW_ATTR_USED] = {NL_A_U64, .optional = TRUE}
81 };
82
83 /* For Parsing nested OVS_FLOW_ATTR_KEY attributes.
84  * Some of the attributes like OVS_KEY_ATTR_RECIRC_ID
85  * & OVS_KEY_ATTR_MPLS are not supported yet. */
86
87 static const NL_POLICY nlFlowKeyPolicy[] = {
88     [OVS_KEY_ATTR_ENCAP] = {.type = NL_A_VAR_LEN, .optional = TRUE},
89     [OVS_KEY_ATTR_PRIORITY] = {.type = NL_A_UNSPEC, .minLen = 4,
90                                .maxLen = 4, .optional = TRUE},
91     [OVS_KEY_ATTR_IN_PORT] = {.type = NL_A_UNSPEC, .minLen = 4,
92                               .maxLen = 4, .optional = FALSE},
93     [OVS_KEY_ATTR_ETHERNET] = {.type = NL_A_UNSPEC,
94                                .minLen = sizeof(struct ovs_key_ethernet),
95                                .maxLen = sizeof(struct ovs_key_ethernet),
96                                .optional = FALSE},
97     [OVS_KEY_ATTR_VLAN] = {.type = NL_A_UNSPEC, .minLen = 2,
98                            .maxLen = 2, .optional = TRUE},
99     [OVS_KEY_ATTR_ETHERTYPE] = {.type = NL_A_UNSPEC, .minLen = 2,
100                                 .maxLen = 2, .optional = TRUE},
101     [OVS_KEY_ATTR_IPV4] = {.type = NL_A_UNSPEC,
102                            .minLen = sizeof(struct ovs_key_ipv4),
103                            .maxLen = sizeof(struct ovs_key_ipv4),
104                            .optional = TRUE},
105     [OVS_KEY_ATTR_IPV6] = {.type = NL_A_UNSPEC,
106                            .minLen = sizeof(struct ovs_key_ipv6),
107                            .maxLen = sizeof(struct ovs_key_ipv6),
108                            .optional = TRUE},
109     [OVS_KEY_ATTR_TCP] = {.type = NL_A_UNSPEC,
110                           .minLen = sizeof(struct ovs_key_tcp),
111                           .maxLen = sizeof(struct ovs_key_tcp),
112                           .optional = TRUE},
113     [OVS_KEY_ATTR_UDP] = {.type = NL_A_UNSPEC,
114                           .minLen = sizeof(struct ovs_key_udp),
115                           .maxLen = sizeof(struct ovs_key_udp),
116                           .optional = TRUE},
117     [OVS_KEY_ATTR_ICMP] = {.type = NL_A_UNSPEC,
118                            .minLen = sizeof(struct ovs_key_icmp),
119                            .maxLen = sizeof(struct ovs_key_icmp),
120                            .optional = TRUE},
121     [OVS_KEY_ATTR_ICMPV6] = {.type = NL_A_UNSPEC,
122                              .minLen = sizeof(struct ovs_key_icmpv6),
123                              .maxLen = sizeof(struct ovs_key_icmpv6),
124                              .optional = TRUE},
125     [OVS_KEY_ATTR_ARP] = {.type = NL_A_UNSPEC,
126                           .minLen = sizeof(struct ovs_key_arp),
127                           .maxLen = sizeof(struct ovs_key_arp),
128                           .optional = TRUE},
129     [OVS_KEY_ATTR_ND] = {.type = NL_A_UNSPEC,
130                          .minLen = sizeof(struct ovs_key_nd),
131                          .maxLen = sizeof(struct ovs_key_nd),
132                          .optional = TRUE},
133     [OVS_KEY_ATTR_SKB_MARK] = {.type = NL_A_UNSPEC, .minLen = 4,
134                                .maxLen = 4, .optional = TRUE},
135     [OVS_KEY_ATTR_TUNNEL] = {.type = NL_A_VAR_LEN, .optional = TRUE},
136     [OVS_KEY_ATTR_SCTP] = {.type = NL_A_UNSPEC,
137                            .minLen = sizeof(struct ovs_key_sctp),
138                            .maxLen = sizeof(struct ovs_key_sctp),
139                            .optional = TRUE},
140     [OVS_KEY_ATTR_TCP_FLAGS] = {.type = NL_A_UNSPEC,
141                                 .minLen = 2, .maxLen = 2,
142                                 .optional = TRUE},
143     [OVS_KEY_ATTR_DP_HASH] = {.type = NL_A_UNSPEC, .minLen = 4,
144                               .maxLen = 4, .optional = TRUE},
145     [OVS_KEY_ATTR_RECIRC_ID] = {.type = NL_A_UNSPEC, .minLen = 4,
146                                 .maxLen = 4, .optional = TRUE},
147     [OVS_KEY_ATTR_MPLS] = {.type = NL_A_VAR_LEN, .optional = TRUE}
148 };
149
150 /* For Parsing nested OVS_KEY_ATTR_TUNNEL attributes */
151 static const NL_POLICY nlFlowTunnelKeyPolicy[] = {
152     [OVS_TUNNEL_KEY_ATTR_ID] = {.type = NL_A_UNSPEC, .minLen = 8,
153                                 .maxLen = 8, .optional = TRUE},
154     [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = {.type = NL_A_UNSPEC, .minLen = 4,
155                                       .maxLen = 4, .optional = TRUE},
156     [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = {.type = NL_A_UNSPEC, .minLen = 4 ,
157                                       .maxLen = 4, .optional = FALSE},
158     [OVS_TUNNEL_KEY_ATTR_TOS] = {.type = NL_A_UNSPEC, .minLen = 1,
159                                  .maxLen = 1, .optional = TRUE},
160     [OVS_TUNNEL_KEY_ATTR_TTL] = {.type = NL_A_UNSPEC, .minLen = 1,
161                                  .maxLen = 1, .optional = TRUE},
162     [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = {.type = NL_A_UNSPEC, .minLen = 0,
163                                            .maxLen = 0, .optional = TRUE},
164     [OVS_TUNNEL_KEY_ATTR_CSUM] = {.type = NL_A_UNSPEC, .minLen = 0,
165                                   .maxLen = 0, .optional = TRUE},
166     [OVS_TUNNEL_KEY_ATTR_OAM] = {.type = NL_A_UNSPEC, .minLen = 0,
167                                  .maxLen = 0, .optional = TRUE},
168     [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = {.type = NL_A_VAR_LEN,
169                                          .optional = TRUE}
170 };
171
172 /* For Parsing nested OVS_FLOW_ATTR_ACTIONS attributes */
173 static const NL_POLICY nlFlowActionPolicy[] = {
174     [OVS_ACTION_ATTR_OUTPUT] = {.type = NL_A_UNSPEC, .minLen = sizeof(UINT32),
175                                 .maxLen = sizeof(UINT32), .optional = TRUE},
176     [OVS_ACTION_ATTR_USERSPACE] = {.type = NL_A_VAR_LEN, .optional = TRUE},
177     [OVS_ACTION_ATTR_PUSH_VLAN] = {.type = NL_A_UNSPEC,
178                                    .minLen =
179                                    sizeof(struct ovs_action_push_vlan),
180                                    .maxLen =
181                                    sizeof(struct ovs_action_push_vlan),
182                                    .optional = TRUE},
183     [OVS_ACTION_ATTR_POP_VLAN] = {.type = NL_A_UNSPEC, .optional = TRUE},
184     [OVS_ACTION_ATTR_PUSH_MPLS] = {.type = NL_A_UNSPEC,
185                                    .minLen =
186                                    sizeof(struct ovs_action_push_mpls),
187                                    .maxLen =
188                                    sizeof(struct ovs_action_push_mpls),
189                                    .optional = TRUE},
190     [OVS_ACTION_ATTR_POP_MPLS] = {.type = NL_A_UNSPEC,
191                                   .minLen = sizeof(UINT16),
192                                   .maxLen = sizeof(UINT16),
193                                   .optional = TRUE},
194     [OVS_ACTION_ATTR_RECIRC] = {.type = NL_A_UNSPEC,
195                                 .minLen = sizeof(UINT32),
196                                 .maxLen = sizeof(UINT32),
197                                 .optional = TRUE},
198     [OVS_ACTION_ATTR_HASH] = {.type = NL_A_UNSPEC,
199                               .minLen = sizeof(struct ovs_action_hash),
200                               .maxLen = sizeof(struct ovs_action_hash),
201                               .optional = TRUE},
202     [OVS_ACTION_ATTR_SET] = {.type = NL_A_VAR_LEN, .optional = TRUE},
203     [OVS_ACTION_ATTR_SAMPLE] = {.type = NL_A_VAR_LEN, .optional = TRUE}
204 };
205
206 /*
207  *----------------------------------------------------------------------------
208  * Netlink interface for flow commands.
209  *----------------------------------------------------------------------------
210  */
211
212 /*
213  *----------------------------------------------------------------------------
214  *  OvsFlowNlNewCmdHandler --
215  *    Handler for OVS_FLOW_CMD_NEW/SET/DEL command.
216  *    It also handles FLUSH case (DEL w/o any key in input)
217  *----------------------------------------------------------------------------
218  */
219 NTSTATUS
220 OvsFlowNlNewCmdHandler(POVS_USER_PARAMS_CONTEXT usrParamsCtx,
221                        UINT32 *replyLen)
222 {
223     NTSTATUS rc = STATUS_SUCCESS;
224     POVS_MESSAGE msgIn = (POVS_MESSAGE)usrParamsCtx->inputBuffer;
225     POVS_MESSAGE msgOut = (POVS_MESSAGE)usrParamsCtx->outputBuffer;
226     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
227     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
228     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
229     PNL_ATTR nlAttrs[__OVS_FLOW_ATTR_MAX];
230     UINT32 attrOffset = NLMSG_HDRLEN + GENL_HDRLEN + OVS_HDRLEN;
231     OvsFlowPut mappedFlow;
232     OvsFlowStats stats;
233     struct ovs_flow_stats replyStats;
234
235     NL_BUFFER nlBuf;
236
237     RtlZeroMemory(&mappedFlow, sizeof(OvsFlowPut));
238     RtlZeroMemory(&stats, sizeof(stats));
239     RtlZeroMemory(&replyStats, sizeof(replyStats));
240
241     *replyLen = 0;
242
243     /* Get all the top level Flow attributes */
244     if ((NlAttrParse(nlMsgHdr, attrOffset, NlMsgAttrsLen(nlMsgHdr),
245                      nlFlowPolicy, nlAttrs, ARRAY_SIZE(nlAttrs)))
246                      != TRUE) {
247         OVS_LOG_ERROR("Attr Parsing failed for msg: %p",
248                        nlMsgHdr);
249         rc = STATUS_UNSUCCESSFUL;
250         goto done;
251     }
252
253     /* FLOW_DEL command w/o any key input is a flush case. */
254     if ((genlMsgHdr->cmd == OVS_FLOW_CMD_DEL) && 
255         (!(nlAttrs[OVS_FLOW_ATTR_KEY]))) {
256         rc = OvsFlushFlowIoctl(ovsHdr->dp_ifindex);
257         goto done;
258     }
259
260     if ((_MapNlToFlowPut(msgIn, nlAttrs[OVS_FLOW_ATTR_KEY],
261          nlAttrs[OVS_FLOW_ATTR_ACTIONS], nlAttrs[OVS_FLOW_ATTR_CLEAR],
262          &mappedFlow))
263         != STATUS_SUCCESS) {
264         OVS_LOG_ERROR("Conversion to OvsFlowPut failed");
265         goto done;
266     }
267
268     rc = OvsPutFlowIoctl(&mappedFlow, sizeof (struct OvsFlowPut),
269                          &stats);
270     if (rc != STATUS_SUCCESS) {
271         OVS_LOG_ERROR("OvsFlowPut failed.");
272         goto done;
273     }
274
275     if (!(usrParamsCtx->outputBuffer)) {
276         /* No output buffer */
277         OVS_LOG_ERROR("outputBuffer NULL.");
278         goto done;
279     }
280
281     replyStats.n_packets = stats.packetCount;
282     replyStats.n_bytes = stats.byteCount;
283
284     /* So far so good. Prepare the reply for userspace */
285     NlBufInit(&nlBuf, usrParamsCtx->outputBuffer,
286               usrParamsCtx->outputLength);
287
288     /* Prepare nl Msg headers */
289     rc = NlFillOvsMsg(&nlBuf, nlMsgHdr->nlmsgType, 0,
290                       nlMsgHdr->nlmsgSeq, nlMsgHdr->nlmsgPid,
291                       genlMsgHdr->cmd, OVS_FLOW_VERSION,
292                       ovsHdr->dp_ifindex);
293     ASSERT(rc);
294
295     /* Append OVS_FLOW_ATTR_STATS attribute */
296     if (!NlMsgPutTailUnspec(&nlBuf, OVS_FLOW_ATTR_STATS,
297         (PCHAR)(&replyStats), sizeof(replyStats))) {
298         OVS_LOG_ERROR("Adding OVS_FLOW_ATTR_STATS attribute failed.");
299         rc = STATUS_UNSUCCESSFUL;
300     }
301
302     *replyLen = msgOut->nlMsg.nlmsgLen;
303
304 done:
305     return rc;
306 }
307
308 /*
309  *----------------------------------------------------------------------------
310  *  _MapNlToFlowPut --
311  *    Maps input netlink message to OvsFlowPut.
312  *----------------------------------------------------------------------------
313  */
314 static NTSTATUS
315 _MapNlToFlowPut(POVS_MESSAGE msgIn, PNL_ATTR keyAttr,
316                 PNL_ATTR actionAttr, PNL_ATTR flowAttrClear,
317                 OvsFlowPut *mappedFlow)
318 {
319     NTSTATUS rc = STATUS_SUCCESS;
320     PNL_MSG_HDR nlMsgHdr = &(msgIn->nlMsg);
321     PGENL_MSG_HDR genlMsgHdr = &(msgIn->genlMsg);
322     POVS_HDR ovsHdr = &(msgIn->ovsHdr);
323
324     UINT32 keyAttrOffset = (UINT32)((PCHAR)keyAttr - (PCHAR)nlMsgHdr);
325     UINT32 tunnelKeyAttrOffset;
326
327     PNL_ATTR keyAttrs[__OVS_KEY_ATTR_MAX] = {NULL};
328     PNL_ATTR tunnelAttrs[__OVS_TUNNEL_KEY_ATTR_MAX] = {NULL};
329
330     /* Get flow keys attributes */
331     if ((NlAttrParseNested(nlMsgHdr, keyAttrOffset, NlAttrLen(keyAttr),
332                            nlFlowKeyPolicy, keyAttrs, ARRAY_SIZE(keyAttrs)))
333                            != TRUE) {
334         OVS_LOG_ERROR("Key Attr Parsing failed for msg: %p",
335                        nlMsgHdr);
336         rc = STATUS_UNSUCCESSFUL;
337         goto done;
338     }
339
340     if (keyAttrs[OVS_KEY_ATTR_TUNNEL]) {
341         tunnelKeyAttrOffset = (UINT32)((PCHAR)
342                               (keyAttrs[OVS_KEY_ATTR_TUNNEL])
343                               - (PCHAR)nlMsgHdr);
344
345         OVS_LOG_ERROR("Parse Flow Tunnel Key Policy");
346
347         /* Get tunnel keys attributes */
348         if ((NlAttrParseNested(nlMsgHdr, tunnelKeyAttrOffset,
349                                NlAttrLen(keyAttr), nlFlowTunnelKeyPolicy,
350                                tunnelAttrs, ARRAY_SIZE(tunnelAttrs)))
351                                != TRUE) {
352             OVS_LOG_ERROR("Tunnel key Attr Parsing failed for msg: %p",
353                            nlMsgHdr);
354             rc = STATUS_UNSUCCESSFUL;
355             goto done;
356         }
357     }
358
359     _MapKeyAttrToFlowPut(keyAttrs, tunnelAttrs,
360                          mappedFlow);
361
362     /* Map the action */
363     if (actionAttr) {
364         mappedFlow->actionsLen = NlAttrGetSize(actionAttr);
365         mappedFlow->actions = NlAttrGet(actionAttr);
366     }
367
368     mappedFlow->dpNo = ovsHdr->dp_ifindex;
369
370     _MapNlToFlowPutFlags(genlMsgHdr, flowAttrClear,
371                                 mappedFlow);
372
373 done:
374     return rc;
375 }
376
377 /*
378  *----------------------------------------------------------------------------
379  *  _MapNlToFlowPutFlags --
380  *    Maps netlink message to OvsFlowPut->flags.
381  *----------------------------------------------------------------------------
382  */
383 static VOID
384 _MapNlToFlowPutFlags(PGENL_MSG_HDR genlMsgHdr,
385                      PNL_ATTR flowAttrClear, OvsFlowPut *mappedFlow)
386 {
387     uint32_t flags = 0;
388
389     switch (genlMsgHdr->cmd) {
390     case OVS_FLOW_CMD_NEW:
391          flags |= OVSWIN_FLOW_PUT_CREATE;
392          break;
393     case OVS_FLOW_CMD_DEL:
394          flags |= OVSWIN_FLOW_PUT_DELETE;
395          break;
396     case OVS_FLOW_CMD_SET:
397          flags |= OVSWIN_FLOW_PUT_MODIFY;
398          break;
399     default:
400          ASSERT(0);
401     }
402
403     if (flowAttrClear) {
404         flags |= OVSWIN_FLOW_PUT_CLEAR;
405     }
406
407     mappedFlow->flags = flags;
408 }
409
410 /*
411  *----------------------------------------------------------------------------
412  *  _MapKeyAttrToFlowPut --
413  *    Converts FLOW_KEY attribute to OvsFlowPut->key.
414  *----------------------------------------------------------------------------
415  */
416 static VOID
417 _MapKeyAttrToFlowPut(PNL_ATTR *keyAttrs,
418                      PNL_ATTR *tunnelAttrs,
419                      OvsFlowPut *mappedFlow)
420 {
421     const struct ovs_key_ethernet *eth_key;
422     OvsFlowKey *destKey = &(mappedFlow->key);
423
424     _MapTunAttrToFlowPut(keyAttrs, tunnelAttrs, destKey);
425
426     /* ===== L2 headers ===== */
427     destKey->l2.inPort = NlAttrGetU32(keyAttrs[OVS_KEY_ATTR_IN_PORT]);
428     eth_key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ETHERNET]);
429     RtlCopyMemory(destKey->l2.dlSrc, eth_key->eth_src, ETH_ADDR_LEN);
430     RtlCopyMemory(destKey->l2.dlDst, eth_key->eth_dst, ETH_ADDR_LEN);
431
432     destKey->l2.dlType = ntohs((NlAttrGetU16(keyAttrs
433                           [OVS_KEY_ATTR_ETHERTYPE])));
434
435     if (keyAttrs[OVS_KEY_ATTR_VLAN]) {
436         destKey->l2.vlanTci = NlAttrGetU16(keyAttrs
437                               [OVS_KEY_ATTR_VLAN]);
438     }
439
440     /* ==== L3 + L4. ==== */
441     destKey->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE
442                          - destKey->l2.offset;
443
444     switch (destKey->l2.dlType) {
445     case ETH_TYPE_IPV4: {
446
447         if (keyAttrs[OVS_KEY_ATTR_IPV4]) {
448             const struct ovs_key_ipv4 *ipv4Key;
449
450             ipv4Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_IPV4]);
451             IpKey *ipv4FlowPutKey = &(destKey->ipKey);
452             ipv4FlowPutKey->nwSrc = ipv4Key->ipv4_src;
453             ipv4FlowPutKey->nwDst = ipv4Key->ipv4_dst;
454             ipv4FlowPutKey->nwProto = ipv4Key->ipv4_proto;
455             ipv4FlowPutKey->nwTos = ipv4Key->ipv4_tos;
456             ipv4FlowPutKey->nwTtl = ipv4Key->ipv4_ttl;
457             ipv4FlowPutKey->nwFrag = ipv4Key->ipv4_frag;
458
459             if (keyAttrs[OVS_KEY_ATTR_TCP]) {
460                 const struct ovs_key_tcp *tcpKey;
461                 tcpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_TCP]);
462                 ipv4FlowPutKey->l4.tpSrc = tcpKey->tcp_src;
463                 ipv4FlowPutKey->l4.tpDst = tcpKey->tcp_dst;
464             }
465
466             if (keyAttrs[OVS_KEY_ATTR_UDP]) {
467                 const struct ovs_key_udp *udpKey;
468                 udpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_UDP]);
469                 ipv4FlowPutKey->l4.tpSrc = udpKey->udp_src;
470                 ipv4FlowPutKey->l4.tpDst = udpKey->udp_dst;
471             }
472
473             if (keyAttrs[OVS_KEY_ATTR_SCTP]) {
474                 const struct ovs_key_sctp *sctpKey;
475                 sctpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_SCTP]);
476                 ipv4FlowPutKey->l4.tpSrc = sctpKey->sctp_src;
477                 ipv4FlowPutKey->l4.tpDst = sctpKey->sctp_dst;
478             }
479
480             destKey->l2.keyLen += OVS_IP_KEY_SIZE;
481         }
482         break;
483     }
484     case ETH_TYPE_IPV6: {
485
486         if (keyAttrs[OVS_KEY_ATTR_IPV6]) {
487             const struct ovs_key_ipv6 *ipv6Key;
488
489             ipv6Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_IPV6]);
490             Ipv6Key *ipv6FlowPutKey = &(destKey->ipv6Key);
491
492             RtlCopyMemory(&ipv6FlowPutKey->ipv6Src, ipv6Key->ipv6_src,
493                           sizeof ipv6Key->ipv6_src);
494             RtlCopyMemory(&ipv6FlowPutKey->ipv6Dst, ipv6Key->ipv6_dst,
495                           sizeof ipv6Key->ipv6_dst);
496
497             ipv6FlowPutKey->ipv6Label = ipv6Key->ipv6_label;
498             ipv6FlowPutKey->nwProto  = ipv6Key->ipv6_proto;
499             ipv6FlowPutKey->nwTos = ipv6Key->ipv6_tclass;
500             ipv6FlowPutKey->nwTtl = ipv6Key->ipv6_hlimit;
501             ipv6FlowPutKey->nwFrag = ipv6Key->ipv6_frag;
502
503             if (keyAttrs[OVS_KEY_ATTR_TCP]) {
504                 const struct ovs_key_tcp *tcpKey;
505                 tcpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_TCP]);
506                 ipv6FlowPutKey->l4.tpSrc = tcpKey->tcp_src;
507                 ipv6FlowPutKey->l4.tpDst = tcpKey->tcp_dst;
508             }
509
510             if (keyAttrs[OVS_KEY_ATTR_UDP]) {
511                 const struct ovs_key_udp *udpKey;
512                 udpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_UDP]);
513                 ipv6FlowPutKey->l4.tpSrc = udpKey->udp_src;
514                 ipv6FlowPutKey->l4.tpDst = udpKey->udp_dst;
515             }
516
517             if (keyAttrs[OVS_KEY_ATTR_SCTP]) {
518                 const struct ovs_key_sctp *sctpKey;
519                 sctpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_SCTP]);
520                 ipv6FlowPutKey->l4.tpSrc = sctpKey->sctp_src;
521                 ipv6FlowPutKey->l4.tpDst = sctpKey->sctp_dst;
522             }
523
524             if (keyAttrs[OVS_KEY_ATTR_ICMPV6]) {
525                 const struct ovs_key_icmpv6 *icmpv6Key;
526
527                 Icmp6Key *icmp6FlowPutKey= &(destKey->icmp6Key);
528
529                 icmpv6Key = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ICMPV6]);
530
531                 icmp6FlowPutKey->l4.tpSrc = icmpv6Key->icmpv6_type;
532                 icmp6FlowPutKey->l4.tpDst = icmpv6Key->icmpv6_code;
533
534                 if (keyAttrs[OVS_KEY_ATTR_ND]) {
535                     const struct ovs_key_nd *ndKey;
536
537                     ndKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ND]);
538                     RtlCopyMemory(&icmp6FlowPutKey->ndTarget,
539                                   ndKey->nd_target, sizeof (icmp6FlowPutKey->ndTarget));
540                     RtlCopyMemory(icmp6FlowPutKey->arpSha,
541                                   ndKey->nd_sll, ETH_ADDR_LEN);
542                     RtlCopyMemory(icmp6FlowPutKey->arpTha,
543                                   ndKey->nd_tll, ETH_ADDR_LEN);
544                 }
545
546                 destKey->l2.keyLen += OVS_ICMPV6_KEY_SIZE;
547
548             } else {
549
550                 destKey->l2.keyLen += OVS_IPV6_KEY_SIZE;
551             }
552
553             ipv6FlowPutKey->pad = 0;
554         }
555         break;
556     }
557     case ETH_TYPE_ARP:
558     case ETH_TYPE_RARP: {
559         ArpKey *arpFlowPutKey = &destKey->arpKey;
560         const struct ovs_key_arp *arpKey;
561
562         arpKey = NlAttrGet(keyAttrs[OVS_KEY_ATTR_ARP]);
563
564         arpFlowPutKey->nwSrc = arpKey->arp_sip;
565         arpFlowPutKey->nwDst = arpKey->arp_tip;
566
567         RtlCopyMemory(arpFlowPutKey->arpSha, arpKey->arp_sha, ETH_ADDR_LEN);
568         RtlCopyMemory(arpFlowPutKey->arpTha, arpKey->arp_tha, ETH_ADDR_LEN);
569         arpFlowPutKey->nwProto = (UINT8)(arpKey->arp_op);
570         arpFlowPutKey->pad[0] = 0;
571         arpFlowPutKey->pad[1] = 0;
572         arpFlowPutKey->pad[2] = 0;
573         destKey->l2.keyLen += OVS_ARP_KEY_SIZE;
574         break;
575     }
576     }
577 }
578
579 /*
580  *----------------------------------------------------------------------------
581  *  _MapTunAttrToFlowPut --
582  *    Converts FLOW_TUNNEL_KEY attribute to OvsFlowKey->tunKey.
583  *----------------------------------------------------------------------------
584  */
585 static VOID
586 _MapTunAttrToFlowPut(PNL_ATTR *keyAttrs,
587                      PNL_ATTR *tunAttrs,
588                      OvsFlowKey *destKey)
589 {
590     if (keyAttrs[OVS_KEY_ATTR_TUNNEL]) {
591
592         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_ID]) {
593             destKey->tunKey.tunnelId = NlAttrGetU64
594                                        (tunAttrs[OVS_TUNNEL_KEY_ATTR_ID]);
595             destKey->tunKey.flags |= OVS_TNL_F_KEY;
596         }
597
598         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]) {
599         destKey->tunKey.dst = NlAttrGetU32
600                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_DST]);
601         }
602
603         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]) {
604         destKey->tunKey.src = NlAttrGetU32
605                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_IPV4_SRC]);
606         }
607
608         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT]) {
609             destKey->tunKey.flags |= OVS_TNL_F_DONT_FRAGMENT;
610         }
611
612         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_CSUM]) {
613             destKey->tunKey.flags |= OVS_TNL_F_CSUM;
614         }
615
616         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_TOS]) {
617         destKey->tunKey.tos = NlAttrGetU8
618                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_TOS]);
619         }
620
621         if (tunAttrs[OVS_TUNNEL_KEY_ATTR_TTL]) {
622         destKey->tunKey.ttl = NlAttrGetU8
623                               (tunAttrs[OVS_TUNNEL_KEY_ATTR_TTL]);
624         }
625
626         destKey->tunKey.pad = 0;
627         destKey->l2.offset = 0;
628     } else {
629         destKey->tunKey.attr[0] = 0;
630         destKey->tunKey.attr[1] = 0;
631         destKey->tunKey.attr[2] = 0;
632         destKey->l2.offset = sizeof destKey->tunKey;
633     }
634 }
635
636 /*
637  *----------------------------------------------------------------------------
638  * OvsDeleteFlowTable --
639  * Results:
640  *    NDIS_STATUS_SUCCESS always.
641  *----------------------------------------------------------------------------
642  */
643 NDIS_STATUS
644 OvsDeleteFlowTable(OVS_DATAPATH *datapath)
645 {
646     if (datapath == NULL || datapath->flowTable == NULL) {
647         return NDIS_STATUS_SUCCESS;
648     }
649
650     DeleteAllFlows(datapath);
651     OvsFreeMemory(datapath->flowTable);
652     datapath->flowTable = NULL;
653     NdisFreeRWLock(datapath->lock);
654
655     return NDIS_STATUS_SUCCESS;
656 }
657
658 /*
659  *----------------------------------------------------------------------------
660  * OvsAllocateFlowTable --
661  * Results:
662  *    NDIS_STATUS_SUCCESS on success.
663  *    NDIS_STATUS_RESOURCES if memory couldn't be allocated
664  *----------------------------------------------------------------------------
665  */
666 NDIS_STATUS
667 OvsAllocateFlowTable(OVS_DATAPATH *datapath,
668                      POVS_SWITCH_CONTEXT switchContext)
669 {
670     PLIST_ENTRY bucket;
671     int i;
672
673     datapath->flowTable = OvsAllocateMemory(OVS_FLOW_TABLE_SIZE *
674                                             sizeof (LIST_ENTRY));
675     if (!datapath->flowTable) {
676         return NDIS_STATUS_RESOURCES;
677     }
678     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
679         bucket = &(datapath->flowTable[i]);
680         InitializeListHead(bucket);
681     }
682     datapath->lock = NdisAllocateRWLock(switchContext->NdisFilterHandle);
683
684     return NDIS_STATUS_SUCCESS;
685 }
686
687
688 /*
689  *----------------------------------------------------------------------------
690  *  GetStartAddrNBL --
691  *    Get the virtual address of the frame.
692  *
693  *  Results:
694  *    Virtual address of the frame.
695  *----------------------------------------------------------------------------
696  */
697 static __inline VOID *
698 GetStartAddrNBL(const NET_BUFFER_LIST *_pNB)
699 {
700     PMDL curMdl;
701     PUINT8 curBuffer;
702     PEthHdr curHeader;
703
704     ASSERT(_pNB);
705
706     // Ethernet Header is a guaranteed safe access.
707     curMdl = (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdl;
708     curBuffer =  MmGetSystemAddressForMdlSafe(curMdl, LowPagePriority);
709     if (!curBuffer) {
710         return NULL;
711     }
712
713     curHeader = (PEthHdr)
714     (curBuffer + (NET_BUFFER_LIST_FIRST_NB(_pNB))->CurrentMdlOffset);
715
716     return (VOID *) curHeader;
717 }
718
719 VOID
720 OvsFlowUsed(OvsFlow *flow,
721             const NET_BUFFER_LIST *packet,
722             const POVS_PACKET_HDR_INFO layers)
723 {
724     LARGE_INTEGER tickCount;
725
726     KeQueryTickCount(&tickCount);
727     flow->used = tickCount.QuadPart * ovsTimeIncrementPerTick;
728     flow->packetCount++;
729     flow->byteCount += OvsPacketLenNBL(packet);
730     flow->tcpFlags |= OvsGetTcpFlags(packet, &flow->key, layers);
731 }
732
733
734 VOID
735 DeleteAllFlows(OVS_DATAPATH *datapath)
736 {
737     INT i;
738     PLIST_ENTRY bucket;
739
740     for (i = 0; i < OVS_FLOW_TABLE_SIZE; i++) {
741         PLIST_ENTRY next;
742         bucket = &(datapath->flowTable[i]);
743         while (!IsListEmpty(bucket)) {
744             OvsFlow *flow;
745             next = bucket->Flink;
746             flow = CONTAINING_RECORD(next, OvsFlow, ListEntry);
747             RemoveFlow(datapath, &flow);
748         }
749     }
750 }
751
752 /*
753  *----------------------------------------------------------------------------
754  * Initializes 'flow' members from 'packet', 'skb_priority', 'tun_id', and
755  * 'ofp_in_port'.
756  *
757  * Initializes 'packet' header pointers as follows:
758  *
759  *    - packet->l2 to the start of the Ethernet header.
760  *
761  *    - packet->l3 to just past the Ethernet header, or just past the
762  *      vlan_header if one is present, to the first byte of the payload of the
763  *      Ethernet frame.
764  *
765  *    - packet->l4 to just past the IPv4 header, if one is present and has a
766  *      correct length, and otherwise NULL.
767  *
768  *    - packet->l7 to just past the TCP or UDP or ICMP header, if one is
769  *      present and has a correct length, and otherwise NULL.
770  *
771  * Returns NDIS_STATUS_SUCCESS normally.  Fails only if packet data cannot be accessed
772  * (e.g. if Pkt_CopyBytesOut() returns an error).
773  *----------------------------------------------------------------------------
774  */
775 NDIS_STATUS
776 OvsExtractFlow(const NET_BUFFER_LIST *packet,
777                UINT32 inPort,
778                OvsFlowKey *flow,
779                POVS_PACKET_HDR_INFO layers,
780                OvsIPv4TunnelKey *tunKey)
781 {
782     struct Eth_Header *eth;
783     UINT8 offset = 0;
784     PVOID vlanTagValue;
785
786     layers->value = 0;
787
788     if (tunKey) {
789         ASSERT(tunKey->dst != 0);
790         RtlMoveMemory(&flow->tunKey, tunKey, sizeof flow->tunKey);
791         flow->l2.offset = 0;
792     } else {
793         flow->tunKey.dst = 0;
794         flow->l2.offset = OVS_WIN_TUNNEL_KEY_SIZE;
795     }
796
797     flow->l2.inPort = inPort;
798
799     if ( OvsPacketLenNBL(packet) < ETH_HEADER_LEN_DIX) {
800         flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + 8 - flow->l2.offset;
801         return NDIS_STATUS_SUCCESS;
802     }
803
804     /* Link layer. */
805     eth = (Eth_Header *)GetStartAddrNBL((NET_BUFFER_LIST *)packet);
806     memcpy(flow->l2.dlSrc, eth->src, ETH_ADDR_LENGTH);
807     memcpy(flow->l2.dlDst, eth->dst, ETH_ADDR_LENGTH);
808
809     /*
810      * vlan_tci.
811      */
812     vlanTagValue = NET_BUFFER_LIST_INFO(packet, Ieee8021QNetBufferListInfo);
813     if (vlanTagValue) {
814         PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanTag =
815             (PNDIS_NET_BUFFER_LIST_8021Q_INFO)(PVOID *)&vlanTagValue;
816         flow->l2.vlanTci = htons(vlanTag->TagHeader.VlanId | OVSWIN_VLAN_CFI |
817                                  (vlanTag->TagHeader.UserPriority << 13));
818     } else {
819         if (eth->dix.typeNBO == ETH_TYPE_802_1PQ_NBO) {
820             Eth_802_1pq_Tag *tag= (Eth_802_1pq_Tag *)&eth->dix.typeNBO;
821             flow->l2.vlanTci = ((UINT16)tag->priority << 13) |
822                                OVSWIN_VLAN_CFI |
823                                ((UINT16)tag->vidHi << 8)  | tag->vidLo;
824             offset = sizeof (Eth_802_1pq_Tag);
825         } else {
826             flow->l2.vlanTci = 0;
827         }
828         /*
829         * XXX
830         * Please note after this point, src mac and dst mac should
831         * not be accessed through eth
832         */
833         eth = (Eth_Header *)((UINT8 *)eth + offset);
834     }
835
836     /*
837      * dl_type.
838      *
839      * XXX assume that at least the first
840      * 12 bytes of received packets are mapped.  This code has the stronger
841      * assumption that at least the first 22 bytes of 'packet' is mapped (if my
842      * arithmetic is right).
843      */
844     if (ETH_TYPENOT8023(eth->dix.typeNBO)) {
845         flow->l2.dlType = eth->dix.typeNBO;
846         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
847     } else if (OvsPacketLenNBL(packet)  >= ETH_HEADER_LEN_802_3 &&
848               eth->e802_3.llc.dsap == 0xaa &&
849               eth->e802_3.llc.ssap == 0xaa &&
850               eth->e802_3.llc.control == ETH_LLC_CONTROL_UFRAME &&
851               eth->e802_3.snap.snapOrg[0] == 0x00 &&
852               eth->e802_3.snap.snapOrg[1] == 0x00 &&
853               eth->e802_3.snap.snapOrg[2] == 0x00) {
854         flow->l2.dlType = eth->e802_3.snap.snapType.typeNBO;
855         layers->l3Offset = ETH_HEADER_LEN_802_3 + offset;
856     } else {
857         flow->l2.dlType = htons(OVSWIN_DL_TYPE_NONE);
858         layers->l3Offset = ETH_HEADER_LEN_DIX + offset;
859     }
860
861     flow->l2.keyLen = OVS_WIN_TUNNEL_KEY_SIZE + OVS_L2_KEY_SIZE - flow->l2.offset;
862     /* Network layer. */
863     if (flow->l2.dlType == htons(ETH_TYPE_IPV4)) {
864         struct IPHdr ip_storage;
865         const struct IPHdr *nh;
866         IpKey *ipKey = &flow->ipKey;
867
868         flow->l2.keyLen += OVS_IP_KEY_SIZE;
869         layers->isIPv4 = 1;
870         nh = OvsGetIp(packet, layers->l3Offset, &ip_storage);
871         if (nh) {
872             layers->l4Offset = layers->l3Offset + nh->ihl * 4;
873
874             ipKey->nwSrc = nh->saddr;
875             ipKey->nwDst = nh->daddr;
876             ipKey->nwProto = nh->protocol;
877
878             ipKey->nwTos = nh->tos;
879             if (nh->frag_off & htons(IP_MF | IP_OFFSET)) {
880                 ipKey->nwFrag = OVSWIN_NW_FRAG_ANY;
881                 if (nh->frag_off & htons(IP_OFFSET)) {
882                     ipKey->nwFrag |= OVSWIN_NW_FRAG_LATER;
883                 }
884             } else {
885                 ipKey->nwFrag = 0;
886             }
887
888             ipKey->nwTtl = nh->ttl;
889             ipKey->l4.tpSrc = 0;
890             ipKey->l4.tpDst = 0;
891
892             if (!(nh->frag_off & htons(IP_OFFSET))) {
893                 if (ipKey->nwProto == SOCKET_IPPROTO_TCP) {
894                     OvsParseTcp(packet, &ipKey->l4, layers);
895                 } else if (ipKey->nwProto == SOCKET_IPPROTO_UDP) {
896                     OvsParseUdp(packet, &ipKey->l4, layers);
897                 } else if (ipKey->nwProto == SOCKET_IPPROTO_ICMP) {
898                     ICMPHdr icmpStorage;
899                     const ICMPHdr *icmp;
900
901                     icmp = OvsGetIcmp(packet, layers->l4Offset, &icmpStorage);
902                     if (icmp) {
903                         ipKey->l4.tpSrc = htons(icmp->type);
904                         ipKey->l4.tpDst = htons(icmp->code);
905                         layers->l7Offset = layers->l4Offset + sizeof *icmp;
906                     }
907                 }
908             }
909         } else {
910             ((UINT64 *)ipKey)[0] = 0;
911             ((UINT64 *)ipKey)[1] = 0;
912         }
913     } else if (flow->l2.dlType == htons(ETH_TYPE_IPV6)) {
914         NDIS_STATUS status;
915         flow->l2.keyLen += OVS_IPV6_KEY_SIZE;
916         status = OvsParseIPv6(packet, flow, layers);
917         if (status != NDIS_STATUS_SUCCESS) {
918             memset(&flow->ipv6Key, 0, sizeof (Ipv6Key));
919             return status;
920         }
921         layers->isIPv6 = 1;
922         flow->ipv6Key.l4.tpSrc = 0;
923         flow->ipv6Key.l4.tpDst = 0;
924         flow->ipv6Key.pad = 0;
925
926         if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_TCP) {
927             OvsParseTcp(packet, &(flow->ipv6Key.l4), layers);
928         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_UDP) {
929             OvsParseUdp(packet, &(flow->ipv6Key.l4), layers);
930         } else if (flow->ipv6Key.nwProto == SOCKET_IPPROTO_ICMPV6) {
931             OvsParseIcmpV6(packet, flow, layers);
932             flow->l2.keyLen += (OVS_ICMPV6_KEY_SIZE - OVS_IPV6_KEY_SIZE);
933         }
934     } else if (flow->l2.dlType == htons(ETH_TYPE_ARP)) {
935         EtherArp arpStorage;
936         const EtherArp *arp;
937         ArpKey *arpKey = &flow->arpKey;
938         ((UINT64 *)arpKey)[0] = 0;
939         ((UINT64 *)arpKey)[1] = 0;
940         ((UINT64 *)arpKey)[2] = 0;
941         flow->l2.keyLen += OVS_ARP_KEY_SIZE;
942         arp = OvsGetArp(packet, layers->l3Offset, &arpStorage);
943         if (arp && arp->ea_hdr.ar_hrd == htons(1) &&
944             arp->ea_hdr.ar_pro == htons(ETH_TYPE_IPV4) &&
945             arp->ea_hdr.ar_hln == ETH_ADDR_LENGTH &&
946             arp->ea_hdr.ar_pln == 4) {
947             /* We only match on the lower 8 bits of the opcode. */
948             if (ntohs(arp->ea_hdr.ar_op) <= 0xff) {
949                 arpKey->nwProto = (UINT8)ntohs(arp->ea_hdr.ar_op);
950             }
951             if (arpKey->nwProto == ARPOP_REQUEST
952                 || arpKey->nwProto == ARPOP_REPLY) {
953                 memcpy(&arpKey->nwSrc, arp->arp_spa, 4);
954                 memcpy(&arpKey->nwDst, arp->arp_tpa, 4);
955                 memcpy(arpKey->arpSha, arp->arp_sha, ETH_ADDR_LENGTH);
956                 memcpy(arpKey->arpTha, arp->arp_tha, ETH_ADDR_LENGTH);
957             }
958         }
959     }
960
961     return NDIS_STATUS_SUCCESS;
962 }
963
964 __inline BOOLEAN
965 FlowEqual(UINT64 *src, UINT64 *dst, UINT32 size)
966 {
967     UINT32 i;
968     ASSERT((size & 0x7) == 0);
969     ASSERT(((UINT64)src & 0x7) == 0);
970     ASSERT(((UINT64)dst & 0x7) == 0);
971     for (i = 0; i < (size >> 3); i++) {
972         if (src[i] != dst[i]) {
973             return FALSE;
974         }
975     }
976     return TRUE;
977 }
978
979
980 /*
981  * ----------------------------------------------------------------------------
982  * AddFlow --
983  *    Add a flow to flow table.
984  *
985  * Results:
986  *   NDIS_STATUS_SUCCESS if no same flow in the flow table.
987  * ----------------------------------------------------------------------------
988  */
989 NTSTATUS
990 AddFlow(OVS_DATAPATH *datapath, OvsFlow *flow)
991 {
992     PLIST_ENTRY head;
993
994     if (OvsLookupFlow(datapath, &flow->key, &flow->hash, TRUE) != NULL) {
995         return STATUS_INVALID_HANDLE;
996     }
997
998     head = &(datapath->flowTable[HASH_BUCKET(flow->hash)]);
999     /*
1000      * We need fence here to make sure flow's nextPtr is updated before
1001      * head->nextPtr is updated.
1002      */
1003     KeMemoryBarrier();
1004
1005     //KeAcquireSpinLock(&FilterDeviceExtension->NblQueueLock, &oldIrql);
1006     InsertTailList(head, &flow->ListEntry);
1007     //KeReleaseSpinLock(&FilterDeviceExtension->NblQueueLock, oldIrql);
1008
1009     datapath->nFlows++;
1010
1011     return STATUS_SUCCESS;
1012 }
1013
1014
1015 /* ----------------------------------------------------------------------------
1016  * RemoveFlow --
1017  *   Remove a flow from flow table, and added to wait list
1018  * ----------------------------------------------------------------------------
1019  */
1020 VOID
1021 RemoveFlow(OVS_DATAPATH *datapath,
1022            OvsFlow **flow)
1023 {
1024     OvsFlow *f = *flow;
1025     *flow = NULL;
1026     UNREFERENCED_PARAMETER(datapath);
1027
1028     ASSERT(datapath->nFlows);
1029     datapath->nFlows--;
1030     // Remove the flow  from queue
1031     RemoveEntryList(&f->ListEntry);
1032     FreeFlow(f);
1033 }
1034
1035
1036 /*
1037  * ----------------------------------------------------------------------------
1038  * OvsLookupFlow --
1039  *
1040  *    Find flow from flow table based on flow key.
1041  *    Caller should either hold portset handle or should
1042  *    have a flowRef in datapath or Acquired datapath.
1043  *
1044  * Results:
1045  *    Flow pointer if lookup successful.
1046  *    NULL if not exists.
1047  * ----------------------------------------------------------------------------
1048  */
1049 OvsFlow *
1050 OvsLookupFlow(OVS_DATAPATH *datapath,
1051               const OvsFlowKey *key,
1052               UINT64 *hash,
1053               BOOLEAN hashValid)
1054 {
1055     PLIST_ENTRY link, head;
1056     UINT16 offset = key->l2.offset;
1057     UINT16 size = key->l2.keyLen;
1058     UINT8 *start;
1059
1060     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
1061     ASSERT(!key->tunKey.dst || offset == 0);
1062
1063     start = (UINT8 *)key + offset;
1064
1065     if (!hashValid) {
1066         *hash = OvsJhashBytes(start, size, 0);
1067     }
1068
1069     head = &datapath->flowTable[HASH_BUCKET(*hash)];
1070     link  = head->Flink;
1071     while (link != head) {
1072         OvsFlow *flow = CONTAINING_RECORD(link, OvsFlow, ListEntry);
1073
1074         if (flow->hash == *hash &&
1075             flow->key.l2.val == key->l2.val &&
1076             FlowEqual((UINT64 *)((uint8 *)&flow->key + offset),
1077                          (UINT64 *)start, size)) {
1078             return flow;
1079         }
1080         link = link->Flink;
1081     }
1082     return NULL;
1083 }
1084
1085
1086 /*
1087  * ----------------------------------------------------------------------------
1088  * OvsHashFlow --
1089  *    Calculate the hash for the given flow key.
1090  * ----------------------------------------------------------------------------
1091  */
1092 UINT64
1093 OvsHashFlow(const OvsFlowKey *key)
1094 {
1095     UINT16 offset = key->l2.offset;
1096     UINT16 size = key->l2.keyLen;
1097     UINT8 *start;
1098
1099     ASSERT(key->tunKey.dst || offset == sizeof (OvsIPv4TunnelKey));
1100     ASSERT(!key->tunKey.dst || offset == 0);
1101     start = (UINT8 *)key + offset;
1102     return OvsJhashBytes(start, size, 0);
1103 }
1104
1105
1106 /*
1107  * ----------------------------------------------------------------------------
1108  * FreeFlow --
1109  *    Free a flow and its actions.
1110  * ----------------------------------------------------------------------------
1111  */
1112 VOID
1113 FreeFlow(OvsFlow *flow)
1114 {
1115     ASSERT(flow);
1116     OvsFreeMemory(flow);
1117 }
1118
1119 NTSTATUS
1120 OvsDoDumpFlows(OvsFlowDumpInput *dumpInput,
1121                OvsFlowDumpOutput *dumpOutput,
1122                UINT32 *replyLen)
1123 {
1124     UINT32 dpNo;
1125     OVS_DATAPATH *datapath = NULL;
1126     OvsFlow *flow;
1127     PLIST_ENTRY node, head;
1128     UINT32 column = 0;
1129     UINT32 rowIndex, columnIndex;
1130     LOCK_STATE_EX dpLockState;
1131     NTSTATUS status = STATUS_SUCCESS;
1132     BOOLEAN findNextNonEmpty = FALSE;
1133
1134     dpNo = dumpInput->dpNo;
1135     NdisAcquireSpinLock(gOvsCtrlLock);
1136     if (gOvsSwitchContext == NULL ||
1137         gOvsSwitchContext->dpNo != dpNo) {
1138         status = STATUS_INVALID_PARAMETER;
1139         goto unlock;
1140     }
1141
1142     rowIndex = dumpInput->position[0];
1143     if (rowIndex >= OVS_FLOW_TABLE_SIZE) {
1144         dumpOutput->n = 0;
1145         *replyLen = sizeof(*dumpOutput);
1146         goto unlock;
1147     }
1148
1149     columnIndex = dumpInput->position[1];
1150
1151     datapath = &gOvsSwitchContext->datapath;
1152     ASSERT(datapath);
1153     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
1154
1155     head = &datapath->flowTable[rowIndex];
1156     node = head->Flink;
1157
1158     while (column < columnIndex) {
1159         if (node == head) {
1160             break;
1161         }
1162         node = node->Flink;
1163         column++;
1164     }
1165
1166     if (node == head) {
1167         findNextNonEmpty = TRUE;
1168         columnIndex = 0;
1169     }
1170
1171     if (findNextNonEmpty) {
1172         while (head == node) {
1173             if (++rowIndex >= OVS_FLOW_TABLE_SIZE) {
1174                 dumpOutput->n = 0;
1175                 goto dp_unlock;
1176             }
1177             head = &datapath->flowTable[rowIndex];
1178             node = head->Flink;
1179         }
1180     }
1181
1182     ASSERT(node != head);
1183     ASSERT(rowIndex < OVS_FLOW_TABLE_SIZE);
1184
1185     flow = CONTAINING_RECORD(node, OvsFlow, ListEntry);
1186     status = ReportFlowInfo(flow, dumpInput->getFlags, dumpInput->actionsLen,
1187                                                             &dumpOutput->flow);
1188
1189     if (status == STATUS_BUFFER_TOO_SMALL) {
1190         dumpOutput->n = sizeof(OvsFlowDumpOutput) + flow->actionsLen;
1191         *replyLen = sizeof(*dumpOutput);
1192     } else {
1193         dumpOutput->n = 1; //one flow reported.
1194         *replyLen = sizeof(*dumpOutput) + dumpOutput->flow.actionsLen;
1195     }
1196
1197     dumpOutput->position[0] = rowIndex;
1198     dumpOutput->position[1] = ++columnIndex;
1199
1200 dp_unlock:
1201     OvsReleaseDatapath(datapath, &dpLockState);
1202
1203 unlock:
1204     NdisReleaseSpinLock(gOvsCtrlLock);
1205     return status;
1206 }
1207
1208 NTSTATUS
1209 OvsDumpFlowIoctl(PVOID inputBuffer,
1210                  UINT32 inputLength,
1211                  PVOID outputBuffer,
1212                  UINT32 outputLength,
1213                  UINT32 *replyLen)
1214 {
1215     OvsFlowDumpOutput *dumpOutput = (OvsFlowDumpOutput *)outputBuffer;
1216     OvsFlowDumpInput *dumpInput = (OvsFlowDumpInput *)inputBuffer;
1217
1218     if (inputBuffer == NULL || outputBuffer == NULL) {
1219         return STATUS_INVALID_PARAMETER;
1220     }
1221
1222     if ((inputLength != sizeof(OvsFlowDumpInput))
1223         || (outputLength != sizeof *dumpOutput + dumpInput->actionsLen)) {
1224         return STATUS_INFO_LENGTH_MISMATCH;
1225     }
1226
1227     return OvsDoDumpFlows(dumpInput, dumpOutput, replyLen);
1228 }
1229
1230 static NTSTATUS
1231 ReportFlowInfo(OvsFlow *flow,
1232                UINT32 getFlags,
1233                UINT32 getActionsLen,
1234                OvsFlowInfo *info)
1235 {
1236     NTSTATUS status = STATUS_SUCCESS;
1237
1238     if (getFlags & FLOW_GET_KEY) {
1239         // always copy the tunnel key part
1240         RtlCopyMemory(&info->key, &flow->key,
1241                             flow->key.l2.keyLen + flow->key.l2.offset);
1242     }
1243
1244     if (getFlags & FLOW_GET_STATS) {
1245         OvsFlowStats *stats = &info->stats;
1246         stats->packetCount = flow->packetCount;
1247         stats->byteCount = flow->byteCount;
1248         stats->used = (UINT32)flow->used;
1249         stats->tcpFlags = flow->tcpFlags;
1250     }
1251
1252     if (getFlags & FLOW_GET_ACTIONS) {
1253         if (flow->actionsLen == 0) {
1254             info->actionsLen = 0;
1255         } else if (flow->actionsLen > getActionsLen) {
1256             info->actionsLen = 0;
1257             status = STATUS_BUFFER_TOO_SMALL;
1258         } else {
1259             RtlCopyMemory(info->actions, flow->actions, flow->actionsLen);
1260             info->actionsLen = flow->actionsLen;
1261         }
1262     }
1263
1264     return status;
1265 }
1266
1267 NTSTATUS
1268 OvsPutFlowIoctl(PVOID inputBuffer,
1269                 UINT32 inputLength,
1270                 struct OvsFlowStats *stats)
1271 {
1272     NTSTATUS status = STATUS_SUCCESS;
1273     OVS_DATAPATH *datapath = NULL;
1274     ULONG actionsLen;
1275     OvsFlowPut *put;
1276     UINT32 dpNo;
1277     LOCK_STATE_EX dpLockState;
1278
1279     if ((inputLength < sizeof(OvsFlowPut)) || (inputBuffer == NULL)) {
1280         return STATUS_INFO_LENGTH_MISMATCH;
1281     }
1282
1283     put = (OvsFlowPut *)inputBuffer;
1284     if (put->actionsLen > 0) {
1285         actionsLen = put->actionsLen;
1286     } else {
1287         actionsLen = 0;
1288     }
1289
1290     dpNo = put->dpNo;
1291     NdisAcquireSpinLock(gOvsCtrlLock);
1292     if (gOvsSwitchContext == NULL ||
1293         gOvsSwitchContext->dpNo != dpNo) {
1294         status = STATUS_INVALID_PARAMETER;
1295         goto unlock;
1296     }
1297
1298     datapath = &gOvsSwitchContext->datapath;
1299     ASSERT(datapath);
1300     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
1301     status = HandleFlowPut(put, datapath, stats);
1302     OvsReleaseDatapath(datapath, &dpLockState);
1303
1304 unlock:
1305     NdisReleaseSpinLock(gOvsCtrlLock);
1306     return status;
1307 }
1308
1309
1310 /* Handles flow add, modify as well as delete */
1311 static NTSTATUS
1312 HandleFlowPut(OvsFlowPut *put,
1313               OVS_DATAPATH *datapath,
1314               struct OvsFlowStats *stats)
1315 {
1316     BOOLEAN   mayCreate, mayModify, mayDelete;
1317     OvsFlow   *KernelFlow;
1318     UINT64    hash;
1319     NTSTATUS  status = STATUS_SUCCESS;
1320
1321     mayCreate = (put->flags & OVSWIN_FLOW_PUT_CREATE) != 0;
1322     mayModify = (put->flags & OVSWIN_FLOW_PUT_MODIFY) != 0;
1323     mayDelete = (put->flags & OVSWIN_FLOW_PUT_DELETE) != 0;
1324
1325     if ((mayCreate || mayModify) == mayDelete) {
1326         return STATUS_INVALID_PARAMETER;
1327     }
1328
1329     KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, FALSE);
1330     if (!KernelFlow) {
1331         if (!mayCreate) {
1332             return STATUS_INVALID_PARAMETER;
1333         }
1334
1335         status = OvsPrepareFlow(&KernelFlow, put, hash);
1336         if (status != STATUS_SUCCESS) {
1337             FreeFlow(KernelFlow);
1338             return STATUS_UNSUCCESSFUL;
1339         }
1340
1341         status = AddFlow(datapath, KernelFlow);
1342         if (status != STATUS_SUCCESS) {
1343             FreeFlow(KernelFlow);
1344             return STATUS_UNSUCCESSFUL;
1345         }
1346
1347         /* Validate the flow addition */
1348         {
1349             UINT64 newHash;
1350             OvsFlow *flow = OvsLookupFlow(datapath, &put->key, &newHash,
1351                                                                     FALSE);
1352             ASSERT(flow);
1353             ASSERT(newHash == hash);
1354             if (!flow || newHash != hash) {
1355                 return STATUS_UNSUCCESSFUL;
1356             }
1357         }
1358     } else {
1359         stats->packetCount = KernelFlow->packetCount;
1360         stats->byteCount = KernelFlow->byteCount;
1361         stats->tcpFlags = KernelFlow->tcpFlags;
1362         stats->used = (UINT32)KernelFlow->used;
1363
1364         if (mayModify) {
1365             OvsFlow *newFlow;
1366             status = OvsPrepareFlow(&newFlow, put, hash);
1367             if (status != STATUS_SUCCESS) {
1368                 return STATUS_UNSUCCESSFUL;
1369             }
1370
1371             KernelFlow = OvsLookupFlow(datapath, &put->key, &hash, TRUE);
1372             if (KernelFlow)  {
1373                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0) {
1374                     newFlow->packetCount = KernelFlow->packetCount;
1375                     newFlow->byteCount = KernelFlow->byteCount;
1376                     newFlow->tcpFlags = KernelFlow->tcpFlags;
1377                 }
1378                 RemoveFlow(datapath, &KernelFlow);
1379             }  else  {
1380                 if ((put->flags & OVSWIN_FLOW_PUT_CLEAR) == 0)  {
1381                     newFlow->packetCount = stats->packetCount;
1382                     newFlow->byteCount = stats->byteCount;
1383                     newFlow->tcpFlags = stats->tcpFlags;
1384                 }
1385             }
1386             status = AddFlow(datapath, newFlow);
1387             ASSERT(status == STATUS_SUCCESS);
1388
1389             /* Validate the flow addition */
1390             {
1391                 UINT64 newHash;
1392                 OvsFlow *testflow = OvsLookupFlow(datapath, &put->key,
1393                                                             &newHash, FALSE);
1394                 ASSERT(testflow);
1395                 ASSERT(newHash == hash);
1396                 if (!testflow || newHash != hash) {
1397                     FreeFlow(newFlow);
1398                     return STATUS_UNSUCCESSFUL;
1399                 }
1400             }
1401         } else {
1402             if (mayDelete) {
1403                 if (KernelFlow) {
1404                     RemoveFlow(datapath, &KernelFlow);
1405                 }
1406             } else {
1407                 return STATUS_UNSUCCESSFUL;
1408             }
1409         }
1410     }
1411     return STATUS_SUCCESS;
1412 }
1413
1414 static NTSTATUS
1415 OvsPrepareFlow(OvsFlow **flow,
1416                const OvsFlowPut *put,
1417                UINT64 hash)
1418 {
1419     OvsFlow     *localFlow = *flow;
1420     NTSTATUS status = STATUS_SUCCESS;
1421
1422     do {
1423         *flow = localFlow =
1424             OvsAllocateMemory(sizeof(OvsFlow) + put->actionsLen);
1425         if (localFlow == NULL) {
1426             status = STATUS_NO_MEMORY;
1427             break;
1428         }
1429
1430         localFlow->key = put->key;
1431         localFlow->actionsLen = put->actionsLen;
1432         if (put->actionsLen) {
1433             NdisMoveMemory((PUCHAR)localFlow->actions, put->actions,
1434                                        put->actionsLen);
1435         }
1436         localFlow->userActionsLen = 0;  // 0 indicate no conversion is made
1437         localFlow->used = 0;
1438         localFlow->packetCount = 0;
1439         localFlow->byteCount = 0;
1440         localFlow->tcpFlags = 0;
1441         localFlow->hash = hash;
1442     } while(FALSE);
1443
1444     return status;
1445 }
1446
1447 NTSTATUS
1448 OvsGetFlowIoctl(PVOID inputBuffer,
1449                 UINT32 inputLength,
1450                 PVOID outputBuffer,
1451                 UINT32 outputLength,
1452                 UINT32 *replyLen)
1453 {
1454     NTSTATUS status = STATUS_SUCCESS;
1455     OVS_DATAPATH *datapath = NULL;
1456     OvsFlow *flow;
1457     UINT32 getFlags, getActionsLen;
1458     OvsFlowGetInput *getInput;
1459     OvsFlowGetOutput *getOutput;
1460     UINT64 hash;
1461     UINT32 dpNo;
1462     LOCK_STATE_EX dpLockState;
1463
1464     if (inputLength != sizeof(OvsFlowGetInput)
1465         || inputBuffer == NULL) {
1466         return STATUS_INFO_LENGTH_MISMATCH;
1467     }
1468
1469     getInput = (OvsFlowGetInput *) inputBuffer;
1470     getFlags = getInput->getFlags;
1471     getActionsLen = getInput->actionsLen;
1472     if (getInput->getFlags & FLOW_GET_KEY) {
1473         return STATUS_INVALID_PARAMETER;
1474     }
1475
1476     if (outputBuffer == NULL
1477         || outputLength != (sizeof *getOutput +
1478                             getInput->actionsLen)) {
1479         return STATUS_INFO_LENGTH_MISMATCH;
1480     }
1481
1482     dpNo = getInput->dpNo;
1483     NdisAcquireSpinLock(gOvsCtrlLock);
1484     if (gOvsSwitchContext == NULL ||
1485         gOvsSwitchContext->dpNo != dpNo) {
1486         status = STATUS_INVALID_PARAMETER;
1487         goto unlock;
1488     }
1489
1490     datapath = &gOvsSwitchContext->datapath;
1491     ASSERT(datapath);
1492     OvsAcquireDatapathRead(datapath, &dpLockState, FALSE);
1493     flow = OvsLookupFlow(datapath, &getInput->key, &hash, FALSE);
1494     if (!flow) {
1495         status = STATUS_INVALID_PARAMETER;
1496         goto dp_unlock;
1497     }
1498
1499     // XXX: can be optimized to return only how much is written out
1500     *replyLen = outputLength;
1501     getOutput = (OvsFlowGetOutput *)outputBuffer;
1502     ReportFlowInfo(flow, getFlags, getActionsLen, &getOutput->info);
1503
1504 dp_unlock:
1505     OvsReleaseDatapath(datapath, &dpLockState);
1506 unlock:
1507     NdisReleaseSpinLock(gOvsCtrlLock);
1508     return status;
1509 }
1510
1511 NTSTATUS
1512 OvsFlushFlowIoctl(UINT32 dpNo)
1513 {
1514     NTSTATUS status = STATUS_SUCCESS;
1515     OVS_DATAPATH *datapath = NULL;
1516     LOCK_STATE_EX dpLockState;
1517
1518     NdisAcquireSpinLock(gOvsCtrlLock);
1519     if (gOvsSwitchContext == NULL ||
1520         gOvsSwitchContext->dpNo != dpNo) {
1521         status = STATUS_INVALID_PARAMETER;
1522         goto unlock;
1523     }
1524
1525     datapath = &gOvsSwitchContext->datapath;
1526     ASSERT(datapath);
1527     OvsAcquireDatapathWrite(datapath, &dpLockState, FALSE);
1528     DeleteAllFlows(datapath);
1529     OvsReleaseDatapath(datapath, &dpLockState);
1530
1531 unlock:
1532     NdisReleaseSpinLock(gOvsCtrlLock);
1533     return status;
1534 }
1535
1536 #pragma warning( pop )