2 * Copyright (c) 2015 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "netlink-conntrack.h"
21 #include <linux/netfilter/nfnetlink.h>
22 #include <linux/netfilter/nfnetlink_conntrack.h>
23 #include <linux/netfilter/nf_conntrack_common.h>
24 #include <linux/netfilter/nf_conntrack_tcp.h>
25 #include <linux/netfilter/nf_conntrack_ftp.h>
26 #include <linux/netfilter/nf_conntrack_sctp.h>
28 #include "byte-order.h"
30 #include "openvswitch/dynamic-string.h"
32 #include "netlink-socket.h"
33 #include "openvswitch/ofpbuf.h"
34 #include "openvswitch/vlog.h"
35 #include "poll-loop.h"
40 VLOG_DEFINE_THIS_MODULE(netlink_conntrack);
41 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
43 /* This module works only if conntrack modules and features are enabled in the
44 * Linux kernel. This can be done from a root shell like this:
46 * $ modprobe ip_conntrack
47 * $ sysctl -w net.netfilter.nf_conntrack_acct=1
48 * $ sysctl -w net.netfilter.nf_conntrack_timestamp=1
50 * Also, if testing conntrack label feature without conntrack-aware OVS kernel
51 * module, there must be a connlabel rule in iptables for space to be reserved
52 * for the labels (see kernel source connlabel_mt_check()). Such a rule can be
53 * inserted from a root shell like this:
55 * $ iptables -A INPUT -m conntrack -m connlabel \
56 * --ctstate NEW,ESTABLISHED,RELATED --label 127 -j ACCEPT
59 /* Some attributes were introduced in later kernels: with these definitions
60 * we should be able to compile userspace against Linux 2.6.32+. */
62 #define CTA_ZONE (CTA_SECMARK + 1)
63 #define CTA_SECCTX (CTA_SECMARK + 2)
64 #define CTA_TIMESTAMP (CTA_SECMARK + 3)
65 #define CTA_MARK_MASK (CTA_SECMARK + 4)
66 #define CTA_LABELS (CTA_SECMARK + 5)
67 #define CTA_LABELS_MASK (CTA_SECMARK + 6)
69 #define CTA_TIMESTAMP_START 1
70 #define CTA_TIMESTAMP_STOP 2
72 #define IPS_TEMPLATE_BIT 11
73 #define IPS_TEMPLATE (1 << IPS_TEMPLATE_BIT)
75 #define IPS_UNTRACKED_BIT 12
76 #define IPS_UNTRACKED (1 << IPS_UNTRACKED_BIT)
78 static const struct nl_policy nfnlgrp_conntrack_policy[] = {
79 [CTA_TUPLE_ORIG] = { .type = NL_A_NESTED, .optional = false },
80 [CTA_TUPLE_REPLY] = { .type = NL_A_NESTED, .optional = false },
81 [CTA_ZONE] = { .type = NL_A_BE16, .optional = true },
82 [CTA_STATUS] = { .type = NL_A_BE32, .optional = false },
83 [CTA_TIMESTAMP] = { .type = NL_A_NESTED, .optional = true },
84 [CTA_TIMEOUT] = { .type = NL_A_BE32, .optional = true },
85 [CTA_COUNTERS_ORIG] = { .type = NL_A_NESTED, .optional = true },
86 [CTA_COUNTERS_REPLY] = { .type = NL_A_NESTED, .optional = true },
87 [CTA_PROTOINFO] = { .type = NL_A_NESTED, .optional = true },
88 [CTA_HELP] = { .type = NL_A_NESTED, .optional = true },
89 [CTA_MARK] = { .type = NL_A_BE32, .optional = true },
90 [CTA_SECCTX] = { .type = NL_A_NESTED, .optional = true },
91 [CTA_ID] = { .type = NL_A_BE32, .optional = false },
92 [CTA_USE] = { .type = NL_A_BE32, .optional = true },
93 [CTA_TUPLE_MASTER] = { .type = NL_A_NESTED, .optional = true },
94 [CTA_NAT_SEQ_ADJ_ORIG] = { .type = NL_A_NESTED, .optional = true },
95 [CTA_NAT_SEQ_ADJ_REPLY] = { .type = NL_A_NESTED, .optional = true },
96 [CTA_LABELS] = { .type = NL_A_UNSPEC, .optional = true },
97 /* CTA_NAT_SRC, CTA_NAT_DST, CTA_TIMESTAMP, CTA_MARK_MASK, and
98 * CTA_LABELS_MASK are not received from kernel. */
101 /* Declarations for conntrack netlink dumping. */
102 static void nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
103 int family, uint8_t subsystem, uint8_t cmd,
106 static bool nl_ct_parse_header_policy(struct ofpbuf *buf,
107 enum nl_ct_event_type *event_type,
108 uint8_t *nfgen_family,
109 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)]);
111 static bool nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
112 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
113 uint8_t nfgen_family);
115 struct nl_ct_dump_state {
122 /* Conntrack netlink dumping. */
124 /* Initialize a conntrack netlink dump. */
126 nl_ct_dump_start(struct nl_ct_dump_state **statep, const uint16_t *zone)
128 struct nl_ct_dump_state *state;
130 *statep = state = xzalloc(sizeof *state);
131 ofpbuf_init(&state->buf, NL_DUMP_BUFSIZE);
134 state->filter_zone = true;
138 nl_msg_put_nfgenmsg(&state->buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
139 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
140 nl_dump_start(&state->dump, NETLINK_NETFILTER, &state->buf);
141 ofpbuf_clear(&state->buf);
146 /* Receive the next 'entry' from the conntrack netlink dump with 'state'.
147 * Returns 'EOF' when no more entries are available, 0 otherwise. 'entry' may
148 * be uninitilized memory on entry, and must be uninitialized with
149 * ct_dpif_entry_uninit() afterwards by the caller. In case the same 'entry' is
150 * passed to this function again, the entry must also be uninitialized before
153 nl_ct_dump_next(struct nl_ct_dump_state *state, struct ct_dpif_entry *entry)
157 memset(entry, 0, sizeof *entry);
159 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
160 enum nl_ct_event_type type;
161 uint8_t nfgen_family;
163 if (!nl_dump_next(&state->dump, &buf, &state->buf)) {
167 if (!nl_ct_parse_header_policy(&buf, &type, &nfgen_family, attrs)) {
171 if (state->filter_zone) {
172 uint16_t entry_zone = attrs[CTA_ZONE]
173 ? ntohs(nl_attr_get_be16(attrs[CTA_ZONE]))
175 if (entry_zone != state->zone) {
180 if (nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
184 ct_dpif_entry_uninit(entry);
185 memset(entry, 0, sizeof *entry);
186 /* Ignore the failed entry and get the next one. */
193 /* End a conntrack netlink dump. */
195 nl_ct_dump_done(struct nl_ct_dump_state *state)
197 int error = nl_dump_done(&state->dump);
199 ofpbuf_uninit(&state->buf);
204 /* Format conntrack event 'entry' of 'type' to 'ds'. */
206 nl_ct_format_event_entry(const struct ct_dpif_entry *entry,
207 enum nl_ct_event_type type, struct ds *ds,
208 bool verbose, bool print_stats)
210 ds_put_format(ds, "%s ",
211 type == NL_CT_EVENT_NEW ? "NEW"
212 : type == NL_CT_EVENT_UPDATE ? "UPDATE"
213 : type == NL_CT_EVENT_DELETE ? "DELETE"
215 ct_dpif_format_entry(entry, ds, verbose, print_stats);
224 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
226 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
227 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
229 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
232 /* Expectations are flushed automatically, because they do not
233 * have a master connection anymore */
240 nl_ct_flush_zone(uint16_t flush_zone)
242 /* Windows can flush a specific zone */
246 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
248 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
249 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
250 nl_msg_put_be16(&buf, CTA_ZONE, flush_zone);
252 err = nl_transact(NETLINK_NETFILTER, &buf, NULL);
259 nl_ct_flush_zone(uint16_t flush_zone)
261 /* Apparently, there's no netlink interface to flush a specific zone.
262 * This code dumps every connection, checks the zone and eventually
265 * This is race-prone, but it is better than using shell scripts. */
268 struct ofpbuf buf, reply, delete;
270 ofpbuf_init(&buf, NL_DUMP_BUFSIZE);
271 ofpbuf_init(&delete, NL_DUMP_BUFSIZE);
273 nl_msg_put_nfgenmsg(&buf, 0, AF_UNSPEC, NFNL_SUBSYS_CTNETLINK,
274 IPCTNL_MSG_CT_GET, NLM_F_REQUEST);
275 nl_dump_start(&dump, NETLINK_NETFILTER, &buf);
279 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
280 enum nl_ct_event_type event_type;
281 uint8_t nfgen_family;
284 if (!nl_dump_next(&dump, &reply, &buf)) {
288 if (!nl_ct_parse_header_policy(&reply, &event_type, &nfgen_family,
293 if (attrs[CTA_ZONE]) {
294 zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
297 if (zone != flush_zone) {
298 /* The entry is not in the zone we're flushing. */
301 nl_msg_put_nfgenmsg(&delete, 0, nfgen_family, NFNL_SUBSYS_CTNETLINK,
302 IPCTNL_MSG_CT_DELETE, NLM_F_REQUEST);
304 nl_msg_put_be16(&delete, CTA_ZONE, htons(zone));
305 nl_msg_put_unspec(&delete, CTA_TUPLE_ORIG, attrs[CTA_TUPLE_ORIG] + 1,
306 attrs[CTA_TUPLE_ORIG]->nla_len - NLA_HDRLEN);
307 nl_msg_put_unspec(&delete, CTA_ID, attrs[CTA_ID] + 1,
308 attrs[CTA_ID]->nla_len - NLA_HDRLEN);
309 nl_transact(NETLINK_NETFILTER, &delete, NULL);
310 ofpbuf_clear(&delete);
315 ofpbuf_uninit(&delete);
318 /* Expectations are flushed automatically, because they do not
319 * have a master connection anymore */
324 /* Conntrack netlink parsing. */
327 nl_ct_parse_counters(struct nlattr *nla, struct ct_dpif_counters *counters)
329 static const struct nl_policy policy[] = {
330 [CTA_COUNTERS_PACKETS] = { .type = NL_A_BE64, .optional = false },
331 [CTA_COUNTERS_BYTES] = { .type = NL_A_BE64, .optional = false },
333 struct nlattr *attrs[ARRAY_SIZE(policy)];
336 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
340 = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_PACKETS]));
341 counters->bytes = ntohll(nl_attr_get_be64(attrs[CTA_COUNTERS_BYTES]));
343 VLOG_ERR_RL(&rl, "Could not parse nested counters. "
344 "Possibly incompatible Linux kernel version.");
351 nl_ct_parse_timestamp(struct nlattr *nla, struct ct_dpif_timestamp *timestamp)
353 static const struct nl_policy policy[] = {
354 [CTA_TIMESTAMP_START] = { .type = NL_A_BE64, .optional = false },
355 [CTA_TIMESTAMP_STOP] = { .type = NL_A_BE64, .optional = true },
357 struct nlattr *attrs[ARRAY_SIZE(policy)];
360 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
364 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_START]));
365 if (attrs[CTA_TIMESTAMP_STOP]) {
367 = ntohll(nl_attr_get_be64(attrs[CTA_TIMESTAMP_STOP]));
370 VLOG_ERR_RL(&rl, "Could not parse nested timestamp. "
371 "Possibly incompatible Linux kernel version.");
378 nl_ct_parse_tuple_ip(struct nlattr *nla, struct ct_dpif_tuple *tuple)
380 static const struct nl_policy policy[] = {
381 [CTA_IP_V4_SRC] = { .type = NL_A_BE32, .optional = true },
382 [CTA_IP_V4_DST] = { .type = NL_A_BE32, .optional = true },
383 [CTA_IP_V6_SRC] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
384 [CTA_IP_V6_DST] = { NL_POLICY_FOR(struct in6_addr), .optional = true },
386 struct nlattr *attrs[ARRAY_SIZE(policy)];
389 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
392 if (tuple->l3_type == AF_INET) {
393 if (attrs[CTA_IP_V4_SRC]) {
394 tuple->src.ip = nl_attr_get_be32(attrs[CTA_IP_V4_SRC]);
396 if (attrs[CTA_IP_V4_DST]) {
397 tuple->dst.ip = nl_attr_get_be32(attrs[CTA_IP_V4_DST]);
399 } else if (tuple->l3_type == AF_INET6) {
400 if (attrs[CTA_IP_V6_SRC]) {
401 memcpy(&tuple->src.in6, nl_attr_get(attrs[CTA_IP_V6_SRC]),
402 sizeof tuple->src.in6);
404 if (attrs[CTA_IP_V6_DST]) {
405 memcpy(&tuple->dst.in6, nl_attr_get(attrs[CTA_IP_V6_DST]),
406 sizeof tuple->dst.in6);
409 VLOG_WARN_RL(&rl, "Unsupported IP protocol: %u.", tuple->l3_type);
413 VLOG_ERR_RL(&rl, "Could not parse nested tuple IP options. "
414 "Possibly incompatible Linux kernel version.");
421 nl_ct_parse_tuple_proto(struct nlattr *nla, struct ct_dpif_tuple *tuple)
423 static const struct nl_policy policy[] = {
424 [CTA_PROTO_NUM] = { .type = NL_A_U8, .optional = false },
425 [CTA_PROTO_SRC_PORT] = { .type = NL_A_BE16, .optional = true },
426 [CTA_PROTO_DST_PORT] = { .type = NL_A_BE16, .optional = true },
427 [CTA_PROTO_ICMP_ID] = { .type = NL_A_BE16, .optional = true },
428 [CTA_PROTO_ICMP_TYPE] = { .type = NL_A_U8, .optional = true },
429 [CTA_PROTO_ICMP_CODE] = { .type = NL_A_U8, .optional = true },
430 [CTA_PROTO_ICMPV6_ID] = { .type = NL_A_BE16, .optional = true },
431 [CTA_PROTO_ICMPV6_TYPE] = { .type = NL_A_U8, .optional = true },
432 [CTA_PROTO_ICMPV6_CODE] = { .type = NL_A_U8, .optional = true },
434 struct nlattr *attrs[ARRAY_SIZE(policy)];
437 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
440 tuple->ip_proto = nl_attr_get_u8(attrs[CTA_PROTO_NUM]);
442 if (tuple->l3_type == AF_INET && tuple->ip_proto == IPPROTO_ICMP) {
443 if (!attrs[CTA_PROTO_ICMP_ID] || !attrs[CTA_PROTO_ICMP_TYPE]
444 || !attrs[CTA_PROTO_ICMP_CODE]) {
445 VLOG_ERR_RL(&rl, "Tuple ICMP data missing.");
448 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMP_ID]);
449 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_TYPE]);
450 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMP_CODE]);
451 } else if (tuple->l3_type == AF_INET6 &&
452 tuple->ip_proto == IPPROTO_ICMPV6) {
453 if (!attrs[CTA_PROTO_ICMPV6_ID] || !attrs[CTA_PROTO_ICMPV6_TYPE]
454 || !attrs[CTA_PROTO_ICMPV6_CODE]) {
455 VLOG_ERR_RL(&rl, "Tuple ICMPv6 data missing.");
458 tuple->icmp_id = nl_attr_get_be16(attrs[CTA_PROTO_ICMPV6_ID]);
459 tuple->icmp_type = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_TYPE]);
460 tuple->icmp_code = nl_attr_get_u8(attrs[CTA_PROTO_ICMPV6_CODE]);
461 } else if (attrs[CTA_PROTO_SRC_PORT] && attrs[CTA_PROTO_DST_PORT]) {
462 tuple->src_port = nl_attr_get_be16(attrs[CTA_PROTO_SRC_PORT]);
463 tuple->dst_port = nl_attr_get_be16(attrs[CTA_PROTO_DST_PORT]);
465 /* Unsupported IPPROTO and no ports, leave them zeroed.
466 * We have parsed the ip_proto, so this is not a total failure. */
467 VLOG_INFO_RL(&rl, "Unsupported L4 protocol: %u.", tuple->ip_proto);
470 VLOG_ERR_RL(&rl, "Could not parse nested tuple protocol options. "
471 "Possibly incompatible Linux kernel version.");
478 nl_ct_parse_tuple(struct nlattr *nla, struct ct_dpif_tuple *tuple,
481 static const struct nl_policy policy[] = {
482 [CTA_TUPLE_IP] = { .type = NL_A_NESTED, .optional = false },
483 [CTA_TUPLE_PROTO] = { .type = NL_A_NESTED, .optional = false },
485 struct nlattr *attrs[ARRAY_SIZE(policy)];
488 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
490 memset(tuple, 0, sizeof *tuple);
493 tuple->l3_type = l3_type;
495 if (!nl_ct_parse_tuple_ip(attrs[CTA_TUPLE_IP], tuple)
496 || !nl_ct_parse_tuple_proto(attrs[CTA_TUPLE_PROTO], tuple)) {
500 ct_dpif_format_tuple(&ds, tuple, true);
502 VLOG_ERR_RL(&rl, "Failed to parse tuple: %s", ds_cstr(&ds));
505 memset(tuple, 0, sizeof *tuple);
509 VLOG_ERR_RL(&rl, "Could not parse nested tuple options. "
510 "Possibly incompatible Linux kernel version.");
516 /* Translate netlink TCP state to CT_DPIF_TCP state. */
518 nl_ct_tcp_state_to_dpif(uint8_t state)
521 /* Windows currently sends up CT_DPIF_TCP state */
525 case TCP_CONNTRACK_NONE:
526 return CT_DPIF_TCPS_CLOSED;
527 case TCP_CONNTRACK_SYN_SENT:
528 return CT_DPIF_TCPS_SYN_SENT;
529 case TCP_CONNTRACK_SYN_SENT2:
530 return CT_DPIF_TCPS_SYN_SENT;
531 case TCP_CONNTRACK_SYN_RECV:
532 return CT_DPIF_TCPS_SYN_RECV;
533 case TCP_CONNTRACK_ESTABLISHED:
534 return CT_DPIF_TCPS_ESTABLISHED;
535 case TCP_CONNTRACK_FIN_WAIT:
536 return CT_DPIF_TCPS_FIN_WAIT_1;
537 case TCP_CONNTRACK_CLOSE_WAIT:
538 return CT_DPIF_TCPS_CLOSE_WAIT;
539 case TCP_CONNTRACK_LAST_ACK:
540 return CT_DPIF_TCPS_LAST_ACK;
541 case TCP_CONNTRACK_TIME_WAIT:
542 return CT_DPIF_TCPS_TIME_WAIT;
543 case TCP_CONNTRACK_CLOSE:
544 return CT_DPIF_TCPS_CLOSING;
546 return CT_DPIF_TCPS_CLOSED;
552 ip_ct_tcp_flags_to_dpif(uint8_t flags)
555 /* Windows currently sends up CT_DPIF_TCP flags */
559 #define CT_DPIF_TCP_FLAG(FLAG) \
560 ret |= (flags & IP_CT_TCP_FLAG_##FLAG) ? CT_DPIF_TCPF_##FLAG : 0;
562 #undef CT_DPIF_STATUS_FLAG
568 nl_ct_parse_protoinfo_tcp(struct nlattr *nla,
569 struct ct_dpif_protoinfo *protoinfo)
571 static const struct nl_policy policy[] = {
572 [CTA_PROTOINFO_TCP_STATE] = { .type = NL_A_U8, .optional = false },
573 [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NL_A_U8,
575 [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NL_A_U8,
577 [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .type = NL_A_U16,
579 [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .type = NL_A_U16,
582 struct nlattr *attrs[ARRAY_SIZE(policy)];
585 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
588 const struct nf_ct_tcp_flags *flags_orig, *flags_reply;
590 protoinfo->proto = IPPROTO_TCP;
591 state = nl_ct_tcp_state_to_dpif(
592 nl_attr_get_u8(attrs[CTA_PROTOINFO_TCP_STATE]));
593 /* The connection tracker keeps only one tcp state for the
594 * connection, but our structures store a separate state for
595 * each endpoint. Here we duplicate the state. */
596 protoinfo->tcp.state_orig = protoinfo->tcp.state_reply = state;
597 protoinfo->tcp.wscale_orig = nl_attr_get_u8(
598 attrs[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
599 protoinfo->tcp.wscale_reply = nl_attr_get_u8(
600 attrs[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
602 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL],
604 protoinfo->tcp.flags_orig =
605 ip_ct_tcp_flags_to_dpif(flags_orig->flags);
607 nl_attr_get_unspec(attrs[CTA_PROTOINFO_TCP_FLAGS_REPLY],
608 sizeof *flags_reply);
609 protoinfo->tcp.flags_reply =
610 ip_ct_tcp_flags_to_dpif(flags_reply->flags);
612 VLOG_ERR_RL(&rl, "Could not parse nested TCP protoinfo options. "
613 "Possibly incompatible Linux kernel version.");
620 nl_ct_parse_protoinfo(struct nlattr *nla, struct ct_dpif_protoinfo *protoinfo)
622 /* These are mutually exclusive. */
623 static const struct nl_policy policy[] = {
624 [CTA_PROTOINFO_TCP] = { .type = NL_A_NESTED, .optional = true },
625 [CTA_PROTOINFO_SCTP] = { .type = NL_A_NESTED, .optional = true },
627 struct nlattr *attrs[ARRAY_SIZE(policy)];
630 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
632 memset(protoinfo, 0, sizeof *protoinfo);
635 if (attrs[CTA_PROTOINFO_TCP]) {
636 parsed = nl_ct_parse_protoinfo_tcp(attrs[CTA_PROTOINFO_TCP],
638 } else if (attrs[CTA_PROTOINFO_SCTP]) {
639 VLOG_WARN_RL(&rl, "SCTP protoinfo not yet supported!");
641 VLOG_WARN_RL(&rl, "Empty protoinfo!");
644 VLOG_ERR_RL(&rl, "Could not parse nested protoinfo options. "
645 "Possibly incompatible Linux kernel version.");
652 nl_ct_parse_helper(struct nlattr *nla, struct ct_dpif_helper *helper)
654 static const struct nl_policy policy[] = {
655 [CTA_HELP_NAME] = { .type = NL_A_STRING, .optional = false },
657 struct nlattr *attrs[ARRAY_SIZE(policy)];
660 parsed = nl_parse_nested(nla, policy, attrs, ARRAY_SIZE(policy));
662 memset(helper, 0, sizeof *helper);
665 helper->name = xstrdup(nl_attr_get_string(attrs[CTA_HELP_NAME]));
667 VLOG_ERR_RL(&rl, "Could not parse nested helper options. "
668 "Possibly incompatible Linux kernel version.");
674 /* Translate netlink entry status flags to CT_DPIF_TCP status flags. */
676 ips_status_to_dpif_flags(uint32_t status)
679 #define CT_DPIF_STATUS_FLAG(FLAG) \
680 ret |= (status & IPS_##FLAG) ? CT_DPIF_STATUS_##FLAG : 0;
682 #undef CT_DPIF_STATUS_FLAG
687 nl_ct_parse_header_policy(struct ofpbuf *buf,
688 enum nl_ct_event_type *event_type,
689 uint8_t *nfgen_family,
690 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)])
692 struct nlmsghdr *nlh;
693 struct nfgenmsg *nfm;
696 nlh = ofpbuf_at(buf, 0, NLMSG_HDRLEN);
697 nfm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *nfm);
699 VLOG_ERR_RL(&rl, "Received bad nfnl message (no nfgenmsg).");
702 if (NFNL_SUBSYS_ID(nlh->nlmsg_type) != NFNL_SUBSYS_CTNETLINK) {
703 VLOG_ERR_RL(&rl, "Received non-conntrack message (subsystem: %u).",
704 NFNL_SUBSYS_ID(nlh->nlmsg_type));
707 if (nfm->version != NFNETLINK_V0) {
708 VLOG_ERR_RL(&rl, "Received unsupported nfnetlink version (%u).",
709 NFNL_MSG_TYPE(nfm->version));
713 if (!nl_policy_parse(buf, NLMSG_HDRLEN + sizeof *nfm,
714 nfnlgrp_conntrack_policy, attrs,
715 ARRAY_SIZE(nfnlgrp_conntrack_policy))) {
716 VLOG_ERR_RL(&rl, "Received bad nfnl message (policy).");
720 type = NFNL_MSG_TYPE(nlh->nlmsg_type);
721 *nfgen_family = nfm->nfgen_family;
724 case IPCTNL_MSG_CT_NEW:
725 *event_type = nlh->nlmsg_flags & NLM_F_CREATE
726 ? NL_CT_EVENT_NEW : NL_CT_EVENT_UPDATE;
728 case IPCTNL_MSG_CT_DELETE:
729 *event_type = NL_CT_EVENT_DELETE;
732 VLOG_ERR_RL(&rl, "Can't parse conntrack event type.");
740 nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
741 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)],
742 uint8_t nfgen_family)
744 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_ORIG], &entry->tuple_orig,
748 if (!nl_ct_parse_tuple(attrs[CTA_TUPLE_REPLY], &entry->tuple_reply,
752 if (attrs[CTA_COUNTERS_ORIG] &&
753 !nl_ct_parse_counters(attrs[CTA_COUNTERS_ORIG],
754 &entry->counters_orig)) {
757 if (attrs[CTA_COUNTERS_REPLY] &&
758 !nl_ct_parse_counters(attrs[CTA_COUNTERS_REPLY],
759 &entry->counters_reply)) {
762 if (attrs[CTA_TIMESTAMP] &&
763 !nl_ct_parse_timestamp(attrs[CTA_TIMESTAMP], &entry->timestamp)) {
767 entry->id = ntohl(nl_attr_get_be32(attrs[CTA_ID]));
769 if (attrs[CTA_ZONE]) {
770 entry->zone = ntohs(nl_attr_get_be16(attrs[CTA_ZONE]));
772 if (attrs[CTA_STATUS]) {
773 entry->status = ips_status_to_dpif_flags(
774 ntohl(nl_attr_get_be32(attrs[CTA_STATUS])));
776 if (attrs[CTA_TIMEOUT]) {
777 entry->timeout = ntohl(nl_attr_get_be32(attrs[CTA_TIMEOUT]));
779 if (attrs[CTA_MARK]) {
780 entry->mark = ntohl(nl_attr_get_be32(attrs[CTA_MARK]));
782 if (attrs[CTA_LABELS]) {
783 memcpy(&entry->labels, nl_attr_get(attrs[CTA_LABELS]),
784 MIN(sizeof entry->labels, nl_attr_get_size(attrs[CTA_LABELS])));
786 if (attrs[CTA_PROTOINFO] &&
787 !nl_ct_parse_protoinfo(attrs[CTA_PROTOINFO], &entry->protoinfo)) {
790 if (attrs[CTA_HELP] &&
791 !nl_ct_parse_helper(attrs[CTA_HELP], &entry->helper)) {
794 if (attrs[CTA_TUPLE_MASTER] &&
795 !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master,
803 nl_ct_parse_entry(struct ofpbuf *buf, struct ct_dpif_entry *entry,
804 enum nl_ct_event_type *event_type)
806 struct nlattr *attrs[ARRAY_SIZE(nfnlgrp_conntrack_policy)];
807 uint8_t nfgen_family;
809 memset(entry, 0, sizeof *entry);
810 if (!nl_ct_parse_header_policy(buf, event_type, &nfgen_family, attrs)) {
814 if (!nl_ct_attrs_to_ct_dpif_entry(entry, attrs, nfgen_family)) {
815 ct_dpif_entry_uninit(entry);
816 memset(entry, 0, sizeof *entry);
823 /* NetFilter utility functions. */
825 /* Puts a nlmsghdr and nfgenmsg at the beginning of 'msg', which must be
826 * initially empty. 'expected_payload' should be an estimate of the number of
827 * payload bytes to be supplied; if the size of the payload is unknown a value
828 * of 0 is acceptable.
830 * Non-zero 'family' is the address family of items to get (e.g. AF_INET).
832 * 'flags' is a bit-mask that indicates what kind of request is being made. It
833 * is often NLM_F_REQUEST indicating that a request is being made, commonly
834 * or'd with NLM_F_ACK to request an acknowledgement. NLM_F_DUMP flag reguests
835 * a dump of the table.
837 * 'subsystem' is a netfilter subsystem id, e.g., NFNL_SUBSYS_CTNETLINK.
839 * 'cmd' is an enumerated value specific to the 'subsystem'.
841 * Sets the new nlmsghdr's nlmsg_pid field to 0 for now. nl_sock_send() will
842 * fill it in just before sending the message.
844 * nl_msg_put_nlmsghdr() should be used to compose Netlink messages that are
845 * not NetFilter Netlink messages. */
847 nl_msg_put_nfgenmsg(struct ofpbuf *msg, size_t expected_payload,
848 int family, uint8_t subsystem, uint8_t cmd,
851 struct nfgenmsg *nfm;
853 nl_msg_put_nlmsghdr(msg, sizeof *nfm + expected_payload,
854 subsystem << 8 | cmd, flags);
855 ovs_assert(msg->size == NLMSG_HDRLEN);
856 nfm = nl_msg_put_uninit(msg, sizeof *nfm);
857 nfm->nfgen_family = family;
858 nfm->version = NFNETLINK_V0;
861 /* nfgenmsg contains ovsHdr padding in windows */
862 nfm->ovsHdr.dp_ifindex = 0;