2 * Copyright (c) 2015 Nicira, Inc.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
14 #include <linux/kconfig.h>
15 #include <linux/version.h>
17 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
19 #include <linux/module.h>
20 #include <linux/openvswitch.h>
22 #include <net/netfilter/nf_conntrack_core.h>
23 #include <net/netfilter/nf_conntrack_helper.h>
24 #include <net/netfilter/nf_conntrack_labels.h>
25 #include <net/netfilter/nf_conntrack_zones.h>
26 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29 #include "conntrack.h"
31 #include "flow_netlink.h"
34 struct ovs_ct_len_tbl {
39 /* Metadata mark for masked write to conntrack mark */
45 /* Metadata label for masked write to conntrack label. */
47 struct ovs_key_ct_labels value;
48 struct ovs_key_ct_labels mask;
51 /* Conntrack action context for execution. */
52 struct ovs_conntrack_info {
53 struct nf_conntrack_helper *helper;
54 struct nf_conntrack_zone zone;
59 struct md_labels labels;
62 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info);
64 static u16 key_to_nfproto(const struct sw_flow_key *key)
66 switch (ntohs(key->eth.type)) {
72 return NFPROTO_UNSPEC;
76 /* Map SKB connection state into the values used by flow definition. */
77 static u8 ovs_ct_get_state(enum ip_conntrack_info ctinfo)
79 u8 ct_state = OVS_CS_F_TRACKED;
82 case IP_CT_ESTABLISHED_REPLY:
83 case IP_CT_RELATED_REPLY:
84 ct_state |= OVS_CS_F_REPLY_DIR;
91 case IP_CT_ESTABLISHED:
92 case IP_CT_ESTABLISHED_REPLY:
93 ct_state |= OVS_CS_F_ESTABLISHED;
96 case IP_CT_RELATED_REPLY:
97 ct_state |= OVS_CS_F_RELATED;
100 ct_state |= OVS_CS_F_NEW;
109 static u32 ovs_ct_get_mark(const struct nf_conn *ct)
111 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
112 return ct ? ct->mark : 0;
118 static void ovs_ct_get_labels(const struct nf_conn *ct,
119 struct ovs_key_ct_labels *labels)
121 struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
124 size_t len = cl->words * sizeof(long);
126 if (len > OVS_CT_LABELS_LEN)
127 len = OVS_CT_LABELS_LEN;
128 else if (len < OVS_CT_LABELS_LEN)
129 memset(labels, 0, OVS_CT_LABELS_LEN);
130 memcpy(labels, cl->bits, len);
132 memset(labels, 0, OVS_CT_LABELS_LEN);
136 static void __ovs_ct_update_key(struct sw_flow_key *key, u8 state,
137 const struct nf_conntrack_zone *zone,
138 const struct nf_conn *ct)
140 key->ct.state = state;
141 key->ct.zone = zone->id;
142 key->ct.mark = ovs_ct_get_mark(ct);
143 ovs_ct_get_labels(ct, &key->ct.labels);
146 /* Update 'key' based on skb->nfct. If 'post_ct' is true, then OVS has
147 * previously sent the packet to conntrack via the ct action.
149 static void ovs_ct_update_key(const struct sk_buff *skb,
150 const struct ovs_conntrack_info *info,
151 struct sw_flow_key *key, bool post_ct)
153 const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
154 enum ip_conntrack_info ctinfo;
158 ct = nf_ct_get(skb, &ctinfo);
160 state = ovs_ct_get_state(ctinfo);
161 /* All unconfirmed entries are NEW connections. */
162 if (!nf_ct_is_confirmed(ct))
163 state |= OVS_CS_F_NEW;
164 /* OVS persists the related flag for the duration of the
168 state |= OVS_CS_F_RELATED;
169 zone = nf_ct_zone(ct);
170 } else if (post_ct) {
171 state = OVS_CS_F_TRACKED | OVS_CS_F_INVALID;
175 __ovs_ct_update_key(key, state, zone, ct);
178 /* This is called to initialize CT key fields possibly coming in from the local
181 void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
183 ovs_ct_update_key(skb, NULL, key, false);
186 int ovs_ct_put_key(const struct sw_flow_key *key, struct sk_buff *skb)
188 if (nla_put_u32(skb, OVS_KEY_ATTR_CT_STATE, key->ct.state))
191 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
192 nla_put_u16(skb, OVS_KEY_ATTR_CT_ZONE, key->ct.zone))
195 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
196 nla_put_u32(skb, OVS_KEY_ATTR_CT_MARK, key->ct.mark))
199 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
200 nla_put(skb, OVS_KEY_ATTR_CT_LABELS, sizeof(key->ct.labels),
207 static int ovs_ct_set_mark(struct sk_buff *skb, struct sw_flow_key *key,
208 u32 ct_mark, u32 mask)
210 #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
211 enum ip_conntrack_info ctinfo;
215 /* The connection could be invalid, in which case set_mark is no-op. */
216 ct = nf_ct_get(skb, &ctinfo);
220 new_mark = ct_mark | (ct->mark & ~(mask));
221 if (ct->mark != new_mark) {
223 nf_conntrack_event_cache(IPCT_MARK, ct);
224 key->ct.mark = new_mark;
233 static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
234 const struct ovs_key_ct_labels *labels,
235 const struct ovs_key_ct_labels *mask)
237 enum ip_conntrack_info ctinfo;
238 struct nf_conn_labels *cl;
242 /* The connection could be invalid, in which case set_label is no-op.*/
243 ct = nf_ct_get(skb, &ctinfo);
247 cl = nf_ct_labels_find(ct);
249 nf_ct_labels_ext_add(ct);
250 cl = nf_ct_labels_find(ct);
252 if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
255 err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
256 OVS_CT_LABELS_LEN / sizeof(u32));
260 ovs_ct_get_labels(ct, &key->ct.labels);
264 /* 'skb' should already be pulled to nh_ofs. */
265 static int ovs_ct_helper(struct sk_buff *skb, u16 proto)
267 const struct nf_conntrack_helper *helper;
268 const struct nf_conn_help *help;
269 enum ip_conntrack_info ctinfo;
270 unsigned int protoff;
273 ct = nf_ct_get(skb, &ctinfo);
274 if (!ct || ctinfo == IP_CT_RELATED_REPLY)
277 help = nfct_help(ct);
281 helper = rcu_dereference(help->helper);
287 protoff = ip_hdrlen(skb);
290 u8 nexthdr = ipv6_hdr(skb)->nexthdr;
294 ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
296 if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
297 pr_debug("proto header not found\n");
304 WARN_ONCE(1, "helper invoked on non-IP family!");
308 return helper->help(skb, protoff, ct, ctinfo);
311 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
312 * value if 'skb' is freed.
314 static int handle_fragments(struct net *net, struct sw_flow_key *key,
315 u16 zone, struct sk_buff *skb)
317 struct ovs_gso_cb ovs_cb = *OVS_GSO_CB(skb);
321 OVS_NLERR(true, "%s: skb has no dev; dropping", __func__);
325 if (key->eth.type == htons(ETH_P_IP)) {
326 enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
328 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
329 err = ip_defrag(net, skb, user);
333 ovs_cb.dp_cb.mru = IPCB(skb)->frag_max_size;
334 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
335 } else if (key->eth.type == htons(ETH_P_IPV6)) {
336 enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
339 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
340 err = nf_ct_frag6_gather(net, skb, user);
344 key->ip.proto = ipv6_hdr(skb)->nexthdr;
345 ovs_cb.dp_cb.mru = IP6CB(skb)->frag_max_size;
346 #endif /* IP frag support */
349 return -EPFNOSUPPORT;
352 key->ip.frag = OVS_FRAG_TYPE_NONE;
355 *OVS_GSO_CB(skb) = ovs_cb;
360 static struct nf_conntrack_expect *
361 ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
362 u16 proto, const struct sk_buff *skb)
364 struct nf_conntrack_tuple tuple;
366 if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
368 return __nf_ct_expect_find(net, zone, &tuple);
371 /* Determine whether skb->nfct is equal to the result of conntrack lookup. */
372 static bool skb_nfct_cached(const struct net *net, const struct sk_buff *skb,
373 const struct ovs_conntrack_info *info)
375 enum ip_conntrack_info ctinfo;
378 ct = nf_ct_get(skb, &ctinfo);
381 if (!net_eq(net, read_pnet(&ct->ct_net)))
383 if (!nf_ct_zone_equal_any(info->ct, nf_ct_zone(ct)))
386 struct nf_conn_help *help;
388 help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
389 if (help && rcu_access_pointer(help->helper) != info->helper)
396 /* Pass 'skb' through conntrack in 'net', using zone configured in 'info', if
397 * not done already. Update key with new CT state after passing the packet
399 * Note that if the packet is deemed invalid by conntrack, skb->nfct will be
400 * set to NULL and 0 will be returned.
402 static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
403 const struct ovs_conntrack_info *info,
406 /* If we are recirculating packets to match on conntrack fields and
407 * committing with a separate conntrack action, then we don't need to
408 * actually run the packet through conntrack twice unless it's for a
411 if (!skb_nfct_cached(net, skb, info)) {
412 struct nf_conn *tmpl = info->ct;
414 /* Associate skb with specified zone. */
417 nf_conntrack_put(skb->nfct);
418 nf_conntrack_get(&tmpl->ct_general);
419 skb->nfct = &tmpl->ct_general;
420 skb->nfctinfo = IP_CT_NEW;
423 if (nf_conntrack_in(net, info->family, NF_INET_FORWARD,
427 ovs_ct_update_key(skb, info, key, true);
429 if (ovs_ct_helper(skb, info->family) != NF_ACCEPT) {
430 WARN_ONCE(1, "helper rejected packet");
438 /* Lookup connection and read fields into key. */
439 static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
440 const struct ovs_conntrack_info *info,
443 struct nf_conntrack_expect *exp;
445 /* If we pass an expected packet through nf_conntrack_in() the
446 * expectation is typically removed, but the packet could still be
447 * lost in upcall processing. To prevent this from happening we
448 * perform an explicit expectation lookup. Expected connections are
449 * always new, and will be passed through conntrack only when they are
450 * committed, as it is OK to remove the expectation at that time.
452 exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
456 state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
457 __ovs_ct_update_key(key, state, &info->zone, exp->master);
461 err = __ovs_ct_lookup(net, key, info, skb);
469 /* Lookup connection and confirm if unconfirmed. */
470 static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
471 const struct ovs_conntrack_info *info,
477 state = key->ct.state;
478 if (key->ct.zone == info->zone.id &&
479 ((state & OVS_CS_F_TRACKED) && !(state & OVS_CS_F_NEW))) {
480 /* Previous lookup has shown that this connection is already
481 * tracked and committed. Skip committing.
486 err = __ovs_ct_lookup(net, key, info, skb);
489 /* This is a no-op if the connection has already been confirmed. */
490 if (nf_conntrack_confirm(skb) != NF_ACCEPT)
496 static bool labels_nonzero(const struct ovs_key_ct_labels *labels)
500 for (i = 0; i < sizeof(*labels); i++)
501 if (labels->ct_labels[i])
507 /* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
508 * value if 'skb' is freed.
510 int ovs_ct_execute(struct net *net, struct sk_buff *skb,
511 struct sw_flow_key *key,
512 const struct ovs_conntrack_info *info)
517 /* The conntrack module expects to be working at L3. */
518 nh_ofs = skb_network_offset(skb);
519 skb_pull(skb, nh_ofs);
521 if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
522 err = handle_fragments(net, key, info->zone.id, skb);
528 err = ovs_ct_commit(net, key, info, skb);
530 err = ovs_ct_lookup(net, key, info, skb);
534 if (info->mark.mask) {
535 err = ovs_ct_set_mark(skb, key, info->mark.value,
540 if (labels_nonzero(&info->labels.mask))
541 err = ovs_ct_set_labels(skb, key, &info->labels.value,
544 skb_push(skb, nh_ofs);
550 static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
551 const struct sw_flow_key *key, bool log)
553 struct nf_conntrack_helper *helper;
554 struct nf_conn_help *help;
556 helper = nf_conntrack_helper_try_module_get(name, info->family,
559 OVS_NLERR(log, "Unknown helper \"%s\"", name);
563 help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL);
565 module_put(helper->me);
569 rcu_assign_pointer(help->helper, helper);
570 info->helper = helper;
574 static const struct ovs_ct_len_tbl ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = {
575 [OVS_CT_ATTR_COMMIT] = { .minlen = 0, .maxlen = 0 },
576 [OVS_CT_ATTR_ZONE] = { .minlen = sizeof(u16),
577 .maxlen = sizeof(u16) },
578 [OVS_CT_ATTR_MARK] = { .minlen = sizeof(struct md_mark),
579 .maxlen = sizeof(struct md_mark) },
580 [OVS_CT_ATTR_LABELS] = { .minlen = sizeof(struct md_labels),
581 .maxlen = sizeof(struct md_labels) },
582 [OVS_CT_ATTR_HELPER] = { .minlen = 1,
583 .maxlen = NF_CT_HELPER_NAME_LEN }
586 static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
587 const char **helper, bool log)
592 nla_for_each_nested(a, attr, rem) {
593 int type = nla_type(a);
594 int maxlen = ovs_ct_attr_lens[type].maxlen;
595 int minlen = ovs_ct_attr_lens[type].minlen;
597 if (type > OVS_CT_ATTR_MAX) {
599 "Unknown conntrack attr (type=%d, max=%d)",
600 type, OVS_CT_ATTR_MAX);
603 if (nla_len(a) < minlen || nla_len(a) > maxlen) {
605 "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
606 type, nla_len(a), maxlen);
611 case OVS_CT_ATTR_COMMIT:
614 #ifdef CONFIG_NF_CONNTRACK_ZONES
615 case OVS_CT_ATTR_ZONE:
616 info->zone.id = nla_get_u16(a);
619 #ifdef CONFIG_NF_CONNTRACK_MARK
620 case OVS_CT_ATTR_MARK: {
621 struct md_mark *mark = nla_data(a);
624 OVS_NLERR(log, "ct_mark mask cannot be 0");
631 #ifdef CONFIG_NF_CONNTRACK_LABELS
632 case OVS_CT_ATTR_LABELS: {
633 struct md_labels *labels = nla_data(a);
635 if (!labels_nonzero(&labels->mask)) {
636 OVS_NLERR(log, "ct_labels mask cannot be 0");
639 info->labels = *labels;
643 case OVS_CT_ATTR_HELPER:
644 *helper = nla_data(a);
645 if (!memchr(*helper, '\0', nla_len(a))) {
646 OVS_NLERR(log, "Invalid conntrack helper");
651 OVS_NLERR(log, "Unknown conntrack attr (%d)",
658 OVS_NLERR(log, "Conntrack attr has %d unknown bytes", rem);
665 bool ovs_ct_verify(struct net *net, enum ovs_key_attr attr)
667 if (attr == OVS_KEY_ATTR_CT_STATE)
669 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
670 attr == OVS_KEY_ATTR_CT_ZONE)
672 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) &&
673 attr == OVS_KEY_ATTR_CT_MARK)
675 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
676 attr == OVS_KEY_ATTR_CT_LABELS) {
677 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
679 return ovs_net->xt_label;
685 int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
686 const struct sw_flow_key *key,
687 struct sw_flow_actions **sfa, bool log)
689 struct ovs_conntrack_info ct_info;
690 const char *helper = NULL;
694 family = key_to_nfproto(key);
695 if (family == NFPROTO_UNSPEC) {
696 OVS_NLERR(log, "ct family unspecified");
700 memset(&ct_info, 0, sizeof(ct_info));
701 ct_info.family = family;
703 nf_ct_zone_init(&ct_info.zone, NF_CT_DEFAULT_ZONE_ID,
704 NF_CT_DEFAULT_ZONE_DIR, 0);
706 err = parse_ct(attr, &ct_info, &helper, log);
710 /* Set up template for tracking connections in specific zones. */
711 ct_info.ct = nf_ct_tmpl_alloc(net, &ct_info.zone, GFP_KERNEL);
713 OVS_NLERR(log, "Failed to allocate conntrack template");
717 __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
718 nf_conntrack_get(&ct_info.ct->ct_general);
721 err = ovs_ct_add_helper(&ct_info, helper, key, log);
726 err = ovs_nla_add_action(sfa, OVS_ACTION_ATTR_CT, &ct_info,
727 sizeof(ct_info), log);
733 __ovs_ct_free_action(&ct_info);
737 int ovs_ct_action_to_attr(const struct ovs_conntrack_info *ct_info,
740 struct nlattr *start;
742 start = nla_nest_start(skb, OVS_ACTION_ATTR_CT);
746 if (ct_info->commit && nla_put_flag(skb, OVS_CT_ATTR_COMMIT))
748 if (IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES) &&
749 nla_put_u16(skb, OVS_CT_ATTR_ZONE, ct_info->zone.id))
751 if (IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) && ct_info->mark.mask &&
752 nla_put(skb, OVS_CT_ATTR_MARK, sizeof(ct_info->mark),
755 if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
756 labels_nonzero(&ct_info->labels.mask) &&
757 nla_put(skb, OVS_CT_ATTR_LABELS, sizeof(ct_info->labels),
760 if (ct_info->helper) {
761 if (nla_put_string(skb, OVS_CT_ATTR_HELPER,
762 ct_info->helper->name))
766 nla_nest_end(skb, start);
771 void ovs_ct_free_action(const struct nlattr *a)
773 struct ovs_conntrack_info *ct_info = nla_data(a);
775 __ovs_ct_free_action(ct_info);
778 static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info)
781 module_put(ct_info->helper->me);
783 nf_ct_tmpl_free(ct_info->ct);
786 void ovs_ct_init(struct net *net)
788 unsigned int n_bits = sizeof(struct ovs_key_ct_labels) * BITS_PER_BYTE;
789 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
791 if (nf_connlabels_get(net, n_bits)) {
792 ovs_net->xt_label = false;
793 OVS_NLERR(true, "Failed to set connlabel length");
795 ovs_net->xt_label = true;
799 void ovs_ct_exit(struct net *net)
801 struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
803 if (ovs_net->xt_label)
804 nf_connlabels_put(net);
807 #endif /* CONFIG_NF_CONNTRACK */