From 78aab811be52cecb6dea3dc937bb9bc4e558e422 Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Thu, 15 Oct 2015 21:14:40 -0700 Subject: [PATCH] ovn: Add stateful ACL support. Add support for the "allow-related" ACL action. This is dependent on the OVS conntrack functionality, which is not available on all platforms or kernel versions. Here is a sample policy that will allow all tenants in logical switch "ls0" to SSH to each other. Anyone can make an HTTP request to "lp0". All other IP traffic is dropped: ovn-nbctl acl-add ls0 from-lport 100 ip allow-related ovn-nbctl acl-add ls0 to-lport 100 tcp.dst==22 allow-related ovn-nbctl acl-add ls0 to-lport 100 "outport == \"lp0\" \ && tcp.dst==80" allow-related ovn-nbctl acl-add ls0 to-lport 1 ip drop Note: Kernel conntrack support is checked into the mainline Linux kernel, but hasn't been backported to the main OVS repo yet. Signed-off-by: Justin Pettit Acked-by: Ben Pfaff --- ovn/TODO | 8 ++ ovn/controller/binding.c | 51 +++++++- ovn/controller/binding.h | 4 +- ovn/controller/lflow.c | 14 +- ovn/controller/lflow.h | 8 +- ovn/controller/ovn-controller.8.xml | 5 + ovn/controller/ovn-controller.c | 33 ++++- ovn/controller/ovn-controller.h | 4 + ovn/controller/physical.c | 16 ++- ovn/controller/physical.h | 3 +- ovn/lib/actions.c | 58 +++++++- ovn/lib/actions.h | 7 +- ovn/northd/ovn-northd.8.xml | 72 ++++++++-- ovn/northd/ovn-northd.c | 196 +++++++++++++++++++++------- ovn/ovn-architecture.7.xml | 8 ++ ovn/ovn-sb.xml | 60 ++++++++- tests/ovn.at | 2 + tests/test-ovn.c | 7 +- 18 files changed, 466 insertions(+), 90 deletions(-) diff --git a/ovn/TODO b/ovn/TODO index a48251fb2..8d22c2cc8 100644 --- a/ovn/TODO +++ b/ovn/TODO @@ -137,3 +137,11 @@ Both ovn-controller and ovn-contorller-vtep should use BFD to monitor the tunnel liveness. Both ovs-vswitchd schema and VTEP schema supports BFD. + +* ACL + +** Support FTP ALGs. + +** Support reject action. + +** Support log option. diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c index b2f2a191a..7f31b310f 100644 --- a/ovn/controller/binding.c +++ b/ovn/controller/binding.c @@ -16,6 +16,7 @@ #include #include "binding.h" +#include "lib/bitmap.h" #include "lib/sset.h" #include "lib/util.h" #include "lib/vswitch-idl.h" @@ -71,9 +72,55 @@ get_local_iface_ids(const struct ovsrec_bridge *br_int, struct sset *lports) } } +static void +update_ct_zones(struct sset *lports, struct simap *ct_zones, + unsigned long *ct_zone_bitmap) +{ + struct simap_node *ct_zone, *ct_zone_next; + const char *iface_id; + int scan_start = 1; + + /* xxx This is wasteful to assign a zone to each port--even if no + * xxx security policy is applied. */ + + /* Delete any zones that are associated with removed ports. */ + SIMAP_FOR_EACH_SAFE(ct_zone, ct_zone_next, ct_zones) { + if (!sset_contains(lports, ct_zone->name)) { + bitmap_set0(ct_zone_bitmap, ct_zone->data); + simap_delete(ct_zones, ct_zone); + } + } + + /* Assign a unique zone id for each logical port. */ + SSET_FOR_EACH(iface_id, lports) { + size_t zone; + + if (simap_contains(ct_zones, iface_id)) { + continue; + } + + /* We assume that there are 64K zones and that we own them all. */ + zone = bitmap_scan(ct_zone_bitmap, 0, scan_start, MAX_CT_ZONES + 1); + if (zone == MAX_CT_ZONES + 1) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1); + VLOG_WARN_RL(&rl, "exhausted all ct zones"); + return; + } + scan_start = zone + 1; + + bitmap_set1(ct_zone_bitmap, zone); + simap_put(ct_zones, iface_id, zone); + + /* xxx We should erase any old entries for this + * xxx zone, but we need a generic interface to the conntrack + * xxx table. */ + } +} + void binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, - const char *chassis_id) + const char *chassis_id, struct simap *ct_zones, + unsigned long *ct_zone_bitmap) { const struct sbrec_chassis *chassis_rec; const struct sbrec_port_binding *binding_rec; @@ -97,6 +144,7 @@ binding_run(struct controller_ctx *ctx, const struct ovsrec_bridge *br_int, /* We have no integration bridge, therefore no local logical ports. * We'll remove our chassis from all port binding records below. */ } + update_ct_zones(&lports, ct_zones, ct_zone_bitmap); sset_clone(&all_lports, &lports); ovsdb_idl_txn_add_comment( @@ -141,6 +189,7 @@ binding_cleanup(struct controller_ctx *ctx, const char *chassis_id) if (!chassis_id) { return true; } + const struct sbrec_chassis *chassis_rec = get_chassis(ctx->ovnsb_idl, chassis_id); if (!chassis_rec) { diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h index 708a6e684..3d4a91eab 100644 --- a/ovn/controller/binding.h +++ b/ovn/controller/binding.h @@ -22,10 +22,12 @@ struct controller_ctx; struct ovsdb_idl; struct ovsrec_bridge; +struct simap; void binding_register_ovs_idl(struct ovsdb_idl *); void binding_run(struct controller_ctx *, const struct ovsrec_bridge *br_int, - const char *chassis_id); + const char *chassis_id, struct simap *ct_zones, + unsigned long *ct_zone_bitmap); bool binding_cleanup(struct controller_ctx *, const char *chassis_id); #endif /* ovn/binding.h */ diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c index 70687eab6..37dc48d00 100644 --- a/ovn/controller/lflow.c +++ b/ovn/controller/lflow.c @@ -58,6 +58,15 @@ symtab_init(void) MFF_LOG_REGS; #undef MFF_LOG_REG + /* Connection tracking state. */ + expr_symtab_add_field(&symtab, "ct_state", MFF_CT_STATE, NULL, false); + expr_symtab_add_predicate(&symtab, "ct.trk", "ct_state[7]"); + expr_symtab_add_subfield(&symtab, "ct.new", "ct.trk", "ct_state[0]"); + expr_symtab_add_subfield(&symtab, "ct.est", "ct.trk", "ct_state[1]"); + expr_symtab_add_subfield(&symtab, "ct.rel", "ct.trk", "ct_state[2]"); + expr_symtab_add_subfield(&symtab, "ct.inv", "ct.trk", "ct_state[5]"); + expr_symtab_add_subfield(&symtab, "ct.rpl", "ct.trk", "ct_state[6]"); + /* Data fields. */ expr_symtab_add_field(&symtab, "eth.src", MFF_ETH_SRC, NULL, false); expr_symtab_add_field(&symtab, "eth.dst", MFF_ETH_DST, NULL, false); @@ -245,7 +254,8 @@ lflow_init(void) /* Translates logical flows in the Logical_Flow table in the OVN_SB database * into OpenFlow flows. See ovn-architecture(7) for more information. */ void -lflow_run(struct controller_ctx *ctx, struct hmap *flow_table) +lflow_run(struct controller_ctx *ctx, struct hmap *flow_table, + const struct simap *ct_zones) { struct hmap flows = HMAP_INITIALIZER(&flows); uint32_t conj_id_ofs = 1; @@ -284,7 +294,7 @@ lflow_run(struct controller_ctx *ctx, struct hmap *flow_table) ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); error = actions_parse_string(lflow->actions, &symtab, &ldp->ports, - first_ptable, LOG_PIPELINE_LEN, + ct_zones, first_ptable, LOG_PIPELINE_LEN, lflow->table_id, output_ptable, &ofpacts, &prereqs); if (error) { diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h index 5cac76c90..c3a92f60e 100644 --- a/ovn/controller/lflow.h +++ b/ovn/controller/lflow.h @@ -36,6 +36,7 @@ struct controller_ctx; struct hmap; +struct simap; struct uuid; /* OpenFlow table numbers. @@ -58,6 +59,7 @@ struct uuid; * These values are documented in ovn-architecture(7), please update the * documentation if you change any of them. */ #define MFF_LOG_DATAPATH MFF_METADATA /* Logical datapath (64 bits). */ +#define MFF_LOG_CT_ZONE MFF_REG5 /* Logical conntrack zone (32 bits). */ #define MFF_LOG_INPORT MFF_REG6 /* Logical input port (32 bits). */ #define MFF_LOG_OUTPORT MFF_REG7 /* Logical output port (32 bits). */ @@ -69,11 +71,11 @@ struct uuid; MFF_LOG_REG(MFF_REG1) \ MFF_LOG_REG(MFF_REG2) \ MFF_LOG_REG(MFF_REG3) \ - MFF_LOG_REG(MFF_REG4) \ - MFF_LOG_REG(MFF_REG5) + MFF_LOG_REG(MFF_REG4) void lflow_init(void); -void lflow_run(struct controller_ctx *, struct hmap *flow_table); +void lflow_run(struct controller_ctx *, struct hmap *flow_table, + const struct simap *ct_zones); void lflow_destroy(void); #endif /* ovn/lflow.h */ diff --git a/ovn/controller/ovn-controller.8.xml b/ovn/controller/ovn-controller.8.xml index 508ad26f5..0def2b43a 100644 --- a/ovn/controller/ovn-controller.8.xml +++ b/ovn/controller/ovn-controller.8.xml @@ -173,6 +173,11 @@
Causes ovn-controller to gracefully terminate.
+ +
ct-zone-list
+
+ Lists each local logical port and its connection tracking zone. +

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c index 0371c3826..85df099e1 100644 --- a/ovn/controller/ovn-controller.c +++ b/ovn/controller/ovn-controller.c @@ -27,6 +27,7 @@ #include "compiler.h" #include "daemon.h" #include "dirs.h" +#include "dynamic-string.h" #include "openvswitch/vconn.h" #include "openvswitch/vlog.h" #include "ovn/lib/ovn-sb-idl.h" @@ -49,6 +50,7 @@ VLOG_DEFINE_THIS_MODULE(main); static unixctl_cb_func ovn_controller_exit; +static unixctl_cb_func ct_zone_list; #define DEFAULT_BRIDGE_NAME "br-int" @@ -449,6 +451,13 @@ main(int argc, char *argv[]) ovsdb_idl_create(ovnsb_remote, &sbrec_idl_class, true, true)); ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl); + /* Initialize connection tracking zones. */ + struct simap ct_zones = SIMAP_INITIALIZER(&ct_zones); + unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)]; + bitmap_set1(ct_zone_bitmap, 0); /* Zone 0 is reserved. */ + unixctl_command_register("ct-zone-list", "", 0, 0, + ct_zone_list, &ct_zones); + /* Main loop. */ exiting = false; while (!exiting) { @@ -470,17 +479,17 @@ main(int argc, char *argv[]) if (chassis_id) { chassis_run(&ctx, chassis_id); encaps_run(&ctx, br_int, chassis_id); - binding_run(&ctx, br_int, chassis_id); + binding_run(&ctx, br_int, chassis_id, &ct_zones, ct_zone_bitmap); } if (br_int) { enum mf_field_id mff_ovn_geneve = ofctrl_run(br_int); struct hmap flow_table = HMAP_INITIALIZER(&flow_table); - lflow_run(&ctx, &flow_table); + lflow_run(&ctx, &flow_table, &ct_zones); if (chassis_id) { physical_run(&ctx, mff_ovn_geneve, - br_int, chassis_id, &flow_table); + br_int, chassis_id, &ct_zones, &flow_table); } ofctrl_put(&flow_table); hmap_destroy(&flow_table); @@ -536,6 +545,8 @@ main(int argc, char *argv[]) lflow_destroy(); ofctrl_destroy(); + simap_destroy(&ct_zones); + ovsdb_idl_loop_destroy(&ovs_idl_loop); ovsdb_idl_loop_destroy(&ovnsb_idl_loop); @@ -643,3 +654,19 @@ ovn_controller_exit(struct unixctl_conn *conn, int argc OVS_UNUSED, unixctl_command_reply(conn, NULL); } + +static void +ct_zone_list(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *ct_zones_) +{ + struct simap *ct_zones = ct_zones_; + struct ds ds = DS_EMPTY_INITIALIZER; + struct simap_node *zone; + + SIMAP_FOR_EACH(zone, ct_zones) { + ds_put_format(&ds, "%s %d\n", zone->name, zone->data); + } + + unixctl_command_reply(conn, ds_cstr(&ds)); + ds_destroy(&ds); +} diff --git a/ovn/controller/ovn-controller.h b/ovn/controller/ovn-controller.h index 9766bc3d2..85e13a658 100644 --- a/ovn/controller/ovn-controller.h +++ b/ovn/controller/ovn-controller.h @@ -17,8 +17,12 @@ #ifndef OVN_CONTROLLER_H #define OVN_CONTROLLER_H 1 +#include "simap.h" #include "ovn/lib/ovn-sb-idl.h" +/* Linux supports a maximum of 64K zones, which seems like a fine default. */ +#define MAX_CT_ZONES 65535 + struct controller_ctx { struct ovsdb_idl *ovnsb_idl; struct ovsdb_idl_txn *ovnsb_idl_txn; diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c index 0c239dffb..630d420c2 100644 --- a/ovn/controller/physical.c +++ b/ovn/controller/physical.c @@ -136,7 +136,7 @@ put_stack(enum mf_field_id field, struct ofpact_stack *stack) void physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, const struct ovsrec_bridge *br_int, const char *this_chassis_id, - struct hmap *flow_table) + const struct simap *ct_zones, struct hmap *flow_table) { struct simap localvif_to_ofport = SIMAP_INITIALIZER(&localvif_to_ofport); struct hmap tunnels = HMAP_INITIALIZER(&tunnels); @@ -293,6 +293,7 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, struct match match; if (!tun) { + int zone_id = simap_get(ct_zones, binding->logical_port); /* Packets that arrive from a vif can belong to a VM or * to a container located inside that VM. Packets that * arrive from containers have a tag (vlan) associated with them. @@ -360,6 +361,10 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, match_set_dl_vlan(&match, htons(tag)); } + if (zone_id) { + put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts); + } + /* Set MFF_LOG_DATAPATH and MFF_LOG_INPORT. */ put_load(binding->datapath->tunnel_key, MFF_LOG_DATAPATH, 0, 64, &ofpacts); @@ -393,6 +398,10 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, binding->tunnel_key); + if (zone_id) { + put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts); + } + /* Resubmit to table 34. */ put_resubmit(OFTABLE_DROP_LOOPBACK, &ofpacts); ofctrl_add_flow(flow_table, OFTABLE_LOCAL_OUTPUT, 100, &match, @@ -500,6 +509,11 @@ physical_run(struct controller_ctx *ctx, enum mf_field_id mff_ovn_geneve, continue; } + int zone_id = simap_get(ct_zones, port->logical_port); + if (zone_id) { + put_load(zone_id, MFF_LOG_CT_ZONE, 0, 32, &ofpacts); + } + if (simap_contains(&localvif_to_ofport, port->parent_port ? port->parent_port : port->logical_port)) { diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h index c79e97bca..a06f759a5 100644 --- a/ovn/controller/physical.h +++ b/ovn/controller/physical.h @@ -31,6 +31,7 @@ struct controller_ctx; struct hmap; struct ovsdb_idl; struct ovsrec_bridge; +struct simap; /* OVN Geneve option information. * @@ -44,6 +45,6 @@ struct ovsrec_bridge; void physical_register_ovs_idl(struct ovsdb_idl *); void physical_run(struct controller_ctx *, enum mf_field_id mff_ovn_geneve, const struct ovsrec_bridge *br_int, const char *chassis_id, - struct hmap *flow_table); + const struct simap *ct_zones, struct hmap *flow_table); #endif /* ovn/physical.h */ diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c index 395666fcd..aebe5ce77 100644 --- a/ovn/lib/actions.c +++ b/ovn/lib/actions.c @@ -24,6 +24,7 @@ #include "lex.h" #include "ofp-actions.h" #include "ofpbuf.h" +#include "simap.h" /* Context maintained during actions_parse(). */ struct action_context { @@ -40,6 +41,7 @@ struct action_context { uint8_t first_ptable; /* First OpenFlow table. */ uint8_t cur_ltable; /* 0 <= cur_ltable < n_tables. */ uint8_t output_ptable; /* OpenFlow table for 'output' to resubmit. */ + const struct simap *ct_zones; /* Map from port name to conntrack zone. */ /* State. */ char *error; /* Error, if any, otherwise NULL. */ @@ -180,6 +182,41 @@ parse_next_action(struct action_context *ctx) } } +static void +emit_ct(struct action_context *ctx, bool recirc_next, bool commit) +{ + struct ofpact_conntrack *ct = ofpact_put_CT(ctx->ofpacts); + ct->flags |= commit ? NX_CT_F_COMMIT : 0; + + /* If "recirc" is set, we automatically go to the next table. */ + if (recirc_next) { + if (ctx->cur_ltable < ctx->n_tables) { + ct->recirc_table = ctx->first_ptable + ctx->cur_ltable + 1; + } else { + action_error(ctx, "\"ct_next\" action not allowed in last table."); + return; + } + } else { + ct->recirc_table = NX_CT_RECIRC_NONE; + } + + /* xxx Should remove hard-coding reg5 if we refactor library. */ + ct->zone_src.field = mf_from_id(MFF_REG5); + ct->zone_src.ofs = 0; + ct->zone_src.n_bits = 16; + + /* We do not support ALGs yet. */ + ct->alg = 0; + + /* CT only works with IP, so set up a prerequisite. */ + struct expr *expr; + char *error; + + expr = expr_parse_string("ip", ctx->symtab, &error); + ovs_assert(!error); + ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, expr); +} + static void parse_actions(struct action_context *ctx) { @@ -219,6 +256,10 @@ parse_actions(struct action_context *ctx) } else { action_syntax_error(ctx, "expecting `--'"); } + } else if (lexer_match_id(ctx->lexer, "ct_next")) { + emit_ct(ctx, true, false); + } else if (lexer_match_id(ctx->lexer, "ct_commit")) { + emit_ct(ctx, false, true); } else { action_syntax_error(ctx, "expecting action"); } @@ -242,6 +283,8 @@ parse_actions(struct action_context *ctx) * (as one would provide to expr_to_matches()). Strings used in the actions * that are not in 'ports' are translated to zero. * + * 'ct_zones' provides a map from a port name to its connection tracking zone. + * * OVN maps each logical flow table (ltable), one-to-one, onto a physical * OpenFlow flow table (ptable). A number of parameters describe this mapping * and data related to flow tables: @@ -257,6 +300,9 @@ parse_actions(struct action_context *ctx) * cur_ltable + 1 < n_tables, then this defines the default table that * "next" will jump to. * + * 'next_table_id' should be the OpenFlow table to which the "next" action will + * resubmit, or 0 to disable "next". + * * - 'output_ptable' should be the OpenFlow table to which the logical * "output" action will resubmit * @@ -272,7 +318,7 @@ parse_actions(struct action_context *ctx) */ char * OVS_WARN_UNUSED_RESULT actions_parse(struct lexer *lexer, const struct shash *symtab, - const struct simap *ports, + const struct simap *ports, const struct simap *ct_zones, uint8_t first_ptable, uint8_t n_tables, uint8_t cur_ltable, uint8_t output_ptable, struct ofpbuf *ofpacts, struct expr **prereqsp) @@ -283,6 +329,7 @@ actions_parse(struct lexer *lexer, const struct shash *symtab, ctx.lexer = lexer; ctx.symtab = symtab; ctx.ports = ports; + ctx.ct_zones = ct_zones; ctx.first_ptable = first_ptable; ctx.n_tables = n_tables; ctx.cur_ltable = cur_ltable; @@ -307,8 +354,8 @@ actions_parse(struct lexer *lexer, const struct shash *symtab, /* Like actions_parse(), but the actions are taken from 's'. */ char * OVS_WARN_UNUSED_RESULT actions_parse_string(const char *s, const struct shash *symtab, - const struct simap *ports, uint8_t first_table, - uint8_t n_tables, uint8_t cur_table, + const struct simap *ports, const struct simap *ct_zones, + uint8_t first_table, uint8_t n_tables, uint8_t cur_table, uint8_t output_table, struct ofpbuf *ofpacts, struct expr **prereqsp) { @@ -317,8 +364,9 @@ actions_parse_string(const char *s, const struct shash *symtab, lexer_init(&lexer, s); lexer_get(&lexer); - error = actions_parse(&lexer, symtab, ports, first_table, n_tables, - cur_table, output_table, ofpacts, prereqsp); + error = actions_parse(&lexer, symtab, ports, ct_zones, first_table, + n_tables, cur_table, output_table, ofpacts, + prereqsp); lexer_destroy(&lexer); return error; diff --git a/ovn/lib/actions.h b/ovn/lib/actions.h index a84c05b06..92f71ded8 100644 --- a/ovn/lib/actions.h +++ b/ovn/lib/actions.h @@ -27,13 +27,14 @@ struct shash; struct simap; char *actions_parse(struct lexer *, const struct shash *symtab, - const struct simap *ports, uint8_t first_ptable, - uint8_t n_tables, uint8_t cur_ltable, + const struct simap *ports, const struct simap *ct_zones, + uint8_t first_ptable, uint8_t n_tables, uint8_t cur_ltable, uint8_t output_ptable, struct ofpbuf *ofpacts, struct expr **prereqsp) OVS_WARN_UNUSED_RESULT; char *actions_parse_string(const char *s, const struct shash *symtab, - const struct simap *ports, uint8_t first_ptable, + const struct simap *ports, + const struct simap *ct_zones, uint8_t first_ptable, uint8_t n_tables, uint8_t cur_ltable, uint8_t output_ptable, struct ofpbuf *ofpacts, struct expr **prereqsp) diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml index 3c5d362fb..f51852e48 100644 --- a/ovn/northd/ovn-northd.8.xml +++ b/ovn/northd/ovn-northd.8.xml @@ -137,24 +137,63 @@ be dropped.

-

Ingress table 1: from-lport ACLs

+

Ingress Table 1: from-lport Pre-ACLs

+ +

+ Ingress table 1 prepares flows for possible stateful ACL processing + in table 2. It contains a priority-0 flow that simply moves + traffic to table 2. If stateful ACLs are used in the logical + datapath, a priority-100 flow is added that sends IP packets to + the connection tracker before advancing to table 2. +

+ +

Ingress table 2: from-lport ACLs

Logical flows in this table closely reproduce those in the - ACL table in the OVN_Northbound database for - the from-lport direction. allow and - allow-related ACLs translate into logical flows with the - next; action, others to drop;. The - priority values from the ACL table are used - directly. + ACL table in the OVN_Northbound database + for the from-lport direction. allow + ACLs translate into logical flows with the next; + action, allow-related ACLs translate into logical + flows with the ct_next; action, other ACLs translate + to drop;. The priority values from the + ACL table are used directly.

- Ingress table 1 also contains a priority 0 flow with action - next;, so that ACLs allow packets by default. + Ingress table 2 also contains a priority 0 flow with action + next;, so that ACLs allow packets by default. If the + logical datapath has a statetful ACL, the following flows will + also be added:

-

Ingress Table 2: Destination Lookup

+
    +
  • + A priority-1 flow to commit IP traffic to the connection + tracker. This is needed for the default allow policy because, + while the initiater's direction may not have any stateful rules, + the server's may and then its return traffic would not be known + and marked as invalid. +
  • + +
  • + A priority-65535 flow that allows any traffic that has been + committed to the connection tracker (i.e., established flows). +
  • + +
  • + A priority-65535 flow that allows any traffic that is considered + related to a committed flow in the connection tracker (e.g., an + ICMP Port Unreachable from a non-listening UDP port). +
  • + +
  • + A priority-65535 flow that drops all traffic marked by the + connection tracker as invalid. +
  • +
+ +

Ingress Table 3: Destination Lookup

This table implements switching behavior. It contains these logical @@ -185,13 +224,20 @@ -

Egress Table 0: to-lport ACLs

+

Egress Table 0: to-lport Pre-ACLs

+ +

+ This is similar to ingress table 1 except for to-lport + traffic. +

+ +

Egress Table 1: to-lport ACLs

- This is similar to ingress table 1 except for to-lport ACLs. + This is similar to ingress table 2 except for to-lport ACLs.

-

Egress Table 1: Egress Port Security

+

Egress Table 2: Egress Port Security

This is similar to the ingress port security logic in ingress table 0, diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index e698907ef..ca2eca694 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -60,6 +60,7 @@ static const char *default_db(void); * These must be listed in the order that the stages will be executed. */ #define INGRESS_STAGES \ INGRESS_STAGE(PORT_SEC, port_sec) \ + INGRESS_STAGE(PRE_ACL, pre_acl) \ INGRESS_STAGE(ACL, acl) \ INGRESS_STAGE(L2_LKUP, l2_lkup) @@ -73,8 +74,9 @@ enum ingress_stage { /* Egress pipeline stages. * * These must be listed in the order that the stages will be executed. */ -#define EGRESS_STAGES \ - EGRESS_STAGE(ACL, acl) \ +#define EGRESS_STAGES \ + EGRESS_STAGE(PRE_ACL, pre_acl) \ + EGRESS_STAGE(ACL, acl) \ EGRESS_STAGE(PORT_SEC, port_sec) enum egress_stage { @@ -722,6 +724,140 @@ lport_is_enabled(const struct nbrec_logical_port *lport) return !lport->enabled || *lport->enabled; } +static bool +has_stateful_acl(struct ovn_datapath *od) +{ + for (size_t i = 0; i < od->nb->n_acls; i++) { + struct nbrec_acl *acl = od->nb->acls[i]; + if (!strcmp(acl->action, "allow-related")) { + return true; + } + } + + return false; +} + +static void +build_acls(struct ovn_datapath *od, struct hmap *lflows) +{ + bool has_stateful = has_stateful_acl(od); + + /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are + * allowed by default. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 0, "1", "next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 0, "1", "next;"); + + /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by + * default. A related rule at priority 1 is added below if there + * are any stateful ACLs in this datapath. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 0, "1", "next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 0, "1", "next;"); + + /* If there are any stateful ACL rules in this dapapath, we must + * send all IP packets through the conntrack action, which handles + * defragmentation, in order to match L4 headers. */ + if (has_stateful) { + /* Ingress and Egress Pre-ACL Table (Priority 100). + * + * Regardless of whether the ACL is "from-lport" or "to-lport", + * we need rules in both the ingress and egress table, because + * the return traffic needs to be followed. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 100, + "ip", "ct_next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 100, + "ip", "ct_next;"); + + /* Ingress and Egress ACL Table (Priority 1). + * + * By default, traffic is allowed. This is partially handled by + * the Priority 0 ACL flows added earlier, but we also need to + * commit IP flows. This is because, while the initiater's + * direction may not have any stateful rules, the server's may + * and then its return traffic would not have an associated + * conntrack entry and would return "+invalid". */ + ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 1, "ip", + "ct_commit; next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 1, "ip", + "ct_commit; next;"); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Always drop traffic that's in an invalid state. This is + * enforced at a higher priority than ACLs can be defined. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX, + "ct.inv", "drop;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX, + "ct.inv", "drop;"); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Always allow traffic that is established to a committed + * conntrack entry. This is enforced at a higher priority than + * ACLs can be defined. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX, + "ct.est && !ct.rel && !ct.new && !ct.inv", + "next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX, + "ct.est && !ct.rel && !ct.new && !ct.inv", + "next;"); + + /* Ingress and Egress ACL Table (Priority 65535). + * + * Always allow traffic that is related to an existing conntrack + * entry. This is enforced at a higher priority than ACLs can + * be defined. + * + * NOTE: This does not support related data sessions (eg, + * a dynamically negotiated FTP data channel), but will allow + * related traffic such as an ICMP Port Unreachable through + * that's generated from a non-listening UDP port. */ + ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX, + "!ct.est && ct.rel && !ct.new && !ct.inv", + "next;"); + ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX, + "!ct.est && ct.rel && !ct.new && !ct.inv", + "next;"); + } + + /* Ingress or Egress ACL Table (Various priorities). */ + for (size_t i = 0; i < od->nb->n_acls; i++) { + struct nbrec_acl *acl = od->nb->acls[i]; + bool ingress = !strcmp(acl->direction, "from-lport") ? true :false; + enum ovn_pipeline pipeline = ingress ? P_IN : P_OUT; + uint8_t stage = ingress ? S_IN_ACL : S_OUT_ACL; + + if (!strcmp(acl->action, "allow")) { + /* If there are any stateful flows, we must even commit "allow" + * actions. This is because, while the initiater's + * direction may not have any stateful rules, the server's + * may and then its return traffic would not have an + * associated conntrack entry and would return "+invalid". */ + const char *actions = has_stateful ? "ct_commit; next;" : "next;"; + ovn_lflow_add(lflows, od, pipeline, stage, acl->priority, + acl->match, actions); + } else if (!strcmp(acl->action, "allow-related")) { + struct ds match = DS_EMPTY_INITIALIZER; + + /* Commit the connection tracking entry, which allows all + * other traffic related to this entry to flow due to the + * 65535 priority flow defined earlier. */ + ds_put_format(&match, "ct.new && (%s)", acl->match); + ovn_lflow_add(lflows, od, pipeline, stage, acl->priority, + ds_cstr(&match), "ct_commit; next;"); + + ds_destroy(&match); + } else if (!strcmp(acl->action, "drop")) { + ovn_lflow_add(lflows, od, pipeline, stage, acl->priority, + acl->match, "drop;"); + } else if (!strcmp(acl->action, "reject")) { + /* xxx Need to support "reject". */ + VLOG_INFO("reject is not a supported action"); + ovn_lflow_add(lflows, od, pipeline, stage, acl->priority, + acl->match, "drop;"); + } + } +} + /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database, * constructing their contents based on the OVN_NB database. */ static void @@ -769,27 +905,6 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, ds_destroy(&match); } - /* Ingress table 1: ACLs (any priority). */ - HMAP_FOR_EACH (od, key_node, datapaths) { - for (size_t i = 0; i < od->nb->n_acls; i++) { - const struct nbrec_acl *acl = od->nb->acls[i]; - const char *action; - - if (strcmp(acl->direction, "from-lport")) { - continue; - } - - action = (!strcmp(acl->action, "allow") || - !strcmp(acl->action, "allow-related")) - ? "next;" : "drop;"; - ovn_lflow_add(&lflows, od, P_IN, S_IN_ACL, acl->priority, - acl->match, action); - } - } - HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, P_IN, S_IN_ACL, 0, "1", "next;"); - } - /* Ingress table 2: Destination lookup, broadcast and multicast handling * (priority 100). */ HMAP_FOR_EACH (op, key_node, ports) { @@ -802,7 +917,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, "outport = \""MC_FLOOD"\"; output;"); } - /* Ingress table 2: Destination lookup, unicast handling (priority 50), */ + /* Ingress table 3: Destination lookup, unicast handling (priority 50), */ HMAP_FOR_EACH (op, key_node, ports) { for (size_t i = 0; i < op->nb->n_macs; i++) { struct eth_addr mac; @@ -835,7 +950,7 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, } } - /* Ingress table 2: Destination lookup for unknown MACs (priority 0). */ + /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */ HMAP_FOR_EACH (od, key_node, datapaths) { if (od->has_unknown) { ovn_lflow_add(&lflows, od, P_IN, S_IN_L2_LKUP, 0, "1", @@ -843,35 +958,14 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, } } - /* Egress table 0: ACLs (any priority). */ - HMAP_FOR_EACH (od, key_node, datapaths) { - for (size_t i = 0; i < od->nb->n_acls; i++) { - const struct nbrec_acl *acl = od->nb->acls[i]; - const char *action; - - if (strcmp(acl->direction, "to-lport")) { - continue; - } - - action = (!strcmp(acl->action, "allow") || - !strcmp(acl->action, "allow-related")) - ? "next;" : "drop;"; - ovn_lflow_add(&lflows, od, P_OUT, S_OUT_ACL, acl->priority, - acl->match, action); - } - } - HMAP_FOR_EACH (od, key_node, datapaths) { - ovn_lflow_add(&lflows, od, P_OUT, S_OUT_ACL, 0, "1", "next;"); - } - - /* Egress table 1: Egress port security multicast/broadcast (priority + /* Egress table 2: Egress port security multicast/broadcast (priority * 100). */ HMAP_FOR_EACH (od, key_node, datapaths) { ovn_lflow_add(&lflows, od, P_OUT, S_OUT_PORT_SEC, 100, "eth.dst[40]", "output;"); } - /* Egress table 1: Egress port security (priorities 50 and 150). + /* Egress table 2: Egress port security (priorities 50 and 150). * * Priority 50 rules implement port security for enabled logical port. * @@ -897,6 +991,12 @@ build_lflows(struct northd_context *ctx, struct hmap *datapaths, ds_destroy(&match); } + /* Build pre-ACL and ACL tables for both ingress and egress. + * Ingress tables 1 and 2. Egress tables 0 and 1. */ + HMAP_FOR_EACH (od, key_node, datapaths) { + build_acls(od, &lflows); + } + /* Push changes to the Logical_Flow table to database. */ const struct sbrec_logical_flow *sbflow, *next_sbflow; SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) { diff --git a/ovn/ovn-architecture.7.xml b/ovn/ovn-architecture.7.xml index 47dfc2a28..f5b84406b 100644 --- a/ovn/ovn-architecture.7.xml +++ b/ovn/ovn-architecture.7.xml @@ -654,6 +654,14 @@ tunnels as part of the tunnel key.) +

conntrack zone field
+
+ A field that denotes the connection tracking zone. The value only + has local significance and is not meaningful between chassis. + This is initialized to 0 at the beginning of the logical ingress + pipeline. OVN stores this in Nicira extension register number 5. +
+
VLAN ID
The VLAN ID is used as an interface between OVN and containers nested diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index 9892ff0aa..5e9604b0b 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -297,10 +297,11 @@ inport to outport; if they are equal, it treats the output as a no-op. In the common case, where they are different, the packet enters the egress pipeline. This transition to the - egress pipeline discards register data, e.g. reg0 - ... reg5, to achieve uniform behavior regardless of whether - the egress pipeline is on a different hypervisor (because registers - aren't preserve across tunnel encapsulation). + egress pipeline discards register data, e.g. reg0 ... + reg4 and connection tracking state, to achieve + uniform behavior regardless of whether the egress pipeline is on a + different hypervisor (because registers aren't preserve across + tunnel encapsulation).

@@ -684,7 +685,7 @@

    -
  • reg0...reg5
  • +
  • reg0...reg4
  • inport outport
  • eth.src eth.dst eth.type
  • vlan.tci vlan.vid vlan.pcp vlan.present
  • @@ -698,6 +699,22 @@
  • icmp4.type icmp4.code
  • icmp6.type icmp6.code
  • nd.target nd.sll nd.tll
  • +
  • +

    + ct_state, which has the following Boolean subfields: +

    +
      +
    • ct.new: True for a new flow
    • +
    • ct.est: True for an established flow
    • +
    • ct.rel: True for a related flow
    • +
    • ct.rpl: True for a reply flow
    • +
    • ct.inv: True for a connection entry in a bad state
    • +
    +

    + ct_state and its subfields are initialized by the + ct_next action, described below. +

    +

@@ -839,6 +856,37 @@ field1 and field2 must modifiable.

+ +
ct_next;
+
+

+ Apply connection tracking to the flow, initializing + ct_state for matching in later tables. + Automatically moves on to the next table, as if followed by + next. +

+ +

+ As a side effect, IP fragments will be reassembled for matching. + If a fragmented packet is output, then it will be sent with any + overlapping fragments squashed. The connection tracking state is + scoped by the logical port, so overlapping addresses may be used. + To allow traffic related to the matched flow, execute + ct_commit. +

+ +

+ It is possible to have actions follow ct_next, + but they will not have access to any of its side-effects and + is not generally useful. +

+
+ +
ct_commit;
+
+ Commit the flow to the connection tracking entry associated + with it by a previous call to ct_next. +

@@ -849,8 +897,6 @@

learn
-
conntrack
-
dec_ttl { action, ... } { action; ...};
decrement TTL; execute first set of actions if diff --git a/tests/ovn.at b/tests/ovn.at index 631dd56b2..6aa116ac6 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -442,6 +442,8 @@ drop; => actions=drop, prereqs=1 next; => actions=resubmit(,27), prereqs=1 next(0); => actions=resubmit(,16), prereqs=1 next(15); => actions=resubmit(,31), prereqs=1 +ct_next; => actions=ct(table=27,zone=NXM_NX_REG5[0..15]), prereqs=ip +ct_commit; => actions=ct(commit,zone=NXM_NX_REG5[0..15]), prereqs=ip output; => actions=resubmit(,64), prereqs=1 outport="eth0"; next; outport="LOCAL"; next; => actions=set_field:0x5->reg7,resubmit(,27),set_field:0xfffe->reg7,resubmit(,27), prereqs=1 tcp.dst=80; => actions=set_field:80->tcp_dst, prereqs=ip.proto == 0x6 && (eth.type == 0x800 || eth.type == 0x86dd) diff --git a/tests/test-ovn.c b/tests/test-ovn.c index 0e9d2d2fc..47a2b0a47 100644 --- a/tests/test-ovn.c +++ b/tests/test-ovn.c @@ -1208,7 +1208,7 @@ static void test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) { struct shash symtab; - struct simap ports; + struct simap ports, ct_zones; struct ds input; create_symtab(&symtab); @@ -1217,6 +1217,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) simap_put(&ports, "eth0", 5); simap_put(&ports, "eth1", 6); simap_put(&ports, "LOCAL", ofp_to_u16(OFPP_LOCAL)); + simap_init(&ct_zones); ds_init(&input); while (!ds_get_test_line(&input, stdin)) { @@ -1226,7 +1227,8 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) ofpbuf_init(&ofpacts, 0); error = actions_parse_string(ds_cstr(&input), &symtab, &ports, - 16, 16, 10, 64, &ofpacts, &prereqs); + &ct_zones, 16, 16, 10, 64, + &ofpacts, &prereqs); if (!error) { struct ds output; @@ -1252,6 +1254,7 @@ test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED) ds_destroy(&input); simap_destroy(&ports); + simap_destroy(&ct_zones); expr_symtab_destroy(&symtab); shash_destroy(&symtab); } -- 2.20.1