}
}
+struct ipv4_netaddr {
+ ovs_be32 addr;
+ unsigned int plen;
+};
+
+struct ipv6_netaddr {
+ struct in6_addr addr;
+ unsigned int plen;
+};
+
+struct lport_addresses {
+ struct eth_addr ea;
+ size_t n_ipv4_addrs;
+ struct ipv4_netaddr *ipv4_addrs;
+ size_t n_ipv6_addrs;
+ struct ipv6_netaddr *ipv6_addrs;
+};
+
+/*
+ * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
+ * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
+ * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
+ * 'ipv6_addrs' fields of input param 'laddrs'.
+ * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
+ * If input param 'store_ipv6' is true only then extracted ipv6 addresses
+ * are stored in 'ipv6_addrs' fields.
+ * Return true if at least 'MAC' is found in 'address', false otherwise.
+ * Eg 1.
+ * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
+ * 30.0.0.3/23' and 'store_ipv6' = true
+ * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
+ *
+ * Eg. 2
+ * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
+ * 30.0.0.3/23' and 'store_ipv6' = false
+ * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
+ *
+ * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
+ * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
+ */
+static bool
+extract_lport_addresses(char *address, struct lport_addresses *laddrs,
+ bool store_ipv6)
+{
+ char *buf = address;
+ int buf_index = 0;
+ char *buf_end = buf + strlen(address);
+ if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
+ ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
+ return false;
+ }
+
+ ovs_be32 ip4;
+ struct in6_addr ip6;
+ unsigned int plen;
+ char *error;
+
+ laddrs->n_ipv4_addrs = 0;
+ laddrs->n_ipv6_addrs = 0;
+ laddrs->ipv4_addrs = NULL;
+ laddrs->ipv6_addrs = NULL;
+
+ /* Loop through the buffer and extract the IPv4/IPv6 addresses
+ * and store in the 'laddrs'. Break the loop if invalid data is found.
+ */
+ buf += buf_index;
+ while (buf < buf_end) {
+ buf_index = 0;
+ error = ip_parse_cidr_len(buf, &buf_index, &ip4, &plen);
+ if (!error) {
+ laddrs->n_ipv4_addrs++;
+ laddrs->ipv4_addrs = xrealloc(
+ laddrs->ipv4_addrs,
+ sizeof (struct ipv4_netaddr) * laddrs->n_ipv4_addrs);
+ laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].addr = ip4;
+ laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].plen = plen;
+ buf += buf_index;
+ continue;
+ }
+ free(error);
+ error = ipv6_parse_cidr_len(buf, &buf_index, &ip6, &plen);
+ if (!error && store_ipv6) {
+ laddrs->n_ipv6_addrs++;
+ laddrs->ipv6_addrs = xrealloc(
+ laddrs->ipv6_addrs,
+ sizeof(struct ipv6_netaddr) * laddrs->n_ipv6_addrs);
+ memcpy(&laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].addr, &ip6,
+ sizeof(struct in6_addr));
+ laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].plen = plen;
+ }
+
+ if (error) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
+ free(error);
+ break;
+ }
+ buf += buf_index;
+ }
+
+ return true;
+}
+
/* Appends port security constraints on L2 address field 'eth_addr_field'
* (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
* 'n_port_security' elements, is the collection of port_security constraints
return !lport->enabled || *lport->enabled;
}
+static bool
+lport_is_up(const struct nbrec_logical_port *lport)
+{
+ return !lport->up || *lport->up;
+}
+
static bool
has_stateful_acl(struct ovn_datapath *od)
{
}
static void
-build_acls(struct ovn_datapath *od, struct hmap *lflows)
+build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
{
bool has_stateful = has_stateful_acl(od);
+ struct ovn_port *op;
+ struct ds match_in, match_out;
/* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
* allowed by default. */
* send all IP packets through the conntrack action, which handles
* defragmentation, in order to match L4 headers. */
if (has_stateful) {
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (op->od == od && !strcmp(op->nbs->type, "router")) {
+ /* Can't use ct() for router ports. Consider the following configuration:
+ lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
+ For a ping from lp1 to lp2, First, the response will go through ct()
+ with a zone for lp2 in the ls2 ingress pipeline on hostB.
+ That ct zone knows about this connection. Next, it goes through ct()
+ with the zone for the router port in the egress pipeline of ls2 on hostB.
+ This zone does not know about the connection, as the icmp request
+ went through the logical router on hostA, not hostB. This would only work
+ with distributed conntrack state across all chassis. */
+
+ ds_init(&match_in);
+ ds_init(&match_out);
+ ds_put_format(&match_in, "ip && inport == %s", op->json_key);
+ ds_put_format(&match_out, "ip && outport == %s", op->json_key);
+ ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
+ ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
+
+ ds_destroy(&match_in);
+ ds_destroy(&match_out);
+ }
+ }
+
/* Ingress and Egress Pre-ACL Table (Priority 100).
*
* Regardless of whether the ACL is "from-lport" or "to-lport",
continue;
}
- build_acls(od, lflows);
+ build_acls(od, lflows, ports);
}
/* Logical switch ingress table 0: Admission control framework (priority
ds_destroy(&match);
}
+ /* Ingress table 3: Destination lookup, ARP reply for known IPs.
+ * (priority 150). */
+ HMAP_FOR_EACH (op, key_node, ports) {
+ if (!op->nbs) {
+ continue;
+ }
+
+ /*
+ * Add ARP reply flows if either the
+ * - port is up or
+ * - port type is router
+ */
+ if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
+ continue;
+ }
+
+ for (size_t i = 0; i < op->nbs->n_addresses; i++) {
+ struct lport_addresses laddrs;
+ if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
+ false)) {
+ continue;
+ }
+ for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
+ char *match = xasprintf(
+ "arp.tpa == "IP_FMT" && arp.op == 1",
+ IP_ARGS(laddrs.ipv4_addrs[j].addr));
+ char *actions = xasprintf(
+ "eth.dst = eth.src; "
+ "eth.src = "ETH_ADDR_FMT"; "
+ "arp.op = 2; /* ARP reply */ "
+ "arp.tha = arp.sha; "
+ "arp.sha = "ETH_ADDR_FMT"; "
+ "arp.tpa = arp.spa; "
+ "arp.spa = "IP_FMT"; "
+ "outport = inport; "
+ "inport = \"\"; /* Allow sending out inport. */ "
+ "output;",
+ ETH_ADDR_ARGS(laddrs.ea),
+ ETH_ADDR_ARGS(laddrs.ea),
+ IP_ARGS(laddrs.ipv4_addrs[j].addr));
+ ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 150,
+ match, actions);
+ free(match);
+ free(actions);
+ }
+
+ free(laddrs.ipv4_addrs);
+ }
+ }
+
/* Ingress table 3: Destination lookup, broadcast and multicast handling
* (priority 100). */
HMAP_FOR_EACH (op, key_node, ports) {
/* XXX ARP for neighboring router */
} else if (op->od->n_router_ports) {
for (size_t i = 0; i < op->nbs->n_addresses; i++) {
- struct eth_addr ea;
- ovs_be32 ip;
+ struct lport_addresses laddrs;
+ if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
+ false)) {
+ continue;
+ }
- if (ovs_scan(op->nbs->addresses[i],
- ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
- ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
+ for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
+ ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
for (size_t j = 0; j < op->od->n_router_ports; j++) {
/* Get the Logical_Router_Port that the Logical_Port is
* connected to, as 'peer'. */
"outport = %s; "
"output;",
ETH_ADDR_ARGS(peer->mac),
- ETH_ADDR_ARGS(ea),
+ ETH_ADDR_ARGS(laddrs.ea),
peer->json_key);
ovn_lflow_add(lflows, peer->od,
S_ROUTER_IN_ARP, 200, match, actions);
break;
}
}
+
+ free(laddrs.ipv4_addrs);
}
}
}
}
\f
static void
-ovnnb_db_changed(struct northd_context *ctx)
+ovnnb_db_run(struct northd_context *ctx)
{
- VLOG_DBG("ovn-nb db contents have changed.");
-
+ if (!ctx->ovnsb_txn) {
+ return;
+ }
+ VLOG_DBG("ovn-nb db contents may have changed.");
struct hmap datapaths, ports;
build_datapaths(ctx, &datapaths);
build_ports(ctx, &datapaths, &ports);
* need to set the corresponding logical port as 'up' in the northbound DB.
*/
static void
-ovnsb_db_changed(struct northd_context *ctx)
+ovnsb_db_run(struct northd_context *ctx)
{
+ if (!ctx->ovnnb_txn) {
+ return;
+ }
struct hmap lports_hmap;
const struct sbrec_port_binding *sb;
const struct nbrec_logical_port *nb;
int
main(int argc, char *argv[])
{
- extern struct vlog_module VLM_reconnect;
- struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
- unsigned int ovnnb_seqno, ovn_seqno;
int res = EXIT_SUCCESS;
- struct northd_context ctx = {
- .ovnsb_txn = NULL,
- };
- bool ovnnb_changes_pending = false;
- bool ovn_changes_pending = false;
struct unixctl_server *unixctl;
int retval;
bool exiting;
fatal_ignore_sigpipe();
set_program_name(argv[0]);
service_start(&argc, &argv);
- vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
- vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
parse_options(argc, argv);
daemonize_start(false);
sbrec_init();
/* We want to detect all changes to the ovn-nb db. */
- ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
- &nbrec_idl_class, true, true);
-
- ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
- &sbrec_idl_class, false, true);
-
- ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
- add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
-
- ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
- add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
- add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
- add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
- add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
-
- ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
- add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
- add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
-
- ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
- add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
- ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
-
- /*
- * The loop here just runs the IDL in a loop waiting for the seqno to
- * change, which indicates that the contents of the db have changed.
- *
- * If the contents of the ovn-nb db change, the mappings to the ovn-sb
- * db must be recalculated.
- *
- * If the contents of the ovn-sb db change, it means the 'up' state of
- * a port may have changed, as that's the only type of change ovn-northd is
- * watching for.
- */
-
- ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
- ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
+ struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+ ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
+
+ struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
+ ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
+
+ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_logical_flow_col_logical_datapath);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
+
+ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_multicast_group_col_datapath);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_multicast_group_col_tunnel_key);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
+
+ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_datapath_binding_col_tunnel_key);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_datapath_binding_col_external_ids);
+
+ ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_port_binding_col_logical_port);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_port_binding_col_tunnel_key);
+ add_column_noalert(ovnsb_idl_loop.idl,
+ &sbrec_port_binding_col_parent_port);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
+ add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
+ ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
+
+ /* Main loop. */
exiting = false;
while (!exiting) {
- ovsdb_idl_run(ovnnb_idl);
- ovsdb_idl_run(ovnsb_idl);
- unixctl_server_run(unixctl);
+ struct northd_context ctx = {
+ .ovnnb_idl = ovnnb_idl_loop.idl,
+ .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
+ .ovnsb_idl = ovnsb_idl_loop.idl,
+ .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
+ };
- if (!ovsdb_idl_is_alive(ovnnb_idl)) {
- int retval = ovsdb_idl_get_last_error(ovnnb_idl);
- VLOG_ERR("%s: database connection failed (%s)",
- ovnnb_db, ovs_retval_to_string(retval));
- res = EXIT_FAILURE;
- break;
- }
+ ovnnb_db_run(&ctx);
+ ovnsb_db_run(&ctx);
- if (!ovsdb_idl_is_alive(ovnsb_idl)) {
- int retval = ovsdb_idl_get_last_error(ovnsb_idl);
- VLOG_ERR("%s: database connection failed (%s)",
- ovnsb_db, ovs_retval_to_string(retval));
- res = EXIT_FAILURE;
- break;
- }
-
- if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
- ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
- ovnnb_changes_pending = true;
- }
-
- if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
- ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
- ovn_changes_pending = true;
- }
-
- /*
- * If there are any pending changes, we delay recalculating the
- * necessary updates until after an existing transaction finishes.
- * This avoids the possibility of rapid updates causing ovn-northd to
- * never be able to successfully make the corresponding updates to the
- * other db. Instead, pending changes are batched up until the next
- * time we get a chance to calculate the new state and apply it.
- */
-
- if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
- /*
- * The OVN-nb db contents have changed, so create a transaction for
- * updating the OVN-sb DB.
- */
- ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
- ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
- "ovn-northd: northbound db changed");
- ovnnb_db_changed(&ctx);
- ovnnb_changes_pending = false;
- }
-
- if (ovn_changes_pending && !ctx.ovnnb_txn) {
- /*
- * The OVN-sb db contents have changed, so create a transaction for
- * updating the northbound DB.
- */
- ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
- ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
- "ovn-northd: southbound db changed");
- ovnsb_db_changed(&ctx);
- ovn_changes_pending = false;
- }
-
- if (ctx.ovnnb_txn) {
- enum ovsdb_idl_txn_status txn_status;
- txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
- switch (txn_status) {
- case TXN_UNCOMMITTED:
- case TXN_INCOMPLETE:
- /* Come back around and try to commit this transaction again */
- break;
- case TXN_ABORTED:
- case TXN_TRY_AGAIN:
- case TXN_NOT_LOCKED:
- case TXN_ERROR:
- /* Something went wrong, so try creating a new transaction. */
- ovn_changes_pending = true;
- case TXN_UNCHANGED:
- case TXN_SUCCESS:
- ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
- ctx.ovnnb_txn = NULL;
- }
- }
-
- if (ctx.ovnsb_txn) {
- enum ovsdb_idl_txn_status txn_status;
- txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
- switch (txn_status) {
- case TXN_UNCOMMITTED:
- case TXN_INCOMPLETE:
- /* Come back around and try to commit this transaction again */
- break;
- case TXN_ABORTED:
- case TXN_TRY_AGAIN:
- case TXN_NOT_LOCKED:
- case TXN_ERROR:
- /* Something went wrong, so try creating a new transaction. */
- ovnnb_changes_pending = true;
- case TXN_UNCHANGED:
- case TXN_SUCCESS:
- ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
- ctx.ovnsb_txn = NULL;
- }
+ unixctl_server_run(unixctl);
+ unixctl_server_wait(unixctl);
+ if (exiting) {
+ poll_immediate_wake();
}
+ ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
+ ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
- if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
- ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
- ovsdb_idl_wait(ovnnb_idl);
- ovsdb_idl_wait(ovnsb_idl);
- if (ctx.ovnnb_txn) {
- ovsdb_idl_txn_wait(ctx.ovnnb_txn);
- }
- if (ctx.ovnsb_txn) {
- ovsdb_idl_txn_wait(ctx.ovnsb_txn);
- }
- unixctl_server_wait(unixctl);
- if (exiting) {
- poll_immediate_wake();
- }
- poll_block();
- }
+ poll_block();
if (should_service_stop()) {
exiting = true;
}
}
unixctl_server_destroy(unixctl);
- ovsdb_idl_destroy(ovnsb_idl);
- ovsdb_idl_destroy(ovnnb_idl);
+ ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
+ ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
service_stop();
free(default_db_);
-
exit(res);
}