2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
58 /* Ingress pipeline stages.
60 * These must be listed in the order that the stages will be executed. */
61 #define INGRESS_STAGES \
62 INGRESS_STAGE(PORT_SEC, port_sec) \
63 INGRESS_STAGE(PRE_ACL, pre_acl) \
64 INGRESS_STAGE(ACL, acl) \
65 INGRESS_STAGE(L2_LKUP, l2_lkup)
68 #define INGRESS_STAGE(NAME, STR) S_IN_##NAME,
74 /* Egress pipeline stages.
76 * These must be listed in the order that the stages will be executed. */
77 #define EGRESS_STAGES \
78 EGRESS_STAGE(PRE_ACL, pre_acl) \
79 EGRESS_STAGE(ACL, acl) \
80 EGRESS_STAGE(PORT_SEC, port_sec)
83 #define EGRESS_STAGE(NAME, STR) S_OUT_##NAME,
93 %s: OVN northbound management daemon\n\
94 usage: %s [OPTIONS]\n\
97 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
99 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
101 -h, --help display this help message\n\
102 -o, --options list available options\n\
103 -V, --version display version information\n\
104 ", program_name, program_name, default_db(), default_db());
107 stream_usage("database", true, true, false);
111 struct hmap_node hmap_node;
116 destroy_tnlids(struct hmap *tnlids)
118 struct tnlid_node *node, *next;
119 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
120 hmap_remove(tnlids, &node->hmap_node);
123 hmap_destroy(tnlids);
127 add_tnlid(struct hmap *set, uint32_t tnlid)
129 struct tnlid_node *node = xmalloc(sizeof *node);
130 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
135 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
137 const struct tnlid_node *node;
138 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
139 if (node->tnlid == tnlid) {
147 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
150 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
151 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
152 if (!tnlid_in_use(set, tnlid)) {
153 add_tnlid(set, tnlid);
159 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
160 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
164 /* The 'key' comes from nb->header_.uuid or sb->external_ids:logical-switch. */
165 struct ovn_datapath {
166 struct hmap_node key_node; /* Index on 'key'. */
167 struct uuid key; /* nb->header_.uuid. */
169 const struct nbrec_logical_switch *nb; /* May be NULL. */
170 const struct sbrec_datapath_binding *sb; /* May be NULL. */
172 struct ovs_list list; /* In list of similar records. */
174 struct hmap port_tnlids;
175 uint32_t port_key_hint;
180 static struct ovn_datapath *
181 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
182 const struct nbrec_logical_switch *nb,
183 const struct sbrec_datapath_binding *sb)
185 struct ovn_datapath *od = xzalloc(sizeof *od);
189 hmap_init(&od->port_tnlids);
190 od->port_key_hint = 0;
191 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
196 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
199 /* Don't remove od->list. It is used within build_datapaths() as a
200 * private list and once we've exited that function it is not safe to
202 hmap_remove(datapaths, &od->key_node);
203 destroy_tnlids(&od->port_tnlids);
208 static struct ovn_datapath *
209 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
211 struct ovn_datapath *od;
213 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
214 if (uuid_equals(uuid, &od->key)) {
221 static struct ovn_datapath *
222 ovn_datapath_from_sbrec(struct hmap *datapaths,
223 const struct sbrec_datapath_binding *sb)
227 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key)) {
230 return ovn_datapath_find(datapaths, &key);
234 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
235 struct ovs_list *sb_only, struct ovs_list *nb_only,
236 struct ovs_list *both)
238 hmap_init(datapaths);
243 const struct sbrec_datapath_binding *sb, *sb_next;
244 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
246 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key)) {
247 ovsdb_idl_txn_add_comment(ctx->ovnsb_txn,
248 "deleting Datapath_Binding "UUID_FMT" that "
249 "lacks external-ids:logical-switch",
250 UUID_ARGS(&sb->header_.uuid));
251 sbrec_datapath_binding_delete(sb);
255 if (ovn_datapath_find(datapaths, &key)) {
256 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
257 VLOG_INFO_RL(&rl, "deleting Datapath_Binding "UUID_FMT" with "
258 "duplicate external-ids:logical-switch "UUID_FMT,
259 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
260 sbrec_datapath_binding_delete(sb);
264 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
266 list_push_back(sb_only, &od->list);
269 const struct nbrec_logical_switch *nb;
270 NBREC_LOGICAL_SWITCH_FOR_EACH (nb, ctx->ovnnb_idl) {
271 struct ovn_datapath *od = ovn_datapath_find(datapaths,
275 list_remove(&od->list);
276 list_push_back(both, &od->list);
278 od = ovn_datapath_create(datapaths, &nb->header_.uuid, nb, NULL);
279 list_push_back(nb_only, &od->list);
285 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
287 static uint32_t hint;
288 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
292 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
294 struct ovs_list sb_only, nb_only, both;
296 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
298 if (!list_is_empty(&nb_only)) {
299 /* First index the in-use datapath tunnel IDs. */
300 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
301 struct ovn_datapath *od;
302 LIST_FOR_EACH (od, list, &both) {
303 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
306 /* Add southbound record for each unmatched northbound record. */
307 LIST_FOR_EACH (od, list, &nb_only) {
308 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
313 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
315 char uuid_s[UUID_LEN + 1];
316 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->nb->header_.uuid));
317 const struct smap id = SMAP_CONST1(&id, "logical-switch", uuid_s);
318 sbrec_datapath_binding_set_external_ids(od->sb, &id);
320 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
322 destroy_tnlids(&dp_tnlids);
325 /* Delete southbound records without northbound matches. */
326 struct ovn_datapath *od, *next;
327 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
328 list_remove(&od->list);
329 sbrec_datapath_binding_delete(od->sb);
330 ovn_datapath_destroy(datapaths, od);
335 struct hmap_node key_node; /* Index on 'key'. */
336 const char *key; /* nb->name and sb->logical_port */
338 const struct nbrec_logical_port *nb; /* May be NULL. */
339 const struct sbrec_port_binding *sb; /* May be NULL. */
341 struct ovn_datapath *od;
343 struct ovs_list list; /* In list of similar records. */
346 static struct ovn_port *
347 ovn_port_create(struct hmap *ports, const char *key,
348 const struct nbrec_logical_port *nb,
349 const struct sbrec_port_binding *sb)
351 struct ovn_port *op = xzalloc(sizeof *op);
355 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
360 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
363 /* Don't remove port->list. It is used within build_ports() as a
364 * private list and once we've exited that function it is not safe to
366 hmap_remove(ports, &port->key_node);
371 static struct ovn_port *
372 ovn_port_find(struct hmap *ports, const char *name)
376 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
377 if (!strcmp(op->key, name)) {
385 ovn_port_allocate_key(struct ovn_datapath *od)
387 return allocate_tnlid(&od->port_tnlids, "port",
388 (1u << 15) - 1, &od->port_key_hint);
392 join_logical_ports(struct northd_context *ctx,
393 struct hmap *datapaths, struct hmap *ports,
394 struct ovs_list *sb_only, struct ovs_list *nb_only,
395 struct ovs_list *both)
402 const struct sbrec_port_binding *sb;
403 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
404 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
406 list_push_back(sb_only, &op->list);
409 struct ovn_datapath *od;
410 HMAP_FOR_EACH (od, key_node, datapaths) {
411 for (size_t i = 0; i < od->nb->n_ports; i++) {
412 const struct nbrec_logical_port *nb = od->nb->ports[i];
413 struct ovn_port *op = ovn_port_find(ports, nb->name);
416 list_remove(&op->list);
417 list_push_back(both, &op->list);
419 op = ovn_port_create(ports, nb->name, nb, NULL);
420 list_push_back(nb_only, &op->list);
428 ovn_port_update_sbrec(const struct ovn_port *op)
430 sbrec_port_binding_set_type(op->sb, op->nb->type);
431 sbrec_port_binding_set_options(op->sb, &op->nb->options);
432 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
433 sbrec_port_binding_set_parent_port(op->sb, op->nb->parent_name);
434 sbrec_port_binding_set_tag(op->sb, op->nb->tag, op->nb->n_tag);
435 sbrec_port_binding_set_mac(op->sb, (const char **) op->nb->addresses,
436 op->nb->n_addresses);
440 build_ports(struct northd_context *ctx, struct hmap *datapaths,
443 struct ovs_list sb_only, nb_only, both;
445 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
447 /* For logical ports that are in both databases, update the southbound
448 * record based on northbound data. Also index the in-use tunnel_keys. */
449 struct ovn_port *op, *next;
450 LIST_FOR_EACH_SAFE (op, next, list, &both) {
451 ovn_port_update_sbrec(op);
453 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
454 if (op->sb->tunnel_key > op->od->port_key_hint) {
455 op->od->port_key_hint = op->sb->tunnel_key;
459 /* Add southbound record for each unmatched northbound record. */
460 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
461 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
466 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
467 ovn_port_update_sbrec(op);
469 sbrec_port_binding_set_logical_port(op->sb, op->key);
470 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
473 /* Delete southbound records without northbound matches. */
474 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
475 list_remove(&op->list);
476 sbrec_port_binding_delete(op->sb);
477 ovn_port_destroy(ports, op);
481 #define OVN_MIN_MULTICAST 32768
482 #define OVN_MAX_MULTICAST 65535
484 struct multicast_group {
486 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
489 #define MC_FLOOD "_MC_flood"
490 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
492 #define MC_UNKNOWN "_MC_unknown"
493 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
496 multicast_group_equal(const struct multicast_group *a,
497 const struct multicast_group *b)
499 return !strcmp(a->name, b->name) && a->key == b->key;
502 /* Multicast group entry. */
503 struct ovn_multicast {
504 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
505 struct ovn_datapath *datapath;
506 const struct multicast_group *group;
508 struct ovn_port **ports;
509 size_t n_ports, allocated_ports;
513 ovn_multicast_hash(const struct ovn_datapath *datapath,
514 const struct multicast_group *group)
516 return hash_pointer(datapath, group->key);
519 static struct ovn_multicast *
520 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
521 const struct multicast_group *group)
523 struct ovn_multicast *mc;
525 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
526 ovn_multicast_hash(datapath, group), mcgroups) {
527 if (mc->datapath == datapath
528 && multicast_group_equal(mc->group, group)) {
536 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
537 struct ovn_port *port)
539 struct ovn_datapath *od = port->od;
540 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
542 mc = xmalloc(sizeof *mc);
543 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
547 mc->allocated_ports = 4;
548 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
550 if (mc->n_ports >= mc->allocated_ports) {
551 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
554 mc->ports[mc->n_ports++] = port;
558 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
561 hmap_remove(mcgroups, &mc->hmap_node);
568 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
569 const struct sbrec_multicast_group *sb)
571 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
572 for (size_t i = 0; i < mc->n_ports; i++) {
573 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
575 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
579 /* Logical flow generation.
581 * This code generates the Logical_Flow table in the southbound database, as a
582 * function of most of the northbound database.
586 struct hmap_node hmap_node;
588 struct ovn_datapath *od;
589 enum ovn_pipeline { P_IN, P_OUT } pipeline;
597 ovn_lflow_hash(const struct ovn_lflow *lflow)
599 size_t hash = uuid_hash(&lflow->od->key);
600 hash = hash_2words((lflow->table_id << 16) | lflow->priority, hash);
601 hash = hash_string(lflow->match, hash);
602 return hash_string(lflow->actions, hash);
606 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
608 return (a->od == b->od
609 && a->pipeline == b->pipeline
610 && a->table_id == b->table_id
611 && a->priority == b->priority
612 && !strcmp(a->match, b->match)
613 && !strcmp(a->actions, b->actions));
617 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
618 enum ovn_pipeline pipeline, uint8_t table_id, uint16_t priority,
619 char *match, char *actions)
622 lflow->pipeline = pipeline;
623 lflow->table_id = table_id;
624 lflow->priority = priority;
625 lflow->match = match;
626 lflow->actions = actions;
630 ingress_stage_to_str(int stage) {
632 #define INGRESS_STAGE(NAME, STR) case S_IN_##NAME: return #STR;
635 default: return "<unknown>";
640 egress_stage_to_str(int stage) {
642 #define EGRESS_STAGE(NAME, STR) case S_OUT_##NAME: return #STR;
645 default: return "<unknown>";
649 /* Adds a row with the specified contents to the Logical_Flow table. */
651 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
652 enum ovn_pipeline pipeline, uint8_t table_id, uint16_t priority,
653 const char *match, const char *actions)
655 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
656 ovn_lflow_init(lflow, od, pipeline, table_id, priority,
657 xstrdup(match), xstrdup(actions));
658 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
661 static struct ovn_lflow *
662 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
663 enum ovn_pipeline pipeline, uint8_t table_id, uint16_t priority,
664 const char *match, const char *actions)
666 struct ovn_lflow target;
667 ovn_lflow_init(&target, od, pipeline, table_id, priority,
668 CONST_CAST(char *, match), CONST_CAST(char *, actions));
670 struct ovn_lflow *lflow;
671 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
673 if (ovn_lflow_equal(lflow, &target)) {
681 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
684 hmap_remove(lflows, &lflow->hmap_node);
686 free(lflow->actions);
691 /* Appends port security constraints on L2 address field 'eth_addr_field'
692 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
693 * 'n_port_security' elements, is the collection of port_security constraints
694 * from an OVN_NB Logical_Port row. */
696 build_port_security(const char *eth_addr_field,
697 char **port_security, size_t n_port_security,
700 size_t base_len = match->length;
701 ds_put_format(match, " && %s == {", eth_addr_field);
704 for (size_t i = 0; i < n_port_security; i++) {
707 if (eth_addr_from_string(port_security[i], &ea)) {
708 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
709 ds_put_char(match, ' ');
713 ds_chomp(match, ' ');
714 ds_put_cstr(match, "}");
717 match->length = base_len;
722 lport_is_enabled(const struct nbrec_logical_port *lport)
724 return !lport->enabled || *lport->enabled;
728 has_stateful_acl(struct ovn_datapath *od)
730 for (size_t i = 0; i < od->nb->n_acls; i++) {
731 struct nbrec_acl *acl = od->nb->acls[i];
732 if (!strcmp(acl->action, "allow-related")) {
741 build_acls(struct ovn_datapath *od, struct hmap *lflows)
743 bool has_stateful = has_stateful_acl(od);
745 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
746 * allowed by default. */
747 ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 0, "1", "next;");
748 ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 0, "1", "next;");
750 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
751 * default. A related rule at priority 1 is added below if there
752 * are any stateful ACLs in this datapath. */
753 ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 0, "1", "next;");
754 ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 0, "1", "next;");
756 /* If there are any stateful ACL rules in this dapapath, we must
757 * send all IP packets through the conntrack action, which handles
758 * defragmentation, in order to match L4 headers. */
760 /* Ingress and Egress Pre-ACL Table (Priority 100).
762 * Regardless of whether the ACL is "from-lport" or "to-lport",
763 * we need rules in both the ingress and egress table, because
764 * the return traffic needs to be followed. */
765 ovn_lflow_add(lflows, od, P_IN, S_IN_PRE_ACL, 100,
767 ovn_lflow_add(lflows, od, P_OUT, S_OUT_PRE_ACL, 100,
770 /* Ingress and Egress ACL Table (Priority 1).
772 * By default, traffic is allowed. This is partially handled by
773 * the Priority 0 ACL flows added earlier, but we also need to
774 * commit IP flows. This is because, while the initiater's
775 * direction may not have any stateful rules, the server's may
776 * and then its return traffic would not have an associated
777 * conntrack entry and would return "+invalid". */
778 ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, 1, "ip",
780 ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, 1, "ip",
783 /* Ingress and Egress ACL Table (Priority 65535).
785 * Always drop traffic that's in an invalid state. This is
786 * enforced at a higher priority than ACLs can be defined. */
787 ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
789 ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
792 /* Ingress and Egress ACL Table (Priority 65535).
794 * Always allow traffic that is established to a committed
795 * conntrack entry. This is enforced at a higher priority than
796 * ACLs can be defined. */
797 ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
798 "ct.est && !ct.rel && !ct.new && !ct.inv",
800 ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
801 "ct.est && !ct.rel && !ct.new && !ct.inv",
804 /* Ingress and Egress ACL Table (Priority 65535).
806 * Always allow traffic that is related to an existing conntrack
807 * entry. This is enforced at a higher priority than ACLs can
810 * NOTE: This does not support related data sessions (eg,
811 * a dynamically negotiated FTP data channel), but will allow
812 * related traffic such as an ICMP Port Unreachable through
813 * that's generated from a non-listening UDP port. */
814 ovn_lflow_add(lflows, od, P_IN, S_IN_ACL, UINT16_MAX,
815 "!ct.est && ct.rel && !ct.new && !ct.inv",
817 ovn_lflow_add(lflows, od, P_OUT, S_OUT_ACL, UINT16_MAX,
818 "!ct.est && ct.rel && !ct.new && !ct.inv",
822 /* Ingress or Egress ACL Table (Various priorities). */
823 for (size_t i = 0; i < od->nb->n_acls; i++) {
824 struct nbrec_acl *acl = od->nb->acls[i];
825 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
826 enum ovn_pipeline pipeline = ingress ? P_IN : P_OUT;
827 uint8_t stage = ingress ? S_IN_ACL : S_OUT_ACL;
829 if (!strcmp(acl->action, "allow")) {
830 /* If there are any stateful flows, we must even commit "allow"
831 * actions. This is because, while the initiater's
832 * direction may not have any stateful rules, the server's
833 * may and then its return traffic would not have an
834 * associated conntrack entry and would return "+invalid". */
835 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
836 ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
837 acl->match, actions);
838 } else if (!strcmp(acl->action, "allow-related")) {
839 struct ds match = DS_EMPTY_INITIALIZER;
841 /* Commit the connection tracking entry, which allows all
842 * other traffic related to this entry to flow due to the
843 * 65535 priority flow defined earlier. */
844 ds_put_format(&match, "ct.new && (%s)", acl->match);
845 ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
846 ds_cstr(&match), "ct_commit; next;");
849 } else if (!strcmp(acl->action, "drop")) {
850 ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
851 acl->match, "drop;");
852 } else if (!strcmp(acl->action, "reject")) {
853 /* xxx Need to support "reject". */
854 VLOG_INFO("reject is not a supported action");
855 ovn_lflow_add(lflows, od, pipeline, stage, acl->priority,
856 acl->match, "drop;");
861 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
862 * constructing their contents based on the OVN_NB database. */
864 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
867 /* This flow table structure is documented in ovn-northd(8), so please
868 * update ovn-northd.8.xml if you change anything. */
870 struct hmap lflows = HMAP_INITIALIZER(&lflows);
871 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
873 /* Ingress table 0: Admission control framework (priorities 0 and 100). */
874 struct ovn_datapath *od;
875 HMAP_FOR_EACH (od, key_node, datapaths) {
876 /* Logical VLANs not supported. */
877 ovn_lflow_add(&lflows, od, P_IN, S_IN_PORT_SEC, 100, "vlan.present",
880 /* Broadcast/multicast source address is invalid. */
881 ovn_lflow_add(&lflows, od, P_IN, S_IN_PORT_SEC, 100, "eth.src[40]",
884 /* Port security flows have priority 50 (see below) and will continue
885 * to the next table if packet source is acceptable. */
888 /* Ingress table 0: Ingress port security (priority 50). */
890 HMAP_FOR_EACH (op, key_node, ports) {
891 if (!lport_is_enabled(op->nb)) {
892 /* Drop packets from disabled logical ports (since logical flow
893 * tables are default-drop). */
897 struct ds match = DS_EMPTY_INITIALIZER;
898 ds_put_cstr(&match, "inport == ");
899 json_string_escape(op->key, &match);
900 build_port_security("eth.src",
901 op->nb->port_security, op->nb->n_port_security,
903 ovn_lflow_add(&lflows, op->od, P_IN, S_IN_PORT_SEC, 50,
904 ds_cstr(&match), "next;");
908 /* Ingress table 2: Destination lookup, broadcast and multicast handling
910 HMAP_FOR_EACH (op, key_node, ports) {
911 if (lport_is_enabled(op->nb)) {
912 ovn_multicast_add(&mcgroups, &mc_flood, op);
915 HMAP_FOR_EACH (od, key_node, datapaths) {
916 ovn_lflow_add(&lflows, od, P_IN, S_IN_L2_LKUP, 100, "eth.mcast",
917 "outport = \""MC_FLOOD"\"; output;");
920 /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
921 HMAP_FOR_EACH (op, key_node, ports) {
922 for (size_t i = 0; i < op->nb->n_addresses; i++) {
925 if (eth_addr_from_string(op->nb->addresses[i], &mac)) {
926 struct ds match, actions;
929 ds_put_format(&match, "eth.dst == %s", op->nb->addresses[i]);
932 ds_put_cstr(&actions, "outport = ");
933 json_string_escape(op->nb->name, &actions);
934 ds_put_cstr(&actions, "; output;");
935 ovn_lflow_add(&lflows, op->od, P_IN, S_IN_L2_LKUP, 50,
936 ds_cstr(&match), ds_cstr(&actions));
937 ds_destroy(&actions);
939 } else if (!strcmp(op->nb->addresses[i], "unknown")) {
940 if (lport_is_enabled(op->nb)) {
941 ovn_multicast_add(&mcgroups, &mc_unknown, op);
942 op->od->has_unknown = true;
945 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
948 "%s: invalid syntax '%s' in addresses column",
949 op->nb->name, op->nb->addresses[i]);
954 /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
955 HMAP_FOR_EACH (od, key_node, datapaths) {
956 if (od->has_unknown) {
957 ovn_lflow_add(&lflows, od, P_IN, S_IN_L2_LKUP, 0, "1",
958 "outport = \""MC_UNKNOWN"\"; output;");
962 /* Egress table 2: Egress port security multicast/broadcast (priority
964 HMAP_FOR_EACH (od, key_node, datapaths) {
965 ovn_lflow_add(&lflows, od, P_OUT, S_OUT_PORT_SEC, 100, "eth.mcast",
969 /* Egress table 2: Egress port security (priorities 50 and 150).
971 * Priority 50 rules implement port security for enabled logical port.
973 * Priority 150 rules drop packets to disabled logical ports, so that they
974 * don't even receive multicast or broadcast packets. */
975 HMAP_FOR_EACH (op, key_node, ports) {
979 ds_put_cstr(&match, "outport == ");
980 json_string_escape(op->key, &match);
981 if (lport_is_enabled(op->nb)) {
982 build_port_security("eth.dst",
983 op->nb->port_security, op->nb->n_port_security,
985 ovn_lflow_add(&lflows, op->od, P_OUT, S_OUT_PORT_SEC, 50,
986 ds_cstr(&match), "output;");
988 ovn_lflow_add(&lflows, op->od, P_OUT, S_OUT_PORT_SEC, 150,
989 ds_cstr(&match), "drop;");
995 /* Build pre-ACL and ACL tables for both ingress and egress.
996 * Ingress tables 1 and 2. Egress tables 0 and 1. */
997 HMAP_FOR_EACH (od, key_node, datapaths) {
998 build_acls(od, &lflows);
1001 /* Push changes to the Logical_Flow table to database. */
1002 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1003 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1004 struct ovn_datapath *od
1005 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1007 sbrec_logical_flow_delete(sbflow);
1011 struct ovn_lflow *lflow = ovn_lflow_find(
1012 &lflows, od, (!strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT),
1013 sbflow->table_id, sbflow->priority,
1014 sbflow->match, sbflow->actions);
1016 ovn_lflow_destroy(&lflows, lflow);
1018 sbrec_logical_flow_delete(sbflow);
1021 struct ovn_lflow *lflow, *next_lflow;
1022 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1023 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1024 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1025 sbrec_logical_flow_set_pipeline(
1026 sbflow, lflow->pipeline == P_IN ? "ingress" : "egress");
1027 sbrec_logical_flow_set_table_id(sbflow, lflow->table_id);
1028 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1029 sbrec_logical_flow_set_match(sbflow, lflow->match);
1030 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1032 const struct smap ids = SMAP_CONST1(
1034 (lflow->pipeline == P_IN
1035 ? ingress_stage_to_str(lflow->table_id)
1036 : egress_stage_to_str(lflow->table_id)));
1037 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1039 ovn_lflow_destroy(&lflows, lflow);
1041 hmap_destroy(&lflows);
1043 /* Push changes to the Multicast_Group table to database. */
1044 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1045 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1046 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1049 sbrec_multicast_group_delete(sbmc);
1053 struct multicast_group group = { .name = sbmc->name,
1054 .key = sbmc->tunnel_key };
1055 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1057 ovn_multicast_update_sbrec(mc, sbmc);
1058 ovn_multicast_destroy(&mcgroups, mc);
1060 sbrec_multicast_group_delete(sbmc);
1063 struct ovn_multicast *mc, *next_mc;
1064 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1065 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1066 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1067 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1068 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1069 ovn_multicast_update_sbrec(mc, sbmc);
1070 ovn_multicast_destroy(&mcgroups, mc);
1072 hmap_destroy(&mcgroups);
1076 ovnnb_db_changed(struct northd_context *ctx)
1078 VLOG_DBG("ovn-nb db contents have changed.");
1080 struct hmap datapaths, ports;
1081 build_datapaths(ctx, &datapaths);
1082 build_ports(ctx, &datapaths, &ports);
1083 build_lflows(ctx, &datapaths, &ports);
1085 struct ovn_datapath *dp, *next_dp;
1086 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1087 ovn_datapath_destroy(&datapaths, dp);
1089 hmap_destroy(&datapaths);
1091 struct ovn_port *port, *next_port;
1092 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1093 ovn_port_destroy(&ports, port);
1095 hmap_destroy(&ports);
1099 * The only change we get notified about is if the 'chassis' column of the
1100 * 'Port_Binding' table changes. When this column is not empty, it means we
1101 * need to set the corresponding logical port as 'up' in the northbound DB.
1104 ovnsb_db_changed(struct northd_context *ctx)
1106 struct hmap lports_hmap;
1107 const struct sbrec_port_binding *sb;
1108 const struct nbrec_logical_port *nb;
1110 struct lport_hash_node {
1111 struct hmap_node node;
1112 const struct nbrec_logical_port *nb;
1113 } *hash_node, *hash_node_next;
1115 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1117 hmap_init(&lports_hmap);
1119 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1120 hash_node = xzalloc(sizeof *hash_node);
1122 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1125 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1127 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1128 hash_string(sb->logical_port, 0),
1130 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1137 /* The logical port doesn't exist for this port binding. This can
1138 * happen under normal circumstances when ovn-northd hasn't gotten
1139 * around to pruning the Port_Binding yet. */
1143 if (sb->chassis && (!nb->up || !*nb->up)) {
1145 nbrec_logical_port_set_up(nb, &up, 1);
1146 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1148 nbrec_logical_port_set_up(nb, &up, 1);
1152 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1153 hmap_remove(&lports_hmap, &hash_node->node);
1156 hmap_destroy(&lports_hmap);
1160 static char *default_db_;
1166 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1172 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1175 DAEMON_OPTION_ENUMS,
1178 static const struct option long_options[] = {
1179 {"ovnsb-db", required_argument, NULL, 'd'},
1180 {"ovnnb-db", required_argument, NULL, 'D'},
1181 {"help", no_argument, NULL, 'h'},
1182 {"options", no_argument, NULL, 'o'},
1183 {"version", no_argument, NULL, 'V'},
1184 DAEMON_LONG_OPTIONS,
1186 STREAM_SSL_LONG_OPTIONS,
1189 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1194 c = getopt_long(argc, argv, short_options, long_options, NULL);
1200 DAEMON_OPTION_HANDLERS;
1201 VLOG_OPTION_HANDLERS;
1202 STREAM_SSL_OPTION_HANDLERS;
1217 ovs_cmdl_print_options(long_options);
1221 ovs_print_version(0, 0);
1230 ovnsb_db = default_db();
1234 ovnnb_db = default_db();
1237 free(short_options);
1241 add_column_noalert(struct ovsdb_idl *idl,
1242 const struct ovsdb_idl_column *column)
1244 ovsdb_idl_add_column(idl, column);
1245 ovsdb_idl_omit_alert(idl, column);
1249 main(int argc, char *argv[])
1251 extern struct vlog_module VLM_reconnect;
1252 struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
1253 unsigned int ovnnb_seqno, ovn_seqno;
1254 int res = EXIT_SUCCESS;
1255 struct northd_context ctx = {
1258 bool ovnnb_changes_pending = false;
1259 bool ovn_changes_pending = false;
1260 struct unixctl_server *unixctl;
1264 fatal_ignore_sigpipe();
1265 set_program_name(argv[0]);
1266 service_start(&argc, &argv);
1267 vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1268 vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1269 parse_options(argc, argv);
1271 daemonize_start(false);
1273 retval = unixctl_server_create(NULL, &unixctl);
1277 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1279 daemonize_complete();
1284 /* We want to detect all changes to the ovn-nb db. */
1285 ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
1286 &nbrec_idl_class, true, true);
1288 ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
1289 &sbrec_idl_class, false, true);
1291 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
1292 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
1293 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
1294 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
1295 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
1296 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
1297 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
1299 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
1300 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
1301 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
1302 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
1303 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
1305 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
1306 add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
1307 add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
1309 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
1310 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
1311 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
1312 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
1313 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
1314 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
1315 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
1316 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
1317 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
1318 ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
1321 * The loop here just runs the IDL in a loop waiting for the seqno to
1322 * change, which indicates that the contents of the db have changed.
1324 * If the contents of the ovn-nb db change, the mappings to the ovn-sb
1325 * db must be recalculated.
1327 * If the contents of the ovn-sb db change, it means the 'up' state of
1328 * a port may have changed, as that's the only type of change ovn-northd is
1332 ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1333 ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1336 ovsdb_idl_run(ovnnb_idl);
1337 ovsdb_idl_run(ovnsb_idl);
1338 unixctl_server_run(unixctl);
1340 if (!ovsdb_idl_is_alive(ovnnb_idl)) {
1341 int retval = ovsdb_idl_get_last_error(ovnnb_idl);
1342 VLOG_ERR("%s: database connection failed (%s)",
1343 ovnnb_db, ovs_retval_to_string(retval));
1348 if (!ovsdb_idl_is_alive(ovnsb_idl)) {
1349 int retval = ovsdb_idl_get_last_error(ovnsb_idl);
1350 VLOG_ERR("%s: database connection failed (%s)",
1351 ovnsb_db, ovs_retval_to_string(retval));
1356 if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
1357 ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1358 ovnnb_changes_pending = true;
1361 if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
1362 ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1363 ovn_changes_pending = true;
1367 * If there are any pending changes, we delay recalculating the
1368 * necessary updates until after an existing transaction finishes.
1369 * This avoids the possibility of rapid updates causing ovn-northd to
1370 * never be able to successfully make the corresponding updates to the
1371 * other db. Instead, pending changes are batched up until the next
1372 * time we get a chance to calculate the new state and apply it.
1375 if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
1377 * The OVN-nb db contents have changed, so create a transaction for
1378 * updating the OVN-sb DB.
1380 ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
1381 ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
1382 "ovn-northd: northbound db changed");
1383 ovnnb_db_changed(&ctx);
1384 ovnnb_changes_pending = false;
1387 if (ovn_changes_pending && !ctx.ovnnb_txn) {
1389 * The OVN-sb db contents have changed, so create a transaction for
1390 * updating the northbound DB.
1392 ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
1393 ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
1394 "ovn-northd: southbound db changed");
1395 ovnsb_db_changed(&ctx);
1396 ovn_changes_pending = false;
1399 if (ctx.ovnnb_txn) {
1400 enum ovsdb_idl_txn_status txn_status;
1401 txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
1402 switch (txn_status) {
1403 case TXN_UNCOMMITTED:
1404 case TXN_INCOMPLETE:
1405 /* Come back around and try to commit this transaction again */
1409 case TXN_NOT_LOCKED:
1411 /* Something went wrong, so try creating a new transaction. */
1412 ovn_changes_pending = true;
1415 ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
1416 ctx.ovnnb_txn = NULL;
1420 if (ctx.ovnsb_txn) {
1421 enum ovsdb_idl_txn_status txn_status;
1422 txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
1423 switch (txn_status) {
1424 case TXN_UNCOMMITTED:
1425 case TXN_INCOMPLETE:
1426 /* Come back around and try to commit this transaction again */
1430 case TXN_NOT_LOCKED:
1432 /* Something went wrong, so try creating a new transaction. */
1433 ovnnb_changes_pending = true;
1436 ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
1437 ctx.ovnsb_txn = NULL;
1441 if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
1442 ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
1443 ovsdb_idl_wait(ovnnb_idl);
1444 ovsdb_idl_wait(ovnsb_idl);
1445 if (ctx.ovnnb_txn) {
1446 ovsdb_idl_txn_wait(ctx.ovnnb_txn);
1448 if (ctx.ovnsb_txn) {
1449 ovsdb_idl_txn_wait(ctx.ovnsb_txn);
1451 unixctl_server_wait(unixctl);
1453 poll_immediate_wake();
1457 if (should_service_stop()) {
1462 unixctl_server_destroy(unixctl);
1463 ovsdb_idl_destroy(ovnsb_idl);
1464 ovsdb_idl_destroy(ovnnb_idl);
1473 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
1474 const char *argv[] OVS_UNUSED, void *exiting_)
1476 bool *exiting = exiting_;
1479 unixctl_command_reply(conn, NULL);