2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "switch_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "switch_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "switch_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 3, "switch_in_l2_lkup") \
93 /* Logical switch egress stages. */ \
94 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "switch_out_pre_acl") \
95 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "switch_out_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "switch_out_port_sec") \
98 /* Logical router ingress stages. */ \
99 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "router_in_admission") \
100 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "router_in_ip_input") \
101 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "router_in_ip_routing") \
102 PIPELINE_STAGE(ROUTER, IN, ARP, 3, "router_in_arp") \
104 /* Logical router egress stages. */ \
105 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "router_out_delivery")
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
108 S_##DP_TYPE##_##PIPELINE##_##STAGE \
109 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
111 #undef PIPELINE_STAGE
114 /* Due to various hard-coded priorities need to implement ACLs, the
115 * northbound database supports a smaller range of ACL priorities than
116 * are available to logical flows. This value is added to an ACL
117 * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
125 return OVN_STAGE_BUILD(dp_type, pipeline, table);
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
132 return (stage >> 8) & 1;
135 /* Returns the table to which 'stage' belongs. */
137 ovn_stage_get_table(enum ovn_stage stage)
142 /* Returns a string name for 'stage'. */
144 ovn_stage_to_str(enum ovn_stage stage)
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
148 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
150 #undef PIPELINE_STAGE
151 default: return "<unknown>";
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
163 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
165 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
167 -h, --help display this help message\n\
168 -o, --options list available options\n\
169 -V, --version display version information\n\
170 ", program_name, program_name, default_db(), default_db());
173 stream_usage("database", true, true, false);
177 struct hmap_node hmap_node;
182 destroy_tnlids(struct hmap *tnlids)
184 struct tnlid_node *node, *next;
185 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186 hmap_remove(tnlids, &node->hmap_node);
189 hmap_destroy(tnlids);
193 add_tnlid(struct hmap *set, uint32_t tnlid)
195 struct tnlid_node *node = xmalloc(sizeof *node);
196 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
203 const struct tnlid_node *node;
204 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205 if (node->tnlid == tnlid) {
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
216 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218 if (!tnlid_in_use(set, tnlid)) {
219 add_tnlid(set, tnlid);
225 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231 * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233 struct hmap_node key_node; /* Index on 'key'. */
234 struct uuid key; /* (nbs/nbr)->header_.uuid. */
236 const struct nbrec_logical_switch *nbs; /* May be NULL. */
237 const struct nbrec_logical_router *nbr; /* May be NULL. */
238 const struct sbrec_datapath_binding *sb; /* May be NULL. */
240 struct ovs_list list; /* In list of similar records. */
242 /* Logical router data (digested from nbr). */
245 /* Logical switch data. */
246 struct ovn_port **router_ports;
247 size_t n_router_ports;
249 struct hmap port_tnlids;
250 uint32_t port_key_hint;
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257 const struct nbrec_logical_switch *nbs,
258 const struct nbrec_logical_router *nbr,
259 const struct sbrec_datapath_binding *sb)
261 struct ovn_datapath *od = xzalloc(sizeof *od);
266 hmap_init(&od->port_tnlids);
267 od->port_key_hint = 0;
268 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
276 /* Don't remove od->list. It is used within build_datapaths() as a
277 * private list and once we've exited that function it is not safe to
279 hmap_remove(datapaths, &od->key_node);
280 destroy_tnlids(&od->port_tnlids);
281 free(od->router_ports);
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
289 struct ovn_datapath *od;
291 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292 if (uuid_equals(uuid, &od->key)) {
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301 const struct sbrec_datapath_binding *sb)
305 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
309 return ovn_datapath_find(datapaths, &key);
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314 struct ovs_list *sb_only, struct ovs_list *nb_only,
315 struct ovs_list *both)
317 hmap_init(datapaths);
322 const struct sbrec_datapath_binding *sb, *sb_next;
323 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
325 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327 ovsdb_idl_txn_add_comment(
329 "deleting Datapath_Binding "UUID_FMT" that lacks "
330 "external-ids:logical-switch and "
331 "external-ids:logical-router",
332 UUID_ARGS(&sb->header_.uuid));
333 sbrec_datapath_binding_delete(sb);
337 if (ovn_datapath_find(datapaths, &key)) {
338 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
340 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341 "duplicate external-ids:logical-switch/router "UUID_FMT,
342 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343 sbrec_datapath_binding_delete(sb);
347 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
349 list_push_back(sb_only, &od->list);
352 const struct nbrec_logical_switch *nbs;
353 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354 struct ovn_datapath *od = ovn_datapath_find(datapaths,
358 list_remove(&od->list);
359 list_push_back(both, &od->list);
361 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
363 list_push_back(nb_only, &od->list);
367 const struct nbrec_logical_router *nbr;
368 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369 struct ovn_datapath *od = ovn_datapath_find(datapaths,
374 list_remove(&od->list);
375 list_push_back(both, &od->list);
378 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
380 "duplicate UUID "UUID_FMT" in OVN_Northbound",
381 UUID_ARGS(&nbr->header_.uuid));
385 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
387 list_push_back(nb_only, &od->list);
391 if (nbr->default_gw) {
393 char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394 if (error || !ip || mask != OVS_BE32_MAX) {
395 static struct vlog_rate_limit rl
396 = VLOG_RATE_LIMIT_INIT(5, 1);
397 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
409 static uint32_t hint;
410 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
416 struct ovs_list sb_only, nb_only, both;
418 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
420 if (!list_is_empty(&nb_only)) {
421 /* First index the in-use datapath tunnel IDs. */
422 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423 struct ovn_datapath *od;
424 LIST_FOR_EACH (od, list, &both) {
425 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
428 /* Add southbound record for each unmatched northbound record. */
429 LIST_FOR_EACH (od, list, &nb_only) {
430 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
435 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
437 char uuid_s[UUID_LEN + 1];
438 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439 const char *key = od->nbs ? "logical-switch" : "logical-router";
440 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441 sbrec_datapath_binding_set_external_ids(od->sb, &id);
443 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
445 destroy_tnlids(&dp_tnlids);
448 /* Delete southbound records without northbound matches. */
449 struct ovn_datapath *od, *next;
450 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451 list_remove(&od->list);
452 sbrec_datapath_binding_delete(od->sb);
453 ovn_datapath_destroy(datapaths, od);
458 struct hmap_node key_node; /* Index on 'key'. */
459 char *key; /* nbs->name, nbr->name, sb->logical_port. */
460 char *json_key; /* 'key', quoted for use in JSON. */
462 const struct nbrec_logical_port *nbs; /* May be NULL. */
463 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464 const struct sbrec_port_binding *sb; /* May be NULL. */
466 /* Logical router port data. */
467 ovs_be32 ip, mask; /* 192.168.10.123/24. */
468 ovs_be32 network; /* 192.168.10.0. */
469 ovs_be32 bcast; /* 192.168.10.255. */
471 struct ovn_port *peer;
473 struct ovn_datapath *od;
475 struct ovs_list list; /* In list of similar records. */
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480 const struct nbrec_logical_port *nbs,
481 const struct nbrec_logical_router_port *nbr,
482 const struct sbrec_port_binding *sb)
484 struct ovn_port *op = xzalloc(sizeof *op);
486 struct ds json_key = DS_EMPTY_INITIALIZER;
487 json_string_escape(key, &json_key);
488 op->json_key = ds_steal_cstr(&json_key);
490 op->key = xstrdup(key);
494 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
502 /* Don't remove port->list. It is used within build_ports() as a
503 * private list and once we've exited that function it is not safe to
505 hmap_remove(ports, &port->key_node);
506 free(port->json_key);
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
517 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518 if (!strcmp(op->key, name)) {
526 ovn_port_allocate_key(struct ovn_datapath *od)
528 return allocate_tnlid(&od->port_tnlids, "port",
529 (1u << 15) - 1, &od->port_key_hint);
533 join_logical_ports(struct northd_context *ctx,
534 struct hmap *datapaths, struct hmap *ports,
535 struct ovs_list *sb_only, struct ovs_list *nb_only,
536 struct ovs_list *both)
543 const struct sbrec_port_binding *sb;
544 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
547 list_push_back(sb_only, &op->list);
550 struct ovn_datapath *od;
551 HMAP_FOR_EACH (od, key_node, datapaths) {
553 for (size_t i = 0; i < od->nbs->n_ports; i++) {
554 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555 struct ovn_port *op = ovn_port_find(ports, nbs->name);
557 if (op->nbs || op->nbr) {
558 static struct vlog_rate_limit rl
559 = VLOG_RATE_LIMIT_INIT(5, 1);
560 VLOG_WARN_RL(&rl, "duplicate logical port %s",
565 list_remove(&op->list);
566 list_push_back(both, &op->list);
568 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569 list_push_back(nb_only, &op->list);
575 for (size_t i = 0; i < od->nbr->n_ports; i++) {
576 const struct nbrec_logical_router_port *nbr
580 if (!eth_addr_from_string(nbr->mac, &mac)) {
581 static struct vlog_rate_limit rl
582 = VLOG_RATE_LIMIT_INIT(5, 1);
583 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
588 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590 static struct vlog_rate_limit rl
591 = VLOG_RATE_LIMIT_INIT(5, 1);
592 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
597 char name[UUID_LEN + 1];
598 snprintf(name, sizeof name, UUID_FMT,
599 UUID_ARGS(&nbr->header_.uuid));
600 struct ovn_port *op = ovn_port_find(ports, name);
602 if (op->nbs || op->nbr) {
603 static struct vlog_rate_limit rl
604 = VLOG_RATE_LIMIT_INIT(5, 1);
605 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
610 list_remove(&op->list);
611 list_push_back(both, &op->list);
613 op = ovn_port_create(ports, name, NULL, nbr, NULL);
614 list_push_back(nb_only, &op->list);
619 op->network = ip & mask;
620 op->bcast = ip | ~mask;
628 /* Connect logical router ports, and logical switch ports of type "router",
631 HMAP_FOR_EACH (op, key_node, ports) {
632 if (op->nbs && !strcmp(op->nbs->type, "router")) {
633 const char *peer_name = smap_get(&op->nbs->options, "router-port");
638 struct ovn_port *peer = ovn_port_find(ports, peer_name);
639 if (!peer || !peer->nbr) {
645 op->od->router_ports = xrealloc(
646 op->od->router_ports,
647 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
648 op->od->router_ports[op->od->n_router_ports++] = op;
649 } else if (op->nbr && op->nbr->peer) {
650 char peer_name[UUID_LEN + 1];
651 snprintf(peer_name, sizeof peer_name, UUID_FMT,
652 UUID_ARGS(&op->nbr->peer->header_.uuid));
653 op->peer = ovn_port_find(ports, peer_name);
659 ovn_port_update_sbrec(const struct ovn_port *op)
661 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
663 sbrec_port_binding_set_type(op->sb, "patch");
665 const char *peer = op->peer ? op->peer->key : "<error>";
666 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
667 sbrec_port_binding_set_options(op->sb, &ids);
669 sbrec_port_binding_set_parent_port(op->sb, NULL);
670 sbrec_port_binding_set_tag(op->sb, NULL, 0);
671 sbrec_port_binding_set_mac(op->sb, NULL, 0);
673 if (strcmp(op->nbs->type, "router")) {
674 sbrec_port_binding_set_type(op->sb, op->nbs->type);
675 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
677 sbrec_port_binding_set_type(op->sb, "patch");
679 const char *router_port = smap_get(&op->nbs->options,
682 router_port = "<error>";
684 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
685 sbrec_port_binding_set_options(op->sb, &ids);
687 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
688 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
689 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
690 op->nbs->n_addresses);
695 build_ports(struct northd_context *ctx, struct hmap *datapaths,
698 struct ovs_list sb_only, nb_only, both;
700 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
702 /* For logical ports that are in both databases, update the southbound
703 * record based on northbound data. Also index the in-use tunnel_keys. */
704 struct ovn_port *op, *next;
705 LIST_FOR_EACH_SAFE (op, next, list, &both) {
706 ovn_port_update_sbrec(op);
708 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
709 if (op->sb->tunnel_key > op->od->port_key_hint) {
710 op->od->port_key_hint = op->sb->tunnel_key;
714 /* Add southbound record for each unmatched northbound record. */
715 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
716 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
721 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
722 ovn_port_update_sbrec(op);
724 sbrec_port_binding_set_logical_port(op->sb, op->key);
725 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
728 /* Delete southbound records without northbound matches. */
729 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
730 list_remove(&op->list);
731 sbrec_port_binding_delete(op->sb);
732 ovn_port_destroy(ports, op);
736 #define OVN_MIN_MULTICAST 32768
737 #define OVN_MAX_MULTICAST 65535
739 struct multicast_group {
741 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
744 #define MC_FLOOD "_MC_flood"
745 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
747 #define MC_UNKNOWN "_MC_unknown"
748 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
751 multicast_group_equal(const struct multicast_group *a,
752 const struct multicast_group *b)
754 return !strcmp(a->name, b->name) && a->key == b->key;
757 /* Multicast group entry. */
758 struct ovn_multicast {
759 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
760 struct ovn_datapath *datapath;
761 const struct multicast_group *group;
763 struct ovn_port **ports;
764 size_t n_ports, allocated_ports;
768 ovn_multicast_hash(const struct ovn_datapath *datapath,
769 const struct multicast_group *group)
771 return hash_pointer(datapath, group->key);
774 static struct ovn_multicast *
775 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
776 const struct multicast_group *group)
778 struct ovn_multicast *mc;
780 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
781 ovn_multicast_hash(datapath, group), mcgroups) {
782 if (mc->datapath == datapath
783 && multicast_group_equal(mc->group, group)) {
791 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
792 struct ovn_port *port)
794 struct ovn_datapath *od = port->od;
795 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
797 mc = xmalloc(sizeof *mc);
798 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
802 mc->allocated_ports = 4;
803 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
805 if (mc->n_ports >= mc->allocated_ports) {
806 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
809 mc->ports[mc->n_ports++] = port;
813 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
816 hmap_remove(mcgroups, &mc->hmap_node);
823 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
824 const struct sbrec_multicast_group *sb)
826 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
827 for (size_t i = 0; i < mc->n_ports; i++) {
828 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
830 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
834 /* Logical flow generation.
836 * This code generates the Logical_Flow table in the southbound database, as a
837 * function of most of the northbound database.
841 struct hmap_node hmap_node;
843 struct ovn_datapath *od;
844 enum ovn_stage stage;
851 ovn_lflow_hash(const struct ovn_lflow *lflow)
853 size_t hash = uuid_hash(&lflow->od->key);
854 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
855 hash = hash_string(lflow->match, hash);
856 return hash_string(lflow->actions, hash);
860 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
862 return (a->od == b->od
863 && a->stage == b->stage
864 && a->priority == b->priority
865 && !strcmp(a->match, b->match)
866 && !strcmp(a->actions, b->actions));
870 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
871 enum ovn_stage stage, uint16_t priority,
872 char *match, char *actions)
875 lflow->stage = stage;
876 lflow->priority = priority;
877 lflow->match = match;
878 lflow->actions = actions;
881 /* Adds a row with the specified contents to the Logical_Flow table. */
883 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
884 enum ovn_stage stage, uint16_t priority,
885 const char *match, const char *actions)
887 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
888 ovn_lflow_init(lflow, od, stage, priority,
889 xstrdup(match), xstrdup(actions));
890 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
893 static struct ovn_lflow *
894 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
895 enum ovn_stage stage, uint16_t priority,
896 const char *match, const char *actions)
898 struct ovn_lflow target;
899 ovn_lflow_init(&target, od, stage, priority,
900 CONST_CAST(char *, match), CONST_CAST(char *, actions));
902 struct ovn_lflow *lflow;
903 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
905 if (ovn_lflow_equal(lflow, &target)) {
913 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
916 hmap_remove(lflows, &lflow->hmap_node);
918 free(lflow->actions);
923 /* Appends port security constraints on L2 address field 'eth_addr_field'
924 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
925 * 'n_port_security' elements, is the collection of port_security constraints
926 * from an OVN_NB Logical_Port row. */
928 build_port_security(const char *eth_addr_field,
929 char **port_security, size_t n_port_security,
932 size_t base_len = match->length;
933 ds_put_format(match, " && %s == {", eth_addr_field);
936 for (size_t i = 0; i < n_port_security; i++) {
939 if (eth_addr_from_string(port_security[i], &ea)) {
940 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
941 ds_put_char(match, ' ');
945 ds_chomp(match, ' ');
946 ds_put_cstr(match, "}");
949 match->length = base_len;
954 lport_is_enabled(const struct nbrec_logical_port *lport)
956 return !lport->enabled || *lport->enabled;
960 has_stateful_acl(struct ovn_datapath *od)
962 for (size_t i = 0; i < od->nbs->n_acls; i++) {
963 struct nbrec_acl *acl = od->nbs->acls[i];
964 if (!strcmp(acl->action, "allow-related")) {
973 build_acls(struct ovn_datapath *od, struct hmap *lflows)
975 bool has_stateful = has_stateful_acl(od);
977 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
978 * allowed by default. */
979 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
980 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
982 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
983 * default. A related rule at priority 1 is added below if there
984 * are any stateful ACLs in this datapath. */
985 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
986 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
988 /* If there are any stateful ACL rules in this dapapath, we must
989 * send all IP packets through the conntrack action, which handles
990 * defragmentation, in order to match L4 headers. */
992 /* Ingress and Egress Pre-ACL Table (Priority 100).
994 * Regardless of whether the ACL is "from-lport" or "to-lport",
995 * we need rules in both the ingress and egress table, because
996 * the return traffic needs to be followed. */
997 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
998 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1000 /* Ingress and Egress ACL Table (Priority 1).
1002 * By default, traffic is allowed. This is partially handled by
1003 * the Priority 0 ACL flows added earlier, but we also need to
1004 * commit IP flows. This is because, while the initiater's
1005 * direction may not have any stateful rules, the server's may
1006 * and then its return traffic would not have an associated
1007 * conntrack entry and would return "+invalid". */
1008 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1009 "ct_commit; next;");
1010 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1011 "ct_commit; next;");
1013 /* Ingress and Egress ACL Table (Priority 65535).
1015 * Always drop traffic that's in an invalid state. This is
1016 * enforced at a higher priority than ACLs can be defined. */
1017 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1019 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1022 /* Ingress and Egress ACL Table (Priority 65535).
1024 * Always allow traffic that is established to a committed
1025 * conntrack entry. This is enforced at a higher priority than
1026 * ACLs can be defined. */
1027 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1028 "ct.est && !ct.rel && !ct.new && !ct.inv",
1030 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1031 "ct.est && !ct.rel && !ct.new && !ct.inv",
1034 /* Ingress and Egress ACL Table (Priority 65535).
1036 * Always allow traffic that is related to an existing conntrack
1037 * entry. This is enforced at a higher priority than ACLs can
1040 * NOTE: This does not support related data sessions (eg,
1041 * a dynamically negotiated FTP data channel), but will allow
1042 * related traffic such as an ICMP Port Unreachable through
1043 * that's generated from a non-listening UDP port. */
1044 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1045 "!ct.est && ct.rel && !ct.new && !ct.inv",
1047 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1048 "!ct.est && ct.rel && !ct.new && !ct.inv",
1052 /* Ingress or Egress ACL Table (Various priorities). */
1053 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1054 struct nbrec_acl *acl = od->nbs->acls[i];
1055 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1056 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1058 if (!strcmp(acl->action, "allow")) {
1059 /* If there are any stateful flows, we must even commit "allow"
1060 * actions. This is because, while the initiater's
1061 * direction may not have any stateful rules, the server's
1062 * may and then its return traffic would not have an
1063 * associated conntrack entry and would return "+invalid". */
1064 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1065 ovn_lflow_add(lflows, od, stage,
1066 acl->priority + OVN_ACL_PRI_OFFSET,
1067 acl->match, actions);
1068 } else if (!strcmp(acl->action, "allow-related")) {
1069 struct ds match = DS_EMPTY_INITIALIZER;
1071 /* Commit the connection tracking entry, which allows all
1072 * other traffic related to this entry to flow due to the
1073 * 65535 priority flow defined earlier. */
1074 ds_put_format(&match, "ct.new && (%s)", acl->match);
1075 ovn_lflow_add(lflows, od, stage,
1076 acl->priority + OVN_ACL_PRI_OFFSET,
1077 ds_cstr(&match), "ct_commit; next;");
1080 } else if (!strcmp(acl->action, "drop")) {
1081 ovn_lflow_add(lflows, od, stage,
1082 acl->priority + OVN_ACL_PRI_OFFSET,
1083 acl->match, "drop;");
1084 } else if (!strcmp(acl->action, "reject")) {
1085 /* xxx Need to support "reject". */
1086 VLOG_INFO("reject is not a supported action");
1087 ovn_lflow_add(lflows, od, stage,
1088 acl->priority + OVN_ACL_PRI_OFFSET,
1089 acl->match, "drop;");
1095 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1096 struct hmap *lflows, struct hmap *mcgroups)
1098 /* This flow table structure is documented in ovn-northd(8), so please
1099 * update ovn-northd.8.xml if you change anything. */
1101 /* Build pre-ACL and ACL tables for both ingress and egress.
1102 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1103 struct ovn_datapath *od;
1104 HMAP_FOR_EACH (od, key_node, datapaths) {
1109 build_acls(od, lflows);
1112 /* Logical switch ingress table 0: Admission control framework (priority
1114 HMAP_FOR_EACH (od, key_node, datapaths) {
1119 /* Logical VLANs not supported. */
1120 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1123 /* Broadcast/multicast source address is invalid. */
1124 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1127 /* Port security flows have priority 50 (see below) and will continue
1128 * to the next table if packet source is acceptable. */
1131 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1132 struct ovn_port *op;
1133 HMAP_FOR_EACH (op, key_node, ports) {
1138 if (!lport_is_enabled(op->nbs)) {
1139 /* Drop packets from disabled logical ports (since logical flow
1140 * tables are default-drop). */
1144 struct ds match = DS_EMPTY_INITIALIZER;
1145 ds_put_format(&match, "inport == %s", op->json_key);
1146 build_port_security("eth.src",
1147 op->nbs->port_security, op->nbs->n_port_security,
1149 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1150 ds_cstr(&match), "next;");
1154 /* Ingress table 3: Destination lookup, broadcast and multicast handling
1155 * (priority 100). */
1156 HMAP_FOR_EACH (op, key_node, ports) {
1161 if (lport_is_enabled(op->nbs)) {
1162 ovn_multicast_add(mcgroups, &mc_flood, op);
1165 HMAP_FOR_EACH (od, key_node, datapaths) {
1170 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1171 "outport = \""MC_FLOOD"\"; output;");
1174 /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1175 HMAP_FOR_EACH (op, key_node, ports) {
1180 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1181 struct eth_addr mac;
1183 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1184 struct ds match, actions;
1187 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1188 ETH_ADDR_ARGS(mac));
1191 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1192 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1193 ds_cstr(&match), ds_cstr(&actions));
1194 ds_destroy(&actions);
1196 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1197 if (lport_is_enabled(op->nbs)) {
1198 ovn_multicast_add(mcgroups, &mc_unknown, op);
1199 op->od->has_unknown = true;
1202 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1205 "%s: invalid syntax '%s' in addresses column",
1206 op->nbs->name, op->nbs->addresses[i]);
1211 /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1212 HMAP_FOR_EACH (od, key_node, datapaths) {
1217 if (od->has_unknown) {
1218 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1219 "outport = \""MC_UNKNOWN"\"; output;");
1223 /* Egress table 2: Egress port security multicast/broadcast (priority
1225 HMAP_FOR_EACH (od, key_node, datapaths) {
1230 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1234 /* Egress table 2: Egress port security (priorities 50 and 150).
1236 * Priority 50 rules implement port security for enabled logical port.
1238 * Priority 150 rules drop packets to disabled logical ports, so that they
1239 * don't even receive multicast or broadcast packets. */
1240 HMAP_FOR_EACH (op, key_node, ports) {
1245 struct ds match = DS_EMPTY_INITIALIZER;
1246 ds_put_format(&match, "outport == %s", op->json_key);
1247 if (lport_is_enabled(op->nbs)) {
1248 build_port_security("eth.dst", op->nbs->port_security,
1249 op->nbs->n_port_security, &match);
1250 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1251 ds_cstr(&match), "output;");
1253 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1254 ds_cstr(&match), "drop;");
1262 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1264 return !lrport->enabled || *lrport->enabled;
1268 add_route(struct hmap *lflows, struct ovn_datapath *od,
1269 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1271 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1272 IP_ARGS(network), IP_ARGS(mask));
1274 struct ds actions = DS_EMPTY_INITIALIZER;
1275 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1277 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1279 ds_put_cstr(&actions, "ip4.dst");
1281 ds_put_cstr(&actions, "; next;");
1283 /* The priority here is calculated to implement longest-prefix-match
1285 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1286 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1287 ds_destroy(&actions);
1292 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1293 struct hmap *lflows)
1295 /* This flow table structure is documented in ovn-northd(8), so please
1296 * update ovn-northd.8.xml if you change anything. */
1298 /* XXX ICMP echo reply */
1300 /* Logical router ingress table 0: Admission control framework. */
1301 struct ovn_datapath *od;
1302 HMAP_FOR_EACH (od, key_node, datapaths) {
1307 /* Logical VLANs not supported.
1308 * Broadcast/multicast source address is invalid. */
1309 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1310 "vlan.present || eth.src[40]", "drop;");
1313 /* Logical router ingress table 0: match (priority 50). */
1314 struct ovn_port *op;
1315 HMAP_FOR_EACH (op, key_node, ports) {
1320 if (!lrport_is_enabled(op->nbr)) {
1321 /* Drop packets from disabled logical ports (since logical flow
1322 * tables are default-drop). */
1326 char *match = xasprintf(
1327 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1328 ETH_ADDR_ARGS(op->mac), op->json_key);
1329 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1334 /* Logical router ingress table 1: IP Input. */
1335 HMAP_FOR_EACH (od, key_node, datapaths) {
1340 /* L3 admission control: drop multicast and broadcast source, localhost
1341 * source or destination, and zero network source or destination
1342 * (priority 100). */
1343 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1345 "ip4.src == 255.255.255.255 || "
1346 "ip4.src == 127.0.0.0/8 || "
1347 "ip4.dst == 127.0.0.0/8 || "
1348 "ip4.src == 0.0.0.0/8 || "
1349 "ip4.dst == 0.0.0.0/8",
1352 /* Drop Ethernet local broadcast. By definition this traffic should
1353 * not be forwarded.*/
1354 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1355 "eth.bcast", "drop;");
1357 /* Drop IP multicast. */
1358 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1359 "ip4.mcast", "drop;");
1363 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1364 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1365 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1368 /* Pass other traffic not already handled to the next table for
1370 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1373 HMAP_FOR_EACH (op, key_node, ports) {
1378 /* L3 admission control: drop packets that originate from an IP address
1379 * owned by the router or a broadcast address known to the router
1380 * (priority 100). */
1381 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1382 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1383 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1387 /* ARP reply. These flows reply to ARP requests for the router's own
1390 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1391 op->json_key, IP_ARGS(op->ip));
1392 char *actions = xasprintf(
1393 "eth.dst = eth.src; "
1394 "eth.src = "ETH_ADDR_FMT"; "
1395 "arp.op = 2; /* ARP reply */ "
1396 "arp.tha = arp.sha; "
1397 "arp.sha = "ETH_ADDR_FMT"; "
1398 "arp.tpa = arp.spa; "
1399 "arp.spa = "IP_FMT"; "
1401 "inport = \"\"; /* Allow sending out inport. */ "
1403 ETH_ADDR_ARGS(op->mac),
1404 ETH_ADDR_ARGS(op->mac),
1407 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1412 /* Drop IP traffic to this router. */
1413 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1414 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1419 /* Logical router ingress table 2: IP Routing.
1421 * A packet that arrives at this table is an IP packet that should be
1422 * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1423 * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1424 * and advances to the next table for ARP resolution. */
1425 HMAP_FOR_EACH (op, key_node, ports) {
1430 add_route(lflows, op->od, op->network, op->mask, 0);
1432 HMAP_FOR_EACH (od, key_node, datapaths) {
1438 add_route(lflows, od, 0, 0, od->gateway);
1441 /* XXX destination unreachable */
1443 /* Local router ingress table 3: ARP Resolution.
1445 * Any packet that reaches this table is an IP packet whose next-hop IP
1446 * address is in reg0. (ip4.dst is the final destination.) This table
1447 * resolves the IP address in reg0 into an output port in outport and an
1448 * Ethernet address in eth.dst. */
1449 HMAP_FOR_EACH (op, key_node, ports) {
1451 /* XXX ARP for neighboring router */
1452 } else if (op->od->n_router_ports) {
1453 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1457 if (ovs_scan(op->nbs->addresses[i],
1458 ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1459 ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1460 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1461 /* Get the Logical_Router_Port that the Logical_Port is
1462 * connected to, as 'peer'. */
1463 const char *peer_name = smap_get(
1464 &op->od->router_ports[j]->nbs->options,
1470 struct ovn_port *peer
1471 = ovn_port_find(ports, peer_name);
1472 if (!peer || !peer->nbr) {
1476 /* Make sure that 'ip' is in 'peer''s network. */
1477 if ((ip ^ peer->network) & peer->mask) {
1481 char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1482 char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1483 "eth.dst = "ETH_ADDR_FMT"; "
1486 ETH_ADDR_ARGS(peer->mac),
1489 ovn_lflow_add(lflows, peer->od,
1490 S_ROUTER_IN_ARP, 200, match, actions);
1500 /* Logical router egress table 0: Delivery (priority 100).
1502 * Priority 100 rules deliver packets to enabled logical ports. */
1503 HMAP_FOR_EACH (op, key_node, ports) {
1508 if (!lrport_is_enabled(op->nbr)) {
1509 /* Drop packets to disabled logical ports (since logical flow
1510 * tables are default-drop). */
1514 char *match = xasprintf("outport == %s", op->json_key);
1515 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1521 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1522 * constructing their contents based on the OVN_NB database. */
1524 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1527 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1528 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1530 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1531 build_lrouter_flows(datapaths, ports, &lflows);
1533 /* Push changes to the Logical_Flow table to database. */
1534 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1535 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1536 struct ovn_datapath *od
1537 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1539 sbrec_logical_flow_delete(sbflow);
1543 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1544 enum ovn_pipeline pipeline
1545 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1546 struct ovn_lflow *lflow = ovn_lflow_find(
1547 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1548 sbflow->priority, sbflow->match, sbflow->actions);
1550 ovn_lflow_destroy(&lflows, lflow);
1552 sbrec_logical_flow_delete(sbflow);
1555 struct ovn_lflow *lflow, *next_lflow;
1556 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1557 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1558 uint8_t table = ovn_stage_get_table(lflow->stage);
1560 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1561 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1562 sbrec_logical_flow_set_pipeline(
1563 sbflow, pipeline == P_IN ? "ingress" : "egress");
1564 sbrec_logical_flow_set_table_id(sbflow, table);
1565 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1566 sbrec_logical_flow_set_match(sbflow, lflow->match);
1567 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1569 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1570 ovn_stage_to_str(lflow->stage));
1571 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1573 ovn_lflow_destroy(&lflows, lflow);
1575 hmap_destroy(&lflows);
1577 /* Push changes to the Multicast_Group table to database. */
1578 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1579 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1580 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1583 sbrec_multicast_group_delete(sbmc);
1587 struct multicast_group group = { .name = sbmc->name,
1588 .key = sbmc->tunnel_key };
1589 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1591 ovn_multicast_update_sbrec(mc, sbmc);
1592 ovn_multicast_destroy(&mcgroups, mc);
1594 sbrec_multicast_group_delete(sbmc);
1597 struct ovn_multicast *mc, *next_mc;
1598 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1599 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1600 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1601 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1602 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1603 ovn_multicast_update_sbrec(mc, sbmc);
1604 ovn_multicast_destroy(&mcgroups, mc);
1606 hmap_destroy(&mcgroups);
1610 ovnnb_db_changed(struct northd_context *ctx)
1612 VLOG_DBG("ovn-nb db contents have changed.");
1614 struct hmap datapaths, ports;
1615 build_datapaths(ctx, &datapaths);
1616 build_ports(ctx, &datapaths, &ports);
1617 build_lflows(ctx, &datapaths, &ports);
1619 struct ovn_datapath *dp, *next_dp;
1620 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1621 ovn_datapath_destroy(&datapaths, dp);
1623 hmap_destroy(&datapaths);
1625 struct ovn_port *port, *next_port;
1626 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1627 ovn_port_destroy(&ports, port);
1629 hmap_destroy(&ports);
1633 * The only change we get notified about is if the 'chassis' column of the
1634 * 'Port_Binding' table changes. When this column is not empty, it means we
1635 * need to set the corresponding logical port as 'up' in the northbound DB.
1638 ovnsb_db_changed(struct northd_context *ctx)
1640 struct hmap lports_hmap;
1641 const struct sbrec_port_binding *sb;
1642 const struct nbrec_logical_port *nb;
1644 struct lport_hash_node {
1645 struct hmap_node node;
1646 const struct nbrec_logical_port *nb;
1647 } *hash_node, *hash_node_next;
1649 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1651 hmap_init(&lports_hmap);
1653 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1654 hash_node = xzalloc(sizeof *hash_node);
1656 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1659 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1661 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1662 hash_string(sb->logical_port, 0),
1664 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1671 /* The logical port doesn't exist for this port binding. This can
1672 * happen under normal circumstances when ovn-northd hasn't gotten
1673 * around to pruning the Port_Binding yet. */
1677 if (sb->chassis && (!nb->up || !*nb->up)) {
1679 nbrec_logical_port_set_up(nb, &up, 1);
1680 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1682 nbrec_logical_port_set_up(nb, &up, 1);
1686 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1687 hmap_remove(&lports_hmap, &hash_node->node);
1690 hmap_destroy(&lports_hmap);
1694 static char *default_db_;
1700 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1706 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1709 DAEMON_OPTION_ENUMS,
1712 static const struct option long_options[] = {
1713 {"ovnsb-db", required_argument, NULL, 'd'},
1714 {"ovnnb-db", required_argument, NULL, 'D'},
1715 {"help", no_argument, NULL, 'h'},
1716 {"options", no_argument, NULL, 'o'},
1717 {"version", no_argument, NULL, 'V'},
1718 DAEMON_LONG_OPTIONS,
1720 STREAM_SSL_LONG_OPTIONS,
1723 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1728 c = getopt_long(argc, argv, short_options, long_options, NULL);
1734 DAEMON_OPTION_HANDLERS;
1735 VLOG_OPTION_HANDLERS;
1736 STREAM_SSL_OPTION_HANDLERS;
1751 ovs_cmdl_print_options(long_options);
1755 ovs_print_version(0, 0);
1764 ovnsb_db = default_db();
1768 ovnnb_db = default_db();
1771 free(short_options);
1775 add_column_noalert(struct ovsdb_idl *idl,
1776 const struct ovsdb_idl_column *column)
1778 ovsdb_idl_add_column(idl, column);
1779 ovsdb_idl_omit_alert(idl, column);
1783 main(int argc, char *argv[])
1785 extern struct vlog_module VLM_reconnect;
1786 struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
1787 unsigned int ovnnb_seqno, ovn_seqno;
1788 int res = EXIT_SUCCESS;
1789 struct northd_context ctx = {
1792 bool ovnnb_changes_pending = false;
1793 bool ovn_changes_pending = false;
1794 struct unixctl_server *unixctl;
1798 fatal_ignore_sigpipe();
1799 set_program_name(argv[0]);
1800 service_start(&argc, &argv);
1801 vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1802 vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1803 parse_options(argc, argv);
1805 daemonize_start(false);
1807 retval = unixctl_server_create(NULL, &unixctl);
1811 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1813 daemonize_complete();
1818 /* We want to detect all changes to the ovn-nb db. */
1819 ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
1820 &nbrec_idl_class, true, true);
1822 ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
1823 &sbrec_idl_class, false, true);
1825 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
1826 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
1827 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
1828 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
1829 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
1830 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
1831 add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
1833 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
1834 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
1835 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
1836 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
1837 add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
1839 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
1840 add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
1841 add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
1843 ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
1844 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
1845 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
1846 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
1847 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
1848 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
1849 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
1850 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
1851 add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
1852 ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
1855 * The loop here just runs the IDL in a loop waiting for the seqno to
1856 * change, which indicates that the contents of the db have changed.
1858 * If the contents of the ovn-nb db change, the mappings to the ovn-sb
1859 * db must be recalculated.
1861 * If the contents of the ovn-sb db change, it means the 'up' state of
1862 * a port may have changed, as that's the only type of change ovn-northd is
1866 ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1867 ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1870 ovsdb_idl_run(ovnnb_idl);
1871 ovsdb_idl_run(ovnsb_idl);
1872 unixctl_server_run(unixctl);
1874 if (!ovsdb_idl_is_alive(ovnnb_idl)) {
1875 int retval = ovsdb_idl_get_last_error(ovnnb_idl);
1876 VLOG_ERR("%s: database connection failed (%s)",
1877 ovnnb_db, ovs_retval_to_string(retval));
1882 if (!ovsdb_idl_is_alive(ovnsb_idl)) {
1883 int retval = ovsdb_idl_get_last_error(ovnsb_idl);
1884 VLOG_ERR("%s: database connection failed (%s)",
1885 ovnsb_db, ovs_retval_to_string(retval));
1890 if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
1891 ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1892 ovnnb_changes_pending = true;
1895 if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
1896 ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1897 ovn_changes_pending = true;
1901 * If there are any pending changes, we delay recalculating the
1902 * necessary updates until after an existing transaction finishes.
1903 * This avoids the possibility of rapid updates causing ovn-northd to
1904 * never be able to successfully make the corresponding updates to the
1905 * other db. Instead, pending changes are batched up until the next
1906 * time we get a chance to calculate the new state and apply it.
1909 if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
1911 * The OVN-nb db contents have changed, so create a transaction for
1912 * updating the OVN-sb DB.
1914 ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
1915 ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
1916 "ovn-northd: northbound db changed");
1917 ovnnb_db_changed(&ctx);
1918 ovnnb_changes_pending = false;
1921 if (ovn_changes_pending && !ctx.ovnnb_txn) {
1923 * The OVN-sb db contents have changed, so create a transaction for
1924 * updating the northbound DB.
1926 ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
1927 ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
1928 "ovn-northd: southbound db changed");
1929 ovnsb_db_changed(&ctx);
1930 ovn_changes_pending = false;
1933 if (ctx.ovnnb_txn) {
1934 enum ovsdb_idl_txn_status txn_status;
1935 txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
1936 switch (txn_status) {
1937 case TXN_UNCOMMITTED:
1938 case TXN_INCOMPLETE:
1939 /* Come back around and try to commit this transaction again */
1943 case TXN_NOT_LOCKED:
1945 /* Something went wrong, so try creating a new transaction. */
1946 ovn_changes_pending = true;
1949 ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
1950 ctx.ovnnb_txn = NULL;
1954 if (ctx.ovnsb_txn) {
1955 enum ovsdb_idl_txn_status txn_status;
1956 txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
1957 switch (txn_status) {
1958 case TXN_UNCOMMITTED:
1959 case TXN_INCOMPLETE:
1960 /* Come back around and try to commit this transaction again */
1964 case TXN_NOT_LOCKED:
1966 /* Something went wrong, so try creating a new transaction. */
1967 ovnnb_changes_pending = true;
1970 ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
1971 ctx.ovnsb_txn = NULL;
1975 if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
1976 ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
1977 ovsdb_idl_wait(ovnnb_idl);
1978 ovsdb_idl_wait(ovnsb_idl);
1979 if (ctx.ovnnb_txn) {
1980 ovsdb_idl_txn_wait(ctx.ovnnb_txn);
1982 if (ctx.ovnsb_txn) {
1983 ovsdb_idl_txn_wait(ctx.ovnsb_txn);
1985 unixctl_server_wait(unixctl);
1987 poll_immediate_wake();
1991 if (should_service_stop()) {
1996 unixctl_server_destroy(unixctl);
1997 ovsdb_idl_destroy(ovnsb_idl);
1998 ovsdb_idl_destroy(ovnnb_idl);
2007 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2008 const char *argv[] OVS_UNUSED, void *exiting_)
2010 bool *exiting = exiting_;
2013 unixctl_command_reply(conn, NULL);