2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "ls_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "ls_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "ls_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 3, "ls_in_l2_lkup") \
93 /* Logical switch egress stages. */ \
94 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
95 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "ls_out_port_sec") \
98 /* Logical router ingress stages. */ \
99 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
100 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
101 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
102 PIPELINE_STAGE(ROUTER, IN, ARP, 3, "lr_in_arp") \
104 /* Logical router egress stages. */ \
105 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
108 S_##DP_TYPE##_##PIPELINE##_##STAGE \
109 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
111 #undef PIPELINE_STAGE
114 /* Due to various hard-coded priorities need to implement ACLs, the
115 * northbound database supports a smaller range of ACL priorities than
116 * are available to logical flows. This value is added to an ACL
117 * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
125 return OVN_STAGE_BUILD(dp_type, pipeline, table);
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
132 return (stage >> 8) & 1;
135 /* Returns the table to which 'stage' belongs. */
137 ovn_stage_get_table(enum ovn_stage stage)
142 /* Returns a string name for 'stage'. */
144 ovn_stage_to_str(enum ovn_stage stage)
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
148 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
150 #undef PIPELINE_STAGE
151 default: return "<unknown>";
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
163 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
165 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
167 -h, --help display this help message\n\
168 -o, --options list available options\n\
169 -V, --version display version information\n\
170 ", program_name, program_name, default_db(), default_db());
173 stream_usage("database", true, true, false);
177 struct hmap_node hmap_node;
182 destroy_tnlids(struct hmap *tnlids)
184 struct tnlid_node *node, *next;
185 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186 hmap_remove(tnlids, &node->hmap_node);
189 hmap_destroy(tnlids);
193 add_tnlid(struct hmap *set, uint32_t tnlid)
195 struct tnlid_node *node = xmalloc(sizeof *node);
196 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
203 const struct tnlid_node *node;
204 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205 if (node->tnlid == tnlid) {
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
216 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218 if (!tnlid_in_use(set, tnlid)) {
219 add_tnlid(set, tnlid);
225 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231 * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233 struct hmap_node key_node; /* Index on 'key'. */
234 struct uuid key; /* (nbs/nbr)->header_.uuid. */
236 const struct nbrec_logical_switch *nbs; /* May be NULL. */
237 const struct nbrec_logical_router *nbr; /* May be NULL. */
238 const struct sbrec_datapath_binding *sb; /* May be NULL. */
240 struct ovs_list list; /* In list of similar records. */
242 /* Logical router data (digested from nbr). */
245 /* Logical switch data. */
246 struct ovn_port **router_ports;
247 size_t n_router_ports;
249 struct hmap port_tnlids;
250 uint32_t port_key_hint;
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257 const struct nbrec_logical_switch *nbs,
258 const struct nbrec_logical_router *nbr,
259 const struct sbrec_datapath_binding *sb)
261 struct ovn_datapath *od = xzalloc(sizeof *od);
266 hmap_init(&od->port_tnlids);
267 od->port_key_hint = 0;
268 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
276 /* Don't remove od->list. It is used within build_datapaths() as a
277 * private list and once we've exited that function it is not safe to
279 hmap_remove(datapaths, &od->key_node);
280 destroy_tnlids(&od->port_tnlids);
281 free(od->router_ports);
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
289 struct ovn_datapath *od;
291 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292 if (uuid_equals(uuid, &od->key)) {
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301 const struct sbrec_datapath_binding *sb)
305 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
309 return ovn_datapath_find(datapaths, &key);
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314 struct ovs_list *sb_only, struct ovs_list *nb_only,
315 struct ovs_list *both)
317 hmap_init(datapaths);
322 const struct sbrec_datapath_binding *sb, *sb_next;
323 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
325 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327 ovsdb_idl_txn_add_comment(
329 "deleting Datapath_Binding "UUID_FMT" that lacks "
330 "external-ids:logical-switch and "
331 "external-ids:logical-router",
332 UUID_ARGS(&sb->header_.uuid));
333 sbrec_datapath_binding_delete(sb);
337 if (ovn_datapath_find(datapaths, &key)) {
338 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
340 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341 "duplicate external-ids:logical-switch/router "UUID_FMT,
342 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343 sbrec_datapath_binding_delete(sb);
347 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
349 list_push_back(sb_only, &od->list);
352 const struct nbrec_logical_switch *nbs;
353 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354 struct ovn_datapath *od = ovn_datapath_find(datapaths,
358 list_remove(&od->list);
359 list_push_back(both, &od->list);
361 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
363 list_push_back(nb_only, &od->list);
367 const struct nbrec_logical_router *nbr;
368 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369 struct ovn_datapath *od = ovn_datapath_find(datapaths,
374 list_remove(&od->list);
375 list_push_back(both, &od->list);
378 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
380 "duplicate UUID "UUID_FMT" in OVN_Northbound",
381 UUID_ARGS(&nbr->header_.uuid));
385 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
387 list_push_back(nb_only, &od->list);
391 if (nbr->default_gw) {
393 char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394 if (error || !ip || mask != OVS_BE32_MAX) {
395 static struct vlog_rate_limit rl
396 = VLOG_RATE_LIMIT_INIT(5, 1);
397 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
409 static uint32_t hint;
410 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
416 struct ovs_list sb_only, nb_only, both;
418 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
420 if (!list_is_empty(&nb_only)) {
421 /* First index the in-use datapath tunnel IDs. */
422 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423 struct ovn_datapath *od;
424 LIST_FOR_EACH (od, list, &both) {
425 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
428 /* Add southbound record for each unmatched northbound record. */
429 LIST_FOR_EACH (od, list, &nb_only) {
430 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
435 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
437 char uuid_s[UUID_LEN + 1];
438 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439 const char *key = od->nbs ? "logical-switch" : "logical-router";
440 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441 sbrec_datapath_binding_set_external_ids(od->sb, &id);
443 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
445 destroy_tnlids(&dp_tnlids);
448 /* Delete southbound records without northbound matches. */
449 struct ovn_datapath *od, *next;
450 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451 list_remove(&od->list);
452 sbrec_datapath_binding_delete(od->sb);
453 ovn_datapath_destroy(datapaths, od);
458 struct hmap_node key_node; /* Index on 'key'. */
459 char *key; /* nbs->name, nbr->name, sb->logical_port. */
460 char *json_key; /* 'key', quoted for use in JSON. */
462 const struct nbrec_logical_port *nbs; /* May be NULL. */
463 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464 const struct sbrec_port_binding *sb; /* May be NULL. */
466 /* Logical router port data. */
467 ovs_be32 ip, mask; /* 192.168.10.123/24. */
468 ovs_be32 network; /* 192.168.10.0. */
469 ovs_be32 bcast; /* 192.168.10.255. */
471 struct ovn_port *peer;
473 struct ovn_datapath *od;
475 struct ovs_list list; /* In list of similar records. */
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480 const struct nbrec_logical_port *nbs,
481 const struct nbrec_logical_router_port *nbr,
482 const struct sbrec_port_binding *sb)
484 struct ovn_port *op = xzalloc(sizeof *op);
486 struct ds json_key = DS_EMPTY_INITIALIZER;
487 json_string_escape(key, &json_key);
488 op->json_key = ds_steal_cstr(&json_key);
490 op->key = xstrdup(key);
494 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
502 /* Don't remove port->list. It is used within build_ports() as a
503 * private list and once we've exited that function it is not safe to
505 hmap_remove(ports, &port->key_node);
506 free(port->json_key);
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
517 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518 if (!strcmp(op->key, name)) {
526 ovn_port_allocate_key(struct ovn_datapath *od)
528 return allocate_tnlid(&od->port_tnlids, "port",
529 (1u << 15) - 1, &od->port_key_hint);
533 join_logical_ports(struct northd_context *ctx,
534 struct hmap *datapaths, struct hmap *ports,
535 struct ovs_list *sb_only, struct ovs_list *nb_only,
536 struct ovs_list *both)
543 const struct sbrec_port_binding *sb;
544 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
547 list_push_back(sb_only, &op->list);
550 struct ovn_datapath *od;
551 HMAP_FOR_EACH (od, key_node, datapaths) {
553 for (size_t i = 0; i < od->nbs->n_ports; i++) {
554 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555 struct ovn_port *op = ovn_port_find(ports, nbs->name);
557 if (op->nbs || op->nbr) {
558 static struct vlog_rate_limit rl
559 = VLOG_RATE_LIMIT_INIT(5, 1);
560 VLOG_WARN_RL(&rl, "duplicate logical port %s",
565 list_remove(&op->list);
566 list_push_back(both, &op->list);
568 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569 list_push_back(nb_only, &op->list);
575 for (size_t i = 0; i < od->nbr->n_ports; i++) {
576 const struct nbrec_logical_router_port *nbr
580 if (!eth_addr_from_string(nbr->mac, &mac)) {
581 static struct vlog_rate_limit rl
582 = VLOG_RATE_LIMIT_INIT(5, 1);
583 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
588 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590 static struct vlog_rate_limit rl
591 = VLOG_RATE_LIMIT_INIT(5, 1);
592 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
597 struct ovn_port *op = ovn_port_find(ports, nbr->name);
599 if (op->nbs || op->nbr) {
600 static struct vlog_rate_limit rl
601 = VLOG_RATE_LIMIT_INIT(5, 1);
602 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
607 list_remove(&op->list);
608 list_push_back(both, &op->list);
610 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
611 list_push_back(nb_only, &op->list);
616 op->network = ip & mask;
617 op->bcast = ip | ~mask;
625 /* Connect logical router ports, and logical switch ports of type "router",
628 HMAP_FOR_EACH (op, key_node, ports) {
629 if (op->nbs && !strcmp(op->nbs->type, "router")) {
630 const char *peer_name = smap_get(&op->nbs->options, "router-port");
635 struct ovn_port *peer = ovn_port_find(ports, peer_name);
636 if (!peer || !peer->nbr) {
642 op->od->router_ports = xrealloc(
643 op->od->router_ports,
644 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
645 op->od->router_ports[op->od->n_router_ports++] = op;
646 } else if (op->nbr && op->nbr->peer) {
647 op->peer = ovn_port_find(ports, op->nbr->name);
653 ovn_port_update_sbrec(const struct ovn_port *op)
655 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
657 sbrec_port_binding_set_type(op->sb, "patch");
659 const char *peer = op->peer ? op->peer->key : "<error>";
660 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
661 sbrec_port_binding_set_options(op->sb, &ids);
663 sbrec_port_binding_set_parent_port(op->sb, NULL);
664 sbrec_port_binding_set_tag(op->sb, NULL, 0);
665 sbrec_port_binding_set_mac(op->sb, NULL, 0);
667 if (strcmp(op->nbs->type, "router")) {
668 sbrec_port_binding_set_type(op->sb, op->nbs->type);
669 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
671 sbrec_port_binding_set_type(op->sb, "patch");
673 const char *router_port = smap_get(&op->nbs->options,
676 router_port = "<error>";
678 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
679 sbrec_port_binding_set_options(op->sb, &ids);
681 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
682 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
683 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
684 op->nbs->n_addresses);
689 build_ports(struct northd_context *ctx, struct hmap *datapaths,
692 struct ovs_list sb_only, nb_only, both;
694 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
696 /* For logical ports that are in both databases, update the southbound
697 * record based on northbound data. Also index the in-use tunnel_keys. */
698 struct ovn_port *op, *next;
699 LIST_FOR_EACH_SAFE (op, next, list, &both) {
700 ovn_port_update_sbrec(op);
702 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
703 if (op->sb->tunnel_key > op->od->port_key_hint) {
704 op->od->port_key_hint = op->sb->tunnel_key;
708 /* Add southbound record for each unmatched northbound record. */
709 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
710 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
715 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
716 ovn_port_update_sbrec(op);
718 sbrec_port_binding_set_logical_port(op->sb, op->key);
719 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
722 /* Delete southbound records without northbound matches. */
723 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
724 list_remove(&op->list);
725 sbrec_port_binding_delete(op->sb);
726 ovn_port_destroy(ports, op);
730 #define OVN_MIN_MULTICAST 32768
731 #define OVN_MAX_MULTICAST 65535
733 struct multicast_group {
735 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
738 #define MC_FLOOD "_MC_flood"
739 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
741 #define MC_UNKNOWN "_MC_unknown"
742 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
745 multicast_group_equal(const struct multicast_group *a,
746 const struct multicast_group *b)
748 return !strcmp(a->name, b->name) && a->key == b->key;
751 /* Multicast group entry. */
752 struct ovn_multicast {
753 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
754 struct ovn_datapath *datapath;
755 const struct multicast_group *group;
757 struct ovn_port **ports;
758 size_t n_ports, allocated_ports;
762 ovn_multicast_hash(const struct ovn_datapath *datapath,
763 const struct multicast_group *group)
765 return hash_pointer(datapath, group->key);
768 static struct ovn_multicast *
769 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
770 const struct multicast_group *group)
772 struct ovn_multicast *mc;
774 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
775 ovn_multicast_hash(datapath, group), mcgroups) {
776 if (mc->datapath == datapath
777 && multicast_group_equal(mc->group, group)) {
785 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
786 struct ovn_port *port)
788 struct ovn_datapath *od = port->od;
789 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
791 mc = xmalloc(sizeof *mc);
792 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
796 mc->allocated_ports = 4;
797 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
799 if (mc->n_ports >= mc->allocated_ports) {
800 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
803 mc->ports[mc->n_ports++] = port;
807 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
810 hmap_remove(mcgroups, &mc->hmap_node);
817 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
818 const struct sbrec_multicast_group *sb)
820 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
821 for (size_t i = 0; i < mc->n_ports; i++) {
822 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
824 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
828 /* Logical flow generation.
830 * This code generates the Logical_Flow table in the southbound database, as a
831 * function of most of the northbound database.
835 struct hmap_node hmap_node;
837 struct ovn_datapath *od;
838 enum ovn_stage stage;
845 ovn_lflow_hash(const struct ovn_lflow *lflow)
847 size_t hash = uuid_hash(&lflow->od->key);
848 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
849 hash = hash_string(lflow->match, hash);
850 return hash_string(lflow->actions, hash);
854 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
856 return (a->od == b->od
857 && a->stage == b->stage
858 && a->priority == b->priority
859 && !strcmp(a->match, b->match)
860 && !strcmp(a->actions, b->actions));
864 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
865 enum ovn_stage stage, uint16_t priority,
866 char *match, char *actions)
869 lflow->stage = stage;
870 lflow->priority = priority;
871 lflow->match = match;
872 lflow->actions = actions;
875 /* Adds a row with the specified contents to the Logical_Flow table. */
877 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
878 enum ovn_stage stage, uint16_t priority,
879 const char *match, const char *actions)
881 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
882 ovn_lflow_init(lflow, od, stage, priority,
883 xstrdup(match), xstrdup(actions));
884 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
887 static struct ovn_lflow *
888 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
889 enum ovn_stage stage, uint16_t priority,
890 const char *match, const char *actions)
892 struct ovn_lflow target;
893 ovn_lflow_init(&target, od, stage, priority,
894 CONST_CAST(char *, match), CONST_CAST(char *, actions));
896 struct ovn_lflow *lflow;
897 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
899 if (ovn_lflow_equal(lflow, &target)) {
907 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
910 hmap_remove(lflows, &lflow->hmap_node);
912 free(lflow->actions);
917 /* Appends port security constraints on L2 address field 'eth_addr_field'
918 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
919 * 'n_port_security' elements, is the collection of port_security constraints
920 * from an OVN_NB Logical_Port row. */
922 build_port_security(const char *eth_addr_field,
923 char **port_security, size_t n_port_security,
926 size_t base_len = match->length;
927 ds_put_format(match, " && %s == {", eth_addr_field);
930 for (size_t i = 0; i < n_port_security; i++) {
933 if (eth_addr_from_string(port_security[i], &ea)) {
934 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
935 ds_put_char(match, ' ');
939 ds_chomp(match, ' ');
940 ds_put_cstr(match, "}");
943 match->length = base_len;
948 lport_is_enabled(const struct nbrec_logical_port *lport)
950 return !lport->enabled || *lport->enabled;
954 has_stateful_acl(struct ovn_datapath *od)
956 for (size_t i = 0; i < od->nbs->n_acls; i++) {
957 struct nbrec_acl *acl = od->nbs->acls[i];
958 if (!strcmp(acl->action, "allow-related")) {
967 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
969 bool has_stateful = has_stateful_acl(od);
971 struct ds match_in, match_out;
973 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
974 * allowed by default. */
975 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
976 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
978 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
979 * default. A related rule at priority 1 is added below if there
980 * are any stateful ACLs in this datapath. */
981 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
982 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
984 /* If there are any stateful ACL rules in this dapapath, we must
985 * send all IP packets through the conntrack action, which handles
986 * defragmentation, in order to match L4 headers. */
988 HMAP_FOR_EACH (op, key_node, ports) {
989 if (op->od == od && !strcmp(op->nbs->type, "router")) {
990 /* Can't use ct() for router ports. Consider the following configuration:
991 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
992 For a ping from lp1 to lp2, First, the response will go through ct()
993 with a zone for lp2 in the ls2 ingress pipeline on hostB.
994 That ct zone knows about this connection. Next, it goes through ct()
995 with the zone for the router port in the egress pipeline of ls2 on hostB.
996 This zone does not know about the connection, as the icmp request
997 went through the logical router on hostA, not hostB. This would only work
998 with distributed conntrack state across all chassis. */
1001 ds_init(&match_out);
1002 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1003 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1004 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
1005 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
1007 ds_destroy(&match_in);
1008 ds_destroy(&match_out);
1012 /* Ingress and Egress Pre-ACL Table (Priority 100).
1014 * Regardless of whether the ACL is "from-lport" or "to-lport",
1015 * we need rules in both the ingress and egress table, because
1016 * the return traffic needs to be followed. */
1017 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1018 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1020 /* Ingress and Egress ACL Table (Priority 1).
1022 * By default, traffic is allowed. This is partially handled by
1023 * the Priority 0 ACL flows added earlier, but we also need to
1024 * commit IP flows. This is because, while the initiater's
1025 * direction may not have any stateful rules, the server's may
1026 * and then its return traffic would not have an associated
1027 * conntrack entry and would return "+invalid". */
1028 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1029 "ct_commit; next;");
1030 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1031 "ct_commit; next;");
1033 /* Ingress and Egress ACL Table (Priority 65535).
1035 * Always drop traffic that's in an invalid state. This is
1036 * enforced at a higher priority than ACLs can be defined. */
1037 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1039 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1042 /* Ingress and Egress ACL Table (Priority 65535).
1044 * Always allow traffic that is established to a committed
1045 * conntrack entry. This is enforced at a higher priority than
1046 * ACLs can be defined. */
1047 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1048 "ct.est && !ct.rel && !ct.new && !ct.inv",
1050 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1051 "ct.est && !ct.rel && !ct.new && !ct.inv",
1054 /* Ingress and Egress ACL Table (Priority 65535).
1056 * Always allow traffic that is related to an existing conntrack
1057 * entry. This is enforced at a higher priority than ACLs can
1060 * NOTE: This does not support related data sessions (eg,
1061 * a dynamically negotiated FTP data channel), but will allow
1062 * related traffic such as an ICMP Port Unreachable through
1063 * that's generated from a non-listening UDP port. */
1064 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1065 "!ct.est && ct.rel && !ct.new && !ct.inv",
1067 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1068 "!ct.est && ct.rel && !ct.new && !ct.inv",
1072 /* Ingress or Egress ACL Table (Various priorities). */
1073 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1074 struct nbrec_acl *acl = od->nbs->acls[i];
1075 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1076 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1078 if (!strcmp(acl->action, "allow")) {
1079 /* If there are any stateful flows, we must even commit "allow"
1080 * actions. This is because, while the initiater's
1081 * direction may not have any stateful rules, the server's
1082 * may and then its return traffic would not have an
1083 * associated conntrack entry and would return "+invalid". */
1084 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1085 ovn_lflow_add(lflows, od, stage,
1086 acl->priority + OVN_ACL_PRI_OFFSET,
1087 acl->match, actions);
1088 } else if (!strcmp(acl->action, "allow-related")) {
1089 struct ds match = DS_EMPTY_INITIALIZER;
1091 /* Commit the connection tracking entry, which allows all
1092 * other traffic related to this entry to flow due to the
1093 * 65535 priority flow defined earlier. */
1094 ds_put_format(&match, "ct.new && (%s)", acl->match);
1095 ovn_lflow_add(lflows, od, stage,
1096 acl->priority + OVN_ACL_PRI_OFFSET,
1097 ds_cstr(&match), "ct_commit; next;");
1100 } else if (!strcmp(acl->action, "drop")) {
1101 ovn_lflow_add(lflows, od, stage,
1102 acl->priority + OVN_ACL_PRI_OFFSET,
1103 acl->match, "drop;");
1104 } else if (!strcmp(acl->action, "reject")) {
1105 /* xxx Need to support "reject". */
1106 VLOG_INFO("reject is not a supported action");
1107 ovn_lflow_add(lflows, od, stage,
1108 acl->priority + OVN_ACL_PRI_OFFSET,
1109 acl->match, "drop;");
1115 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1116 struct hmap *lflows, struct hmap *mcgroups)
1118 /* This flow table structure is documented in ovn-northd(8), so please
1119 * update ovn-northd.8.xml if you change anything. */
1121 /* Build pre-ACL and ACL tables for both ingress and egress.
1122 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1123 struct ovn_datapath *od;
1124 HMAP_FOR_EACH (od, key_node, datapaths) {
1129 build_acls(od, lflows, ports);
1132 /* Logical switch ingress table 0: Admission control framework (priority
1134 HMAP_FOR_EACH (od, key_node, datapaths) {
1139 /* Logical VLANs not supported. */
1140 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1143 /* Broadcast/multicast source address is invalid. */
1144 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1147 /* Port security flows have priority 50 (see below) and will continue
1148 * to the next table if packet source is acceptable. */
1151 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1152 struct ovn_port *op;
1153 HMAP_FOR_EACH (op, key_node, ports) {
1158 if (!lport_is_enabled(op->nbs)) {
1159 /* Drop packets from disabled logical ports (since logical flow
1160 * tables are default-drop). */
1164 struct ds match = DS_EMPTY_INITIALIZER;
1165 ds_put_format(&match, "inport == %s", op->json_key);
1166 build_port_security("eth.src",
1167 op->nbs->port_security, op->nbs->n_port_security,
1169 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1170 ds_cstr(&match), "next;");
1174 /* Ingress table 3: Destination lookup, ARP reply for known IPs.
1175 * (priority 150). */
1176 HMAP_FOR_EACH (op, key_node, ports) {
1181 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1185 if (ovs_scan(op->nbs->addresses[i],
1186 ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1187 ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1188 char *match = xasprintf(
1189 "arp.tpa == "IP_FMT" && arp.op == 1", IP_ARGS(ip));
1190 char *actions = xasprintf(
1191 "eth.dst = eth.src; "
1192 "eth.src = "ETH_ADDR_FMT"; "
1193 "arp.op = 2; /* ARP reply */ "
1194 "arp.tha = arp.sha; "
1195 "arp.sha = "ETH_ADDR_FMT"; "
1196 "arp.tpa = arp.spa; "
1197 "arp.spa = "IP_FMT"; "
1198 "outport = inport; "
1199 "inport = \"\"; /* Allow sending out inport. */ "
1204 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 150,
1212 /* Ingress table 3: Destination lookup, broadcast and multicast handling
1213 * (priority 100). */
1214 HMAP_FOR_EACH (op, key_node, ports) {
1219 if (lport_is_enabled(op->nbs)) {
1220 ovn_multicast_add(mcgroups, &mc_flood, op);
1223 HMAP_FOR_EACH (od, key_node, datapaths) {
1228 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1229 "outport = \""MC_FLOOD"\"; output;");
1232 /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1233 HMAP_FOR_EACH (op, key_node, ports) {
1238 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1239 struct eth_addr mac;
1241 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1242 struct ds match, actions;
1245 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1246 ETH_ADDR_ARGS(mac));
1249 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1250 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1251 ds_cstr(&match), ds_cstr(&actions));
1252 ds_destroy(&actions);
1254 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1255 if (lport_is_enabled(op->nbs)) {
1256 ovn_multicast_add(mcgroups, &mc_unknown, op);
1257 op->od->has_unknown = true;
1260 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1263 "%s: invalid syntax '%s' in addresses column",
1264 op->nbs->name, op->nbs->addresses[i]);
1269 /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1270 HMAP_FOR_EACH (od, key_node, datapaths) {
1275 if (od->has_unknown) {
1276 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1277 "outport = \""MC_UNKNOWN"\"; output;");
1281 /* Egress table 2: Egress port security multicast/broadcast (priority
1283 HMAP_FOR_EACH (od, key_node, datapaths) {
1288 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1292 /* Egress table 2: Egress port security (priorities 50 and 150).
1294 * Priority 50 rules implement port security for enabled logical port.
1296 * Priority 150 rules drop packets to disabled logical ports, so that they
1297 * don't even receive multicast or broadcast packets. */
1298 HMAP_FOR_EACH (op, key_node, ports) {
1303 struct ds match = DS_EMPTY_INITIALIZER;
1304 ds_put_format(&match, "outport == %s", op->json_key);
1305 if (lport_is_enabled(op->nbs)) {
1306 build_port_security("eth.dst", op->nbs->port_security,
1307 op->nbs->n_port_security, &match);
1308 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1309 ds_cstr(&match), "output;");
1311 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1312 ds_cstr(&match), "drop;");
1320 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1322 return !lrport->enabled || *lrport->enabled;
1326 add_route(struct hmap *lflows, struct ovn_datapath *od,
1327 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1329 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1330 IP_ARGS(network), IP_ARGS(mask));
1332 struct ds actions = DS_EMPTY_INITIALIZER;
1333 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1335 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1337 ds_put_cstr(&actions, "ip4.dst");
1339 ds_put_cstr(&actions, "; next;");
1341 /* The priority here is calculated to implement longest-prefix-match
1343 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1344 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1345 ds_destroy(&actions);
1350 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1351 struct hmap *lflows)
1353 /* This flow table structure is documented in ovn-northd(8), so please
1354 * update ovn-northd.8.xml if you change anything. */
1356 /* Logical router ingress table 0: Admission control framework. */
1357 struct ovn_datapath *od;
1358 HMAP_FOR_EACH (od, key_node, datapaths) {
1363 /* Logical VLANs not supported.
1364 * Broadcast/multicast source address is invalid. */
1365 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1366 "vlan.present || eth.src[40]", "drop;");
1369 /* Logical router ingress table 0: match (priority 50). */
1370 struct ovn_port *op;
1371 HMAP_FOR_EACH (op, key_node, ports) {
1376 if (!lrport_is_enabled(op->nbr)) {
1377 /* Drop packets from disabled logical ports (since logical flow
1378 * tables are default-drop). */
1382 char *match = xasprintf(
1383 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1384 ETH_ADDR_ARGS(op->mac), op->json_key);
1385 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1390 /* Logical router ingress table 1: IP Input. */
1391 HMAP_FOR_EACH (od, key_node, datapaths) {
1396 /* L3 admission control: drop multicast and broadcast source, localhost
1397 * source or destination, and zero network source or destination
1398 * (priority 100). */
1399 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1401 "ip4.src == 255.255.255.255 || "
1402 "ip4.src == 127.0.0.0/8 || "
1403 "ip4.dst == 127.0.0.0/8 || "
1404 "ip4.src == 0.0.0.0/8 || "
1405 "ip4.dst == 0.0.0.0/8",
1408 /* Drop Ethernet local broadcast. By definition this traffic should
1409 * not be forwarded.*/
1410 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1411 "eth.bcast", "drop;");
1413 /* Drop IP multicast. */
1414 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1415 "ip4.mcast", "drop;");
1419 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1420 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1421 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1424 /* Pass other traffic not already handled to the next table for
1426 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1429 HMAP_FOR_EACH (op, key_node, ports) {
1434 /* L3 admission control: drop packets that originate from an IP address
1435 * owned by the router or a broadcast address known to the router
1436 * (priority 100). */
1437 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1438 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1439 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1443 /* ICMP echo reply. These flows reply to ICMP echo requests
1444 * received for the router's IP address. */
1446 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1447 "icmp4.type == 8 && icmp4.code == 0",
1448 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1449 char *actions = xasprintf(
1450 "ip4.dst = ip4.src; "
1451 "ip4.src = "IP_FMT"; "
1454 "inport = \"\"; /* Allow sending out inport. */ "
1457 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1462 /* ARP reply. These flows reply to ARP requests for the router's own
1465 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1466 op->json_key, IP_ARGS(op->ip));
1467 actions = xasprintf(
1468 "eth.dst = eth.src; "
1469 "eth.src = "ETH_ADDR_FMT"; "
1470 "arp.op = 2; /* ARP reply */ "
1471 "arp.tha = arp.sha; "
1472 "arp.sha = "ETH_ADDR_FMT"; "
1473 "arp.tpa = arp.spa; "
1474 "arp.spa = "IP_FMT"; "
1476 "inport = \"\"; /* Allow sending out inport. */ "
1478 ETH_ADDR_ARGS(op->mac),
1479 ETH_ADDR_ARGS(op->mac),
1482 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1487 /* Drop IP traffic to this router. */
1488 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1489 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1494 /* Logical router ingress table 2: IP Routing.
1496 * A packet that arrives at this table is an IP packet that should be
1497 * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1498 * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1499 * and advances to the next table for ARP resolution. */
1500 HMAP_FOR_EACH (op, key_node, ports) {
1505 add_route(lflows, op->od, op->network, op->mask, 0);
1507 HMAP_FOR_EACH (od, key_node, datapaths) {
1513 add_route(lflows, od, 0, 0, od->gateway);
1516 /* XXX destination unreachable */
1518 /* Local router ingress table 3: ARP Resolution.
1520 * Any packet that reaches this table is an IP packet whose next-hop IP
1521 * address is in reg0. (ip4.dst is the final destination.) This table
1522 * resolves the IP address in reg0 into an output port in outport and an
1523 * Ethernet address in eth.dst. */
1524 HMAP_FOR_EACH (op, key_node, ports) {
1526 /* XXX ARP for neighboring router */
1527 } else if (op->od->n_router_ports) {
1528 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1532 if (ovs_scan(op->nbs->addresses[i],
1533 ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1534 ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1535 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1536 /* Get the Logical_Router_Port that the Logical_Port is
1537 * connected to, as 'peer'. */
1538 const char *peer_name = smap_get(
1539 &op->od->router_ports[j]->nbs->options,
1545 struct ovn_port *peer
1546 = ovn_port_find(ports, peer_name);
1547 if (!peer || !peer->nbr) {
1551 /* Make sure that 'ip' is in 'peer''s network. */
1552 if ((ip ^ peer->network) & peer->mask) {
1556 char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1557 char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1558 "eth.dst = "ETH_ADDR_FMT"; "
1561 ETH_ADDR_ARGS(peer->mac),
1564 ovn_lflow_add(lflows, peer->od,
1565 S_ROUTER_IN_ARP, 200, match, actions);
1575 /* Logical router egress table 0: Delivery (priority 100).
1577 * Priority 100 rules deliver packets to enabled logical ports. */
1578 HMAP_FOR_EACH (op, key_node, ports) {
1583 if (!lrport_is_enabled(op->nbr)) {
1584 /* Drop packets to disabled logical ports (since logical flow
1585 * tables are default-drop). */
1589 char *match = xasprintf("outport == %s", op->json_key);
1590 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1596 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1597 * constructing their contents based on the OVN_NB database. */
1599 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1602 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1603 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1605 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1606 build_lrouter_flows(datapaths, ports, &lflows);
1608 /* Push changes to the Logical_Flow table to database. */
1609 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1610 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1611 struct ovn_datapath *od
1612 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1614 sbrec_logical_flow_delete(sbflow);
1618 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1619 enum ovn_pipeline pipeline
1620 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1621 struct ovn_lflow *lflow = ovn_lflow_find(
1622 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1623 sbflow->priority, sbflow->match, sbflow->actions);
1625 ovn_lflow_destroy(&lflows, lflow);
1627 sbrec_logical_flow_delete(sbflow);
1630 struct ovn_lflow *lflow, *next_lflow;
1631 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1632 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1633 uint8_t table = ovn_stage_get_table(lflow->stage);
1635 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1636 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1637 sbrec_logical_flow_set_pipeline(
1638 sbflow, pipeline == P_IN ? "ingress" : "egress");
1639 sbrec_logical_flow_set_table_id(sbflow, table);
1640 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1641 sbrec_logical_flow_set_match(sbflow, lflow->match);
1642 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1644 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1645 ovn_stage_to_str(lflow->stage));
1646 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1648 ovn_lflow_destroy(&lflows, lflow);
1650 hmap_destroy(&lflows);
1652 /* Push changes to the Multicast_Group table to database. */
1653 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1654 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1655 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1658 sbrec_multicast_group_delete(sbmc);
1662 struct multicast_group group = { .name = sbmc->name,
1663 .key = sbmc->tunnel_key };
1664 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1666 ovn_multicast_update_sbrec(mc, sbmc);
1667 ovn_multicast_destroy(&mcgroups, mc);
1669 sbrec_multicast_group_delete(sbmc);
1672 struct ovn_multicast *mc, *next_mc;
1673 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1674 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1675 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1676 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1677 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1678 ovn_multicast_update_sbrec(mc, sbmc);
1679 ovn_multicast_destroy(&mcgroups, mc);
1681 hmap_destroy(&mcgroups);
1685 ovnnb_db_run(struct northd_context *ctx)
1687 if (!ctx->ovnsb_txn) {
1690 VLOG_DBG("ovn-nb db contents may have changed.");
1691 struct hmap datapaths, ports;
1692 build_datapaths(ctx, &datapaths);
1693 build_ports(ctx, &datapaths, &ports);
1694 build_lflows(ctx, &datapaths, &ports);
1696 struct ovn_datapath *dp, *next_dp;
1697 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1698 ovn_datapath_destroy(&datapaths, dp);
1700 hmap_destroy(&datapaths);
1702 struct ovn_port *port, *next_port;
1703 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1704 ovn_port_destroy(&ports, port);
1706 hmap_destroy(&ports);
1710 * The only change we get notified about is if the 'chassis' column of the
1711 * 'Port_Binding' table changes. When this column is not empty, it means we
1712 * need to set the corresponding logical port as 'up' in the northbound DB.
1715 ovnsb_db_run(struct northd_context *ctx)
1717 if (!ctx->ovnnb_txn) {
1720 struct hmap lports_hmap;
1721 const struct sbrec_port_binding *sb;
1722 const struct nbrec_logical_port *nb;
1724 struct lport_hash_node {
1725 struct hmap_node node;
1726 const struct nbrec_logical_port *nb;
1727 } *hash_node, *hash_node_next;
1729 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1731 hmap_init(&lports_hmap);
1733 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1734 hash_node = xzalloc(sizeof *hash_node);
1736 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1739 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1741 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1742 hash_string(sb->logical_port, 0),
1744 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1751 /* The logical port doesn't exist for this port binding. This can
1752 * happen under normal circumstances when ovn-northd hasn't gotten
1753 * around to pruning the Port_Binding yet. */
1757 if (sb->chassis && (!nb->up || !*nb->up)) {
1759 nbrec_logical_port_set_up(nb, &up, 1);
1760 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1762 nbrec_logical_port_set_up(nb, &up, 1);
1766 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1767 hmap_remove(&lports_hmap, &hash_node->node);
1770 hmap_destroy(&lports_hmap);
1774 static char *default_db_;
1780 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1786 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1789 DAEMON_OPTION_ENUMS,
1792 static const struct option long_options[] = {
1793 {"ovnsb-db", required_argument, NULL, 'd'},
1794 {"ovnnb-db", required_argument, NULL, 'D'},
1795 {"help", no_argument, NULL, 'h'},
1796 {"options", no_argument, NULL, 'o'},
1797 {"version", no_argument, NULL, 'V'},
1798 DAEMON_LONG_OPTIONS,
1800 STREAM_SSL_LONG_OPTIONS,
1803 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1808 c = getopt_long(argc, argv, short_options, long_options, NULL);
1814 DAEMON_OPTION_HANDLERS;
1815 VLOG_OPTION_HANDLERS;
1816 STREAM_SSL_OPTION_HANDLERS;
1831 ovs_cmdl_print_options(long_options);
1835 ovs_print_version(0, 0);
1844 ovnsb_db = default_db();
1848 ovnnb_db = default_db();
1851 free(short_options);
1855 add_column_noalert(struct ovsdb_idl *idl,
1856 const struct ovsdb_idl_column *column)
1858 ovsdb_idl_add_column(idl, column);
1859 ovsdb_idl_omit_alert(idl, column);
1863 main(int argc, char *argv[])
1865 extern struct vlog_module VLM_reconnect;
1866 unsigned int ovnnb_seqno, ovnsb_seqno;
1867 int res = EXIT_SUCCESS;
1868 struct unixctl_server *unixctl;
1872 fatal_ignore_sigpipe();
1873 set_program_name(argv[0]);
1874 service_start(&argc, &argv);
1875 vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1876 vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1877 parse_options(argc, argv);
1879 daemonize_start(false);
1881 retval = unixctl_server_create(NULL, &unixctl);
1885 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1887 daemonize_complete();
1892 /* We want to detect all changes to the ovn-nb db. */
1893 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
1894 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
1896 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
1897 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
1899 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
1900 add_column_noalert(ovnsb_idl_loop.idl,
1901 &sbrec_logical_flow_col_logical_datapath);
1902 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
1903 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
1904 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
1905 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
1906 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
1908 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
1909 add_column_noalert(ovnsb_idl_loop.idl,
1910 &sbrec_multicast_group_col_datapath);
1911 add_column_noalert(ovnsb_idl_loop.idl,
1912 &sbrec_multicast_group_col_tunnel_key);
1913 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
1914 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
1916 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
1917 add_column_noalert(ovnsb_idl_loop.idl,
1918 &sbrec_datapath_binding_col_tunnel_key);
1919 add_column_noalert(ovnsb_idl_loop.idl,
1920 &sbrec_datapath_binding_col_external_ids);
1922 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
1923 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
1924 add_column_noalert(ovnsb_idl_loop.idl,
1925 &sbrec_port_binding_col_logical_port);
1926 add_column_noalert(ovnsb_idl_loop.idl,
1927 &sbrec_port_binding_col_tunnel_key);
1928 add_column_noalert(ovnsb_idl_loop.idl,
1929 &sbrec_port_binding_col_parent_port);
1930 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
1931 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
1932 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
1933 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
1934 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
1936 ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl_loop.idl);
1937 ovnsb_seqno = ovsdb_idl_get_seqno(ovnsb_idl_loop.idl);
1942 struct northd_context ctx = {
1943 .ovnnb_idl = ovnnb_idl_loop.idl,
1944 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
1945 .ovnsb_idl = ovnsb_idl_loop.idl,
1946 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
1949 if (ovnnb_seqno != ovsdb_idl_get_seqno(ctx.ovnnb_idl)) {
1950 ovnnb_seqno = ovsdb_idl_get_seqno(ctx.ovnnb_idl);
1953 if (ovnsb_seqno != ovsdb_idl_get_seqno(ctx.ovnsb_idl)) {
1954 ovnsb_seqno = ovsdb_idl_get_seqno(ctx.ovnsb_idl);
1958 unixctl_server_run(unixctl);
1959 unixctl_server_wait(unixctl);
1961 poll_immediate_wake();
1963 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
1964 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
1967 if (should_service_stop()) {
1972 unixctl_server_destroy(unixctl);
1973 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
1974 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
1982 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
1983 const char *argv[] OVS_UNUSED, void *exiting_)
1985 bool *exiting = exiting_;
1988 unixctl_command_reply(conn, NULL);