2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "ls_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "ls_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "ls_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 3, "ls_in_l2_lkup") \
93 /* Logical switch egress stages. */ \
94 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
95 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "ls_out_port_sec") \
98 /* Logical router ingress stages. */ \
99 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
100 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
101 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
102 PIPELINE_STAGE(ROUTER, IN, ARP, 3, "lr_in_arp") \
104 /* Logical router egress stages. */ \
105 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
108 S_##DP_TYPE##_##PIPELINE##_##STAGE \
109 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
111 #undef PIPELINE_STAGE
114 /* Due to various hard-coded priorities need to implement ACLs, the
115 * northbound database supports a smaller range of ACL priorities than
116 * are available to logical flows. This value is added to an ACL
117 * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
125 return OVN_STAGE_BUILD(dp_type, pipeline, table);
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
132 return (stage >> 8) & 1;
135 /* Returns the table to which 'stage' belongs. */
137 ovn_stage_get_table(enum ovn_stage stage)
142 /* Returns a string name for 'stage'. */
144 ovn_stage_to_str(enum ovn_stage stage)
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
148 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
150 #undef PIPELINE_STAGE
151 default: return "<unknown>";
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
163 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
165 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
167 -h, --help display this help message\n\
168 -o, --options list available options\n\
169 -V, --version display version information\n\
170 ", program_name, program_name, default_db(), default_db());
173 stream_usage("database", true, true, false);
177 struct hmap_node hmap_node;
182 destroy_tnlids(struct hmap *tnlids)
184 struct tnlid_node *node, *next;
185 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186 hmap_remove(tnlids, &node->hmap_node);
189 hmap_destroy(tnlids);
193 add_tnlid(struct hmap *set, uint32_t tnlid)
195 struct tnlid_node *node = xmalloc(sizeof *node);
196 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
203 const struct tnlid_node *node;
204 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205 if (node->tnlid == tnlid) {
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
216 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218 if (!tnlid_in_use(set, tnlid)) {
219 add_tnlid(set, tnlid);
225 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231 * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233 struct hmap_node key_node; /* Index on 'key'. */
234 struct uuid key; /* (nbs/nbr)->header_.uuid. */
236 const struct nbrec_logical_switch *nbs; /* May be NULL. */
237 const struct nbrec_logical_router *nbr; /* May be NULL. */
238 const struct sbrec_datapath_binding *sb; /* May be NULL. */
240 struct ovs_list list; /* In list of similar records. */
242 /* Logical router data (digested from nbr). */
245 /* Logical switch data. */
246 struct ovn_port **router_ports;
247 size_t n_router_ports;
249 struct hmap port_tnlids;
250 uint32_t port_key_hint;
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257 const struct nbrec_logical_switch *nbs,
258 const struct nbrec_logical_router *nbr,
259 const struct sbrec_datapath_binding *sb)
261 struct ovn_datapath *od = xzalloc(sizeof *od);
266 hmap_init(&od->port_tnlids);
267 od->port_key_hint = 0;
268 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
276 /* Don't remove od->list. It is used within build_datapaths() as a
277 * private list and once we've exited that function it is not safe to
279 hmap_remove(datapaths, &od->key_node);
280 destroy_tnlids(&od->port_tnlids);
281 free(od->router_ports);
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
289 struct ovn_datapath *od;
291 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292 if (uuid_equals(uuid, &od->key)) {
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301 const struct sbrec_datapath_binding *sb)
305 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
309 return ovn_datapath_find(datapaths, &key);
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314 struct ovs_list *sb_only, struct ovs_list *nb_only,
315 struct ovs_list *both)
317 hmap_init(datapaths);
322 const struct sbrec_datapath_binding *sb, *sb_next;
323 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
325 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327 ovsdb_idl_txn_add_comment(
329 "deleting Datapath_Binding "UUID_FMT" that lacks "
330 "external-ids:logical-switch and "
331 "external-ids:logical-router",
332 UUID_ARGS(&sb->header_.uuid));
333 sbrec_datapath_binding_delete(sb);
337 if (ovn_datapath_find(datapaths, &key)) {
338 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
340 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341 "duplicate external-ids:logical-switch/router "UUID_FMT,
342 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343 sbrec_datapath_binding_delete(sb);
347 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
349 list_push_back(sb_only, &od->list);
352 const struct nbrec_logical_switch *nbs;
353 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354 struct ovn_datapath *od = ovn_datapath_find(datapaths,
358 list_remove(&od->list);
359 list_push_back(both, &od->list);
361 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
363 list_push_back(nb_only, &od->list);
367 const struct nbrec_logical_router *nbr;
368 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369 struct ovn_datapath *od = ovn_datapath_find(datapaths,
374 list_remove(&od->list);
375 list_push_back(both, &od->list);
378 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
380 "duplicate UUID "UUID_FMT" in OVN_Northbound",
381 UUID_ARGS(&nbr->header_.uuid));
385 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
387 list_push_back(nb_only, &od->list);
391 if (nbr->default_gw) {
393 char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394 if (error || !ip || mask != OVS_BE32_MAX) {
395 static struct vlog_rate_limit rl
396 = VLOG_RATE_LIMIT_INIT(5, 1);
397 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
409 static uint32_t hint;
410 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
416 struct ovs_list sb_only, nb_only, both;
418 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
420 if (!list_is_empty(&nb_only)) {
421 /* First index the in-use datapath tunnel IDs. */
422 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423 struct ovn_datapath *od;
424 LIST_FOR_EACH (od, list, &both) {
425 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
428 /* Add southbound record for each unmatched northbound record. */
429 LIST_FOR_EACH (od, list, &nb_only) {
430 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
435 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
437 char uuid_s[UUID_LEN + 1];
438 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439 const char *key = od->nbs ? "logical-switch" : "logical-router";
440 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441 sbrec_datapath_binding_set_external_ids(od->sb, &id);
443 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
445 destroy_tnlids(&dp_tnlids);
448 /* Delete southbound records without northbound matches. */
449 struct ovn_datapath *od, *next;
450 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451 list_remove(&od->list);
452 sbrec_datapath_binding_delete(od->sb);
453 ovn_datapath_destroy(datapaths, od);
458 struct hmap_node key_node; /* Index on 'key'. */
459 char *key; /* nbs->name, nbr->name, sb->logical_port. */
460 char *json_key; /* 'key', quoted for use in JSON. */
462 const struct nbrec_logical_port *nbs; /* May be NULL. */
463 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464 const struct sbrec_port_binding *sb; /* May be NULL. */
466 /* Logical router port data. */
467 ovs_be32 ip, mask; /* 192.168.10.123/24. */
468 ovs_be32 network; /* 192.168.10.0. */
469 ovs_be32 bcast; /* 192.168.10.255. */
471 struct ovn_port *peer;
473 struct ovn_datapath *od;
475 struct ovs_list list; /* In list of similar records. */
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480 const struct nbrec_logical_port *nbs,
481 const struct nbrec_logical_router_port *nbr,
482 const struct sbrec_port_binding *sb)
484 struct ovn_port *op = xzalloc(sizeof *op);
486 struct ds json_key = DS_EMPTY_INITIALIZER;
487 json_string_escape(key, &json_key);
488 op->json_key = ds_steal_cstr(&json_key);
490 op->key = xstrdup(key);
494 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
502 /* Don't remove port->list. It is used within build_ports() as a
503 * private list and once we've exited that function it is not safe to
505 hmap_remove(ports, &port->key_node);
506 free(port->json_key);
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
517 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518 if (!strcmp(op->key, name)) {
526 ovn_port_allocate_key(struct ovn_datapath *od)
528 return allocate_tnlid(&od->port_tnlids, "port",
529 (1u << 15) - 1, &od->port_key_hint);
533 join_logical_ports(struct northd_context *ctx,
534 struct hmap *datapaths, struct hmap *ports,
535 struct ovs_list *sb_only, struct ovs_list *nb_only,
536 struct ovs_list *both)
543 const struct sbrec_port_binding *sb;
544 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
547 list_push_back(sb_only, &op->list);
550 struct ovn_datapath *od;
551 HMAP_FOR_EACH (od, key_node, datapaths) {
553 for (size_t i = 0; i < od->nbs->n_ports; i++) {
554 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555 struct ovn_port *op = ovn_port_find(ports, nbs->name);
557 if (op->nbs || op->nbr) {
558 static struct vlog_rate_limit rl
559 = VLOG_RATE_LIMIT_INIT(5, 1);
560 VLOG_WARN_RL(&rl, "duplicate logical port %s",
565 list_remove(&op->list);
566 list_push_back(both, &op->list);
568 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569 list_push_back(nb_only, &op->list);
575 for (size_t i = 0; i < od->nbr->n_ports; i++) {
576 const struct nbrec_logical_router_port *nbr
580 if (!eth_addr_from_string(nbr->mac, &mac)) {
581 static struct vlog_rate_limit rl
582 = VLOG_RATE_LIMIT_INIT(5, 1);
583 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
588 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590 static struct vlog_rate_limit rl
591 = VLOG_RATE_LIMIT_INIT(5, 1);
592 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
597 struct ovn_port *op = ovn_port_find(ports, nbr->name);
599 if (op->nbs || op->nbr) {
600 static struct vlog_rate_limit rl
601 = VLOG_RATE_LIMIT_INIT(5, 1);
602 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
607 list_remove(&op->list);
608 list_push_back(both, &op->list);
610 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
611 list_push_back(nb_only, &op->list);
616 op->network = ip & mask;
617 op->bcast = ip | ~mask;
625 /* Connect logical router ports, and logical switch ports of type "router",
628 HMAP_FOR_EACH (op, key_node, ports) {
629 if (op->nbs && !strcmp(op->nbs->type, "router")) {
630 const char *peer_name = smap_get(&op->nbs->options, "router-port");
635 struct ovn_port *peer = ovn_port_find(ports, peer_name);
636 if (!peer || !peer->nbr) {
642 op->od->router_ports = xrealloc(
643 op->od->router_ports,
644 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
645 op->od->router_ports[op->od->n_router_ports++] = op;
646 } else if (op->nbr && op->nbr->peer) {
647 op->peer = ovn_port_find(ports, op->nbr->name);
653 ovn_port_update_sbrec(const struct ovn_port *op)
655 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
657 sbrec_port_binding_set_type(op->sb, "patch");
659 const char *peer = op->peer ? op->peer->key : "<error>";
660 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
661 sbrec_port_binding_set_options(op->sb, &ids);
663 sbrec_port_binding_set_parent_port(op->sb, NULL);
664 sbrec_port_binding_set_tag(op->sb, NULL, 0);
665 sbrec_port_binding_set_mac(op->sb, NULL, 0);
667 if (strcmp(op->nbs->type, "router")) {
668 sbrec_port_binding_set_type(op->sb, op->nbs->type);
669 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
671 sbrec_port_binding_set_type(op->sb, "patch");
673 const char *router_port = smap_get(&op->nbs->options,
676 router_port = "<error>";
678 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
679 sbrec_port_binding_set_options(op->sb, &ids);
681 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
682 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
683 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
684 op->nbs->n_addresses);
689 build_ports(struct northd_context *ctx, struct hmap *datapaths,
692 struct ovs_list sb_only, nb_only, both;
694 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
696 /* For logical ports that are in both databases, update the southbound
697 * record based on northbound data. Also index the in-use tunnel_keys. */
698 struct ovn_port *op, *next;
699 LIST_FOR_EACH_SAFE (op, next, list, &both) {
700 ovn_port_update_sbrec(op);
702 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
703 if (op->sb->tunnel_key > op->od->port_key_hint) {
704 op->od->port_key_hint = op->sb->tunnel_key;
708 /* Add southbound record for each unmatched northbound record. */
709 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
710 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
715 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
716 ovn_port_update_sbrec(op);
718 sbrec_port_binding_set_logical_port(op->sb, op->key);
719 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
722 /* Delete southbound records without northbound matches. */
723 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
724 list_remove(&op->list);
725 sbrec_port_binding_delete(op->sb);
726 ovn_port_destroy(ports, op);
730 #define OVN_MIN_MULTICAST 32768
731 #define OVN_MAX_MULTICAST 65535
733 struct multicast_group {
735 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
738 #define MC_FLOOD "_MC_flood"
739 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
741 #define MC_UNKNOWN "_MC_unknown"
742 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
745 multicast_group_equal(const struct multicast_group *a,
746 const struct multicast_group *b)
748 return !strcmp(a->name, b->name) && a->key == b->key;
751 /* Multicast group entry. */
752 struct ovn_multicast {
753 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
754 struct ovn_datapath *datapath;
755 const struct multicast_group *group;
757 struct ovn_port **ports;
758 size_t n_ports, allocated_ports;
762 ovn_multicast_hash(const struct ovn_datapath *datapath,
763 const struct multicast_group *group)
765 return hash_pointer(datapath, group->key);
768 static struct ovn_multicast *
769 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
770 const struct multicast_group *group)
772 struct ovn_multicast *mc;
774 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
775 ovn_multicast_hash(datapath, group), mcgroups) {
776 if (mc->datapath == datapath
777 && multicast_group_equal(mc->group, group)) {
785 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
786 struct ovn_port *port)
788 struct ovn_datapath *od = port->od;
789 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
791 mc = xmalloc(sizeof *mc);
792 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
796 mc->allocated_ports = 4;
797 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
799 if (mc->n_ports >= mc->allocated_ports) {
800 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
803 mc->ports[mc->n_ports++] = port;
807 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
810 hmap_remove(mcgroups, &mc->hmap_node);
817 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
818 const struct sbrec_multicast_group *sb)
820 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
821 for (size_t i = 0; i < mc->n_ports; i++) {
822 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
824 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
828 /* Logical flow generation.
830 * This code generates the Logical_Flow table in the southbound database, as a
831 * function of most of the northbound database.
835 struct hmap_node hmap_node;
837 struct ovn_datapath *od;
838 enum ovn_stage stage;
845 ovn_lflow_hash(const struct ovn_lflow *lflow)
847 size_t hash = uuid_hash(&lflow->od->key);
848 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
849 hash = hash_string(lflow->match, hash);
850 return hash_string(lflow->actions, hash);
854 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
856 return (a->od == b->od
857 && a->stage == b->stage
858 && a->priority == b->priority
859 && !strcmp(a->match, b->match)
860 && !strcmp(a->actions, b->actions));
864 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
865 enum ovn_stage stage, uint16_t priority,
866 char *match, char *actions)
869 lflow->stage = stage;
870 lflow->priority = priority;
871 lflow->match = match;
872 lflow->actions = actions;
875 /* Adds a row with the specified contents to the Logical_Flow table. */
877 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
878 enum ovn_stage stage, uint16_t priority,
879 const char *match, const char *actions)
881 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
882 ovn_lflow_init(lflow, od, stage, priority,
883 xstrdup(match), xstrdup(actions));
884 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
887 static struct ovn_lflow *
888 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
889 enum ovn_stage stage, uint16_t priority,
890 const char *match, const char *actions)
892 struct ovn_lflow target;
893 ovn_lflow_init(&target, od, stage, priority,
894 CONST_CAST(char *, match), CONST_CAST(char *, actions));
896 struct ovn_lflow *lflow;
897 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
899 if (ovn_lflow_equal(lflow, &target)) {
907 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
910 hmap_remove(lflows, &lflow->hmap_node);
912 free(lflow->actions);
917 /* Appends port security constraints on L2 address field 'eth_addr_field'
918 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
919 * 'n_port_security' elements, is the collection of port_security constraints
920 * from an OVN_NB Logical_Port row. */
922 build_port_security(const char *eth_addr_field,
923 char **port_security, size_t n_port_security,
926 size_t base_len = match->length;
927 ds_put_format(match, " && %s == {", eth_addr_field);
930 for (size_t i = 0; i < n_port_security; i++) {
933 if (eth_addr_from_string(port_security[i], &ea)) {
934 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
935 ds_put_char(match, ' ');
939 ds_chomp(match, ' ');
940 ds_put_cstr(match, "}");
943 match->length = base_len;
948 lport_is_enabled(const struct nbrec_logical_port *lport)
950 return !lport->enabled || *lport->enabled;
954 lport_is_up(const struct nbrec_logical_port *lport)
956 return !lport->up || *lport->up;
960 has_stateful_acl(struct ovn_datapath *od)
962 for (size_t i = 0; i < od->nbs->n_acls; i++) {
963 struct nbrec_acl *acl = od->nbs->acls[i];
964 if (!strcmp(acl->action, "allow-related")) {
973 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
975 bool has_stateful = has_stateful_acl(od);
977 struct ds match_in, match_out;
979 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
980 * allowed by default. */
981 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
982 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
984 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
985 * default. A related rule at priority 1 is added below if there
986 * are any stateful ACLs in this datapath. */
987 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
988 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
990 /* If there are any stateful ACL rules in this dapapath, we must
991 * send all IP packets through the conntrack action, which handles
992 * defragmentation, in order to match L4 headers. */
994 HMAP_FOR_EACH (op, key_node, ports) {
995 if (op->od == od && !strcmp(op->nbs->type, "router")) {
996 /* Can't use ct() for router ports. Consider the following configuration:
997 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
998 For a ping from lp1 to lp2, First, the response will go through ct()
999 with a zone for lp2 in the ls2 ingress pipeline on hostB.
1000 That ct zone knows about this connection. Next, it goes through ct()
1001 with the zone for the router port in the egress pipeline of ls2 on hostB.
1002 This zone does not know about the connection, as the icmp request
1003 went through the logical router on hostA, not hostB. This would only work
1004 with distributed conntrack state across all chassis. */
1007 ds_init(&match_out);
1008 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1009 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1010 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
1011 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
1013 ds_destroy(&match_in);
1014 ds_destroy(&match_out);
1018 /* Ingress and Egress Pre-ACL Table (Priority 100).
1020 * Regardless of whether the ACL is "from-lport" or "to-lport",
1021 * we need rules in both the ingress and egress table, because
1022 * the return traffic needs to be followed. */
1023 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1024 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1026 /* Ingress and Egress ACL Table (Priority 1).
1028 * By default, traffic is allowed. This is partially handled by
1029 * the Priority 0 ACL flows added earlier, but we also need to
1030 * commit IP flows. This is because, while the initiater's
1031 * direction may not have any stateful rules, the server's may
1032 * and then its return traffic would not have an associated
1033 * conntrack entry and would return "+invalid". */
1034 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1035 "ct_commit; next;");
1036 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1037 "ct_commit; next;");
1039 /* Ingress and Egress ACL Table (Priority 65535).
1041 * Always drop traffic that's in an invalid state. This is
1042 * enforced at a higher priority than ACLs can be defined. */
1043 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1045 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1048 /* Ingress and Egress ACL Table (Priority 65535).
1050 * Always allow traffic that is established to a committed
1051 * conntrack entry. This is enforced at a higher priority than
1052 * ACLs can be defined. */
1053 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1054 "ct.est && !ct.rel && !ct.new && !ct.inv",
1056 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1057 "ct.est && !ct.rel && !ct.new && !ct.inv",
1060 /* Ingress and Egress ACL Table (Priority 65535).
1062 * Always allow traffic that is related to an existing conntrack
1063 * entry. This is enforced at a higher priority than ACLs can
1066 * NOTE: This does not support related data sessions (eg,
1067 * a dynamically negotiated FTP data channel), but will allow
1068 * related traffic such as an ICMP Port Unreachable through
1069 * that's generated from a non-listening UDP port. */
1070 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1071 "!ct.est && ct.rel && !ct.new && !ct.inv",
1073 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1074 "!ct.est && ct.rel && !ct.new && !ct.inv",
1078 /* Ingress or Egress ACL Table (Various priorities). */
1079 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1080 struct nbrec_acl *acl = od->nbs->acls[i];
1081 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1082 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1084 if (!strcmp(acl->action, "allow")) {
1085 /* If there are any stateful flows, we must even commit "allow"
1086 * actions. This is because, while the initiater's
1087 * direction may not have any stateful rules, the server's
1088 * may and then its return traffic would not have an
1089 * associated conntrack entry and would return "+invalid". */
1090 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1091 ovn_lflow_add(lflows, od, stage,
1092 acl->priority + OVN_ACL_PRI_OFFSET,
1093 acl->match, actions);
1094 } else if (!strcmp(acl->action, "allow-related")) {
1095 struct ds match = DS_EMPTY_INITIALIZER;
1097 /* Commit the connection tracking entry, which allows all
1098 * other traffic related to this entry to flow due to the
1099 * 65535 priority flow defined earlier. */
1100 ds_put_format(&match, "ct.new && (%s)", acl->match);
1101 ovn_lflow_add(lflows, od, stage,
1102 acl->priority + OVN_ACL_PRI_OFFSET,
1103 ds_cstr(&match), "ct_commit; next;");
1106 } else if (!strcmp(acl->action, "drop")) {
1107 ovn_lflow_add(lflows, od, stage,
1108 acl->priority + OVN_ACL_PRI_OFFSET,
1109 acl->match, "drop;");
1110 } else if (!strcmp(acl->action, "reject")) {
1111 /* xxx Need to support "reject". */
1112 VLOG_INFO("reject is not a supported action");
1113 ovn_lflow_add(lflows, od, stage,
1114 acl->priority + OVN_ACL_PRI_OFFSET,
1115 acl->match, "drop;");
1121 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1122 struct hmap *lflows, struct hmap *mcgroups)
1124 /* This flow table structure is documented in ovn-northd(8), so please
1125 * update ovn-northd.8.xml if you change anything. */
1127 /* Build pre-ACL and ACL tables for both ingress and egress.
1128 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1129 struct ovn_datapath *od;
1130 HMAP_FOR_EACH (od, key_node, datapaths) {
1135 build_acls(od, lflows, ports);
1138 /* Logical switch ingress table 0: Admission control framework (priority
1140 HMAP_FOR_EACH (od, key_node, datapaths) {
1145 /* Logical VLANs not supported. */
1146 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1149 /* Broadcast/multicast source address is invalid. */
1150 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1153 /* Port security flows have priority 50 (see below) and will continue
1154 * to the next table if packet source is acceptable. */
1157 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1158 struct ovn_port *op;
1159 HMAP_FOR_EACH (op, key_node, ports) {
1164 if (!lport_is_enabled(op->nbs)) {
1165 /* Drop packets from disabled logical ports (since logical flow
1166 * tables are default-drop). */
1170 struct ds match = DS_EMPTY_INITIALIZER;
1171 ds_put_format(&match, "inport == %s", op->json_key);
1172 build_port_security("eth.src",
1173 op->nbs->port_security, op->nbs->n_port_security,
1175 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1176 ds_cstr(&match), "next;");
1180 /* Ingress table 3: Destination lookup, ARP reply for known IPs.
1181 * (priority 150). */
1182 HMAP_FOR_EACH (op, key_node, ports) {
1188 * Add ARP reply flows if either the
1190 * - port type is router
1192 if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1196 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1200 if (ovs_scan(op->nbs->addresses[i],
1201 ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1202 ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1203 char *match = xasprintf(
1204 "arp.tpa == "IP_FMT" && arp.op == 1", IP_ARGS(ip));
1205 char *actions = xasprintf(
1206 "eth.dst = eth.src; "
1207 "eth.src = "ETH_ADDR_FMT"; "
1208 "arp.op = 2; /* ARP reply */ "
1209 "arp.tha = arp.sha; "
1210 "arp.sha = "ETH_ADDR_FMT"; "
1211 "arp.tpa = arp.spa; "
1212 "arp.spa = "IP_FMT"; "
1213 "outport = inport; "
1214 "inport = \"\"; /* Allow sending out inport. */ "
1219 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 150,
1227 /* Ingress table 3: Destination lookup, broadcast and multicast handling
1228 * (priority 100). */
1229 HMAP_FOR_EACH (op, key_node, ports) {
1234 if (lport_is_enabled(op->nbs)) {
1235 ovn_multicast_add(mcgroups, &mc_flood, op);
1238 HMAP_FOR_EACH (od, key_node, datapaths) {
1243 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1244 "outport = \""MC_FLOOD"\"; output;");
1247 /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1248 HMAP_FOR_EACH (op, key_node, ports) {
1253 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1254 struct eth_addr mac;
1256 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1257 struct ds match, actions;
1260 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1261 ETH_ADDR_ARGS(mac));
1264 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1265 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1266 ds_cstr(&match), ds_cstr(&actions));
1267 ds_destroy(&actions);
1269 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1270 if (lport_is_enabled(op->nbs)) {
1271 ovn_multicast_add(mcgroups, &mc_unknown, op);
1272 op->od->has_unknown = true;
1275 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1278 "%s: invalid syntax '%s' in addresses column",
1279 op->nbs->name, op->nbs->addresses[i]);
1284 /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1285 HMAP_FOR_EACH (od, key_node, datapaths) {
1290 if (od->has_unknown) {
1291 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1292 "outport = \""MC_UNKNOWN"\"; output;");
1296 /* Egress table 2: Egress port security multicast/broadcast (priority
1298 HMAP_FOR_EACH (od, key_node, datapaths) {
1303 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1307 /* Egress table 2: Egress port security (priorities 50 and 150).
1309 * Priority 50 rules implement port security for enabled logical port.
1311 * Priority 150 rules drop packets to disabled logical ports, so that they
1312 * don't even receive multicast or broadcast packets. */
1313 HMAP_FOR_EACH (op, key_node, ports) {
1318 struct ds match = DS_EMPTY_INITIALIZER;
1319 ds_put_format(&match, "outport == %s", op->json_key);
1320 if (lport_is_enabled(op->nbs)) {
1321 build_port_security("eth.dst", op->nbs->port_security,
1322 op->nbs->n_port_security, &match);
1323 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1324 ds_cstr(&match), "output;");
1326 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1327 ds_cstr(&match), "drop;");
1335 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1337 return !lrport->enabled || *lrport->enabled;
1341 add_route(struct hmap *lflows, struct ovn_datapath *od,
1342 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1344 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1345 IP_ARGS(network), IP_ARGS(mask));
1347 struct ds actions = DS_EMPTY_INITIALIZER;
1348 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1350 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1352 ds_put_cstr(&actions, "ip4.dst");
1354 ds_put_cstr(&actions, "; next;");
1356 /* The priority here is calculated to implement longest-prefix-match
1358 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1359 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1360 ds_destroy(&actions);
1365 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1366 struct hmap *lflows)
1368 /* This flow table structure is documented in ovn-northd(8), so please
1369 * update ovn-northd.8.xml if you change anything. */
1371 /* Logical router ingress table 0: Admission control framework. */
1372 struct ovn_datapath *od;
1373 HMAP_FOR_EACH (od, key_node, datapaths) {
1378 /* Logical VLANs not supported.
1379 * Broadcast/multicast source address is invalid. */
1380 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1381 "vlan.present || eth.src[40]", "drop;");
1384 /* Logical router ingress table 0: match (priority 50). */
1385 struct ovn_port *op;
1386 HMAP_FOR_EACH (op, key_node, ports) {
1391 if (!lrport_is_enabled(op->nbr)) {
1392 /* Drop packets from disabled logical ports (since logical flow
1393 * tables are default-drop). */
1397 char *match = xasprintf(
1398 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1399 ETH_ADDR_ARGS(op->mac), op->json_key);
1400 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1405 /* Logical router ingress table 1: IP Input. */
1406 HMAP_FOR_EACH (od, key_node, datapaths) {
1411 /* L3 admission control: drop multicast and broadcast source, localhost
1412 * source or destination, and zero network source or destination
1413 * (priority 100). */
1414 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1416 "ip4.src == 255.255.255.255 || "
1417 "ip4.src == 127.0.0.0/8 || "
1418 "ip4.dst == 127.0.0.0/8 || "
1419 "ip4.src == 0.0.0.0/8 || "
1420 "ip4.dst == 0.0.0.0/8",
1423 /* Drop Ethernet local broadcast. By definition this traffic should
1424 * not be forwarded.*/
1425 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1426 "eth.bcast", "drop;");
1428 /* Drop IP multicast. */
1429 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1430 "ip4.mcast", "drop;");
1434 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1435 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1436 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1439 /* Pass other traffic not already handled to the next table for
1441 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1444 HMAP_FOR_EACH (op, key_node, ports) {
1449 /* L3 admission control: drop packets that originate from an IP address
1450 * owned by the router or a broadcast address known to the router
1451 * (priority 100). */
1452 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1453 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1454 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1458 /* ICMP echo reply. These flows reply to ICMP echo requests
1459 * received for the router's IP address. */
1461 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1462 "icmp4.type == 8 && icmp4.code == 0",
1463 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1464 char *actions = xasprintf(
1465 "ip4.dst = ip4.src; "
1466 "ip4.src = "IP_FMT"; "
1469 "inport = \"\"; /* Allow sending out inport. */ "
1472 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1477 /* ARP reply. These flows reply to ARP requests for the router's own
1480 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1481 op->json_key, IP_ARGS(op->ip));
1482 actions = xasprintf(
1483 "eth.dst = eth.src; "
1484 "eth.src = "ETH_ADDR_FMT"; "
1485 "arp.op = 2; /* ARP reply */ "
1486 "arp.tha = arp.sha; "
1487 "arp.sha = "ETH_ADDR_FMT"; "
1488 "arp.tpa = arp.spa; "
1489 "arp.spa = "IP_FMT"; "
1491 "inport = \"\"; /* Allow sending out inport. */ "
1493 ETH_ADDR_ARGS(op->mac),
1494 ETH_ADDR_ARGS(op->mac),
1497 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1502 /* Drop IP traffic to this router. */
1503 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1504 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1509 /* Logical router ingress table 2: IP Routing.
1511 * A packet that arrives at this table is an IP packet that should be
1512 * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1513 * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1514 * and advances to the next table for ARP resolution. */
1515 HMAP_FOR_EACH (op, key_node, ports) {
1520 add_route(lflows, op->od, op->network, op->mask, 0);
1522 HMAP_FOR_EACH (od, key_node, datapaths) {
1528 add_route(lflows, od, 0, 0, od->gateway);
1531 /* XXX destination unreachable */
1533 /* Local router ingress table 3: ARP Resolution.
1535 * Any packet that reaches this table is an IP packet whose next-hop IP
1536 * address is in reg0. (ip4.dst is the final destination.) This table
1537 * resolves the IP address in reg0 into an output port in outport and an
1538 * Ethernet address in eth.dst. */
1539 HMAP_FOR_EACH (op, key_node, ports) {
1541 /* XXX ARP for neighboring router */
1542 } else if (op->od->n_router_ports) {
1543 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1547 if (ovs_scan(op->nbs->addresses[i],
1548 ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1549 ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1550 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1551 /* Get the Logical_Router_Port that the Logical_Port is
1552 * connected to, as 'peer'. */
1553 const char *peer_name = smap_get(
1554 &op->od->router_ports[j]->nbs->options,
1560 struct ovn_port *peer
1561 = ovn_port_find(ports, peer_name);
1562 if (!peer || !peer->nbr) {
1566 /* Make sure that 'ip' is in 'peer''s network. */
1567 if ((ip ^ peer->network) & peer->mask) {
1571 char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1572 char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1573 "eth.dst = "ETH_ADDR_FMT"; "
1576 ETH_ADDR_ARGS(peer->mac),
1579 ovn_lflow_add(lflows, peer->od,
1580 S_ROUTER_IN_ARP, 200, match, actions);
1590 /* Logical router egress table 0: Delivery (priority 100).
1592 * Priority 100 rules deliver packets to enabled logical ports. */
1593 HMAP_FOR_EACH (op, key_node, ports) {
1598 if (!lrport_is_enabled(op->nbr)) {
1599 /* Drop packets to disabled logical ports (since logical flow
1600 * tables are default-drop). */
1604 char *match = xasprintf("outport == %s", op->json_key);
1605 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1611 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1612 * constructing their contents based on the OVN_NB database. */
1614 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1617 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1618 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1620 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1621 build_lrouter_flows(datapaths, ports, &lflows);
1623 /* Push changes to the Logical_Flow table to database. */
1624 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1625 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1626 struct ovn_datapath *od
1627 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1629 sbrec_logical_flow_delete(sbflow);
1633 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1634 enum ovn_pipeline pipeline
1635 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1636 struct ovn_lflow *lflow = ovn_lflow_find(
1637 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1638 sbflow->priority, sbflow->match, sbflow->actions);
1640 ovn_lflow_destroy(&lflows, lflow);
1642 sbrec_logical_flow_delete(sbflow);
1645 struct ovn_lflow *lflow, *next_lflow;
1646 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1647 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1648 uint8_t table = ovn_stage_get_table(lflow->stage);
1650 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1651 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1652 sbrec_logical_flow_set_pipeline(
1653 sbflow, pipeline == P_IN ? "ingress" : "egress");
1654 sbrec_logical_flow_set_table_id(sbflow, table);
1655 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1656 sbrec_logical_flow_set_match(sbflow, lflow->match);
1657 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1659 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1660 ovn_stage_to_str(lflow->stage));
1661 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1663 ovn_lflow_destroy(&lflows, lflow);
1665 hmap_destroy(&lflows);
1667 /* Push changes to the Multicast_Group table to database. */
1668 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1669 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1670 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1673 sbrec_multicast_group_delete(sbmc);
1677 struct multicast_group group = { .name = sbmc->name,
1678 .key = sbmc->tunnel_key };
1679 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1681 ovn_multicast_update_sbrec(mc, sbmc);
1682 ovn_multicast_destroy(&mcgroups, mc);
1684 sbrec_multicast_group_delete(sbmc);
1687 struct ovn_multicast *mc, *next_mc;
1688 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1689 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1690 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1691 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1692 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1693 ovn_multicast_update_sbrec(mc, sbmc);
1694 ovn_multicast_destroy(&mcgroups, mc);
1696 hmap_destroy(&mcgroups);
1700 ovnnb_db_run(struct northd_context *ctx)
1702 if (!ctx->ovnsb_txn) {
1705 VLOG_DBG("ovn-nb db contents may have changed.");
1706 struct hmap datapaths, ports;
1707 build_datapaths(ctx, &datapaths);
1708 build_ports(ctx, &datapaths, &ports);
1709 build_lflows(ctx, &datapaths, &ports);
1711 struct ovn_datapath *dp, *next_dp;
1712 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1713 ovn_datapath_destroy(&datapaths, dp);
1715 hmap_destroy(&datapaths);
1717 struct ovn_port *port, *next_port;
1718 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1719 ovn_port_destroy(&ports, port);
1721 hmap_destroy(&ports);
1725 * The only change we get notified about is if the 'chassis' column of the
1726 * 'Port_Binding' table changes. When this column is not empty, it means we
1727 * need to set the corresponding logical port as 'up' in the northbound DB.
1730 ovnsb_db_run(struct northd_context *ctx)
1732 if (!ctx->ovnnb_txn) {
1735 struct hmap lports_hmap;
1736 const struct sbrec_port_binding *sb;
1737 const struct nbrec_logical_port *nb;
1739 struct lport_hash_node {
1740 struct hmap_node node;
1741 const struct nbrec_logical_port *nb;
1742 } *hash_node, *hash_node_next;
1744 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1746 hmap_init(&lports_hmap);
1748 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1749 hash_node = xzalloc(sizeof *hash_node);
1751 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1754 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1756 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1757 hash_string(sb->logical_port, 0),
1759 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1766 /* The logical port doesn't exist for this port binding. This can
1767 * happen under normal circumstances when ovn-northd hasn't gotten
1768 * around to pruning the Port_Binding yet. */
1772 if (sb->chassis && (!nb->up || !*nb->up)) {
1774 nbrec_logical_port_set_up(nb, &up, 1);
1775 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1777 nbrec_logical_port_set_up(nb, &up, 1);
1781 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1782 hmap_remove(&lports_hmap, &hash_node->node);
1785 hmap_destroy(&lports_hmap);
1789 static char *default_db_;
1795 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1801 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1804 DAEMON_OPTION_ENUMS,
1807 static const struct option long_options[] = {
1808 {"ovnsb-db", required_argument, NULL, 'd'},
1809 {"ovnnb-db", required_argument, NULL, 'D'},
1810 {"help", no_argument, NULL, 'h'},
1811 {"options", no_argument, NULL, 'o'},
1812 {"version", no_argument, NULL, 'V'},
1813 DAEMON_LONG_OPTIONS,
1815 STREAM_SSL_LONG_OPTIONS,
1818 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1823 c = getopt_long(argc, argv, short_options, long_options, NULL);
1829 DAEMON_OPTION_HANDLERS;
1830 VLOG_OPTION_HANDLERS;
1831 STREAM_SSL_OPTION_HANDLERS;
1846 ovs_cmdl_print_options(long_options);
1850 ovs_print_version(0, 0);
1859 ovnsb_db = default_db();
1863 ovnnb_db = default_db();
1866 free(short_options);
1870 add_column_noalert(struct ovsdb_idl *idl,
1871 const struct ovsdb_idl_column *column)
1873 ovsdb_idl_add_column(idl, column);
1874 ovsdb_idl_omit_alert(idl, column);
1878 main(int argc, char *argv[])
1880 int res = EXIT_SUCCESS;
1881 struct unixctl_server *unixctl;
1885 fatal_ignore_sigpipe();
1886 set_program_name(argv[0]);
1887 service_start(&argc, &argv);
1888 parse_options(argc, argv);
1890 daemonize_start(false);
1892 retval = unixctl_server_create(NULL, &unixctl);
1896 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1898 daemonize_complete();
1903 /* We want to detect all changes to the ovn-nb db. */
1904 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
1905 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
1907 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
1908 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
1910 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
1911 add_column_noalert(ovnsb_idl_loop.idl,
1912 &sbrec_logical_flow_col_logical_datapath);
1913 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
1914 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
1915 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
1916 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
1917 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
1919 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
1920 add_column_noalert(ovnsb_idl_loop.idl,
1921 &sbrec_multicast_group_col_datapath);
1922 add_column_noalert(ovnsb_idl_loop.idl,
1923 &sbrec_multicast_group_col_tunnel_key);
1924 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
1925 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
1927 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
1928 add_column_noalert(ovnsb_idl_loop.idl,
1929 &sbrec_datapath_binding_col_tunnel_key);
1930 add_column_noalert(ovnsb_idl_loop.idl,
1931 &sbrec_datapath_binding_col_external_ids);
1933 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
1934 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
1935 add_column_noalert(ovnsb_idl_loop.idl,
1936 &sbrec_port_binding_col_logical_port);
1937 add_column_noalert(ovnsb_idl_loop.idl,
1938 &sbrec_port_binding_col_tunnel_key);
1939 add_column_noalert(ovnsb_idl_loop.idl,
1940 &sbrec_port_binding_col_parent_port);
1941 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
1942 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
1943 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
1944 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
1945 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
1950 struct northd_context ctx = {
1951 .ovnnb_idl = ovnnb_idl_loop.idl,
1952 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
1953 .ovnsb_idl = ovnsb_idl_loop.idl,
1954 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
1960 unixctl_server_run(unixctl);
1961 unixctl_server_wait(unixctl);
1963 poll_immediate_wake();
1965 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
1966 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
1969 if (should_service_stop()) {
1974 unixctl_server_destroy(unixctl);
1975 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
1976 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
1984 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
1985 const char *argv[] OVS_UNUSED, void *exiting_)
1987 bool *exiting = exiting_;
1990 unixctl_command_reply(conn, NULL);