2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
35 #include "stream-ssl.h"
39 #include "openvswitch/vlog.h"
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
43 static unixctl_cb_func ovn_northd_exit;
45 struct northd_context {
46 struct ovsdb_idl *ovnnb_idl;
47 struct ovsdb_idl *ovnsb_idl;
48 struct ovsdb_idl_txn *ovnnb_txn;
49 struct ovsdb_idl_txn *ovnsb_txn;
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
55 static const char *default_db(void);
57 /* Pipeline stages. */
59 /* The two pipelines in an OVN logical flow table. */
61 P_IN, /* Ingress pipeline. */
62 P_OUT /* Egress pipeline. */
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67 DP_SWITCH, /* OVN logical switch. */
68 DP_ROUTER /* OVN logical router. */
71 /* Returns an "enum ovn_stage" built from the arguments.
73 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74 * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
78 /* A stage within an OVN logical switch or router.
80 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81 * or router, whether the stage is part of the ingress or egress pipeline, and
82 * the table within that pipeline. The first three components are combined to
83 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84 * S_ROUTER_OUT_DELIVERY. */
86 #define PIPELINE_STAGES \
87 /* Logical switch ingress stages. */ \
88 PIPELINE_STAGE(SWITCH, IN, PORT_SEC, 0, "ls_in_port_sec") \
89 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 1, "ls_in_pre_acl") \
90 PIPELINE_STAGE(SWITCH, IN, ACL, 2, "ls_in_acl") \
91 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 3, "ls_in_l2_lkup") \
93 /* Logical switch egress stages. */ \
94 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
95 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
96 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC, 2, "ls_out_port_sec") \
98 /* Logical router ingress stages. */ \
99 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
100 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
101 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 2, "lr_in_ip_routing") \
102 PIPELINE_STAGE(ROUTER, IN, ARP, 3, "lr_in_arp") \
104 /* Logical router egress stages. */ \
105 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 0, "lr_out_delivery")
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
108 S_##DP_TYPE##_##PIPELINE##_##STAGE \
109 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
111 #undef PIPELINE_STAGE
114 /* Due to various hard-coded priorities need to implement ACLs, the
115 * northbound database supports a smaller range of ACL priorities than
116 * are available to logical flows. This value is added to an ACL
117 * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
125 return OVN_STAGE_BUILD(dp_type, pipeline, table);
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
132 return (stage >> 8) & 1;
135 /* Returns the table to which 'stage' belongs. */
137 ovn_stage_get_table(enum ovn_stage stage)
142 /* Returns a string name for 'stage'. */
144 ovn_stage_to_str(enum ovn_stage stage)
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
148 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
150 #undef PIPELINE_STAGE
151 default: return "<unknown>";
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
163 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
165 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
167 -h, --help display this help message\n\
168 -o, --options list available options\n\
169 -V, --version display version information\n\
170 ", program_name, program_name, default_db(), default_db());
173 stream_usage("database", true, true, false);
177 struct hmap_node hmap_node;
182 destroy_tnlids(struct hmap *tnlids)
184 struct tnlid_node *node, *next;
185 HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186 hmap_remove(tnlids, &node->hmap_node);
189 hmap_destroy(tnlids);
193 add_tnlid(struct hmap *set, uint32_t tnlid)
195 struct tnlid_node *node = xmalloc(sizeof *node);
196 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
203 const struct tnlid_node *node;
204 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205 if (node->tnlid == tnlid) {
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
216 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218 if (!tnlid_in_use(set, tnlid)) {
219 add_tnlid(set, tnlid);
225 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231 * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233 struct hmap_node key_node; /* Index on 'key'. */
234 struct uuid key; /* (nbs/nbr)->header_.uuid. */
236 const struct nbrec_logical_switch *nbs; /* May be NULL. */
237 const struct nbrec_logical_router *nbr; /* May be NULL. */
238 const struct sbrec_datapath_binding *sb; /* May be NULL. */
240 struct ovs_list list; /* In list of similar records. */
242 /* Logical router data (digested from nbr). */
245 /* Logical switch data. */
246 struct ovn_port **router_ports;
247 size_t n_router_ports;
249 struct hmap port_tnlids;
250 uint32_t port_key_hint;
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257 const struct nbrec_logical_switch *nbs,
258 const struct nbrec_logical_router *nbr,
259 const struct sbrec_datapath_binding *sb)
261 struct ovn_datapath *od = xzalloc(sizeof *od);
266 hmap_init(&od->port_tnlids);
267 od->port_key_hint = 0;
268 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
276 /* Don't remove od->list. It is used within build_datapaths() as a
277 * private list and once we've exited that function it is not safe to
279 hmap_remove(datapaths, &od->key_node);
280 destroy_tnlids(&od->port_tnlids);
281 free(od->router_ports);
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
289 struct ovn_datapath *od;
291 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292 if (uuid_equals(uuid, &od->key)) {
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301 const struct sbrec_datapath_binding *sb)
305 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
309 return ovn_datapath_find(datapaths, &key);
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314 struct ovs_list *sb_only, struct ovs_list *nb_only,
315 struct ovs_list *both)
317 hmap_init(datapaths);
322 const struct sbrec_datapath_binding *sb, *sb_next;
323 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
325 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327 ovsdb_idl_txn_add_comment(
329 "deleting Datapath_Binding "UUID_FMT" that lacks "
330 "external-ids:logical-switch and "
331 "external-ids:logical-router",
332 UUID_ARGS(&sb->header_.uuid));
333 sbrec_datapath_binding_delete(sb);
337 if (ovn_datapath_find(datapaths, &key)) {
338 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
340 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341 "duplicate external-ids:logical-switch/router "UUID_FMT,
342 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343 sbrec_datapath_binding_delete(sb);
347 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
349 list_push_back(sb_only, &od->list);
352 const struct nbrec_logical_switch *nbs;
353 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354 struct ovn_datapath *od = ovn_datapath_find(datapaths,
358 list_remove(&od->list);
359 list_push_back(both, &od->list);
361 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
363 list_push_back(nb_only, &od->list);
367 const struct nbrec_logical_router *nbr;
368 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369 struct ovn_datapath *od = ovn_datapath_find(datapaths,
374 list_remove(&od->list);
375 list_push_back(both, &od->list);
378 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
380 "duplicate UUID "UUID_FMT" in OVN_Northbound",
381 UUID_ARGS(&nbr->header_.uuid));
385 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
387 list_push_back(nb_only, &od->list);
391 if (nbr->default_gw) {
393 char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394 if (error || !ip || mask != OVS_BE32_MAX) {
395 static struct vlog_rate_limit rl
396 = VLOG_RATE_LIMIT_INIT(5, 1);
397 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
409 static uint32_t hint;
410 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
416 struct ovs_list sb_only, nb_only, both;
418 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
420 if (!list_is_empty(&nb_only)) {
421 /* First index the in-use datapath tunnel IDs. */
422 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423 struct ovn_datapath *od;
424 LIST_FOR_EACH (od, list, &both) {
425 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
428 /* Add southbound record for each unmatched northbound record. */
429 LIST_FOR_EACH (od, list, &nb_only) {
430 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
435 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
437 char uuid_s[UUID_LEN + 1];
438 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439 const char *key = od->nbs ? "logical-switch" : "logical-router";
440 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441 sbrec_datapath_binding_set_external_ids(od->sb, &id);
443 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
445 destroy_tnlids(&dp_tnlids);
448 /* Delete southbound records without northbound matches. */
449 struct ovn_datapath *od, *next;
450 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451 list_remove(&od->list);
452 sbrec_datapath_binding_delete(od->sb);
453 ovn_datapath_destroy(datapaths, od);
458 struct hmap_node key_node; /* Index on 'key'. */
459 char *key; /* nbs->name, nbr->name, sb->logical_port. */
460 char *json_key; /* 'key', quoted for use in JSON. */
462 const struct nbrec_logical_port *nbs; /* May be NULL. */
463 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464 const struct sbrec_port_binding *sb; /* May be NULL. */
466 /* Logical router port data. */
467 ovs_be32 ip, mask; /* 192.168.10.123/24. */
468 ovs_be32 network; /* 192.168.10.0. */
469 ovs_be32 bcast; /* 192.168.10.255. */
471 struct ovn_port *peer;
473 struct ovn_datapath *od;
475 struct ovs_list list; /* In list of similar records. */
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480 const struct nbrec_logical_port *nbs,
481 const struct nbrec_logical_router_port *nbr,
482 const struct sbrec_port_binding *sb)
484 struct ovn_port *op = xzalloc(sizeof *op);
486 struct ds json_key = DS_EMPTY_INITIALIZER;
487 json_string_escape(key, &json_key);
488 op->json_key = ds_steal_cstr(&json_key);
490 op->key = xstrdup(key);
494 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
502 /* Don't remove port->list. It is used within build_ports() as a
503 * private list and once we've exited that function it is not safe to
505 hmap_remove(ports, &port->key_node);
506 free(port->json_key);
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
517 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518 if (!strcmp(op->key, name)) {
526 ovn_port_allocate_key(struct ovn_datapath *od)
528 return allocate_tnlid(&od->port_tnlids, "port",
529 (1u << 15) - 1, &od->port_key_hint);
533 join_logical_ports(struct northd_context *ctx,
534 struct hmap *datapaths, struct hmap *ports,
535 struct ovs_list *sb_only, struct ovs_list *nb_only,
536 struct ovs_list *both)
543 const struct sbrec_port_binding *sb;
544 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
547 list_push_back(sb_only, &op->list);
550 struct ovn_datapath *od;
551 HMAP_FOR_EACH (od, key_node, datapaths) {
553 for (size_t i = 0; i < od->nbs->n_ports; i++) {
554 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555 struct ovn_port *op = ovn_port_find(ports, nbs->name);
557 if (op->nbs || op->nbr) {
558 static struct vlog_rate_limit rl
559 = VLOG_RATE_LIMIT_INIT(5, 1);
560 VLOG_WARN_RL(&rl, "duplicate logical port %s",
565 list_remove(&op->list);
566 list_push_back(both, &op->list);
568 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569 list_push_back(nb_only, &op->list);
575 for (size_t i = 0; i < od->nbr->n_ports; i++) {
576 const struct nbrec_logical_router_port *nbr
580 if (!eth_addr_from_string(nbr->mac, &mac)) {
581 static struct vlog_rate_limit rl
582 = VLOG_RATE_LIMIT_INIT(5, 1);
583 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
588 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590 static struct vlog_rate_limit rl
591 = VLOG_RATE_LIMIT_INIT(5, 1);
592 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
597 struct ovn_port *op = ovn_port_find(ports, nbr->name);
599 if (op->nbs || op->nbr) {
600 static struct vlog_rate_limit rl
601 = VLOG_RATE_LIMIT_INIT(5, 1);
602 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
607 list_remove(&op->list);
608 list_push_back(both, &op->list);
610 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
611 list_push_back(nb_only, &op->list);
616 op->network = ip & mask;
617 op->bcast = ip | ~mask;
625 /* Connect logical router ports, and logical switch ports of type "router",
628 HMAP_FOR_EACH (op, key_node, ports) {
629 if (op->nbs && !strcmp(op->nbs->type, "router")) {
630 const char *peer_name = smap_get(&op->nbs->options, "router-port");
635 struct ovn_port *peer = ovn_port_find(ports, peer_name);
636 if (!peer || !peer->nbr) {
642 op->od->router_ports = xrealloc(
643 op->od->router_ports,
644 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
645 op->od->router_ports[op->od->n_router_ports++] = op;
646 } else if (op->nbr && op->nbr->peer) {
647 op->peer = ovn_port_find(ports, op->nbr->name);
653 ovn_port_update_sbrec(const struct ovn_port *op)
655 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
657 sbrec_port_binding_set_type(op->sb, "patch");
659 const char *peer = op->peer ? op->peer->key : "<error>";
660 const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
661 sbrec_port_binding_set_options(op->sb, &ids);
663 sbrec_port_binding_set_parent_port(op->sb, NULL);
664 sbrec_port_binding_set_tag(op->sb, NULL, 0);
665 sbrec_port_binding_set_mac(op->sb, NULL, 0);
667 if (strcmp(op->nbs->type, "router")) {
668 sbrec_port_binding_set_type(op->sb, op->nbs->type);
669 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
671 sbrec_port_binding_set_type(op->sb, "patch");
673 const char *router_port = smap_get(&op->nbs->options,
676 router_port = "<error>";
678 const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
679 sbrec_port_binding_set_options(op->sb, &ids);
681 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
682 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
683 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
684 op->nbs->n_addresses);
689 build_ports(struct northd_context *ctx, struct hmap *datapaths,
692 struct ovs_list sb_only, nb_only, both;
694 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
696 /* For logical ports that are in both databases, update the southbound
697 * record based on northbound data. Also index the in-use tunnel_keys. */
698 struct ovn_port *op, *next;
699 LIST_FOR_EACH_SAFE (op, next, list, &both) {
700 ovn_port_update_sbrec(op);
702 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
703 if (op->sb->tunnel_key > op->od->port_key_hint) {
704 op->od->port_key_hint = op->sb->tunnel_key;
708 /* Add southbound record for each unmatched northbound record. */
709 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
710 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
715 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
716 ovn_port_update_sbrec(op);
718 sbrec_port_binding_set_logical_port(op->sb, op->key);
719 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
722 /* Delete southbound records without northbound matches. */
723 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
724 list_remove(&op->list);
725 sbrec_port_binding_delete(op->sb);
726 ovn_port_destroy(ports, op);
730 #define OVN_MIN_MULTICAST 32768
731 #define OVN_MAX_MULTICAST 65535
733 struct multicast_group {
735 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
738 #define MC_FLOOD "_MC_flood"
739 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
741 #define MC_UNKNOWN "_MC_unknown"
742 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
745 multicast_group_equal(const struct multicast_group *a,
746 const struct multicast_group *b)
748 return !strcmp(a->name, b->name) && a->key == b->key;
751 /* Multicast group entry. */
752 struct ovn_multicast {
753 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
754 struct ovn_datapath *datapath;
755 const struct multicast_group *group;
757 struct ovn_port **ports;
758 size_t n_ports, allocated_ports;
762 ovn_multicast_hash(const struct ovn_datapath *datapath,
763 const struct multicast_group *group)
765 return hash_pointer(datapath, group->key);
768 static struct ovn_multicast *
769 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
770 const struct multicast_group *group)
772 struct ovn_multicast *mc;
774 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
775 ovn_multicast_hash(datapath, group), mcgroups) {
776 if (mc->datapath == datapath
777 && multicast_group_equal(mc->group, group)) {
785 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
786 struct ovn_port *port)
788 struct ovn_datapath *od = port->od;
789 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
791 mc = xmalloc(sizeof *mc);
792 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
796 mc->allocated_ports = 4;
797 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
799 if (mc->n_ports >= mc->allocated_ports) {
800 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
803 mc->ports[mc->n_ports++] = port;
807 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
810 hmap_remove(mcgroups, &mc->hmap_node);
817 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
818 const struct sbrec_multicast_group *sb)
820 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
821 for (size_t i = 0; i < mc->n_ports; i++) {
822 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
824 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
828 /* Logical flow generation.
830 * This code generates the Logical_Flow table in the southbound database, as a
831 * function of most of the northbound database.
835 struct hmap_node hmap_node;
837 struct ovn_datapath *od;
838 enum ovn_stage stage;
845 ovn_lflow_hash(const struct ovn_lflow *lflow)
847 size_t hash = uuid_hash(&lflow->od->key);
848 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
849 hash = hash_string(lflow->match, hash);
850 return hash_string(lflow->actions, hash);
854 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
856 return (a->od == b->od
857 && a->stage == b->stage
858 && a->priority == b->priority
859 && !strcmp(a->match, b->match)
860 && !strcmp(a->actions, b->actions));
864 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
865 enum ovn_stage stage, uint16_t priority,
866 char *match, char *actions)
869 lflow->stage = stage;
870 lflow->priority = priority;
871 lflow->match = match;
872 lflow->actions = actions;
875 /* Adds a row with the specified contents to the Logical_Flow table. */
877 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
878 enum ovn_stage stage, uint16_t priority,
879 const char *match, const char *actions)
881 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
882 ovn_lflow_init(lflow, od, stage, priority,
883 xstrdup(match), xstrdup(actions));
884 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
887 static struct ovn_lflow *
888 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
889 enum ovn_stage stage, uint16_t priority,
890 const char *match, const char *actions)
892 struct ovn_lflow target;
893 ovn_lflow_init(&target, od, stage, priority,
894 CONST_CAST(char *, match), CONST_CAST(char *, actions));
896 struct ovn_lflow *lflow;
897 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
899 if (ovn_lflow_equal(lflow, &target)) {
907 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
910 hmap_remove(lflows, &lflow->hmap_node);
912 free(lflow->actions);
917 struct ipv4_netaddr {
922 struct ipv6_netaddr {
923 struct in6_addr addr;
927 struct lport_addresses {
930 struct ipv4_netaddr *ipv4_addrs;
932 struct ipv6_netaddr *ipv6_addrs;
936 * Extracts the mac, ipv4 and ipv6 addresses from the input param 'address'
937 * which should be of the format 'MAC [IP1 IP2 ..]" where IPn should be
938 * a valid IPv4 or IPv6 address and stores them in the 'ipv4_addrs' and
939 * 'ipv6_addrs' fields of input param 'laddrs'.
940 * The caller has to free the 'ipv4_addrs' and 'ipv6_addrs' fields.
941 * If input param 'store_ipv6' is true only then extracted ipv6 addresses
942 * are stored in 'ipv6_addrs' fields.
943 * Return true if at least 'MAC' is found in 'address', false otherwise.
945 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
946 * 30.0.0.3/23' and 'store_ipv6' = true
947 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 1.
950 * If 'address' = '00:00:00:00:00:01 10.0.0.4 fe80::ea2a:eaff:fe28:3390/64
951 * 30.0.0.3/23' and 'store_ipv6' = false
952 * then returns true with laddrs->n_ipv4_addrs = 2, naddrs->n_ipv6_addrs = 0.
954 * Eg 3. If 'address' = '00:00:00:00:00:01 10.0.0.4 addr 30.0.0.4', then
955 * returns true with laddrs->n_ipv4_addrs = 1 and laddrs->n_ipv6_addrs = 0.
958 extract_lport_addresses(char *address, struct lport_addresses *laddrs,
963 char *buf_end = buf + strlen(address);
964 if (!ovs_scan_len(buf, &buf_index, ETH_ADDR_SCAN_FMT,
965 ETH_ADDR_SCAN_ARGS(laddrs->ea))) {
966 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
967 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address. No MAC address"
977 laddrs->n_ipv4_addrs = 0;
978 laddrs->n_ipv6_addrs = 0;
979 laddrs->ipv4_addrs = NULL;
980 laddrs->ipv6_addrs = NULL;
982 /* Loop through the buffer and extract the IPv4/IPv6 addresses
983 * and store in the 'laddrs'. Break the loop if invalid data is found.
986 while (buf < buf_end) {
988 error = ip_parse_cidr_len(buf, &buf_index, &ip4, &plen);
990 laddrs->n_ipv4_addrs++;
991 laddrs->ipv4_addrs = xrealloc(
993 sizeof (struct ipv4_netaddr) * laddrs->n_ipv4_addrs);
994 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].addr = ip4;
995 laddrs->ipv4_addrs[laddrs->n_ipv4_addrs - 1].plen = plen;
1000 error = ipv6_parse_cidr_len(buf, &buf_index, &ip6, &plen);
1001 if (!error && store_ipv6) {
1002 laddrs->n_ipv6_addrs++;
1003 laddrs->ipv6_addrs = xrealloc(
1005 sizeof(struct ipv6_netaddr) * laddrs->n_ipv6_addrs);
1006 memcpy(&laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].addr, &ip6,
1007 sizeof(struct in6_addr));
1008 laddrs->ipv6_addrs[laddrs->n_ipv6_addrs - 1].plen = plen;
1012 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1013 VLOG_INFO_RL(&rl, "invalid syntax '%s' in address", address);
1023 /* Appends port security constraints on L2 address field 'eth_addr_field'
1024 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
1025 * 'n_port_security' elements, is the collection of port_security constraints
1026 * from an OVN_NB Logical_Port row. */
1028 build_port_security(const char *eth_addr_field,
1029 char **port_security, size_t n_port_security,
1032 size_t base_len = match->length;
1033 ds_put_format(match, " && %s == {", eth_addr_field);
1036 for (size_t i = 0; i < n_port_security; i++) {
1039 if (eth_addr_from_string(port_security[i], &ea)) {
1040 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1041 ds_put_char(match, ' ');
1045 ds_chomp(match, ' ');
1046 ds_put_cstr(match, "}");
1049 match->length = base_len;
1054 lport_is_enabled(const struct nbrec_logical_port *lport)
1056 return !lport->enabled || *lport->enabled;
1060 lport_is_up(const struct nbrec_logical_port *lport)
1062 return !lport->up || *lport->up;
1066 has_stateful_acl(struct ovn_datapath *od)
1068 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1069 struct nbrec_acl *acl = od->nbs->acls[i];
1070 if (!strcmp(acl->action, "allow-related")) {
1079 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
1081 bool has_stateful = has_stateful_acl(od);
1082 struct ovn_port *op;
1083 struct ds match_in, match_out;
1085 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1086 * allowed by default. */
1087 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1088 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1090 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1091 * default. A related rule at priority 1 is added below if there
1092 * are any stateful ACLs in this datapath. */
1093 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1094 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1096 /* If there are any stateful ACL rules in this dapapath, we must
1097 * send all IP packets through the conntrack action, which handles
1098 * defragmentation, in order to match L4 headers. */
1100 HMAP_FOR_EACH (op, key_node, ports) {
1101 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1102 /* Can't use ct() for router ports. Consider the following configuration:
1103 lp1(10.0.0.2) on hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB,
1104 For a ping from lp1 to lp2, First, the response will go through ct()
1105 with a zone for lp2 in the ls2 ingress pipeline on hostB.
1106 That ct zone knows about this connection. Next, it goes through ct()
1107 with the zone for the router port in the egress pipeline of ls2 on hostB.
1108 This zone does not know about the connection, as the icmp request
1109 went through the logical router on hostA, not hostB. This would only work
1110 with distributed conntrack state across all chassis. */
1113 ds_init(&match_out);
1114 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1115 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1116 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, ds_cstr(&match_in), "next;");
1117 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, ds_cstr(&match_out), "next;");
1119 ds_destroy(&match_in);
1120 ds_destroy(&match_out);
1124 /* Ingress and Egress Pre-ACL Table (Priority 100).
1126 * Regardless of whether the ACL is "from-lport" or "to-lport",
1127 * we need rules in both the ingress and egress table, because
1128 * the return traffic needs to be followed. */
1129 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1130 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1132 /* Ingress and Egress ACL Table (Priority 1).
1134 * By default, traffic is allowed. This is partially handled by
1135 * the Priority 0 ACL flows added earlier, but we also need to
1136 * commit IP flows. This is because, while the initiater's
1137 * direction may not have any stateful rules, the server's may
1138 * and then its return traffic would not have an associated
1139 * conntrack entry and would return "+invalid". */
1140 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1141 "ct_commit; next;");
1142 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1143 "ct_commit; next;");
1145 /* Ingress and Egress ACL Table (Priority 65535).
1147 * Always drop traffic that's in an invalid state. This is
1148 * enforced at a higher priority than ACLs can be defined. */
1149 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1151 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1154 /* Ingress and Egress ACL Table (Priority 65535).
1156 * Always allow traffic that is established to a committed
1157 * conntrack entry. This is enforced at a higher priority than
1158 * ACLs can be defined. */
1159 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1160 "ct.est && !ct.rel && !ct.new && !ct.inv",
1162 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1163 "ct.est && !ct.rel && !ct.new && !ct.inv",
1166 /* Ingress and Egress ACL Table (Priority 65535).
1168 * Always allow traffic that is related to an existing conntrack
1169 * entry. This is enforced at a higher priority than ACLs can
1172 * NOTE: This does not support related data sessions (eg,
1173 * a dynamically negotiated FTP data channel), but will allow
1174 * related traffic such as an ICMP Port Unreachable through
1175 * that's generated from a non-listening UDP port. */
1176 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1177 "!ct.est && ct.rel && !ct.new && !ct.inv",
1179 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1180 "!ct.est && ct.rel && !ct.new && !ct.inv",
1184 /* Ingress or Egress ACL Table (Various priorities). */
1185 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1186 struct nbrec_acl *acl = od->nbs->acls[i];
1187 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1188 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1190 if (!strcmp(acl->action, "allow")) {
1191 /* If there are any stateful flows, we must even commit "allow"
1192 * actions. This is because, while the initiater's
1193 * direction may not have any stateful rules, the server's
1194 * may and then its return traffic would not have an
1195 * associated conntrack entry and would return "+invalid". */
1196 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1197 ovn_lflow_add(lflows, od, stage,
1198 acl->priority + OVN_ACL_PRI_OFFSET,
1199 acl->match, actions);
1200 } else if (!strcmp(acl->action, "allow-related")) {
1201 struct ds match = DS_EMPTY_INITIALIZER;
1203 /* Commit the connection tracking entry, which allows all
1204 * other traffic related to this entry to flow due to the
1205 * 65535 priority flow defined earlier. */
1206 ds_put_format(&match, "ct.new && (%s)", acl->match);
1207 ovn_lflow_add(lflows, od, stage,
1208 acl->priority + OVN_ACL_PRI_OFFSET,
1209 ds_cstr(&match), "ct_commit; next;");
1212 } else if (!strcmp(acl->action, "drop")) {
1213 ovn_lflow_add(lflows, od, stage,
1214 acl->priority + OVN_ACL_PRI_OFFSET,
1215 acl->match, "drop;");
1216 } else if (!strcmp(acl->action, "reject")) {
1217 /* xxx Need to support "reject". */
1218 VLOG_INFO("reject is not a supported action");
1219 ovn_lflow_add(lflows, od, stage,
1220 acl->priority + OVN_ACL_PRI_OFFSET,
1221 acl->match, "drop;");
1227 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1228 struct hmap *lflows, struct hmap *mcgroups)
1230 /* This flow table structure is documented in ovn-northd(8), so please
1231 * update ovn-northd.8.xml if you change anything. */
1233 /* Build pre-ACL and ACL tables for both ingress and egress.
1234 * Ingress tables 1 and 2. Egress tables 0 and 1. */
1235 struct ovn_datapath *od;
1236 HMAP_FOR_EACH (od, key_node, datapaths) {
1241 build_acls(od, lflows, ports);
1244 /* Logical switch ingress table 0: Admission control framework (priority
1246 HMAP_FOR_EACH (od, key_node, datapaths) {
1251 /* Logical VLANs not supported. */
1252 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1255 /* Broadcast/multicast source address is invalid. */
1256 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1259 /* Port security flows have priority 50 (see below) and will continue
1260 * to the next table if packet source is acceptable. */
1263 /* Logical switch ingress table 0: Ingress port security (priority 50). */
1264 struct ovn_port *op;
1265 HMAP_FOR_EACH (op, key_node, ports) {
1270 if (!lport_is_enabled(op->nbs)) {
1271 /* Drop packets from disabled logical ports (since logical flow
1272 * tables are default-drop). */
1276 struct ds match = DS_EMPTY_INITIALIZER;
1277 ds_put_format(&match, "inport == %s", op->json_key);
1278 build_port_security("eth.src",
1279 op->nbs->port_security, op->nbs->n_port_security,
1281 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1282 ds_cstr(&match), "next;");
1286 /* Ingress table 3: Destination lookup, ARP reply for known IPs.
1287 * (priority 150). */
1288 HMAP_FOR_EACH (op, key_node, ports) {
1294 * Add ARP reply flows if either the
1296 * - port type is router
1298 if (!lport_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1302 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1303 struct lport_addresses laddrs;
1304 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1308 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1309 char *match = xasprintf(
1310 "arp.tpa == "IP_FMT" && arp.op == 1",
1311 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1312 char *actions = xasprintf(
1313 "eth.dst = eth.src; "
1314 "eth.src = "ETH_ADDR_FMT"; "
1315 "arp.op = 2; /* ARP reply */ "
1316 "arp.tha = arp.sha; "
1317 "arp.sha = "ETH_ADDR_FMT"; "
1318 "arp.tpa = arp.spa; "
1319 "arp.spa = "IP_FMT"; "
1320 "outport = inport; "
1321 "inport = \"\"; /* Allow sending out inport. */ "
1323 ETH_ADDR_ARGS(laddrs.ea),
1324 ETH_ADDR_ARGS(laddrs.ea),
1325 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1326 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 150,
1332 free(laddrs.ipv4_addrs);
1336 /* Ingress table 3: Destination lookup, broadcast and multicast handling
1337 * (priority 100). */
1338 HMAP_FOR_EACH (op, key_node, ports) {
1343 if (lport_is_enabled(op->nbs)) {
1344 ovn_multicast_add(mcgroups, &mc_flood, op);
1347 HMAP_FOR_EACH (od, key_node, datapaths) {
1352 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1353 "outport = \""MC_FLOOD"\"; output;");
1356 /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1357 HMAP_FOR_EACH (op, key_node, ports) {
1362 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1363 struct eth_addr mac;
1365 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1366 struct ds match, actions;
1369 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1370 ETH_ADDR_ARGS(mac));
1373 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1374 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1375 ds_cstr(&match), ds_cstr(&actions));
1376 ds_destroy(&actions);
1378 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1379 if (lport_is_enabled(op->nbs)) {
1380 ovn_multicast_add(mcgroups, &mc_unknown, op);
1381 op->od->has_unknown = true;
1384 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1387 "%s: invalid syntax '%s' in addresses column",
1388 op->nbs->name, op->nbs->addresses[i]);
1393 /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1394 HMAP_FOR_EACH (od, key_node, datapaths) {
1399 if (od->has_unknown) {
1400 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1401 "outport = \""MC_UNKNOWN"\"; output;");
1405 /* Egress table 2: Egress port security multicast/broadcast (priority
1407 HMAP_FOR_EACH (od, key_node, datapaths) {
1412 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1416 /* Egress table 2: Egress port security (priorities 50 and 150).
1418 * Priority 50 rules implement port security for enabled logical port.
1420 * Priority 150 rules drop packets to disabled logical ports, so that they
1421 * don't even receive multicast or broadcast packets. */
1422 HMAP_FOR_EACH (op, key_node, ports) {
1427 struct ds match = DS_EMPTY_INITIALIZER;
1428 ds_put_format(&match, "outport == %s", op->json_key);
1429 if (lport_is_enabled(op->nbs)) {
1430 build_port_security("eth.dst", op->nbs->port_security,
1431 op->nbs->n_port_security, &match);
1432 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1433 ds_cstr(&match), "output;");
1435 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1436 ds_cstr(&match), "drop;");
1444 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1446 return !lrport->enabled || *lrport->enabled;
1450 add_route(struct hmap *lflows, struct ovn_datapath *od,
1451 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1453 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1454 IP_ARGS(network), IP_ARGS(mask));
1456 struct ds actions = DS_EMPTY_INITIALIZER;
1457 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1459 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1461 ds_put_cstr(&actions, "ip4.dst");
1463 ds_put_cstr(&actions, "; next;");
1465 /* The priority here is calculated to implement longest-prefix-match
1467 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1468 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1469 ds_destroy(&actions);
1474 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1475 struct hmap *lflows)
1477 /* This flow table structure is documented in ovn-northd(8), so please
1478 * update ovn-northd.8.xml if you change anything. */
1480 /* Logical router ingress table 0: Admission control framework. */
1481 struct ovn_datapath *od;
1482 HMAP_FOR_EACH (od, key_node, datapaths) {
1487 /* Logical VLANs not supported.
1488 * Broadcast/multicast source address is invalid. */
1489 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1490 "vlan.present || eth.src[40]", "drop;");
1493 /* Logical router ingress table 0: match (priority 50). */
1494 struct ovn_port *op;
1495 HMAP_FOR_EACH (op, key_node, ports) {
1500 if (!lrport_is_enabled(op->nbr)) {
1501 /* Drop packets from disabled logical ports (since logical flow
1502 * tables are default-drop). */
1506 char *match = xasprintf(
1507 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1508 ETH_ADDR_ARGS(op->mac), op->json_key);
1509 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1514 /* Logical router ingress table 1: IP Input. */
1515 HMAP_FOR_EACH (od, key_node, datapaths) {
1520 /* L3 admission control: drop multicast and broadcast source, localhost
1521 * source or destination, and zero network source or destination
1522 * (priority 100). */
1523 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1525 "ip4.src == 255.255.255.255 || "
1526 "ip4.src == 127.0.0.0/8 || "
1527 "ip4.dst == 127.0.0.0/8 || "
1528 "ip4.src == 0.0.0.0/8 || "
1529 "ip4.dst == 0.0.0.0/8",
1532 /* Drop Ethernet local broadcast. By definition this traffic should
1533 * not be forwarded.*/
1534 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1535 "eth.bcast", "drop;");
1537 /* Drop IP multicast. */
1538 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1539 "ip4.mcast", "drop;");
1543 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1544 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1545 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1548 /* Pass other traffic not already handled to the next table for
1550 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1553 HMAP_FOR_EACH (op, key_node, ports) {
1558 /* L3 admission control: drop packets that originate from an IP address
1559 * owned by the router or a broadcast address known to the router
1560 * (priority 100). */
1561 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1562 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1563 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1567 /* ICMP echo reply. These flows reply to ICMP echo requests
1568 * received for the router's IP address. */
1570 "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1571 "icmp4.type == 8 && icmp4.code == 0",
1572 op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1573 char *actions = xasprintf(
1574 "ip4.dst = ip4.src; "
1575 "ip4.src = "IP_FMT"; "
1578 "inport = \"\"; /* Allow sending out inport. */ "
1581 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1586 /* ARP reply. These flows reply to ARP requests for the router's own
1589 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1590 op->json_key, IP_ARGS(op->ip));
1591 actions = xasprintf(
1592 "eth.dst = eth.src; "
1593 "eth.src = "ETH_ADDR_FMT"; "
1594 "arp.op = 2; /* ARP reply */ "
1595 "arp.tha = arp.sha; "
1596 "arp.sha = "ETH_ADDR_FMT"; "
1597 "arp.tpa = arp.spa; "
1598 "arp.spa = "IP_FMT"; "
1600 "inport = \"\"; /* Allow sending out inport. */ "
1602 ETH_ADDR_ARGS(op->mac),
1603 ETH_ADDR_ARGS(op->mac),
1606 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1611 /* Drop IP traffic to this router. */
1612 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1613 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1618 /* Logical router ingress table 2: IP Routing.
1620 * A packet that arrives at this table is an IP packet that should be
1621 * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1622 * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1623 * and advances to the next table for ARP resolution. */
1624 HMAP_FOR_EACH (op, key_node, ports) {
1629 add_route(lflows, op->od, op->network, op->mask, 0);
1631 HMAP_FOR_EACH (od, key_node, datapaths) {
1637 add_route(lflows, od, 0, 0, od->gateway);
1640 /* XXX destination unreachable */
1642 /* Local router ingress table 3: ARP Resolution.
1644 * Any packet that reaches this table is an IP packet whose next-hop IP
1645 * address is in reg0. (ip4.dst is the final destination.) This table
1646 * resolves the IP address in reg0 into an output port in outport and an
1647 * Ethernet address in eth.dst. */
1648 HMAP_FOR_EACH (op, key_node, ports) {
1650 /* XXX ARP for neighboring router */
1651 } else if (op->od->n_router_ports) {
1652 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1653 struct lport_addresses laddrs;
1654 if (!extract_lport_addresses(op->nbs->addresses[i], &laddrs,
1659 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
1660 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
1661 for (size_t j = 0; j < op->od->n_router_ports; j++) {
1662 /* Get the Logical_Router_Port that the Logical_Port is
1663 * connected to, as 'peer'. */
1664 const char *peer_name = smap_get(
1665 &op->od->router_ports[j]->nbs->options,
1671 struct ovn_port *peer
1672 = ovn_port_find(ports, peer_name);
1673 if (!peer || !peer->nbr) {
1677 /* Make sure that 'ip' is in 'peer''s network. */
1678 if ((ip ^ peer->network) & peer->mask) {
1682 char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1683 char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1684 "eth.dst = "ETH_ADDR_FMT"; "
1687 ETH_ADDR_ARGS(peer->mac),
1688 ETH_ADDR_ARGS(laddrs.ea),
1690 ovn_lflow_add(lflows, peer->od,
1691 S_ROUTER_IN_ARP, 200, match, actions);
1698 free(laddrs.ipv4_addrs);
1703 /* Logical router egress table 0: Delivery (priority 100).
1705 * Priority 100 rules deliver packets to enabled logical ports. */
1706 HMAP_FOR_EACH (op, key_node, ports) {
1711 if (!lrport_is_enabled(op->nbr)) {
1712 /* Drop packets to disabled logical ports (since logical flow
1713 * tables are default-drop). */
1717 char *match = xasprintf("outport == %s", op->json_key);
1718 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1724 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1725 * constructing their contents based on the OVN_NB database. */
1727 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1730 struct hmap lflows = HMAP_INITIALIZER(&lflows);
1731 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1733 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1734 build_lrouter_flows(datapaths, ports, &lflows);
1736 /* Push changes to the Logical_Flow table to database. */
1737 const struct sbrec_logical_flow *sbflow, *next_sbflow;
1738 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1739 struct ovn_datapath *od
1740 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1742 sbrec_logical_flow_delete(sbflow);
1746 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1747 enum ovn_pipeline pipeline
1748 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1749 struct ovn_lflow *lflow = ovn_lflow_find(
1750 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1751 sbflow->priority, sbflow->match, sbflow->actions);
1753 ovn_lflow_destroy(&lflows, lflow);
1755 sbrec_logical_flow_delete(sbflow);
1758 struct ovn_lflow *lflow, *next_lflow;
1759 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1760 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1761 uint8_t table = ovn_stage_get_table(lflow->stage);
1763 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1764 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1765 sbrec_logical_flow_set_pipeline(
1766 sbflow, pipeline == P_IN ? "ingress" : "egress");
1767 sbrec_logical_flow_set_table_id(sbflow, table);
1768 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1769 sbrec_logical_flow_set_match(sbflow, lflow->match);
1770 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1772 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1773 ovn_stage_to_str(lflow->stage));
1774 sbrec_logical_flow_set_external_ids(sbflow, &ids);
1776 ovn_lflow_destroy(&lflows, lflow);
1778 hmap_destroy(&lflows);
1780 /* Push changes to the Multicast_Group table to database. */
1781 const struct sbrec_multicast_group *sbmc, *next_sbmc;
1782 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1783 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1786 sbrec_multicast_group_delete(sbmc);
1790 struct multicast_group group = { .name = sbmc->name,
1791 .key = sbmc->tunnel_key };
1792 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1794 ovn_multicast_update_sbrec(mc, sbmc);
1795 ovn_multicast_destroy(&mcgroups, mc);
1797 sbrec_multicast_group_delete(sbmc);
1800 struct ovn_multicast *mc, *next_mc;
1801 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1802 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1803 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1804 sbrec_multicast_group_set_name(sbmc, mc->group->name);
1805 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1806 ovn_multicast_update_sbrec(mc, sbmc);
1807 ovn_multicast_destroy(&mcgroups, mc);
1809 hmap_destroy(&mcgroups);
1813 ovnnb_db_run(struct northd_context *ctx)
1815 if (!ctx->ovnsb_txn) {
1818 VLOG_DBG("ovn-nb db contents may have changed.");
1819 struct hmap datapaths, ports;
1820 build_datapaths(ctx, &datapaths);
1821 build_ports(ctx, &datapaths, &ports);
1822 build_lflows(ctx, &datapaths, &ports);
1824 struct ovn_datapath *dp, *next_dp;
1825 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1826 ovn_datapath_destroy(&datapaths, dp);
1828 hmap_destroy(&datapaths);
1830 struct ovn_port *port, *next_port;
1831 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1832 ovn_port_destroy(&ports, port);
1834 hmap_destroy(&ports);
1838 * The only change we get notified about is if the 'chassis' column of the
1839 * 'Port_Binding' table changes. When this column is not empty, it means we
1840 * need to set the corresponding logical port as 'up' in the northbound DB.
1843 ovnsb_db_run(struct northd_context *ctx)
1845 if (!ctx->ovnnb_txn) {
1848 struct hmap lports_hmap;
1849 const struct sbrec_port_binding *sb;
1850 const struct nbrec_logical_port *nb;
1852 struct lport_hash_node {
1853 struct hmap_node node;
1854 const struct nbrec_logical_port *nb;
1855 } *hash_node, *hash_node_next;
1857 VLOG_DBG("Recalculating port up states for ovn-nb db.");
1859 hmap_init(&lports_hmap);
1861 NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1862 hash_node = xzalloc(sizeof *hash_node);
1864 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1867 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1869 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1870 hash_string(sb->logical_port, 0),
1872 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1879 /* The logical port doesn't exist for this port binding. This can
1880 * happen under normal circumstances when ovn-northd hasn't gotten
1881 * around to pruning the Port_Binding yet. */
1885 if (sb->chassis && (!nb->up || !*nb->up)) {
1887 nbrec_logical_port_set_up(nb, &up, 1);
1888 } else if (!sb->chassis && (!nb->up || *nb->up)) {
1890 nbrec_logical_port_set_up(nb, &up, 1);
1894 HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1895 hmap_remove(&lports_hmap, &hash_node->node);
1898 hmap_destroy(&lports_hmap);
1902 static char *default_db_;
1908 default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1914 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1917 DAEMON_OPTION_ENUMS,
1920 static const struct option long_options[] = {
1921 {"ovnsb-db", required_argument, NULL, 'd'},
1922 {"ovnnb-db", required_argument, NULL, 'D'},
1923 {"help", no_argument, NULL, 'h'},
1924 {"options", no_argument, NULL, 'o'},
1925 {"version", no_argument, NULL, 'V'},
1926 DAEMON_LONG_OPTIONS,
1928 STREAM_SSL_LONG_OPTIONS,
1931 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1936 c = getopt_long(argc, argv, short_options, long_options, NULL);
1942 DAEMON_OPTION_HANDLERS;
1943 VLOG_OPTION_HANDLERS;
1944 STREAM_SSL_OPTION_HANDLERS;
1959 ovs_cmdl_print_options(long_options);
1963 ovs_print_version(0, 0);
1972 ovnsb_db = default_db();
1976 ovnnb_db = default_db();
1979 free(short_options);
1983 add_column_noalert(struct ovsdb_idl *idl,
1984 const struct ovsdb_idl_column *column)
1986 ovsdb_idl_add_column(idl, column);
1987 ovsdb_idl_omit_alert(idl, column);
1991 main(int argc, char *argv[])
1993 int res = EXIT_SUCCESS;
1994 struct unixctl_server *unixctl;
1998 fatal_ignore_sigpipe();
1999 set_program_name(argv[0]);
2000 service_start(&argc, &argv);
2001 parse_options(argc, argv);
2003 daemonize_start(false);
2005 retval = unixctl_server_create(NULL, &unixctl);
2009 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2011 daemonize_complete();
2016 /* We want to detect all changes to the ovn-nb db. */
2017 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2018 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2020 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2021 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2023 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2024 add_column_noalert(ovnsb_idl_loop.idl,
2025 &sbrec_logical_flow_col_logical_datapath);
2026 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2027 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2028 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2029 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2030 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2032 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2033 add_column_noalert(ovnsb_idl_loop.idl,
2034 &sbrec_multicast_group_col_datapath);
2035 add_column_noalert(ovnsb_idl_loop.idl,
2036 &sbrec_multicast_group_col_tunnel_key);
2037 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2038 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2040 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2041 add_column_noalert(ovnsb_idl_loop.idl,
2042 &sbrec_datapath_binding_col_tunnel_key);
2043 add_column_noalert(ovnsb_idl_loop.idl,
2044 &sbrec_datapath_binding_col_external_ids);
2046 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2047 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2048 add_column_noalert(ovnsb_idl_loop.idl,
2049 &sbrec_port_binding_col_logical_port);
2050 add_column_noalert(ovnsb_idl_loop.idl,
2051 &sbrec_port_binding_col_tunnel_key);
2052 add_column_noalert(ovnsb_idl_loop.idl,
2053 &sbrec_port_binding_col_parent_port);
2054 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2055 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2056 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2057 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2058 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2063 struct northd_context ctx = {
2064 .ovnnb_idl = ovnnb_idl_loop.idl,
2065 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2066 .ovnsb_idl = ovnsb_idl_loop.idl,
2067 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2073 unixctl_server_run(unixctl);
2074 unixctl_server_wait(unixctl);
2076 poll_immediate_wake();
2078 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2079 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
2082 if (should_service_stop()) {
2087 unixctl_server_destroy(unixctl);
2088 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2089 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
2097 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2098 const char *argv[] OVS_UNUSED, void *exiting_)
2100 bool *exiting = exiting_;
2103 unixctl_command_reply(conn, NULL);