2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn/lib/ovn-util.h"
34 #include "poll-loop.h"
37 #include "stream-ssl.h"
41 #include "openvswitch/vlog.h"
43 VLOG_DEFINE_THIS_MODULE(ovn_northd);
45 static unixctl_cb_func ovn_northd_exit;
47 struct northd_context {
48 struct ovsdb_idl *ovnnb_idl;
49 struct ovsdb_idl *ovnsb_idl;
50 struct ovsdb_idl_txn *ovnnb_txn;
51 struct ovsdb_idl_txn *ovnsb_txn;
54 static const char *ovnnb_db;
55 static const char *ovnsb_db;
57 static const char *default_nb_db(void);
58 static const char *default_sb_db(void);
60 /* Pipeline stages. */
62 /* The two pipelines in an OVN logical flow table. */
64 P_IN, /* Ingress pipeline. */
65 P_OUT /* Egress pipeline. */
68 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
69 enum ovn_datapath_type {
70 DP_SWITCH, /* OVN logical switch. */
71 DP_ROUTER /* OVN logical router. */
74 /* Returns an "enum ovn_stage" built from the arguments.
76 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
77 * functions can't be used in enums or switch cases.) */
78 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
79 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
81 /* A stage within an OVN logical switch or router.
83 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
84 * or router, whether the stage is part of the ingress or egress pipeline, and
85 * the table within that pipeline. The first three components are combined to
86 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
87 * S_ROUTER_OUT_DELIVERY. */
89 #define PIPELINE_STAGES \
90 /* Logical switch ingress stages. */ \
91 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
94 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
95 PIPELINE_STAGE(SWITCH, IN, ACL, 4, "ls_in_acl") \
96 PIPELINE_STAGE(SWITCH, IN, ARP_RSP, 5, "ls_in_arp_rsp") \
97 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 6, "ls_in_l2_lkup") \
99 /* Logical switch egress stages. */ \
100 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 0, "ls_out_pre_acl") \
101 PIPELINE_STAGE(SWITCH, OUT, ACL, 1, "ls_out_acl") \
102 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 2, "ls_out_port_sec_ip") \
103 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 3, "ls_out_port_sec_l2") \
105 /* Logical router ingress stages. */ \
106 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
107 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
108 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
109 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
110 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
111 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
112 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
114 /* Logical router egress stages. */ \
115 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
116 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
118 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
119 S_##DP_TYPE##_##PIPELINE##_##STAGE \
120 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
122 #undef PIPELINE_STAGE
125 /* Due to various hard-coded priorities need to implement ACLs, the
126 * northbound database supports a smaller range of ACL priorities than
127 * are available to logical flows. This value is added to an ACL
128 * priority to determine the ACL's logical flow priority. */
129 #define OVN_ACL_PRI_OFFSET 1000
131 /* Returns an "enum ovn_stage" built from the arguments. */
132 static enum ovn_stage
133 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
136 return OVN_STAGE_BUILD(dp_type, pipeline, table);
139 /* Returns the pipeline to which 'stage' belongs. */
140 static enum ovn_pipeline
141 ovn_stage_get_pipeline(enum ovn_stage stage)
143 return (stage >> 8) & 1;
146 /* Returns the table to which 'stage' belongs. */
148 ovn_stage_get_table(enum ovn_stage stage)
153 /* Returns a string name for 'stage'. */
155 ovn_stage_to_str(enum ovn_stage stage)
158 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
159 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
161 #undef PIPELINE_STAGE
162 default: return "<unknown>";
170 %s: OVN northbound management daemon\n\
171 usage: %s [OPTIONS]\n\
174 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
176 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
178 -h, --help display this help message\n\
179 -o, --options list available options\n\
180 -V, --version display version information\n\
181 ", program_name, program_name, default_nb_db(), default_sb_db());
184 stream_usage("database", true, true, false);
188 struct hmap_node hmap_node;
193 destroy_tnlids(struct hmap *tnlids)
195 struct tnlid_node *node;
196 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
199 hmap_destroy(tnlids);
203 add_tnlid(struct hmap *set, uint32_t tnlid)
205 struct tnlid_node *node = xmalloc(sizeof *node);
206 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
211 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
213 const struct tnlid_node *node;
214 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
215 if (node->tnlid == tnlid) {
223 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
226 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
227 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
228 if (!tnlid_in_use(set, tnlid)) {
229 add_tnlid(set, tnlid);
235 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
236 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
240 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
241 * sb->external_ids:logical-switch. */
242 struct ovn_datapath {
243 struct hmap_node key_node; /* Index on 'key'. */
244 struct uuid key; /* (nbs/nbr)->header_.uuid. */
246 const struct nbrec_logical_switch *nbs; /* May be NULL. */
247 const struct nbrec_logical_router *nbr; /* May be NULL. */
248 const struct sbrec_datapath_binding *sb; /* May be NULL. */
250 struct ovs_list list; /* In list of similar records. */
252 /* Logical router data (digested from nbr). */
253 const struct ovn_port *gateway_port;
256 /* Logical switch data. */
257 struct ovn_port **router_ports;
258 size_t n_router_ports;
260 struct hmap port_tnlids;
261 uint32_t port_key_hint;
266 static struct ovn_datapath *
267 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
268 const struct nbrec_logical_switch *nbs,
269 const struct nbrec_logical_router *nbr,
270 const struct sbrec_datapath_binding *sb)
272 struct ovn_datapath *od = xzalloc(sizeof *od);
277 hmap_init(&od->port_tnlids);
278 od->port_key_hint = 0;
279 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
284 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
287 /* Don't remove od->list. It is used within build_datapaths() as a
288 * private list and once we've exited that function it is not safe to
290 hmap_remove(datapaths, &od->key_node);
291 destroy_tnlids(&od->port_tnlids);
292 free(od->router_ports);
297 static struct ovn_datapath *
298 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
300 struct ovn_datapath *od;
302 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
303 if (uuid_equals(uuid, &od->key)) {
310 static struct ovn_datapath *
311 ovn_datapath_from_sbrec(struct hmap *datapaths,
312 const struct sbrec_datapath_binding *sb)
316 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
317 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
320 return ovn_datapath_find(datapaths, &key);
324 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
326 return !lrouter->enabled || *lrouter->enabled;
330 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
331 struct ovs_list *sb_only, struct ovs_list *nb_only,
332 struct ovs_list *both)
334 hmap_init(datapaths);
335 ovs_list_init(sb_only);
336 ovs_list_init(nb_only);
339 const struct sbrec_datapath_binding *sb, *sb_next;
340 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
342 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
343 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
344 ovsdb_idl_txn_add_comment(
346 "deleting Datapath_Binding "UUID_FMT" that lacks "
347 "external-ids:logical-switch and "
348 "external-ids:logical-router",
349 UUID_ARGS(&sb->header_.uuid));
350 sbrec_datapath_binding_delete(sb);
354 if (ovn_datapath_find(datapaths, &key)) {
355 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
357 &rl, "deleting Datapath_Binding "UUID_FMT" with "
358 "duplicate external-ids:logical-switch/router "UUID_FMT,
359 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
360 sbrec_datapath_binding_delete(sb);
364 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
366 ovs_list_push_back(sb_only, &od->list);
369 const struct nbrec_logical_switch *nbs;
370 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
371 struct ovn_datapath *od = ovn_datapath_find(datapaths,
375 ovs_list_remove(&od->list);
376 ovs_list_push_back(both, &od->list);
378 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
380 ovs_list_push_back(nb_only, &od->list);
384 const struct nbrec_logical_router *nbr;
385 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
386 if (!lrouter_is_enabled(nbr)) {
390 struct ovn_datapath *od = ovn_datapath_find(datapaths,
395 ovs_list_remove(&od->list);
396 ovs_list_push_back(both, &od->list);
399 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
401 "duplicate UUID "UUID_FMT" in OVN_Northbound",
402 UUID_ARGS(&nbr->header_.uuid));
406 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
408 ovs_list_push_back(nb_only, &od->list);
412 if (nbr->default_gw) {
414 if (!ip_parse(nbr->default_gw, &ip) || !ip) {
415 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
416 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
422 /* Set the gateway port to NULL. If there is a gateway, it will get
423 * filled in as we go through the ports later. */
424 od->gateway_port = NULL;
429 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
431 static uint32_t hint;
432 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
435 /* Updates the southbound Datapath_Binding table so that it contains the
436 * logical switches and routers specified by the northbound database.
438 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
439 * switch and router. */
441 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
443 struct ovs_list sb_only, nb_only, both;
445 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
447 if (!ovs_list_is_empty(&nb_only)) {
448 /* First index the in-use datapath tunnel IDs. */
449 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
450 struct ovn_datapath *od;
451 LIST_FOR_EACH (od, list, &both) {
452 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
455 /* Add southbound record for each unmatched northbound record. */
456 LIST_FOR_EACH (od, list, &nb_only) {
457 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
462 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
464 char uuid_s[UUID_LEN + 1];
465 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
466 const char *key = od->nbs ? "logical-switch" : "logical-router";
467 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
468 sbrec_datapath_binding_set_external_ids(od->sb, &id);
470 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
472 destroy_tnlids(&dp_tnlids);
475 /* Delete southbound records without northbound matches. */
476 struct ovn_datapath *od, *next;
477 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
478 ovs_list_remove(&od->list);
479 sbrec_datapath_binding_delete(od->sb);
480 ovn_datapath_destroy(datapaths, od);
485 struct hmap_node key_node; /* Index on 'key'. */
486 char *key; /* nbs->name, nbr->name, sb->logical_port. */
487 char *json_key; /* 'key', quoted for use in JSON. */
489 const struct nbrec_logical_switch_port *nbs; /* May be NULL. */
490 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
491 const struct sbrec_port_binding *sb; /* May be NULL. */
493 /* Logical router port data. */
494 ovs_be32 ip, mask; /* 192.168.10.123/24. */
495 ovs_be32 network; /* 192.168.10.0. */
496 ovs_be32 bcast; /* 192.168.10.255. */
498 struct ovn_port *peer;
500 struct ovn_datapath *od;
502 struct ovs_list list; /* In list of similar records. */
505 static struct ovn_port *
506 ovn_port_create(struct hmap *ports, const char *key,
507 const struct nbrec_logical_switch_port *nbs,
508 const struct nbrec_logical_router_port *nbr,
509 const struct sbrec_port_binding *sb)
511 struct ovn_port *op = xzalloc(sizeof *op);
513 struct ds json_key = DS_EMPTY_INITIALIZER;
514 json_string_escape(key, &json_key);
515 op->json_key = ds_steal_cstr(&json_key);
517 op->key = xstrdup(key);
521 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
526 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
529 /* Don't remove port->list. It is used within build_ports() as a
530 * private list and once we've exited that function it is not safe to
532 hmap_remove(ports, &port->key_node);
533 free(port->json_key);
539 static struct ovn_port *
540 ovn_port_find(struct hmap *ports, const char *name)
544 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
545 if (!strcmp(op->key, name)) {
553 ovn_port_allocate_key(struct ovn_datapath *od)
555 return allocate_tnlid(&od->port_tnlids, "port",
556 (1u << 15) - 1, &od->port_key_hint);
560 join_logical_ports(struct northd_context *ctx,
561 struct hmap *datapaths, struct hmap *ports,
562 struct ovs_list *sb_only, struct ovs_list *nb_only,
563 struct ovs_list *both)
566 ovs_list_init(sb_only);
567 ovs_list_init(nb_only);
570 const struct sbrec_port_binding *sb;
571 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
572 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
574 ovs_list_push_back(sb_only, &op->list);
577 struct ovn_datapath *od;
578 HMAP_FOR_EACH (od, key_node, datapaths) {
580 for (size_t i = 0; i < od->nbs->n_ports; i++) {
581 const struct nbrec_logical_switch_port *nbs = od->nbs->ports[i];
582 struct ovn_port *op = ovn_port_find(ports, nbs->name);
584 if (op->nbs || op->nbr) {
585 static struct vlog_rate_limit rl
586 = VLOG_RATE_LIMIT_INIT(5, 1);
587 VLOG_WARN_RL(&rl, "duplicate logical port %s",
592 ovs_list_remove(&op->list);
593 ovs_list_push_back(both, &op->list);
595 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
596 ovs_list_push_back(nb_only, &op->list);
602 for (size_t i = 0; i < od->nbr->n_ports; i++) {
603 const struct nbrec_logical_router_port *nbr
607 if (!eth_addr_from_string(nbr->mac, &mac)) {
608 static struct vlog_rate_limit rl
609 = VLOG_RATE_LIMIT_INIT(5, 1);
610 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
615 char *error = ip_parse_masked(nbr->network, &ip, &mask);
616 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
617 static struct vlog_rate_limit rl
618 = VLOG_RATE_LIMIT_INIT(5, 1);
619 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
624 struct ovn_port *op = ovn_port_find(ports, nbr->name);
626 if (op->nbs || op->nbr) {
627 static struct vlog_rate_limit rl
628 = VLOG_RATE_LIMIT_INIT(5, 1);
629 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
634 ovs_list_remove(&op->list);
635 ovs_list_push_back(both, &op->list);
637 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
638 ovs_list_push_back(nb_only, &op->list);
643 op->network = ip & mask;
644 op->bcast = ip | ~mask;
649 /* If 'od' has a gateway and 'op' routes to it... */
650 if (od->gateway && !((op->network ^ od->gateway) & op->mask)) {
651 /* ...and if 'op' is a longer match than the current
653 const struct ovn_port *gw = od->gateway_port;
654 int len = gw ? ip_count_cidr_bits(gw->mask) : 0;
655 if (ip_count_cidr_bits(op->mask) > len) {
656 /* ...then it's the default gateway port. */
657 od->gateway_port = op;
664 /* Connect logical router ports, and logical switch ports of type "router",
667 HMAP_FOR_EACH (op, key_node, ports) {
668 if (op->nbs && !strcmp(op->nbs->type, "router")) {
669 const char *peer_name = smap_get(&op->nbs->options, "router-port");
674 struct ovn_port *peer = ovn_port_find(ports, peer_name);
675 if (!peer || !peer->nbr) {
681 op->od->router_ports = xrealloc(
682 op->od->router_ports,
683 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
684 op->od->router_ports[op->od->n_router_ports++] = op;
685 } else if (op->nbr && op->nbr->peer) {
686 op->peer = ovn_port_find(ports, op->nbr->peer);
692 ovn_port_update_sbrec(const struct ovn_port *op)
694 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
696 /* If the router is for l3 gateway, it resides on a chassis
697 * and its port type is "gateway". */
698 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
700 sbrec_port_binding_set_type(op->sb, "gateway");
702 sbrec_port_binding_set_type(op->sb, "patch");
705 const char *peer = op->peer ? op->peer->key : "<error>";
708 smap_add(&new, "peer", peer);
710 smap_add(&new, "gateway-chassis", chassis);
712 sbrec_port_binding_set_options(op->sb, &new);
715 sbrec_port_binding_set_parent_port(op->sb, NULL);
716 sbrec_port_binding_set_tag(op->sb, NULL, 0);
717 sbrec_port_binding_set_mac(op->sb, NULL, 0);
719 if (strcmp(op->nbs->type, "router")) {
720 sbrec_port_binding_set_type(op->sb, op->nbs->type);
721 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
723 const char *chassis = NULL;
724 if (op->peer && op->peer->od && op->peer->od->nbr) {
725 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
728 /* A switch port connected to a gateway router is also of
731 sbrec_port_binding_set_type(op->sb, "gateway");
733 sbrec_port_binding_set_type(op->sb, "patch");
736 const char *router_port = smap_get(&op->nbs->options,
739 router_port = "<error>";
743 smap_add(&new, "peer", router_port);
745 smap_add(&new, "gateway-chassis", chassis);
747 sbrec_port_binding_set_options(op->sb, &new);
750 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
751 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
752 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
753 op->nbs->n_addresses);
757 /* Updates the southbound Port_Binding table so that it contains the logical
758 * switch ports specified by the northbound database.
760 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
761 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
764 build_ports(struct northd_context *ctx, struct hmap *datapaths,
767 struct ovs_list sb_only, nb_only, both;
769 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
771 /* For logical ports that are in both databases, update the southbound
772 * record based on northbound data. Also index the in-use tunnel_keys. */
773 struct ovn_port *op, *next;
774 LIST_FOR_EACH_SAFE (op, next, list, &both) {
775 ovn_port_update_sbrec(op);
777 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
778 if (op->sb->tunnel_key > op->od->port_key_hint) {
779 op->od->port_key_hint = op->sb->tunnel_key;
783 /* Add southbound record for each unmatched northbound record. */
784 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
785 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
790 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
791 ovn_port_update_sbrec(op);
793 sbrec_port_binding_set_logical_port(op->sb, op->key);
794 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
797 /* Delete southbound records without northbound matches. */
798 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
799 ovs_list_remove(&op->list);
800 sbrec_port_binding_delete(op->sb);
801 ovn_port_destroy(ports, op);
805 #define OVN_MIN_MULTICAST 32768
806 #define OVN_MAX_MULTICAST 65535
808 struct multicast_group {
810 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
813 #define MC_FLOOD "_MC_flood"
814 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
816 #define MC_UNKNOWN "_MC_unknown"
817 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
820 multicast_group_equal(const struct multicast_group *a,
821 const struct multicast_group *b)
823 return !strcmp(a->name, b->name) && a->key == b->key;
826 /* Multicast group entry. */
827 struct ovn_multicast {
828 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
829 struct ovn_datapath *datapath;
830 const struct multicast_group *group;
832 struct ovn_port **ports;
833 size_t n_ports, allocated_ports;
837 ovn_multicast_hash(const struct ovn_datapath *datapath,
838 const struct multicast_group *group)
840 return hash_pointer(datapath, group->key);
843 static struct ovn_multicast *
844 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
845 const struct multicast_group *group)
847 struct ovn_multicast *mc;
849 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
850 ovn_multicast_hash(datapath, group), mcgroups) {
851 if (mc->datapath == datapath
852 && multicast_group_equal(mc->group, group)) {
860 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
861 struct ovn_port *port)
863 struct ovn_datapath *od = port->od;
864 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
866 mc = xmalloc(sizeof *mc);
867 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
871 mc->allocated_ports = 4;
872 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
874 if (mc->n_ports >= mc->allocated_ports) {
875 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
878 mc->ports[mc->n_ports++] = port;
882 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
885 hmap_remove(mcgroups, &mc->hmap_node);
892 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
893 const struct sbrec_multicast_group *sb)
895 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
896 for (size_t i = 0; i < mc->n_ports; i++) {
897 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
899 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
903 /* Logical flow generation.
905 * This code generates the Logical_Flow table in the southbound database, as a
906 * function of most of the northbound database.
910 struct hmap_node hmap_node;
912 struct ovn_datapath *od;
913 enum ovn_stage stage;
920 ovn_lflow_hash(const struct ovn_lflow *lflow)
922 size_t hash = uuid_hash(&lflow->od->key);
923 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
924 hash = hash_string(lflow->match, hash);
925 return hash_string(lflow->actions, hash);
929 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
931 return (a->od == b->od
932 && a->stage == b->stage
933 && a->priority == b->priority
934 && !strcmp(a->match, b->match)
935 && !strcmp(a->actions, b->actions));
939 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
940 enum ovn_stage stage, uint16_t priority,
941 char *match, char *actions)
944 lflow->stage = stage;
945 lflow->priority = priority;
946 lflow->match = match;
947 lflow->actions = actions;
950 /* Adds a row with the specified contents to the Logical_Flow table. */
952 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
953 enum ovn_stage stage, uint16_t priority,
954 const char *match, const char *actions)
956 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
957 ovn_lflow_init(lflow, od, stage, priority,
958 xstrdup(match), xstrdup(actions));
959 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
962 static struct ovn_lflow *
963 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
964 enum ovn_stage stage, uint16_t priority,
965 const char *match, const char *actions)
967 struct ovn_lflow target;
968 ovn_lflow_init(&target, od, stage, priority,
969 CONST_CAST(char *, match), CONST_CAST(char *, actions));
971 struct ovn_lflow *lflow;
972 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
974 if (ovn_lflow_equal(lflow, &target)) {
982 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
985 hmap_remove(lflows, &lflow->hmap_node);
987 free(lflow->actions);
992 /* Appends port security constraints on L2 address field 'eth_addr_field'
993 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
994 * 'n_port_security' elements, is the collection of port_security constraints
995 * from an OVN_NB Logical_Switch_Port row. */
997 build_port_security_l2(const char *eth_addr_field,
998 char **port_security, size_t n_port_security,
1001 size_t base_len = match->length;
1002 ds_put_format(match, " && %s == {", eth_addr_field);
1005 for (size_t i = 0; i < n_port_security; i++) {
1008 if (eth_addr_from_string(port_security[i], &ea)) {
1009 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1010 ds_put_char(match, ' ');
1014 ds_chomp(match, ' ');
1015 ds_put_cstr(match, "}");
1018 match->length = base_len;
1023 build_port_security_ipv6_nd_flow(
1024 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1027 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1028 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1029 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1030 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1032 if (!n_ipv6_addrs) {
1033 ds_put_cstr(match, "))");
1037 char ip6_str[INET6_ADDRSTRLEN + 1];
1038 struct in6_addr lla;
1039 in6_generate_lla(ea, &lla);
1040 memset(ip6_str, 0, sizeof(ip6_str));
1041 ipv6_string_mapped(ip6_str, &lla);
1042 ds_put_format(match, " && (nd.target == %s", ip6_str);
1044 for(int i = 0; i < n_ipv6_addrs; i++) {
1045 memset(ip6_str, 0, sizeof(ip6_str));
1046 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1047 ds_put_format(match, " || nd.target == %s", ip6_str);
1050 ds_put_format(match, ")))");
1054 build_port_security_ipv6_flow(
1055 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1056 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1058 char ip6_str[INET6_ADDRSTRLEN + 1];
1060 ds_put_format(match, " && %s == {",
1061 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1063 /* Allow link-local address. */
1064 struct in6_addr lla;
1065 in6_generate_lla(ea, &lla);
1066 ipv6_string_mapped(ip6_str, &lla);
1067 ds_put_format(match, "%s, ", ip6_str);
1069 /* Allow ip6.dst=ff00::/8 for multicast packets */
1070 if (pipeline == P_OUT) {
1071 ds_put_cstr(match, "ff00::/8, ");
1073 for(int i = 0; i < n_ipv6_addrs; i++) {
1074 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1075 ds_put_format(match, "%s, ", ip6_str);
1077 /* Replace ", " by "}". */
1078 ds_chomp(match, ' ');
1079 ds_chomp(match, ',');
1080 ds_put_cstr(match, "}");
1084 * Build port security constraints on ARP and IPv6 ND fields
1085 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1087 * For each port security of the logical port, following
1088 * logical flows are added
1089 * - If the port security has no IP (both IPv4 and IPv6) or
1090 * if it has IPv4 address(es)
1091 * - Priority 90 flow to allow ARP packets for known MAC addresses
1092 * in the eth.src and arp.spa fields. If the port security
1093 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1095 * - If the port security has no IP (both IPv4 and IPv6) or
1096 * if it has IPv6 address(es)
1097 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1098 * in the eth.src and nd.sll/nd.tll fields. If the port security
1099 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1100 * for IPv6 Neighbor Advertisement packet.
1102 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1105 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1107 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1108 struct lport_addresses ps;
1109 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1110 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1111 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port security. No MAC"
1112 " address found", op->nbs->port_security[i]);
1116 bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
1117 struct ds match = DS_EMPTY_INITIALIZER;
1119 if (ps.n_ipv4_addrs || no_ip) {
1121 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
1122 ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
1123 ETH_ADDR_ARGS(ps.ea));
1125 if (ps.n_ipv4_addrs) {
1126 ds_put_cstr(&match, " && (");
1127 for (size_t i = 0; i < ps.n_ipv4_addrs; i++) {
1128 ds_put_cstr(&match, "arp.spa == ");
1129 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1130 /* When the netmask is applied, if the host portion is
1131 * non-zero, the host can only use the specified
1132 * address in the arp.spa. If zero, the host is allowed
1133 * to use any address in the subnet. */
1134 if (ps.ipv4_addrs[i].addr & ~mask) {
1135 ds_put_format(&match, IP_FMT,
1136 IP_ARGS(ps.ipv4_addrs[i].addr));
1138 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1141 ds_put_cstr(&match, " || ");
1143 ds_chomp(&match, ' ');
1144 ds_chomp(&match, '|');
1145 ds_chomp(&match, '|');
1146 ds_put_cstr(&match, ")");
1148 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1149 ds_cstr(&match), "next;");
1153 if (ps.n_ipv6_addrs || no_ip) {
1155 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT,
1156 op->json_key, ETH_ADDR_ARGS(ps.ea));
1157 build_port_security_ipv6_nd_flow(&match, ps.ea, ps.ipv6_addrs,
1159 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1160 ds_cstr(&match), "next;");
1163 free(ps.ipv4_addrs);
1164 free(ps.ipv6_addrs);
1167 char *match = xasprintf("inport == %s && (arp || nd)", op->json_key);
1168 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1174 * Build port security constraints on IPv4 and IPv6 src and dst fields
1175 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1177 * For each port security of the logical port, following
1178 * logical flows are added
1179 * - If the port security has IPv4 addresses,
1180 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1182 * - If the port security has IPv6 addresses,
1183 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1185 * - If the port security has IPv4 addresses or IPv6 addresses or both
1186 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1189 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1190 struct hmap *lflows)
1192 char *port_direction;
1193 enum ovn_stage stage;
1194 if (pipeline == P_IN) {
1195 port_direction = "inport";
1196 stage = S_SWITCH_IN_PORT_SEC_IP;
1198 port_direction = "outport";
1199 stage = S_SWITCH_OUT_PORT_SEC_IP;
1202 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1203 struct lport_addresses ps;
1204 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1208 if (!(ps.n_ipv4_addrs || ps.n_ipv6_addrs)) {
1212 if (ps.n_ipv4_addrs) {
1213 struct ds match = DS_EMPTY_INITIALIZER;
1214 if (pipeline == P_IN) {
1215 /* Permit use of the unspecified address for DHCP discovery */
1216 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1217 ds_put_format(&dhcp_match, "inport == %s"
1218 " && eth.src == "ETH_ADDR_FMT
1219 " && ip4.src == 0.0.0.0"
1220 " && ip4.dst == 255.255.255.255"
1221 " && udp.src == 68 && udp.dst == 67", op->json_key,
1222 ETH_ADDR_ARGS(ps.ea));
1223 ovn_lflow_add(lflows, op->od, stage, 90,
1224 ds_cstr(&dhcp_match), "next;");
1225 ds_destroy(&dhcp_match);
1226 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
1227 " && ip4.src == {", op->json_key,
1228 ETH_ADDR_ARGS(ps.ea));
1230 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
1231 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1232 op->json_key, ETH_ADDR_ARGS(ps.ea));
1235 for (int i = 0; i < ps.n_ipv4_addrs; i++) {
1236 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1237 /* When the netmask is applied, if the host portion is
1238 * non-zero, the host can only use the specified
1239 * address. If zero, the host is allowed to use any
1240 * address in the subnet.
1242 if (ps.ipv4_addrs[i].addr & ~mask) {
1243 ds_put_format(&match, IP_FMT,
1244 IP_ARGS(ps.ipv4_addrs[i].addr));
1245 if (pipeline == P_OUT && ps.ipv4_addrs[i].plen != 32) {
1246 /* Host is also allowed to receive packets to the
1247 * broadcast address in the specified subnet.
1249 ds_put_format(&match, ", "IP_FMT,
1250 IP_ARGS(ps.ipv4_addrs[i].addr | ~mask));
1253 /* host portion is zero */
1254 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1257 ds_put_cstr(&match, ", ");
1260 /* Replace ", " by "}". */
1261 ds_chomp(&match, ' ');
1262 ds_chomp(&match, ',');
1263 ds_put_cstr(&match, "}");
1264 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1266 free(ps.ipv4_addrs);
1269 if (ps.n_ipv6_addrs) {
1270 struct ds match = DS_EMPTY_INITIALIZER;
1271 if (pipeline == P_IN) {
1272 /* Permit use of unspecified address for duplicate address
1274 struct ds dad_match = DS_EMPTY_INITIALIZER;
1275 ds_put_format(&dad_match, "inport == %s"
1276 " && eth.src == "ETH_ADDR_FMT
1278 " && ip6.dst == ff02::/16"
1279 " && icmp6.type == {131, 135, 143}", op->json_key,
1280 ETH_ADDR_ARGS(ps.ea));
1281 ovn_lflow_add(lflows, op->od, stage, 90,
1282 ds_cstr(&dad_match), "next;");
1283 ds_destroy(&dad_match);
1285 ds_put_format(&match, "%s == %s && %s == "ETH_ADDR_FMT"",
1286 port_direction, op->json_key,
1287 pipeline == P_IN ? "eth.src" : "eth.dst",
1288 ETH_ADDR_ARGS(ps.ea));
1289 build_port_security_ipv6_flow(pipeline, &match, ps.ea,
1290 ps.ipv6_addrs, ps.n_ipv6_addrs);
1291 ovn_lflow_add(lflows, op->od, stage, 90,
1292 ds_cstr(&match), "next;");
1294 free(ps.ipv6_addrs);
1297 char *match = xasprintf(
1298 "%s == %s && %s == "ETH_ADDR_FMT" && ip", port_direction,
1299 op->json_key, pipeline == P_IN ? "eth.src" : "eth.dst",
1300 ETH_ADDR_ARGS(ps.ea));
1301 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1307 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
1309 return !lsp->enabled || *lsp->enabled;
1313 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
1315 return !lsp->up || *lsp->up;
1319 has_stateful_acl(struct ovn_datapath *od)
1321 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1322 struct nbrec_acl *acl = od->nbs->acls[i];
1323 if (!strcmp(acl->action, "allow-related")) {
1332 build_acls(struct ovn_datapath *od, struct hmap *lflows, struct hmap *ports)
1334 bool has_stateful = has_stateful_acl(od);
1335 struct ovn_port *op;
1337 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1338 * allowed by default. */
1339 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1340 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1342 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1343 * default. A related rule at priority 1 is added below if there
1344 * are any stateful ACLs in this datapath. */
1345 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1346 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1348 /* If there are any stateful ACL rules in this dapapath, we must
1349 * send all IP packets through the conntrack action, which handles
1350 * defragmentation, in order to match L4 headers. */
1352 HMAP_FOR_EACH (op, key_node, ports) {
1353 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1354 /* Can't use ct() for router ports. Consider the
1355 * following configuration: lp1(10.0.0.2) on
1356 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1357 * ping from lp1 to lp2, First, the response will go
1358 * through ct() with a zone for lp2 in the ls2 ingress
1359 * pipeline on hostB. That ct zone knows about this
1360 * connection. Next, it goes through ct() with the zone
1361 * for the router port in the egress pipeline of ls2 on
1362 * hostB. This zone does not know about the connection,
1363 * as the icmp request went through the logical router
1364 * on hostA, not hostB. This would only work with
1365 * distributed conntrack state across all chassis. */
1366 struct ds match_in = DS_EMPTY_INITIALIZER;
1367 struct ds match_out = DS_EMPTY_INITIALIZER;
1369 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1370 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1371 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1372 ds_cstr(&match_in), "next;");
1373 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1374 ds_cstr(&match_out), "next;");
1376 ds_destroy(&match_in);
1377 ds_destroy(&match_out);
1381 /* Ingress and Egress Pre-ACL Table (Priority 100).
1383 * Regardless of whether the ACL is "from-lport" or "to-lport",
1384 * we need rules in both the ingress and egress table, because
1385 * the return traffic needs to be followed. */
1386 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
1387 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
1389 /* Ingress and Egress ACL Table (Priority 1).
1391 * By default, traffic is allowed. This is partially handled by
1392 * the Priority 0 ACL flows added earlier, but we also need to
1393 * commit IP flows. This is because, while the initiater's
1394 * direction may not have any stateful rules, the server's may
1395 * and then its return traffic would not have an associated
1396 * conntrack entry and would return "+invalid". */
1397 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1398 "ct_commit; next;");
1399 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1400 "ct_commit; next;");
1402 /* Ingress and Egress ACL Table (Priority 65535).
1404 * Always drop traffic that's in an invalid state. This is
1405 * enforced at a higher priority than ACLs can be defined. */
1406 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1408 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1411 /* Ingress and Egress ACL Table (Priority 65535).
1413 * Always allow traffic that is established to a committed
1414 * conntrack entry. This is enforced at a higher priority than
1415 * ACLs can be defined. */
1416 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1417 "ct.est && !ct.rel && !ct.new && !ct.inv",
1419 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1420 "ct.est && !ct.rel && !ct.new && !ct.inv",
1423 /* Ingress and Egress ACL Table (Priority 65535).
1425 * Always allow traffic that is related to an existing conntrack
1426 * entry. This is enforced at a higher priority than ACLs can
1429 * NOTE: This does not support related data sessions (eg,
1430 * a dynamically negotiated FTP data channel), but will allow
1431 * related traffic such as an ICMP Port Unreachable through
1432 * that's generated from a non-listening UDP port. */
1433 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1434 "!ct.est && ct.rel && !ct.new && !ct.inv",
1436 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1437 "!ct.est && ct.rel && !ct.new && !ct.inv",
1441 /* Ingress or Egress ACL Table (Various priorities). */
1442 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1443 struct nbrec_acl *acl = od->nbs->acls[i];
1444 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1445 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1447 if (!strcmp(acl->action, "allow")) {
1448 /* If there are any stateful flows, we must even commit "allow"
1449 * actions. This is because, while the initiater's
1450 * direction may not have any stateful rules, the server's
1451 * may and then its return traffic would not have an
1452 * associated conntrack entry and would return "+invalid". */
1453 const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1454 ovn_lflow_add(lflows, od, stage,
1455 acl->priority + OVN_ACL_PRI_OFFSET,
1456 acl->match, actions);
1457 } else if (!strcmp(acl->action, "allow-related")) {
1458 struct ds match = DS_EMPTY_INITIALIZER;
1460 /* Commit the connection tracking entry, which allows all
1461 * other traffic related to this entry to flow due to the
1462 * 65535 priority flow defined earlier. */
1463 ds_put_format(&match, "ct.new && (%s)", acl->match);
1464 ovn_lflow_add(lflows, od, stage,
1465 acl->priority + OVN_ACL_PRI_OFFSET,
1466 ds_cstr(&match), "ct_commit; next;");
1469 } else if (!strcmp(acl->action, "drop")) {
1470 ovn_lflow_add(lflows, od, stage,
1471 acl->priority + OVN_ACL_PRI_OFFSET,
1472 acl->match, "drop;");
1473 } else if (!strcmp(acl->action, "reject")) {
1474 /* xxx Need to support "reject". */
1475 VLOG_INFO("reject is not a supported action");
1476 ovn_lflow_add(lflows, od, stage,
1477 acl->priority + OVN_ACL_PRI_OFFSET,
1478 acl->match, "drop;");
1484 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1485 struct hmap *lflows, struct hmap *mcgroups)
1487 /* This flow table structure is documented in ovn-northd(8), so please
1488 * update ovn-northd.8.xml if you change anything. */
1490 /* Build pre-ACL and ACL tables for both ingress and egress.
1491 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1492 struct ovn_datapath *od;
1493 HMAP_FOR_EACH (od, key_node, datapaths) {
1498 build_acls(od, lflows, ports);
1501 /* Logical switch ingress table 0: Admission control framework (priority
1503 HMAP_FOR_EACH (od, key_node, datapaths) {
1508 /* Logical VLANs not supported. */
1509 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
1512 /* Broadcast/multicast source address is invalid. */
1513 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
1516 /* Port security flows have priority 50 (see below) and will continue
1517 * to the next table if packet source is acceptable. */
1520 /* Logical switch ingress table 0: Ingress port security - L2
1522 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1523 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1525 struct ovn_port *op;
1526 HMAP_FOR_EACH (op, key_node, ports) {
1531 if (!lsp_is_enabled(op->nbs)) {
1532 /* Drop packets from disabled logical ports (since logical flow
1533 * tables are default-drop). */
1537 struct ds match = DS_EMPTY_INITIALIZER;
1538 ds_put_format(&match, "inport == %s", op->json_key);
1539 build_port_security_l2(
1540 "eth.src", op->nbs->port_security, op->nbs->n_port_security,
1542 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
1543 ds_cstr(&match), "next;");
1546 if (op->nbs->n_port_security) {
1547 build_port_security_ip(P_IN, op, lflows);
1548 build_port_security_nd(op, lflows);
1552 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1554 HMAP_FOR_EACH (od, key_node, datapaths) {
1559 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1560 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
1563 /* Ingress table 3: ARP responder, skip requests coming from localnet ports.
1564 * (priority 100). */
1565 HMAP_FOR_EACH (op, key_node, ports) {
1570 if (!strcmp(op->nbs->type, "localnet")) {
1571 char *match = xasprintf("inport == %s", op->json_key);
1572 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 100,
1578 /* Ingress table 5: ARP responder, reply for known IPs.
1580 HMAP_FOR_EACH (op, key_node, ports) {
1586 * Add ARP reply flows if either the
1588 * - port type is router
1590 if (!lsp_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1594 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1595 struct lport_addresses laddrs;
1596 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
1600 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1601 char *match = xasprintf(
1602 "arp.tpa == "IP_FMT" && arp.op == 1",
1603 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1604 char *actions = xasprintf(
1605 "eth.dst = eth.src; "
1606 "eth.src = "ETH_ADDR_FMT"; "
1607 "arp.op = 2; /* ARP reply */ "
1608 "arp.tha = arp.sha; "
1609 "arp.sha = "ETH_ADDR_FMT"; "
1610 "arp.tpa = arp.spa; "
1611 "arp.spa = "IP_FMT"; "
1612 "outport = inport; "
1613 "inport = \"\"; /* Allow sending out inport. */ "
1615 ETH_ADDR_ARGS(laddrs.ea),
1616 ETH_ADDR_ARGS(laddrs.ea),
1617 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1618 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_RSP, 50,
1624 free(laddrs.ipv4_addrs);
1628 /* Ingress table 5: ARP responder, by default goto next.
1630 HMAP_FOR_EACH (od, key_node, datapaths) {
1635 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_RSP, 0, "1", "next;");
1638 /* Ingress table 6: Destination lookup, broadcast and multicast handling
1639 * (priority 100). */
1640 HMAP_FOR_EACH (op, key_node, ports) {
1645 if (lsp_is_enabled(op->nbs)) {
1646 ovn_multicast_add(mcgroups, &mc_flood, op);
1649 HMAP_FOR_EACH (od, key_node, datapaths) {
1654 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1655 "outport = \""MC_FLOOD"\"; output;");
1658 /* Ingress table 6: Destination lookup, unicast handling (priority 50), */
1659 HMAP_FOR_EACH (op, key_node, ports) {
1664 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1665 struct eth_addr mac;
1667 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1668 struct ds match, actions;
1671 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1672 ETH_ADDR_ARGS(mac));
1675 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1676 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1677 ds_cstr(&match), ds_cstr(&actions));
1678 ds_destroy(&actions);
1680 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1681 if (lsp_is_enabled(op->nbs)) {
1682 ovn_multicast_add(mcgroups, &mc_unknown, op);
1683 op->od->has_unknown = true;
1686 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1689 "%s: invalid syntax '%s' in addresses column",
1690 op->nbs->name, op->nbs->addresses[i]);
1695 /* Ingress table 6: Destination lookup for unknown MACs (priority 0). */
1696 HMAP_FOR_EACH (od, key_node, datapaths) {
1701 if (od->has_unknown) {
1702 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1703 "outport = \""MC_UNKNOWN"\"; output;");
1707 /* Egress table 2: Egress port security - IP (priority 0)
1708 * port security L2 - multicast/broadcast (priority
1710 HMAP_FOR_EACH (od, key_node, datapaths) {
1715 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
1716 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
1720 /* Egress table 2: Egress port security - IP (priorities 90 and 80)
1721 * if port security enabled.
1723 * Egress table 3: Egress port security - L2 (priorities 50 and 150).
1725 * Priority 50 rules implement port security for enabled logical port.
1727 * Priority 150 rules drop packets to disabled logical ports, so that they
1728 * don't even receive multicast or broadcast packets. */
1729 HMAP_FOR_EACH (op, key_node, ports) {
1734 struct ds match = DS_EMPTY_INITIALIZER;
1735 ds_put_format(&match, "outport == %s", op->json_key);
1736 if (lsp_is_enabled(op->nbs)) {
1737 build_port_security_l2("eth.dst", op->nbs->port_security,
1738 op->nbs->n_port_security, &match);
1739 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
1740 ds_cstr(&match), "output;");
1742 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
1743 ds_cstr(&match), "drop;");
1748 if (op->nbs->n_port_security) {
1749 build_port_security_ip(P_OUT, op, lflows);
1755 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1757 return !lrport->enabled || *lrport->enabled;
1761 add_route(struct hmap *lflows, const struct ovn_port *op,
1762 ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1764 char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1765 IP_ARGS(network), IP_ARGS(mask));
1767 struct ds actions = DS_EMPTY_INITIALIZER;
1768 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1770 ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1772 ds_put_cstr(&actions, "ip4.dst");
1774 ds_put_format(&actions,
1777 "eth.src = "ETH_ADDR_FMT"; "
1780 IP_ARGS(op->ip), ETH_ADDR_ARGS(op->mac), op->json_key);
1782 /* The priority here is calculated to implement longest-prefix-match
1784 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING,
1785 count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1786 ds_destroy(&actions);
1791 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
1793 const struct nbrec_logical_router_static_route *route)
1795 ovs_be32 prefix, next_hop, mask;
1797 /* Verify that next hop is an IP address with 32 bits mask. */
1798 char *error = ip_parse_masked(route->nexthop, &next_hop, &mask);
1799 if (error || mask != OVS_BE32_MAX) {
1800 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1801 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
1806 /* Verify that ip prefix is a valid CIDR address. */
1807 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
1808 if (error || !ip_is_cidr(mask)) {
1809 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1810 VLOG_WARN_RL(&rl, "bad 'network' in static routes %s",
1816 /* Find the outgoing port. */
1817 struct ovn_port *out_port = NULL;
1818 if (route->output_port) {
1819 out_port = ovn_port_find(ports, route->output_port);
1821 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1822 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
1823 route->output_port, route->ip_prefix);
1827 /* output_port is not specified, find the
1828 * router port matching the next hop. */
1830 for (i = 0; i < od->nbr->n_ports; i++) {
1831 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
1832 out_port = ovn_port_find(ports, lrp->name);
1834 /* This should not happen. */
1838 if (out_port->network
1839 && !((out_port->network ^ next_hop) & out_port->mask)) {
1840 /* There should be only 1 interface that matches the next hop.
1841 * Otherwise, it's a configuration error, because subnets of
1842 * router's interfaces should NOT overlap. */
1846 if (i == od->nbr->n_ports) {
1847 /* There is no matched out port. */
1848 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1849 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
1850 route->ip_prefix, route->nexthop);
1855 add_route(lflows, out_port, prefix, mask, next_hop);
1859 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1860 struct hmap *lflows)
1862 /* This flow table structure is documented in ovn-northd(8), so please
1863 * update ovn-northd.8.xml if you change anything. */
1865 /* Logical router ingress table 0: Admission control framework. */
1866 struct ovn_datapath *od;
1867 HMAP_FOR_EACH (od, key_node, datapaths) {
1872 /* Logical VLANs not supported.
1873 * Broadcast/multicast source address is invalid. */
1874 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1875 "vlan.present || eth.src[40]", "drop;");
1878 /* Logical router ingress table 0: match (priority 50). */
1879 struct ovn_port *op;
1880 HMAP_FOR_EACH (op, key_node, ports) {
1885 if (!lrport_is_enabled(op->nbr)) {
1886 /* Drop packets from disabled logical ports (since logical flow
1887 * tables are default-drop). */
1891 char *match = xasprintf(
1892 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1893 ETH_ADDR_ARGS(op->mac), op->json_key);
1894 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1899 /* Logical router ingress table 1: IP Input. */
1900 HMAP_FOR_EACH (od, key_node, datapaths) {
1905 /* L3 admission control: drop multicast and broadcast source, localhost
1906 * source or destination, and zero network source or destination
1907 * (priority 100). */
1908 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1910 "ip4.src == 255.255.255.255 || "
1911 "ip4.src == 127.0.0.0/8 || "
1912 "ip4.dst == 127.0.0.0/8 || "
1913 "ip4.src == 0.0.0.0/8 || "
1914 "ip4.dst == 0.0.0.0/8",
1917 /* ARP reply handling. Use ARP replies to populate the logical
1918 * router's ARP table. */
1919 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
1920 "put_arp(inport, arp.spa, arp.sha);");
1922 /* Drop Ethernet local broadcast. By definition this traffic should
1923 * not be forwarded.*/
1924 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1925 "eth.bcast", "drop;");
1927 /* Drop IP multicast. */
1928 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1929 "ip4.mcast", "drop;");
1933 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1934 char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1935 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1938 /* Pass other traffic not already handled to the next table for
1940 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1943 HMAP_FOR_EACH (op, key_node, ports) {
1948 /* L3 admission control: drop packets that originate from an IP address
1949 * owned by the router or a broadcast address known to the router
1950 * (priority 100). */
1951 char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1952 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1953 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1957 /* ICMP echo reply. These flows reply to ICMP echo requests
1958 * received for the router's IP address. Since packets only
1959 * get here as part of the logical router datapath, the inport
1960 * (i.e. the incoming locally attached net) does not matter.
1961 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
1963 "(ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1964 "icmp4.type == 8 && icmp4.code == 0",
1965 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1966 char *actions = xasprintf(
1967 "ip4.dst = ip4.src; "
1968 "ip4.src = "IP_FMT"; "
1971 "inport = \"\"; /* Allow sending out inport. */ "
1974 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1979 /* ARP reply. These flows reply to ARP requests for the router's own
1982 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1983 op->json_key, IP_ARGS(op->ip));
1984 actions = xasprintf(
1985 "eth.dst = eth.src; "
1986 "eth.src = "ETH_ADDR_FMT"; "
1987 "arp.op = 2; /* ARP reply */ "
1988 "arp.tha = arp.sha; "
1989 "arp.sha = "ETH_ADDR_FMT"; "
1990 "arp.tpa = arp.spa; "
1991 "arp.spa = "IP_FMT"; "
1993 "inport = \"\"; /* Allow sending out inport. */ "
1995 ETH_ADDR_ARGS(op->mac),
1996 ETH_ADDR_ARGS(op->mac),
1999 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2004 /* ARP handling for external IP addresses.
2006 * DNAT IP addresses are external IP addresses that need ARP
2008 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2009 const struct nbrec_nat *nat;
2011 nat = op->od->nbr->nat[i];
2013 if(!strcmp(nat->type, "snat")) {
2018 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2019 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2020 VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
2021 "for router %s", nat->external_ip, op->key);
2026 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2027 op->json_key, IP_ARGS(ip));
2028 actions = xasprintf(
2029 "eth.dst = eth.src; "
2030 "eth.src = "ETH_ADDR_FMT"; "
2031 "arp.op = 2; /* ARP reply */ "
2032 "arp.tha = arp.sha; "
2033 "arp.sha = "ETH_ADDR_FMT"; "
2034 "arp.tpa = arp.spa; "
2035 "arp.spa = "IP_FMT"; "
2037 "inport = \"\"; /* Allow sending out inport. */ "
2039 ETH_ADDR_ARGS(op->mac),
2040 ETH_ADDR_ARGS(op->mac),
2043 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2049 /* Drop IP traffic to this router. */
2050 match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
2051 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2056 /* NAT in Gateway routers. */
2057 HMAP_FOR_EACH (od, key_node, datapaths) {
2062 /* Packets are allowed by default. */
2063 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2064 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2065 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2067 /* NAT rules are only valid on Gateway routers. */
2068 if (!smap_get(&od->nbr->options, "chassis")) {
2072 for (int i = 0; i < od->nbr->n_nat; i++) {
2073 const struct nbrec_nat *nat;
2075 nat = od->nbr->nat[i];
2079 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2080 if (error || mask != OVS_BE32_MAX) {
2081 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2082 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2088 /* Check the validity of nat->logical_ip. 'logical_ip' can
2089 * be a subnet when the type is "snat". */
2090 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2091 if (!strcmp(nat->type, "snat")) {
2093 static struct vlog_rate_limit rl =
2094 VLOG_RATE_LIMIT_INIT(5, 1);
2095 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2096 "in router "UUID_FMT"",
2097 nat->logical_ip, UUID_ARGS(&od->key));
2102 if (error || mask != OVS_BE32_MAX) {
2103 static struct vlog_rate_limit rl =
2104 VLOG_RATE_LIMIT_INIT(5, 1);
2105 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2106 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2113 char *match, *actions;
2115 /* Ingress UNSNAT table: It is for already established connections'
2116 * reverse traffic. i.e., SNAT has already been done in egress
2117 * pipeline and now the packet has entered the ingress pipeline as
2118 * part of a reply. We undo the SNAT here.
2120 * Undoing SNAT has to happen before DNAT processing. This is
2121 * because when the packet was DNATed in ingress pipeline, it did
2122 * not know about the possibility of eventual additional SNAT in
2123 * egress pipeline. */
2124 if (!strcmp(nat->type, "snat")
2125 || !strcmp(nat->type, "dnat_and_snat")) {
2126 match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
2127 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
2128 match, "ct_snat; next;");
2132 /* Ingress DNAT table: Packets enter the pipeline with destination
2133 * IP address that needs to be DNATted from a external IP address
2134 * to a logical IP address. */
2135 if (!strcmp(nat->type, "dnat")
2136 || !strcmp(nat->type, "dnat_and_snat")) {
2137 /* Packet when it goes from the initiator to destination.
2138 * We need to zero the inport because the router can
2139 * send the packet back through the same interface. */
2140 match = xasprintf("ip && ip4.dst == %s", nat->external_ip);
2141 actions = xasprintf("inport = \"\"; ct_dnat(%s);",
2143 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
2149 /* Egress SNAT table: Packets enter the egress pipeline with
2150 * source ip address that needs to be SNATted to a external ip
2152 if (!strcmp(nat->type, "snat")
2153 || !strcmp(nat->type, "dnat_and_snat")) {
2154 match = xasprintf("ip && ip4.src == %s", nat->logical_ip);
2155 actions = xasprintf("ct_snat(%s);", nat->external_ip);
2157 /* The priority here is calculated such that the
2158 * nat->logical_ip with the longest mask gets a higher
2160 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
2161 count_1bits(ntohl(mask)) + 1, match, actions);
2167 /* Re-circulate every packet through the DNAT zone.
2168 * This helps with two things.
2170 * 1. Any packet that needs to be unDNATed in the reverse
2171 * direction gets unDNATed. Ideally this could be done in
2172 * the egress pipeline. But since the gateway router
2173 * does not have any feature that depends on the source
2174 * ip address being external IP address for IP routing,
2175 * we can do it here, saving a future re-circulation.
2177 * 2. Any packet that was sent through SNAT zone in the
2178 * previous table automatically gets re-circulated to get
2179 * back the new destination IP address that is needed for
2180 * routing in the openflow pipeline. */
2181 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2182 "ip", "inport = \"\"; ct_dnat;");
2185 /* Logical router ingress table 2: IP Routing.
2187 * A packet that arrives at this table is an IP packet that should be
2188 * routed to the address in ip4.dst. This table sets outport to the correct
2189 * output port, eth.src to the output port's MAC address, and reg0 to the
2190 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2191 * unchanged), and advances to the next table for ARP resolution. */
2192 HMAP_FOR_EACH (op, key_node, ports) {
2197 add_route(lflows, op, op->network, op->mask, 0);
2199 HMAP_FOR_EACH (od, key_node, datapaths) {
2204 /* Convert the static routes to flows. */
2205 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2206 const struct nbrec_logical_router_static_route *route;
2208 route = od->nbr->static_routes[i];
2209 build_static_route_flow(lflows, od, ports, route);
2212 if (od->gateway && od->gateway_port) {
2213 add_route(lflows, od->gateway_port, 0, 0, od->gateway);
2216 /* XXX destination unreachable */
2218 /* Local router ingress table 3: ARP Resolution.
2220 * Any packet that reaches this table is an IP packet whose next-hop IP
2221 * address is in reg0. (ip4.dst is the final destination.) This table
2222 * resolves the IP address in reg0 into an output port in outport and an
2223 * Ethernet address in eth.dst. */
2224 HMAP_FOR_EACH (op, key_node, ports) {
2226 /* This is a logical router port. If next-hop IP address in 'reg0'
2227 * matches ip address of this router port, then the packet is
2228 * intended to eventually be sent to this logical port. Set the
2229 * destination mac address using this port's mac address.
2231 * The packet is still in peer's logical pipeline. So the match
2232 * should be on peer's outport. */
2233 if (op->nbr->peer) {
2234 struct ovn_port *peer = ovn_port_find(ports, op->nbr->peer);
2239 if (!peer->ip || !op->ip) {
2242 char *match = xasprintf("outport == %s && reg0 == "IP_FMT,
2243 peer->json_key, IP_ARGS(op->ip));
2244 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2245 "next;", ETH_ADDR_ARGS(op->mac));
2246 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2247 100, match, actions);
2251 } else if (op->od->n_router_ports && strcmp(op->nbs->type, "router")) {
2252 /* This is a logical switch port that backs a VM or a container.
2253 * Extract its addresses. For each of the address, go through all
2254 * the router ports attached to the switch (to which this port
2255 * connects) and if the address in question is reachable from the
2256 * router port, add an ARP entry in that router's pipeline. */
2258 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
2259 struct lport_addresses laddrs;
2260 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
2265 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
2266 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
2267 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2268 /* Get the Logical_Router_Port that the
2269 * Logical_Switch_Port is connected to, as
2271 const char *peer_name = smap_get(
2272 &op->od->router_ports[j]->nbs->options,
2278 struct ovn_port *peer
2279 = ovn_port_find(ports, peer_name);
2280 if (!peer || !peer->nbr) {
2284 /* Make sure that 'ip' is in 'peer''s network. */
2285 if ((ip ^ peer->network) & peer->mask) {
2289 char *match = xasprintf(
2290 "outport == %s && reg0 == "IP_FMT,
2291 peer->json_key, IP_ARGS(ip));
2292 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; "
2294 ETH_ADDR_ARGS(laddrs.ea));
2295 ovn_lflow_add(lflows, peer->od,
2296 S_ROUTER_IN_ARP_RESOLVE,
2297 100, match, actions);
2304 free(laddrs.ipv4_addrs);
2306 } else if (!strcmp(op->nbs->type, "router")) {
2307 /* This is a logical switch port that connects to a router. */
2309 /* The peer of this switch port is the router port for which
2310 * we need to add logical flows such that it can resolve
2311 * ARP entries for all the other router ports connected to
2312 * the switch in question. */
2314 const char *peer_name = smap_get(&op->nbs->options,
2320 struct ovn_port *peer = ovn_port_find(ports, peer_name);
2321 if (!peer || !peer->nbr || !peer->ip) {
2325 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2326 const char *router_port_name = smap_get(
2327 &op->od->router_ports[j]->nbs->options,
2329 struct ovn_port *router_port = ovn_port_find(ports,
2331 if (!router_port || !router_port->nbr || !router_port->ip) {
2335 /* Skip the router port under consideration. */
2336 if (router_port == peer) {
2340 if (!router_port->ip) {
2343 char *match = xasprintf("outport == %s && reg0 == "IP_FMT,
2345 IP_ARGS(router_port->ip));
2346 char *actions = xasprintf("eth.dst = "ETH_ADDR_FMT"; next;",
2347 ETH_ADDR_ARGS(router_port->mac));
2348 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2349 100, match, actions);
2356 HMAP_FOR_EACH (od, key_node, datapaths) {
2361 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2362 "get_arp(outport, reg0); next;");
2365 /* Local router ingress table 4: ARP request.
2367 * In the common case where the Ethernet destination has been resolved,
2368 * this table outputs the packet (priority 100). Otherwise, it composes
2369 * and sends an ARP request (priority 0). */
2370 HMAP_FOR_EACH (od, key_node, datapaths) {
2375 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2376 "eth.dst == 00:00:00:00:00:00",
2378 "eth.dst = ff:ff:ff:ff:ff:ff; "
2380 "arp.op = 1; " /* ARP request */
2383 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2386 /* Logical router egress table 1: Delivery (priority 100).
2388 * Priority 100 rules deliver packets to enabled logical ports. */
2389 HMAP_FOR_EACH (op, key_node, ports) {
2394 if (!lrport_is_enabled(op->nbr)) {
2395 /* Drop packets to disabled logical ports (since logical flow
2396 * tables are default-drop). */
2400 char *match = xasprintf("outport == %s", op->json_key);
2401 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
2407 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2408 * constructing their contents based on the OVN_NB database. */
2410 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2413 struct hmap lflows = HMAP_INITIALIZER(&lflows);
2414 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2416 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2417 build_lrouter_flows(datapaths, ports, &lflows);
2419 /* Push changes to the Logical_Flow table to database. */
2420 const struct sbrec_logical_flow *sbflow, *next_sbflow;
2421 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2422 struct ovn_datapath *od
2423 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2425 sbrec_logical_flow_delete(sbflow);
2429 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
2430 enum ovn_pipeline pipeline
2431 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
2432 struct ovn_lflow *lflow = ovn_lflow_find(
2433 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2434 sbflow->priority, sbflow->match, sbflow->actions);
2436 ovn_lflow_destroy(&lflows, lflow);
2438 sbrec_logical_flow_delete(sbflow);
2441 struct ovn_lflow *lflow, *next_lflow;
2442 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
2443 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2444 uint8_t table = ovn_stage_get_table(lflow->stage);
2446 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2447 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
2448 sbrec_logical_flow_set_pipeline(
2449 sbflow, pipeline == P_IN ? "ingress" : "egress");
2450 sbrec_logical_flow_set_table_id(sbflow, table);
2451 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2452 sbrec_logical_flow_set_match(sbflow, lflow->match);
2453 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
2455 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2456 ovn_stage_to_str(lflow->stage));
2457 sbrec_logical_flow_set_external_ids(sbflow, &ids);
2459 ovn_lflow_destroy(&lflows, lflow);
2461 hmap_destroy(&lflows);
2463 /* Push changes to the Multicast_Group table to database. */
2464 const struct sbrec_multicast_group *sbmc, *next_sbmc;
2465 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2466 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2469 sbrec_multicast_group_delete(sbmc);
2473 struct multicast_group group = { .name = sbmc->name,
2474 .key = sbmc->tunnel_key };
2475 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2477 ovn_multicast_update_sbrec(mc, sbmc);
2478 ovn_multicast_destroy(&mcgroups, mc);
2480 sbrec_multicast_group_delete(sbmc);
2483 struct ovn_multicast *mc, *next_mc;
2484 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2485 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2486 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2487 sbrec_multicast_group_set_name(sbmc, mc->group->name);
2488 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2489 ovn_multicast_update_sbrec(mc, sbmc);
2490 ovn_multicast_destroy(&mcgroups, mc);
2492 hmap_destroy(&mcgroups);
2496 ovnnb_db_run(struct northd_context *ctx)
2498 if (!ctx->ovnsb_txn) {
2501 struct hmap datapaths, ports;
2502 build_datapaths(ctx, &datapaths);
2503 build_ports(ctx, &datapaths, &ports);
2504 build_lflows(ctx, &datapaths, &ports);
2506 struct ovn_datapath *dp, *next_dp;
2507 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
2508 ovn_datapath_destroy(&datapaths, dp);
2510 hmap_destroy(&datapaths);
2512 struct ovn_port *port, *next_port;
2513 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
2514 ovn_port_destroy(&ports, port);
2516 hmap_destroy(&ports);
2520 * The only change we get notified about is if the 'chassis' column of the
2521 * 'Port_Binding' table changes. When this column is not empty, it means we
2522 * need to set the corresponding logical port as 'up' in the northbound DB.
2525 ovnsb_db_run(struct northd_context *ctx)
2527 if (!ctx->ovnnb_txn) {
2530 struct hmap lports_hmap;
2531 const struct sbrec_port_binding *sb;
2532 const struct nbrec_logical_switch_port *nb;
2534 struct lport_hash_node {
2535 struct hmap_node node;
2536 const struct nbrec_logical_switch_port *nb;
2539 hmap_init(&lports_hmap);
2541 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
2542 hash_node = xzalloc(sizeof *hash_node);
2544 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
2547 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
2549 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
2550 hash_string(sb->logical_port, 0),
2552 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
2559 /* The logical port doesn't exist for this port binding. This can
2560 * happen under normal circumstances when ovn-northd hasn't gotten
2561 * around to pruning the Port_Binding yet. */
2565 if (sb->chassis && (!nb->up || !*nb->up)) {
2567 nbrec_logical_switch_port_set_up(nb, &up, 1);
2568 } else if (!sb->chassis && (!nb->up || *nb->up)) {
2570 nbrec_logical_switch_port_set_up(nb, &up, 1);
2574 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
2577 hmap_destroy(&lports_hmap);
2581 static char *default_nb_db_;
2586 if (!default_nb_db_) {
2587 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
2589 return default_nb_db_;
2592 static char *default_sb_db_;
2597 if (!default_sb_db_) {
2598 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2600 return default_sb_db_;
2604 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2607 DAEMON_OPTION_ENUMS,
2610 static const struct option long_options[] = {
2611 {"ovnsb-db", required_argument, NULL, 'd'},
2612 {"ovnnb-db", required_argument, NULL, 'D'},
2613 {"help", no_argument, NULL, 'h'},
2614 {"options", no_argument, NULL, 'o'},
2615 {"version", no_argument, NULL, 'V'},
2616 DAEMON_LONG_OPTIONS,
2618 STREAM_SSL_LONG_OPTIONS,
2621 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2626 c = getopt_long(argc, argv, short_options, long_options, NULL);
2632 DAEMON_OPTION_HANDLERS;
2633 VLOG_OPTION_HANDLERS;
2634 STREAM_SSL_OPTION_HANDLERS;
2649 ovs_cmdl_print_options(long_options);
2653 ovs_print_version(0, 0);
2662 ovnsb_db = default_sb_db();
2666 ovnnb_db = default_nb_db();
2669 free(short_options);
2673 add_column_noalert(struct ovsdb_idl *idl,
2674 const struct ovsdb_idl_column *column)
2676 ovsdb_idl_add_column(idl, column);
2677 ovsdb_idl_omit_alert(idl, column);
2681 main(int argc, char *argv[])
2683 int res = EXIT_SUCCESS;
2684 struct unixctl_server *unixctl;
2688 fatal_ignore_sigpipe();
2689 set_program_name(argv[0]);
2690 service_start(&argc, &argv);
2691 parse_options(argc, argv);
2693 daemonize_start(false);
2695 retval = unixctl_server_create(NULL, &unixctl);
2699 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
2701 daemonize_complete();
2706 /* We want to detect all changes to the ovn-nb db. */
2707 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2708 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
2710 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
2711 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
2713 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
2714 add_column_noalert(ovnsb_idl_loop.idl,
2715 &sbrec_logical_flow_col_logical_datapath);
2716 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
2717 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
2718 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
2719 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
2720 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
2722 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
2723 add_column_noalert(ovnsb_idl_loop.idl,
2724 &sbrec_multicast_group_col_datapath);
2725 add_column_noalert(ovnsb_idl_loop.idl,
2726 &sbrec_multicast_group_col_tunnel_key);
2727 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
2728 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
2730 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
2731 add_column_noalert(ovnsb_idl_loop.idl,
2732 &sbrec_datapath_binding_col_tunnel_key);
2733 add_column_noalert(ovnsb_idl_loop.idl,
2734 &sbrec_datapath_binding_col_external_ids);
2736 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
2737 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
2738 add_column_noalert(ovnsb_idl_loop.idl,
2739 &sbrec_port_binding_col_logical_port);
2740 add_column_noalert(ovnsb_idl_loop.idl,
2741 &sbrec_port_binding_col_tunnel_key);
2742 add_column_noalert(ovnsb_idl_loop.idl,
2743 &sbrec_port_binding_col_parent_port);
2744 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
2745 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
2746 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
2747 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
2748 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
2753 struct northd_context ctx = {
2754 .ovnnb_idl = ovnnb_idl_loop.idl,
2755 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
2756 .ovnsb_idl = ovnsb_idl_loop.idl,
2757 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
2763 unixctl_server_run(unixctl);
2764 unixctl_server_wait(unixctl);
2766 poll_immediate_wake();
2768 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
2769 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
2772 if (should_service_stop()) {
2777 unixctl_server_destroy(unixctl);
2778 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
2779 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
2782 free(default_nb_db_);
2783 free(default_sb_db_);
2788 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2789 const char *argv[] OVS_UNUSED, void *exiting_)
2791 bool *exiting = exiting_;
2794 unixctl_command_reply(conn, NULL);