2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn/lib/ovn-util.h"
34 #include "poll-loop.h"
38 #include "stream-ssl.h"
42 #include "openvswitch/vlog.h"
44 VLOG_DEFINE_THIS_MODULE(ovn_northd);
46 static unixctl_cb_func ovn_northd_exit;
48 struct northd_context {
49 struct ovsdb_idl *ovnnb_idl;
50 struct ovsdb_idl *ovnsb_idl;
51 struct ovsdb_idl_txn *ovnnb_txn;
52 struct ovsdb_idl_txn *ovnsb_txn;
55 static const char *ovnnb_db;
56 static const char *ovnsb_db;
58 static const char *default_nb_db(void);
59 static const char *default_sb_db(void);
61 /* Pipeline stages. */
63 /* The two pipelines in an OVN logical flow table. */
65 P_IN, /* Ingress pipeline. */
66 P_OUT /* Egress pipeline. */
69 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
70 enum ovn_datapath_type {
71 DP_SWITCH, /* OVN logical switch. */
72 DP_ROUTER /* OVN logical router. */
75 /* Returns an "enum ovn_stage" built from the arguments.
77 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
78 * functions can't be used in enums or switch cases.) */
79 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
80 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
82 /* A stage within an OVN logical switch or router.
84 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
85 * or router, whether the stage is part of the ingress or egress pipeline, and
86 * the table within that pipeline. The first three components are combined to
87 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
88 * S_ROUTER_OUT_DELIVERY. */
90 #define PIPELINE_STAGES \
91 /* Logical switch ingress stages. */ \
92 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
94 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
95 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
96 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
97 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
98 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
99 PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \
100 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \
101 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \
102 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 10, "ls_in_l2_lkup") \
104 /* Logical switch egress stages. */ \
105 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
106 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
107 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
108 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
109 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
110 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \
111 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \
112 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \
114 /* Logical router ingress stages. */ \
115 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
116 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
117 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
118 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
119 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
120 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
121 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
123 /* Logical router egress stages. */ \
124 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
125 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
127 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
128 S_##DP_TYPE##_##PIPELINE##_##STAGE \
129 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
131 #undef PIPELINE_STAGE
134 /* Due to various hard-coded priorities need to implement ACLs, the
135 * northbound database supports a smaller range of ACL priorities than
136 * are available to logical flows. This value is added to an ACL
137 * priority to determine the ACL's logical flow priority. */
138 #define OVN_ACL_PRI_OFFSET 1000
140 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
141 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
142 #define REGBIT_CONNTRACK_NAT "reg0[2]"
144 /* Returns an "enum ovn_stage" built from the arguments. */
145 static enum ovn_stage
146 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
149 return OVN_STAGE_BUILD(dp_type, pipeline, table);
152 /* Returns the pipeline to which 'stage' belongs. */
153 static enum ovn_pipeline
154 ovn_stage_get_pipeline(enum ovn_stage stage)
156 return (stage >> 8) & 1;
159 /* Returns the table to which 'stage' belongs. */
161 ovn_stage_get_table(enum ovn_stage stage)
166 /* Returns a string name for 'stage'. */
168 ovn_stage_to_str(enum ovn_stage stage)
171 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
172 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
174 #undef PIPELINE_STAGE
175 default: return "<unknown>";
183 %s: OVN northbound management daemon\n\
184 usage: %s [OPTIONS]\n\
187 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
189 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
191 -h, --help display this help message\n\
192 -o, --options list available options\n\
193 -V, --version display version information\n\
194 ", program_name, program_name, default_nb_db(), default_sb_db());
197 stream_usage("database", true, true, false);
201 struct hmap_node hmap_node;
206 destroy_tnlids(struct hmap *tnlids)
208 struct tnlid_node *node;
209 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
212 hmap_destroy(tnlids);
216 add_tnlid(struct hmap *set, uint32_t tnlid)
218 struct tnlid_node *node = xmalloc(sizeof *node);
219 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
224 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
226 const struct tnlid_node *node;
227 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
228 if (node->tnlid == tnlid) {
236 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
239 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
240 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
241 if (!tnlid_in_use(set, tnlid)) {
242 add_tnlid(set, tnlid);
248 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
249 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
253 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
254 * sb->external_ids:logical-switch. */
255 struct ovn_datapath {
256 struct hmap_node key_node; /* Index on 'key'. */
257 struct uuid key; /* (nbs/nbr)->header_.uuid. */
259 const struct nbrec_logical_switch *nbs; /* May be NULL. */
260 const struct nbrec_logical_router *nbr; /* May be NULL. */
261 const struct sbrec_datapath_binding *sb; /* May be NULL. */
263 struct ovs_list list; /* In list of similar records. */
265 /* Logical switch data. */
266 struct ovn_port **router_ports;
267 size_t n_router_ports;
269 struct hmap port_tnlids;
270 uint32_t port_key_hint;
275 static struct ovn_datapath *
276 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
277 const struct nbrec_logical_switch *nbs,
278 const struct nbrec_logical_router *nbr,
279 const struct sbrec_datapath_binding *sb)
281 struct ovn_datapath *od = xzalloc(sizeof *od);
286 hmap_init(&od->port_tnlids);
287 od->port_key_hint = 0;
288 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
293 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
296 /* Don't remove od->list. It is used within build_datapaths() as a
297 * private list and once we've exited that function it is not safe to
299 hmap_remove(datapaths, &od->key_node);
300 destroy_tnlids(&od->port_tnlids);
301 free(od->router_ports);
306 static struct ovn_datapath *
307 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
309 struct ovn_datapath *od;
311 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
312 if (uuid_equals(uuid, &od->key)) {
319 static struct ovn_datapath *
320 ovn_datapath_from_sbrec(struct hmap *datapaths,
321 const struct sbrec_datapath_binding *sb)
325 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
329 return ovn_datapath_find(datapaths, &key);
333 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
335 return !lrouter->enabled || *lrouter->enabled;
339 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
340 struct ovs_list *sb_only, struct ovs_list *nb_only,
341 struct ovs_list *both)
343 hmap_init(datapaths);
344 ovs_list_init(sb_only);
345 ovs_list_init(nb_only);
348 const struct sbrec_datapath_binding *sb, *sb_next;
349 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
351 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
352 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
353 ovsdb_idl_txn_add_comment(
355 "deleting Datapath_Binding "UUID_FMT" that lacks "
356 "external-ids:logical-switch and "
357 "external-ids:logical-router",
358 UUID_ARGS(&sb->header_.uuid));
359 sbrec_datapath_binding_delete(sb);
363 if (ovn_datapath_find(datapaths, &key)) {
364 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
366 &rl, "deleting Datapath_Binding "UUID_FMT" with "
367 "duplicate external-ids:logical-switch/router "UUID_FMT,
368 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
369 sbrec_datapath_binding_delete(sb);
373 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
375 ovs_list_push_back(sb_only, &od->list);
378 const struct nbrec_logical_switch *nbs;
379 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
380 struct ovn_datapath *od = ovn_datapath_find(datapaths,
384 ovs_list_remove(&od->list);
385 ovs_list_push_back(both, &od->list);
387 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
389 ovs_list_push_back(nb_only, &od->list);
393 const struct nbrec_logical_router *nbr;
394 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
395 if (!lrouter_is_enabled(nbr)) {
399 struct ovn_datapath *od = ovn_datapath_find(datapaths,
404 ovs_list_remove(&od->list);
405 ovs_list_push_back(both, &od->list);
408 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
410 "duplicate UUID "UUID_FMT" in OVN_Northbound",
411 UUID_ARGS(&nbr->header_.uuid));
415 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
417 ovs_list_push_back(nb_only, &od->list);
423 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
425 static uint32_t hint;
426 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
429 /* Updates the southbound Datapath_Binding table so that it contains the
430 * logical switches and routers specified by the northbound database.
432 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
433 * switch and router. */
435 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
437 struct ovs_list sb_only, nb_only, both;
439 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
441 if (!ovs_list_is_empty(&nb_only)) {
442 /* First index the in-use datapath tunnel IDs. */
443 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
444 struct ovn_datapath *od;
445 LIST_FOR_EACH (od, list, &both) {
446 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
449 /* Add southbound record for each unmatched northbound record. */
450 LIST_FOR_EACH (od, list, &nb_only) {
451 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
456 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
458 char uuid_s[UUID_LEN + 1];
459 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
460 const char *key = od->nbs ? "logical-switch" : "logical-router";
461 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
462 sbrec_datapath_binding_set_external_ids(od->sb, &id);
464 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
466 destroy_tnlids(&dp_tnlids);
469 /* Delete southbound records without northbound matches. */
470 struct ovn_datapath *od, *next;
471 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
472 ovs_list_remove(&od->list);
473 sbrec_datapath_binding_delete(od->sb);
474 ovn_datapath_destroy(datapaths, od);
479 struct hmap_node key_node; /* Index on 'key'. */
480 char *key; /* nbs->name, nbr->name, sb->logical_port. */
481 char *json_key; /* 'key', quoted for use in JSON. */
483 const struct nbrec_logical_switch_port *nbs; /* May be NULL. */
484 const struct nbrec_logical_router_port *nbr; /* May be NULL. */
485 const struct sbrec_port_binding *sb; /* May be NULL. */
487 /* Logical router port data. */
488 char *ip_s; /* "192.168.10.123" */
489 char *network_s; /* "192.168.10.0" */
490 char *bcast_s; /* "192.168.10.255" */
491 int plen; /* CIDR prefix: 24 */
493 ovs_be32 ip; /* 192.168.10.123 */
494 ovs_be32 mask; /* 255.255.255.0 */
495 ovs_be32 network; /* 192.168.10.255 */
498 struct ovn_port *peer;
500 struct ovn_datapath *od;
502 struct ovs_list list; /* In list of similar records. */
505 static struct ovn_port *
506 ovn_port_create(struct hmap *ports, const char *key,
507 const struct nbrec_logical_switch_port *nbs,
508 const struct nbrec_logical_router_port *nbr,
509 const struct sbrec_port_binding *sb)
511 struct ovn_port *op = xzalloc(sizeof *op);
513 struct ds json_key = DS_EMPTY_INITIALIZER;
514 json_string_escape(key, &json_key);
515 op->json_key = ds_steal_cstr(&json_key);
517 op->key = xstrdup(key);
521 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
526 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
529 /* Don't remove port->list. It is used within build_ports() as a
530 * private list and once we've exited that function it is not safe to
532 hmap_remove(ports, &port->key_node);
534 free(port->network_s);
536 free(port->json_key);
542 static struct ovn_port *
543 ovn_port_find(struct hmap *ports, const char *name)
547 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
548 if (!strcmp(op->key, name)) {
556 ovn_port_allocate_key(struct ovn_datapath *od)
558 return allocate_tnlid(&od->port_tnlids, "port",
559 (1u << 15) - 1, &od->port_key_hint);
563 join_logical_ports(struct northd_context *ctx,
564 struct hmap *datapaths, struct hmap *ports,
565 struct ovs_list *sb_only, struct ovs_list *nb_only,
566 struct ovs_list *both)
569 ovs_list_init(sb_only);
570 ovs_list_init(nb_only);
573 const struct sbrec_port_binding *sb;
574 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
575 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
577 ovs_list_push_back(sb_only, &op->list);
580 struct ovn_datapath *od;
581 HMAP_FOR_EACH (od, key_node, datapaths) {
583 for (size_t i = 0; i < od->nbs->n_ports; i++) {
584 const struct nbrec_logical_switch_port *nbs = od->nbs->ports[i];
585 struct ovn_port *op = ovn_port_find(ports, nbs->name);
587 if (op->nbs || op->nbr) {
588 static struct vlog_rate_limit rl
589 = VLOG_RATE_LIMIT_INIT(5, 1);
590 VLOG_WARN_RL(&rl, "duplicate logical port %s",
595 ovs_list_remove(&op->list);
596 ovs_list_push_back(both, &op->list);
598 op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
599 ovs_list_push_back(nb_only, &op->list);
605 for (size_t i = 0; i < od->nbr->n_ports; i++) {
606 const struct nbrec_logical_router_port *nbr
610 if (!eth_addr_from_string(nbr->mac, &mac)) {
611 static struct vlog_rate_limit rl
612 = VLOG_RATE_LIMIT_INIT(5, 1);
613 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
618 char *error = ip_parse_masked(nbr->network, &ip, &mask);
619 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
620 static struct vlog_rate_limit rl
621 = VLOG_RATE_LIMIT_INIT(5, 1);
622 VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
627 struct ovn_port *op = ovn_port_find(ports, nbr->name);
629 if (op->nbs || op->nbr) {
630 static struct vlog_rate_limit rl
631 = VLOG_RATE_LIMIT_INIT(5, 1);
632 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
637 ovs_list_remove(&op->list);
638 ovs_list_push_back(both, &op->list);
640 op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
641 ovs_list_push_back(nb_only, &op->list);
646 op->network = ip & mask;
647 op->plen = ip_count_cidr_bits(mask);
649 op->ip_s = xasprintf(IP_FMT, IP_ARGS(ip));
650 op->network_s = xasprintf(IP_FMT, IP_ARGS(op->network));
651 op->bcast_s = xasprintf(IP_FMT, IP_ARGS(ip | ~mask));
659 /* Connect logical router ports, and logical switch ports of type "router",
662 HMAP_FOR_EACH (op, key_node, ports) {
663 if (op->nbs && !strcmp(op->nbs->type, "router")) {
664 const char *peer_name = smap_get(&op->nbs->options, "router-port");
669 struct ovn_port *peer = ovn_port_find(ports, peer_name);
670 if (!peer || !peer->nbr) {
676 op->od->router_ports = xrealloc(
677 op->od->router_ports,
678 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
679 op->od->router_ports[op->od->n_router_ports++] = op;
680 } else if (op->nbr && op->nbr->peer) {
681 op->peer = ovn_port_find(ports, op->nbr->peer);
687 ovn_port_update_sbrec(const struct ovn_port *op)
689 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
691 /* If the router is for l3 gateway, it resides on a chassis
692 * and its port type is "gateway". */
693 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
695 sbrec_port_binding_set_type(op->sb, "gateway");
697 sbrec_port_binding_set_type(op->sb, "patch");
700 const char *peer = op->peer ? op->peer->key : "<error>";
703 smap_add(&new, "peer", peer);
705 smap_add(&new, "gateway-chassis", chassis);
707 sbrec_port_binding_set_options(op->sb, &new);
710 sbrec_port_binding_set_parent_port(op->sb, NULL);
711 sbrec_port_binding_set_tag(op->sb, NULL, 0);
712 sbrec_port_binding_set_mac(op->sb, NULL, 0);
714 if (strcmp(op->nbs->type, "router")) {
715 sbrec_port_binding_set_type(op->sb, op->nbs->type);
716 sbrec_port_binding_set_options(op->sb, &op->nbs->options);
718 const char *chassis = NULL;
719 if (op->peer && op->peer->od && op->peer->od->nbr) {
720 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
723 /* A switch port connected to a gateway router is also of
726 sbrec_port_binding_set_type(op->sb, "gateway");
728 sbrec_port_binding_set_type(op->sb, "patch");
731 const char *router_port = smap_get(&op->nbs->options,
734 router_port = "<error>";
738 smap_add(&new, "peer", router_port);
740 smap_add(&new, "gateway-chassis", chassis);
742 sbrec_port_binding_set_options(op->sb, &new);
745 sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
746 sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
747 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
748 op->nbs->n_addresses);
752 /* Updates the southbound Port_Binding table so that it contains the logical
753 * switch ports specified by the northbound database.
755 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
756 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
759 build_ports(struct northd_context *ctx, struct hmap *datapaths,
762 struct ovs_list sb_only, nb_only, both;
764 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
766 /* For logical ports that are in both databases, update the southbound
767 * record based on northbound data. Also index the in-use tunnel_keys. */
768 struct ovn_port *op, *next;
769 LIST_FOR_EACH_SAFE (op, next, list, &both) {
770 ovn_port_update_sbrec(op);
772 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
773 if (op->sb->tunnel_key > op->od->port_key_hint) {
774 op->od->port_key_hint = op->sb->tunnel_key;
778 /* Add southbound record for each unmatched northbound record. */
779 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
780 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
785 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
786 ovn_port_update_sbrec(op);
788 sbrec_port_binding_set_logical_port(op->sb, op->key);
789 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
792 /* Delete southbound records without northbound matches. */
793 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
794 ovs_list_remove(&op->list);
795 sbrec_port_binding_delete(op->sb);
796 ovn_port_destroy(ports, op);
800 #define OVN_MIN_MULTICAST 32768
801 #define OVN_MAX_MULTICAST 65535
803 struct multicast_group {
805 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
808 #define MC_FLOOD "_MC_flood"
809 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
811 #define MC_UNKNOWN "_MC_unknown"
812 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
815 multicast_group_equal(const struct multicast_group *a,
816 const struct multicast_group *b)
818 return !strcmp(a->name, b->name) && a->key == b->key;
821 /* Multicast group entry. */
822 struct ovn_multicast {
823 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
824 struct ovn_datapath *datapath;
825 const struct multicast_group *group;
827 struct ovn_port **ports;
828 size_t n_ports, allocated_ports;
832 ovn_multicast_hash(const struct ovn_datapath *datapath,
833 const struct multicast_group *group)
835 return hash_pointer(datapath, group->key);
838 static struct ovn_multicast *
839 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
840 const struct multicast_group *group)
842 struct ovn_multicast *mc;
844 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
845 ovn_multicast_hash(datapath, group), mcgroups) {
846 if (mc->datapath == datapath
847 && multicast_group_equal(mc->group, group)) {
855 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
856 struct ovn_port *port)
858 struct ovn_datapath *od = port->od;
859 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
861 mc = xmalloc(sizeof *mc);
862 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
866 mc->allocated_ports = 4;
867 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
869 if (mc->n_ports >= mc->allocated_ports) {
870 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
873 mc->ports[mc->n_ports++] = port;
877 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
880 hmap_remove(mcgroups, &mc->hmap_node);
887 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
888 const struct sbrec_multicast_group *sb)
890 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
891 for (size_t i = 0; i < mc->n_ports; i++) {
892 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
894 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
898 /* Logical flow generation.
900 * This code generates the Logical_Flow table in the southbound database, as a
901 * function of most of the northbound database.
905 struct hmap_node hmap_node;
907 struct ovn_datapath *od;
908 enum ovn_stage stage;
915 ovn_lflow_hash(const struct ovn_lflow *lflow)
917 size_t hash = uuid_hash(&lflow->od->key);
918 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
919 hash = hash_string(lflow->match, hash);
920 return hash_string(lflow->actions, hash);
924 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
926 return (a->od == b->od
927 && a->stage == b->stage
928 && a->priority == b->priority
929 && !strcmp(a->match, b->match)
930 && !strcmp(a->actions, b->actions));
934 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
935 enum ovn_stage stage, uint16_t priority,
936 char *match, char *actions)
939 lflow->stage = stage;
940 lflow->priority = priority;
941 lflow->match = match;
942 lflow->actions = actions;
945 /* Adds a row with the specified contents to the Logical_Flow table. */
947 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
948 enum ovn_stage stage, uint16_t priority,
949 const char *match, const char *actions)
951 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
952 ovn_lflow_init(lflow, od, stage, priority,
953 xstrdup(match), xstrdup(actions));
954 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
957 static struct ovn_lflow *
958 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
959 enum ovn_stage stage, uint16_t priority,
960 const char *match, const char *actions)
962 struct ovn_lflow target;
963 ovn_lflow_init(&target, od, stage, priority,
964 CONST_CAST(char *, match), CONST_CAST(char *, actions));
966 struct ovn_lflow *lflow;
967 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
969 if (ovn_lflow_equal(lflow, &target)) {
977 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
980 hmap_remove(lflows, &lflow->hmap_node);
982 free(lflow->actions);
987 /* Appends port security constraints on L2 address field 'eth_addr_field'
988 * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
989 * 'n_port_security' elements, is the collection of port_security constraints
990 * from an OVN_NB Logical_Switch_Port row. */
992 build_port_security_l2(const char *eth_addr_field,
993 char **port_security, size_t n_port_security,
996 size_t base_len = match->length;
997 ds_put_format(match, " && %s == {", eth_addr_field);
1000 for (size_t i = 0; i < n_port_security; i++) {
1003 if (eth_addr_from_string(port_security[i], &ea)) {
1004 ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1005 ds_put_char(match, ' ');
1009 ds_chomp(match, ' ');
1010 ds_put_cstr(match, "}");
1013 match->length = base_len;
1018 build_port_security_ipv6_nd_flow(
1019 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1022 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1023 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1024 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1025 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1027 if (!n_ipv6_addrs) {
1028 ds_put_cstr(match, "))");
1032 char ip6_str[INET6_ADDRSTRLEN + 1];
1033 struct in6_addr lla;
1034 in6_generate_lla(ea, &lla);
1035 memset(ip6_str, 0, sizeof(ip6_str));
1036 ipv6_string_mapped(ip6_str, &lla);
1037 ds_put_format(match, " && (nd.target == %s", ip6_str);
1039 for(int i = 0; i < n_ipv6_addrs; i++) {
1040 memset(ip6_str, 0, sizeof(ip6_str));
1041 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1042 ds_put_format(match, " || nd.target == %s", ip6_str);
1045 ds_put_format(match, ")))");
1049 build_port_security_ipv6_flow(
1050 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1051 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1053 char ip6_str[INET6_ADDRSTRLEN + 1];
1055 ds_put_format(match, " && %s == {",
1056 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1058 /* Allow link-local address. */
1059 struct in6_addr lla;
1060 in6_generate_lla(ea, &lla);
1061 ipv6_string_mapped(ip6_str, &lla);
1062 ds_put_format(match, "%s, ", ip6_str);
1064 /* Allow ip6.dst=ff00::/8 for multicast packets */
1065 if (pipeline == P_OUT) {
1066 ds_put_cstr(match, "ff00::/8, ");
1068 for(int i = 0; i < n_ipv6_addrs; i++) {
1069 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1070 ds_put_format(match, "%s, ", ip6_str);
1072 /* Replace ", " by "}". */
1073 ds_chomp(match, ' ');
1074 ds_chomp(match, ',');
1075 ds_put_cstr(match, "}");
1079 * Build port security constraints on ARP and IPv6 ND fields
1080 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1082 * For each port security of the logical port, following
1083 * logical flows are added
1084 * - If the port security has no IP (both IPv4 and IPv6) or
1085 * if it has IPv4 address(es)
1086 * - Priority 90 flow to allow ARP packets for known MAC addresses
1087 * in the eth.src and arp.spa fields. If the port security
1088 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1090 * - If the port security has no IP (both IPv4 and IPv6) or
1091 * if it has IPv6 address(es)
1092 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1093 * in the eth.src and nd.sll/nd.tll fields. If the port security
1094 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1095 * for IPv6 Neighbor Advertisement packet.
1097 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1100 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1102 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1103 struct lport_addresses ps;
1104 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1105 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1106 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port security. No MAC"
1107 " address found", op->nbs->port_security[i]);
1111 bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
1112 struct ds match = DS_EMPTY_INITIALIZER;
1114 if (ps.n_ipv4_addrs || no_ip) {
1116 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
1117 ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
1118 ETH_ADDR_ARGS(ps.ea));
1120 if (ps.n_ipv4_addrs) {
1121 ds_put_cstr(&match, " && (");
1122 for (size_t i = 0; i < ps.n_ipv4_addrs; i++) {
1123 ds_put_cstr(&match, "arp.spa == ");
1124 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1125 /* When the netmask is applied, if the host portion is
1126 * non-zero, the host can only use the specified
1127 * address in the arp.spa. If zero, the host is allowed
1128 * to use any address in the subnet. */
1129 if (ps.ipv4_addrs[i].addr & ~mask) {
1130 ds_put_format(&match, IP_FMT,
1131 IP_ARGS(ps.ipv4_addrs[i].addr));
1133 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1136 ds_put_cstr(&match, " || ");
1138 ds_chomp(&match, ' ');
1139 ds_chomp(&match, '|');
1140 ds_chomp(&match, '|');
1141 ds_put_cstr(&match, ")");
1143 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1144 ds_cstr(&match), "next;");
1148 if (ps.n_ipv6_addrs || no_ip) {
1150 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT,
1151 op->json_key, ETH_ADDR_ARGS(ps.ea));
1152 build_port_security_ipv6_nd_flow(&match, ps.ea, ps.ipv6_addrs,
1154 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1155 ds_cstr(&match), "next;");
1158 destroy_lport_addresses(&ps);
1161 char *match = xasprintf("inport == %s && (arp || nd)", op->json_key);
1162 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1168 * Build port security constraints on IPv4 and IPv6 src and dst fields
1169 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1171 * For each port security of the logical port, following
1172 * logical flows are added
1173 * - If the port security has IPv4 addresses,
1174 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1176 * - If the port security has IPv6 addresses,
1177 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1179 * - If the port security has IPv4 addresses or IPv6 addresses or both
1180 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1183 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1184 struct hmap *lflows)
1186 char *port_direction;
1187 enum ovn_stage stage;
1188 if (pipeline == P_IN) {
1189 port_direction = "inport";
1190 stage = S_SWITCH_IN_PORT_SEC_IP;
1192 port_direction = "outport";
1193 stage = S_SWITCH_OUT_PORT_SEC_IP;
1196 for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1197 struct lport_addresses ps;
1198 if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1202 if (!(ps.n_ipv4_addrs || ps.n_ipv6_addrs)) {
1206 if (ps.n_ipv4_addrs) {
1207 struct ds match = DS_EMPTY_INITIALIZER;
1208 if (pipeline == P_IN) {
1209 /* Permit use of the unspecified address for DHCP discovery */
1210 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1211 ds_put_format(&dhcp_match, "inport == %s"
1212 " && eth.src == "ETH_ADDR_FMT
1213 " && ip4.src == 0.0.0.0"
1214 " && ip4.dst == 255.255.255.255"
1215 " && udp.src == 68 && udp.dst == 67", op->json_key,
1216 ETH_ADDR_ARGS(ps.ea));
1217 ovn_lflow_add(lflows, op->od, stage, 90,
1218 ds_cstr(&dhcp_match), "next;");
1219 ds_destroy(&dhcp_match);
1220 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
1221 " && ip4.src == {", op->json_key,
1222 ETH_ADDR_ARGS(ps.ea));
1224 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
1225 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1226 op->json_key, ETH_ADDR_ARGS(ps.ea));
1229 for (int i = 0; i < ps.n_ipv4_addrs; i++) {
1230 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1231 /* When the netmask is applied, if the host portion is
1232 * non-zero, the host can only use the specified
1233 * address. If zero, the host is allowed to use any
1234 * address in the subnet.
1236 if (ps.ipv4_addrs[i].addr & ~mask) {
1237 ds_put_format(&match, IP_FMT,
1238 IP_ARGS(ps.ipv4_addrs[i].addr));
1239 if (pipeline == P_OUT && ps.ipv4_addrs[i].plen != 32) {
1240 /* Host is also allowed to receive packets to the
1241 * broadcast address in the specified subnet.
1243 ds_put_format(&match, ", "IP_FMT,
1244 IP_ARGS(ps.ipv4_addrs[i].addr | ~mask));
1247 /* host portion is zero */
1248 ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1251 ds_put_cstr(&match, ", ");
1254 /* Replace ", " by "}". */
1255 ds_chomp(&match, ' ');
1256 ds_chomp(&match, ',');
1257 ds_put_cstr(&match, "}");
1258 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1262 if (ps.n_ipv6_addrs) {
1263 struct ds match = DS_EMPTY_INITIALIZER;
1264 if (pipeline == P_IN) {
1265 /* Permit use of unspecified address for duplicate address
1267 struct ds dad_match = DS_EMPTY_INITIALIZER;
1268 ds_put_format(&dad_match, "inport == %s"
1269 " && eth.src == "ETH_ADDR_FMT
1271 " && ip6.dst == ff02::/16"
1272 " && icmp6.type == {131, 135, 143}", op->json_key,
1273 ETH_ADDR_ARGS(ps.ea));
1274 ovn_lflow_add(lflows, op->od, stage, 90,
1275 ds_cstr(&dad_match), "next;");
1276 ds_destroy(&dad_match);
1278 ds_put_format(&match, "%s == %s && %s == "ETH_ADDR_FMT"",
1279 port_direction, op->json_key,
1280 pipeline == P_IN ? "eth.src" : "eth.dst",
1281 ETH_ADDR_ARGS(ps.ea));
1282 build_port_security_ipv6_flow(pipeline, &match, ps.ea,
1283 ps.ipv6_addrs, ps.n_ipv6_addrs);
1284 ovn_lflow_add(lflows, op->od, stage, 90,
1285 ds_cstr(&match), "next;");
1289 destroy_lport_addresses(&ps);
1291 char *match = xasprintf(
1292 "%s == %s && %s == "ETH_ADDR_FMT" && ip", port_direction,
1293 op->json_key, pipeline == P_IN ? "eth.src" : "eth.dst",
1294 ETH_ADDR_ARGS(ps.ea));
1295 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1302 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
1304 return !lsp->enabled || *lsp->enabled;
1308 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
1310 return !lsp->up || *lsp->up;
1314 has_stateful_acl(struct ovn_datapath *od)
1316 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1317 struct nbrec_acl *acl = od->nbs->acls[i];
1318 if (!strcmp(acl->action, "allow-related")) {
1327 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1330 bool has_stateful = has_stateful_acl(od);
1331 struct ovn_port *op;
1333 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1334 * allowed by default. */
1335 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1336 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1338 /* If there are any stateful ACL rules in this dapapath, we must
1339 * send all IP packets through the conntrack action, which handles
1340 * defragmentation, in order to match L4 headers. */
1342 HMAP_FOR_EACH (op, key_node, ports) {
1343 if (op->od == od && !strcmp(op->nbs->type, "router")) {
1344 /* Can't use ct() for router ports. Consider the
1345 * following configuration: lp1(10.0.0.2) on
1346 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1347 * ping from lp1 to lp2, First, the response will go
1348 * through ct() with a zone for lp2 in the ls2 ingress
1349 * pipeline on hostB. That ct zone knows about this
1350 * connection. Next, it goes through ct() with the zone
1351 * for the router port in the egress pipeline of ls2 on
1352 * hostB. This zone does not know about the connection,
1353 * as the icmp request went through the logical router
1354 * on hostA, not hostB. This would only work with
1355 * distributed conntrack state across all chassis. */
1356 struct ds match_in = DS_EMPTY_INITIALIZER;
1357 struct ds match_out = DS_EMPTY_INITIALIZER;
1359 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1360 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1361 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1362 ds_cstr(&match_in), "next;");
1363 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1364 ds_cstr(&match_out), "next;");
1366 ds_destroy(&match_in);
1367 ds_destroy(&match_out);
1370 /* Ingress and Egress Pre-ACL Table (Priority 110).
1372 * Not to do conntrack on ND packets. */
1373 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1374 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
1376 /* Ingress and Egress Pre-ACL Table (Priority 100).
1378 * Regardless of whether the ACL is "from-lport" or "to-lport",
1379 * we need rules in both the ingress and egress table, because
1380 * the return traffic needs to be followed.
1382 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1383 * it to conntrack for tracking and defragmentation. */
1384 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1385 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1386 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1387 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1391 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1392 * 'ip_address'. The caller must free() the memory allocated for
1395 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1398 char *ip_str, *start, *next;
1402 next = start = xstrdup(key);
1403 ip_str = strsep(&next, ":");
1404 if (!ip_str || !ip_str[0]) {
1405 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1406 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1412 char *error = ip_parse_masked(ip_str, &ip, &mask);
1413 if (error || mask != OVS_BE32_MAX) {
1414 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1415 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1422 if (next && next[0]) {
1423 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1424 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1425 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1432 *ip_address = strdup(ip_str);
1437 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1439 /* Allow all packets to go to next tables by default. */
1440 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
1441 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
1443 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1444 if (od->nbs->load_balancer) {
1445 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1446 struct smap *vips = &lb->vips;
1447 struct smap_node *node;
1448 bool vip_configured = false;
1450 SMAP_FOR_EACH (node, vips) {
1451 vip_configured = true;
1453 /* node->key contains IP:port or just IP. */
1454 char *ip_address = NULL;
1456 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1461 if (!sset_contains(&all_ips, ip_address)) {
1462 sset_add(&all_ips, ip_address);
1467 /* Ignore L4 port information in the key because fragmented packets
1468 * may not have L4 information. The pre-stateful table will send
1469 * the packet through ct() action to de-fragment. In stateful
1470 * table, we will eventually look at L4 information. */
1473 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1474 * packet to conntrack for defragmentation. */
1475 const char *ip_address;
1476 SSET_FOR_EACH(ip_address, &all_ips) {
1477 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
1478 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
1479 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1483 sset_destroy(&all_ips);
1485 if (vip_configured) {
1486 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
1487 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1493 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
1495 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
1496 * allowed by default. */
1497 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
1498 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
1500 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
1501 * sent to conntrack for tracking and defragmentation. */
1502 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
1503 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1504 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
1505 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1509 build_acls(struct ovn_datapath *od, struct hmap *lflows)
1511 bool has_stateful = has_stateful_acl(od);
1513 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1514 * default. A related rule at priority 1 is added below if there
1515 * are any stateful ACLs in this datapath. */
1516 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1517 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1520 /* Ingress and Egress ACL Table (Priority 1).
1522 * By default, traffic is allowed. This is partially handled by
1523 * the Priority 0 ACL flows added earlier, but we also need to
1524 * commit IP flows. This is because, while the initiater's
1525 * direction may not have any stateful rules, the server's may
1526 * and then its return traffic would not have an associated
1527 * conntrack entry and would return "+invalid". */
1528 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1529 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1530 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1531 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1533 /* Ingress and Egress ACL Table (Priority 65535).
1535 * Always drop traffic that's in an invalid state. This is
1536 * enforced at a higher priority than ACLs can be defined. */
1537 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1539 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1542 /* Ingress and Egress ACL Table (Priority 65535).
1544 * Always allow traffic that is established to a committed
1545 * conntrack entry. This is enforced at a higher priority than
1546 * ACLs can be defined. */
1547 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1548 "ct.est && !ct.rel && !ct.new && !ct.inv",
1550 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1551 "ct.est && !ct.rel && !ct.new && !ct.inv",
1554 /* Ingress and Egress ACL Table (Priority 65535).
1556 * Always allow traffic that is related to an existing conntrack
1557 * entry. This is enforced at a higher priority than ACLs can
1560 * NOTE: This does not support related data sessions (eg,
1561 * a dynamically negotiated FTP data channel), but will allow
1562 * related traffic such as an ICMP Port Unreachable through
1563 * that's generated from a non-listening UDP port. */
1564 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1565 "!ct.est && ct.rel && !ct.new && !ct.inv",
1567 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1568 "!ct.est && ct.rel && !ct.new && !ct.inv",
1571 /* Ingress and Egress ACL Table (Priority 65535).
1573 * Not to do conntrack on ND packets. */
1574 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1575 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
1578 /* Ingress or Egress ACL Table (Various priorities). */
1579 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1580 struct nbrec_acl *acl = od->nbs->acls[i];
1581 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1582 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1584 if (!strcmp(acl->action, "allow")) {
1585 /* If there are any stateful flows, we must even commit "allow"
1586 * actions. This is because, while the initiater's
1587 * direction may not have any stateful rules, the server's
1588 * may and then its return traffic would not have an
1589 * associated conntrack entry and would return "+invalid". */
1590 const char *actions = has_stateful
1591 ? REGBIT_CONNTRACK_COMMIT" = 1; next;"
1593 ovn_lflow_add(lflows, od, stage,
1594 acl->priority + OVN_ACL_PRI_OFFSET,
1595 acl->match, actions);
1596 } else if (!strcmp(acl->action, "allow-related")) {
1597 struct ds match = DS_EMPTY_INITIALIZER;
1599 /* Commit the connection tracking entry, which allows all
1600 * other traffic related to this entry to flow due to the
1601 * 65535 priority flow defined earlier. */
1602 ds_put_format(&match, "ct.new && (%s)", acl->match);
1603 ovn_lflow_add(lflows, od, stage,
1604 acl->priority + OVN_ACL_PRI_OFFSET,
1606 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1609 } else if (!strcmp(acl->action, "drop")) {
1610 ovn_lflow_add(lflows, od, stage,
1611 acl->priority + OVN_ACL_PRI_OFFSET,
1612 acl->match, "drop;");
1613 } else if (!strcmp(acl->action, "reject")) {
1614 /* xxx Need to support "reject". */
1615 VLOG_INFO("reject is not a supported action");
1616 ovn_lflow_add(lflows, od, stage,
1617 acl->priority + OVN_ACL_PRI_OFFSET,
1618 acl->match, "drop;");
1624 build_lb(struct ovn_datapath *od, struct hmap *lflows)
1626 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
1628 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
1629 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
1631 if (od->nbs->load_balancer) {
1632 /* Ingress and Egress LB Table (Priority 65535).
1634 * Send established traffic through conntrack for just NAT. */
1635 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
1636 "ct.est && !ct.rel && !ct.new && !ct.inv",
1637 REGBIT_CONNTRACK_NAT" = 1; next;");
1638 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
1639 "ct.est && !ct.rel && !ct.new && !ct.inv",
1640 REGBIT_CONNTRACK_NAT" = 1; next;");
1645 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
1647 /* Ingress and Egress stateful Table (Priority 0): Packets are
1648 * allowed by default. */
1649 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
1650 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
1652 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
1653 * committed to conntrack. */
1654 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1655 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit; next;");
1656 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1657 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit; next;");
1659 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
1660 * through nat (without committing).
1662 * REGBIT_CONNTRACK_COMMIT is set for new connections and
1663 * REGBIT_CONNTRACK_NAT is set for established connections. So they
1666 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1667 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1668 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1669 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1671 /* Load balancing rules for new connections get committed to conntrack
1672 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
1673 * a higher priority rule for load balancing below also commits the
1674 * connection, so it is okay if we do not hit the above match on
1675 * REGBIT_CONNTRACK_COMMIT. */
1676 if (od->nbs->load_balancer) {
1677 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1678 struct smap *vips = &lb->vips;
1679 struct smap_node *node;
1681 SMAP_FOR_EACH (node, vips) {
1684 /* node->key contains IP:port or just IP. */
1685 char *ip_address = NULL;
1686 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1691 /* New connections in Ingress table. */
1692 char *action = xasprintf("ct_lb(%s);", node->value);
1693 struct ds match = DS_EMPTY_INITIALIZER;
1694 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
1696 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
1697 ds_put_format(&match, "&& udp && udp.dst == %d", port);
1699 ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
1701 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1702 120, ds_cstr(&match), action);
1704 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1705 110, ds_cstr(&match), action);
1715 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1716 struct hmap *lflows, struct hmap *mcgroups)
1718 /* This flow table structure is documented in ovn-northd(8), so please
1719 * update ovn-northd.8.xml if you change anything. */
1721 struct ds match = DS_EMPTY_INITIALIZER;
1722 struct ds actions = DS_EMPTY_INITIALIZER;
1724 /* Build pre-ACL and ACL tables for both ingress and egress.
1725 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1726 struct ovn_datapath *od;
1727 HMAP_FOR_EACH (od, key_node, datapaths) {
1732 build_pre_acls(od, lflows, ports);
1733 build_pre_lb(od, lflows);
1734 build_pre_stateful(od, lflows);
1735 build_acls(od, lflows);
1736 build_lb(od, lflows);
1737 build_stateful(od, lflows);
1740 /* Logical switch ingress table 0: Admission control framework (priority
1742 HMAP_FOR_EACH (od, key_node, datapaths) {
1747 /* Logical VLANs not supported. */
1748 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
1751 /* Broadcast/multicast source address is invalid. */
1752 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
1755 /* Port security flows have priority 50 (see below) and will continue
1756 * to the next table if packet source is acceptable. */
1759 /* Logical switch ingress table 0: Ingress port security - L2
1761 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
1762 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
1764 struct ovn_port *op;
1765 HMAP_FOR_EACH (op, key_node, ports) {
1770 if (!lsp_is_enabled(op->nbs)) {
1771 /* Drop packets from disabled logical ports (since logical flow
1772 * tables are default-drop). */
1777 ds_put_format(&match, "inport == %s", op->json_key);
1778 build_port_security_l2(
1779 "eth.src", op->nbs->port_security, op->nbs->n_port_security,
1781 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
1782 ds_cstr(&match), "next;");
1784 if (op->nbs->n_port_security) {
1785 build_port_security_ip(P_IN, op, lflows);
1786 build_port_security_nd(op, lflows);
1790 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1792 HMAP_FOR_EACH (od, key_node, datapaths) {
1797 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1798 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
1801 /* Ingress table 9: ARP responder, skip requests coming from localnet ports.
1802 * (priority 100). */
1803 HMAP_FOR_EACH (op, key_node, ports) {
1808 if (!strcmp(op->nbs->type, "localnet")) {
1810 ds_put_format(&match, "inport == %s", op->json_key);
1811 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
1812 ds_cstr(&match), "next;");
1816 /* Ingress table 9: ARP/ND responder, reply for known IPs.
1818 HMAP_FOR_EACH (op, key_node, ports) {
1824 * Add ARP/ND reply flows if either the
1826 * - port type is router
1828 if (!lsp_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1832 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1833 struct lport_addresses laddrs;
1834 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
1838 for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1840 ds_put_format(&match, "arp.tpa == "IP_FMT" && arp.op == 1",
1841 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1843 ds_put_format(&actions,
1844 "eth.dst = eth.src; "
1845 "eth.src = "ETH_ADDR_FMT"; "
1846 "arp.op = 2; /* ARP reply */ "
1847 "arp.tha = arp.sha; "
1848 "arp.sha = "ETH_ADDR_FMT"; "
1849 "arp.tpa = arp.spa; "
1850 "arp.spa = "IP_FMT"; "
1851 "outport = inport; "
1852 "inport = \"\"; /* Allow sending out inport. */ "
1854 ETH_ADDR_ARGS(laddrs.ea),
1855 ETH_ADDR_ARGS(laddrs.ea),
1856 IP_ARGS(laddrs.ipv4_addrs[j].addr));
1857 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1858 ds_cstr(&match), ds_cstr(&actions));
1861 if (laddrs.n_ipv6_addrs > 0) {
1862 char ip6_str[INET6_ADDRSTRLEN + 1];
1864 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
1865 if (laddrs.n_ipv6_addrs == 1) {
1866 ipv6_string_mapped(ip6_str,
1867 &(laddrs.ipv6_addrs[0].addr));
1868 ds_put_format(&match, "nd.target == %s", ip6_str);
1870 ds_put_cstr(&match, "(");
1871 for (size_t j = 0; j < laddrs.n_ipv6_addrs; j++) {
1872 ipv6_string_mapped(ip6_str,
1873 &(laddrs.ipv6_addrs[j].addr));
1874 ds_put_format(&match, "nd.target == %s || ", ip6_str);
1876 ds_chomp(&match, ' ');
1877 ds_chomp(&match, '|');
1878 ds_chomp(&match, '|');
1879 ds_chomp(&match, ' ');
1880 ds_put_cstr(&match, ")");
1883 ds_put_format(&actions,
1884 "na { eth.src = "ETH_ADDR_FMT"; "
1885 "nd.tll = "ETH_ADDR_FMT"; "
1886 "outport = inport; "
1887 "inport = \"\"; /* Allow sending out inport. */ "
1889 ETH_ADDR_ARGS(laddrs.ea),
1890 ETH_ADDR_ARGS(laddrs.ea));
1892 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1893 ds_cstr(&match), ds_cstr(&actions));
1897 destroy_lport_addresses(&laddrs);
1901 /* Ingress table 9: ARP/ND responder, by default goto next.
1903 HMAP_FOR_EACH (od, key_node, datapaths) {
1908 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
1911 /* Ingress table 10: Destination lookup, broadcast and multicast handling
1912 * (priority 100). */
1913 HMAP_FOR_EACH (op, key_node, ports) {
1918 if (lsp_is_enabled(op->nbs)) {
1919 ovn_multicast_add(mcgroups, &mc_flood, op);
1922 HMAP_FOR_EACH (od, key_node, datapaths) {
1927 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1928 "outport = \""MC_FLOOD"\"; output;");
1931 /* Ingress table 10: Destination lookup, unicast handling (priority 50), */
1932 HMAP_FOR_EACH (op, key_node, ports) {
1937 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1938 struct eth_addr mac;
1940 if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1942 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1943 ETH_ADDR_ARGS(mac));
1946 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1947 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1948 ds_cstr(&match), ds_cstr(&actions));
1949 } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1950 if (lsp_is_enabled(op->nbs)) {
1951 ovn_multicast_add(mcgroups, &mc_unknown, op);
1952 op->od->has_unknown = true;
1955 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1958 "%s: invalid syntax '%s' in addresses column",
1959 op->nbs->name, op->nbs->addresses[i]);
1964 /* Ingress table 10: Destination lookup for unknown MACs (priority 0). */
1965 HMAP_FOR_EACH (od, key_node, datapaths) {
1970 if (od->has_unknown) {
1971 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1972 "outport = \""MC_UNKNOWN"\"; output;");
1976 /* Egress tables 6: Egress port security - IP (priority 0)
1977 * Egress table 7: Egress port security L2 - multicast/broadcast
1978 * (priority 100). */
1979 HMAP_FOR_EACH (od, key_node, datapaths) {
1984 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
1985 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
1989 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
1990 * if port security enabled.
1992 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
1994 * Priority 50 rules implement port security for enabled logical port.
1996 * Priority 150 rules drop packets to disabled logical ports, so that they
1997 * don't even receive multicast or broadcast packets. */
1998 HMAP_FOR_EACH (op, key_node, ports) {
2004 ds_put_format(&match, "outport == %s", op->json_key);
2005 if (lsp_is_enabled(op->nbs)) {
2006 build_port_security_l2("eth.dst", op->nbs->port_security,
2007 op->nbs->n_port_security, &match);
2008 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
2009 ds_cstr(&match), "output;");
2011 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
2012 ds_cstr(&match), "drop;");
2015 if (op->nbs->n_port_security) {
2016 build_port_security_ip(P_OUT, op, lflows);
2021 ds_destroy(&actions);
2025 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2027 return !lrport->enabled || *lrport->enabled;
2031 add_route(struct hmap *lflows, const struct ovn_port *op,
2032 const char *network_s, int plen, const char *gateway)
2034 char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
2036 struct ds actions = DS_EMPTY_INITIALIZER;
2037 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
2039 ds_put_cstr(&actions, gateway);
2041 ds_put_cstr(&actions, "ip4.dst");
2043 ds_put_format(&actions,
2046 "eth.src = "ETH_ADDR_FMT"; "
2049 op->ip_s, ETH_ADDR_ARGS(op->mac), op->json_key);
2051 /* The priority here is calculated to implement longest-prefix-match
2053 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match,
2055 ds_destroy(&actions);
2060 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2062 const struct nbrec_logical_router_static_route *route)
2064 ovs_be32 prefix, next_hop, mask;
2066 /* Verify that next hop is an IP address with 32 bits mask. */
2067 char *error = ip_parse_masked(route->nexthop, &next_hop, &mask);
2068 if (error || mask != OVS_BE32_MAX) {
2069 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2070 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2075 /* Verify that ip prefix is a valid CIDR address. */
2076 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
2077 if (error || !ip_is_cidr(mask)) {
2078 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2079 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
2085 /* Find the outgoing port. */
2086 struct ovn_port *out_port = NULL;
2087 if (route->output_port) {
2088 out_port = ovn_port_find(ports, route->output_port);
2090 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2091 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
2092 route->output_port, route->ip_prefix);
2096 /* output_port is not specified, find the
2097 * router port matching the next hop. */
2099 for (i = 0; i < od->nbr->n_ports; i++) {
2100 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
2101 out_port = ovn_port_find(ports, lrp->name);
2103 /* This should not happen. */
2107 if (out_port->network
2108 && !((out_port->network ^ next_hop) & out_port->mask)) {
2109 /* There should be only 1 interface that matches the next hop.
2110 * Otherwise, it's a configuration error, because subnets of
2111 * router's interfaces should NOT overlap. */
2115 if (i == od->nbr->n_ports) {
2116 /* There is no matched out port. */
2117 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2118 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
2119 route->ip_prefix, route->nexthop);
2124 char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix));
2125 add_route(lflows, out_port, prefix_s, ip_count_cidr_bits(mask),
2131 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
2132 struct hmap *lflows)
2134 /* This flow table structure is documented in ovn-northd(8), so please
2135 * update ovn-northd.8.xml if you change anything. */
2137 struct ds match = DS_EMPTY_INITIALIZER;
2138 struct ds actions = DS_EMPTY_INITIALIZER;
2140 /* Logical router ingress table 0: Admission control framework. */
2141 struct ovn_datapath *od;
2142 HMAP_FOR_EACH (od, key_node, datapaths) {
2147 /* Logical VLANs not supported.
2148 * Broadcast/multicast source address is invalid. */
2149 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
2150 "vlan.present || eth.src[40]", "drop;");
2153 /* Logical router ingress table 0: match (priority 50). */
2154 struct ovn_port *op;
2155 HMAP_FOR_EACH (op, key_node, ports) {
2160 if (!lrport_is_enabled(op->nbr)) {
2161 /* Drop packets from disabled logical ports (since logical flow
2162 * tables are default-drop). */
2167 ds_put_format(&match,
2168 "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
2169 ETH_ADDR_ARGS(op->mac), op->json_key);
2170 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
2171 ds_cstr(&match), "next;");
2174 /* Logical router ingress table 1: IP Input. */
2175 HMAP_FOR_EACH (od, key_node, datapaths) {
2180 /* L3 admission control: drop multicast and broadcast source, localhost
2181 * source or destination, and zero network source or destination
2182 * (priority 100). */
2183 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
2185 "ip4.src == 255.255.255.255 || "
2186 "ip4.src == 127.0.0.0/8 || "
2187 "ip4.dst == 127.0.0.0/8 || "
2188 "ip4.src == 0.0.0.0/8 || "
2189 "ip4.dst == 0.0.0.0/8",
2192 /* ARP reply handling. Use ARP replies to populate the logical
2193 * router's ARP table. */
2194 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
2195 "put_arp(inport, arp.spa, arp.sha);");
2197 /* Drop Ethernet local broadcast. By definition this traffic should
2198 * not be forwarded.*/
2199 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2200 "eth.bcast", "drop;");
2202 /* Drop IP multicast. */
2203 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2204 "ip4.mcast", "drop;");
2208 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
2210 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
2211 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
2212 ds_cstr(&match), "drop;");
2214 /* Pass other traffic not already handled to the next table for
2216 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
2219 HMAP_FOR_EACH (op, key_node, ports) {
2224 /* L3 admission control: drop packets that originate from an IP address
2225 * owned by the router or a broadcast address known to the router
2226 * (priority 100). */
2228 ds_put_format(&match, "ip4.src == {%s, %s}", op->ip_s, op->bcast_s);
2229 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
2230 ds_cstr(&match), "drop;");
2232 /* ICMP echo reply. These flows reply to ICMP echo requests
2233 * received for the router's IP address. Since packets only
2234 * get here as part of the logical router datapath, the inport
2235 * (i.e. the incoming locally attached net) does not matter.
2236 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
2238 ds_put_format(&match,
2239 "ip4.dst == %s && icmp4.type == 8 && icmp4.code == 0",
2242 ds_put_format(&actions,
2243 "ip4.dst = ip4.src; "
2247 "inport = \"\"; /* Allow sending out inport. */ "
2250 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2251 ds_cstr(&match), ds_cstr(&actions));
2253 /* ARP reply. These flows reply to ARP requests for the router's own
2256 ds_put_format(&match, "inport == %s && arp.tpa == %s && arp.op == 1",
2257 op->json_key, op->ip_s);
2259 ds_put_format(&actions,
2260 "eth.dst = eth.src; "
2261 "eth.src = "ETH_ADDR_FMT"; "
2262 "arp.op = 2; /* ARP reply */ "
2263 "arp.tha = arp.sha; "
2264 "arp.sha = "ETH_ADDR_FMT"; "
2265 "arp.tpa = arp.spa; "
2268 "inport = \"\"; /* Allow sending out inport. */ "
2270 ETH_ADDR_ARGS(op->mac),
2271 ETH_ADDR_ARGS(op->mac),
2274 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2275 ds_cstr(&match), ds_cstr(&actions));
2277 /* ARP handling for external IP addresses.
2279 * DNAT IP addresses are external IP addresses that need ARP
2281 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2282 const struct nbrec_nat *nat;
2284 nat = op->od->nbr->nat[i];
2286 if(!strcmp(nat->type, "snat")) {
2291 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2292 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2293 VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
2294 "for router %s", nat->external_ip, op->key);
2299 ds_put_format(&match,
2300 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2301 op->json_key, IP_ARGS(ip));
2303 ds_put_format(&actions,
2304 "eth.dst = eth.src; "
2305 "eth.src = "ETH_ADDR_FMT"; "
2306 "arp.op = 2; /* ARP reply */ "
2307 "arp.tha = arp.sha; "
2308 "arp.sha = "ETH_ADDR_FMT"; "
2309 "arp.tpa = arp.spa; "
2310 "arp.spa = "IP_FMT"; "
2312 "inport = \"\"; /* Allow sending out inport. */ "
2314 ETH_ADDR_ARGS(op->mac),
2315 ETH_ADDR_ARGS(op->mac),
2318 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2319 ds_cstr(&match), ds_cstr(&actions));
2322 /* Drop IP traffic to this router, unless the router ip is used as
2324 bool snat_ip_is_router_ip = false;
2325 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2326 const struct nbrec_nat *nat;
2329 nat = op->od->nbr->nat[i];
2330 if (strcmp(nat->type, "snat")) {
2334 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2335 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2336 VLOG_WARN_RL(&rl, "bad ip address %s in snat configuration "
2337 "for router %s", nat->external_ip, op->key);
2342 snat_ip_is_router_ip = true;
2347 if (!snat_ip_is_router_ip) {
2349 ds_put_format(&match, "ip4.dst == %s", op->ip_s);
2350 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2351 ds_cstr(&match), "drop;");
2355 /* NAT in Gateway routers. */
2356 HMAP_FOR_EACH (od, key_node, datapaths) {
2361 /* Packets are allowed by default. */
2362 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2363 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2364 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2366 /* NAT rules are only valid on Gateway routers. */
2367 if (!smap_get(&od->nbr->options, "chassis")) {
2371 for (int i = 0; i < od->nbr->n_nat; i++) {
2372 const struct nbrec_nat *nat;
2374 nat = od->nbr->nat[i];
2378 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2379 if (error || mask != OVS_BE32_MAX) {
2380 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2381 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2387 /* Check the validity of nat->logical_ip. 'logical_ip' can
2388 * be a subnet when the type is "snat". */
2389 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2390 if (!strcmp(nat->type, "snat")) {
2392 static struct vlog_rate_limit rl =
2393 VLOG_RATE_LIMIT_INIT(5, 1);
2394 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2395 "in router "UUID_FMT"",
2396 nat->logical_ip, UUID_ARGS(&od->key));
2401 if (error || mask != OVS_BE32_MAX) {
2402 static struct vlog_rate_limit rl =
2403 VLOG_RATE_LIMIT_INIT(5, 1);
2404 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2405 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2411 /* Ingress UNSNAT table: It is for already established connections'
2412 * reverse traffic. i.e., SNAT has already been done in egress
2413 * pipeline and now the packet has entered the ingress pipeline as
2414 * part of a reply. We undo the SNAT here.
2416 * Undoing SNAT has to happen before DNAT processing. This is
2417 * because when the packet was DNATed in ingress pipeline, it did
2418 * not know about the possibility of eventual additional SNAT in
2419 * egress pipeline. */
2420 if (!strcmp(nat->type, "snat")
2421 || !strcmp(nat->type, "dnat_and_snat")) {
2423 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2424 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
2425 ds_cstr(&match), "ct_snat; next;");
2428 /* Ingress DNAT table: Packets enter the pipeline with destination
2429 * IP address that needs to be DNATted from a external IP address
2430 * to a logical IP address. */
2431 if (!strcmp(nat->type, "dnat")
2432 || !strcmp(nat->type, "dnat_and_snat")) {
2433 /* Packet when it goes from the initiator to destination.
2434 * We need to zero the inport because the router can
2435 * send the packet back through the same interface. */
2437 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2439 ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);",
2441 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
2442 ds_cstr(&match), ds_cstr(&actions));
2445 /* Egress SNAT table: Packets enter the egress pipeline with
2446 * source ip address that needs to be SNATted to a external ip
2448 if (!strcmp(nat->type, "snat")
2449 || !strcmp(nat->type, "dnat_and_snat")) {
2451 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
2453 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
2455 /* The priority here is calculated such that the
2456 * nat->logical_ip with the longest mask gets a higher
2458 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
2459 count_1bits(ntohl(mask)) + 1,
2460 ds_cstr(&match), ds_cstr(&actions));
2464 /* Re-circulate every packet through the DNAT zone.
2465 * This helps with two things.
2467 * 1. Any packet that needs to be unDNATed in the reverse
2468 * direction gets unDNATed. Ideally this could be done in
2469 * the egress pipeline. But since the gateway router
2470 * does not have any feature that depends on the source
2471 * ip address being external IP address for IP routing,
2472 * we can do it here, saving a future re-circulation.
2474 * 2. Any packet that was sent through SNAT zone in the
2475 * previous table automatically gets re-circulated to get
2476 * back the new destination IP address that is needed for
2477 * routing in the openflow pipeline. */
2478 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2479 "ip", "inport = \"\"; ct_dnat;");
2482 /* Logical router ingress table 4: IP Routing.
2484 * A packet that arrives at this table is an IP packet that should be
2485 * routed to the address in ip4.dst. This table sets outport to the correct
2486 * output port, eth.src to the output port's MAC address, and reg0 to the
2487 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2488 * unchanged), and advances to the next table for ARP resolution. */
2489 HMAP_FOR_EACH (op, key_node, ports) {
2494 add_route(lflows, op, op->network_s, op->plen, NULL);
2496 HMAP_FOR_EACH (od, key_node, datapaths) {
2501 /* Convert the static routes to flows. */
2502 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2503 const struct nbrec_logical_router_static_route *route;
2505 route = od->nbr->static_routes[i];
2506 build_static_route_flow(lflows, od, ports, route);
2509 /* XXX destination unreachable */
2511 /* Local router ingress table 5: ARP Resolution.
2513 * Any packet that reaches this table is an IP packet whose next-hop IP
2514 * address is in reg0. (ip4.dst is the final destination.) This table
2515 * resolves the IP address in reg0 into an output port in outport and an
2516 * Ethernet address in eth.dst. */
2517 HMAP_FOR_EACH (op, key_node, ports) {
2519 /* This is a logical router port. If next-hop IP address in 'reg0'
2520 * matches ip address of this router port, then the packet is
2521 * intended to eventually be sent to this logical port. Set the
2522 * destination mac address using this port's mac address.
2524 * The packet is still in peer's logical pipeline. So the match
2525 * should be on peer's outport. */
2526 if (op->nbr->peer) {
2527 struct ovn_port *peer = ovn_port_find(ports, op->nbr->peer);
2532 if (!peer->ip || !op->ip) {
2536 ds_put_format(&match, "outport == %s && reg0 == %s",
2537 peer->json_key, op->ip_s);
2539 ds_put_format(&actions, "eth.dst = "ETH_ADDR_FMT"; next;",
2540 ETH_ADDR_ARGS(op->mac));
2541 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2542 100, ds_cstr(&match), ds_cstr(&actions));
2544 } else if (op->od->n_router_ports && strcmp(op->nbs->type, "router")) {
2545 /* This is a logical switch port that backs a VM or a container.
2546 * Extract its addresses. For each of the address, go through all
2547 * the router ports attached to the switch (to which this port
2548 * connects) and if the address in question is reachable from the
2549 * router port, add an ARP entry in that router's pipeline. */
2551 for (size_t i = 0; i < op->nbs->n_addresses; i++) {
2552 struct lport_addresses laddrs;
2553 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
2558 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
2559 ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
2560 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2561 /* Get the Logical_Router_Port that the
2562 * Logical_Switch_Port is connected to, as
2564 const char *peer_name = smap_get(
2565 &op->od->router_ports[j]->nbs->options,
2571 struct ovn_port *peer
2572 = ovn_port_find(ports, peer_name);
2573 if (!peer || !peer->nbr) {
2577 /* Make sure that 'ip' is in 'peer''s network. */
2578 if ((ip ^ peer->network) & peer->mask) {
2583 ds_put_format(&match, "outport == %s && reg0 == "IP_FMT,
2584 peer->json_key, IP_ARGS(ip));
2586 ds_put_format(&actions,
2587 "eth.dst = "ETH_ADDR_FMT"; next;",
2588 ETH_ADDR_ARGS(laddrs.ea));
2589 ovn_lflow_add(lflows, peer->od,
2590 S_ROUTER_IN_ARP_RESOLVE, 100,
2591 ds_cstr(&match), ds_cstr(&actions));
2596 destroy_lport_addresses(&laddrs);
2598 } else if (!strcmp(op->nbs->type, "router")) {
2599 /* This is a logical switch port that connects to a router. */
2601 /* The peer of this switch port is the router port for which
2602 * we need to add logical flows such that it can resolve
2603 * ARP entries for all the other router ports connected to
2604 * the switch in question. */
2606 const char *peer_name = smap_get(&op->nbs->options,
2612 struct ovn_port *peer = ovn_port_find(ports, peer_name);
2613 if (!peer || !peer->nbr || !peer->ip) {
2617 for (size_t j = 0; j < op->od->n_router_ports; j++) {
2618 const char *router_port_name = smap_get(
2619 &op->od->router_ports[j]->nbs->options,
2621 struct ovn_port *router_port = ovn_port_find(ports,
2623 if (!router_port || !router_port->nbr || !router_port->ip) {
2627 /* Skip the router port under consideration. */
2628 if (router_port == peer) {
2632 if (!router_port->ip) {
2636 ds_put_format(&match, "outport == %s && reg0 == %s",
2637 peer->json_key, router_port->ip_s);
2639 ds_put_format(&actions, "eth.dst = "ETH_ADDR_FMT"; next;",
2640 ETH_ADDR_ARGS(router_port->mac));
2641 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2642 100, ds_cstr(&match), ds_cstr(&actions));
2647 HMAP_FOR_EACH (od, key_node, datapaths) {
2652 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2653 "get_arp(outport, reg0); next;");
2656 /* Local router ingress table 6: ARP request.
2658 * In the common case where the Ethernet destination has been resolved,
2659 * this table outputs the packet (priority 0). Otherwise, it composes
2660 * and sends an ARP request (priority 100). */
2661 HMAP_FOR_EACH (od, key_node, datapaths) {
2666 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2667 "eth.dst == 00:00:00:00:00:00",
2669 "eth.dst = ff:ff:ff:ff:ff:ff; "
2671 "arp.op = 1; " /* ARP request */
2674 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2677 /* Logical router egress table 1: Delivery (priority 100).
2679 * Priority 100 rules deliver packets to enabled logical ports. */
2680 HMAP_FOR_EACH (op, key_node, ports) {
2685 if (!lrport_is_enabled(op->nbr)) {
2686 /* Drop packets to disabled logical ports (since logical flow
2687 * tables are default-drop). */
2692 ds_put_format(&match, "outport == %s", op->json_key);
2693 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
2694 ds_cstr(&match), "output;");
2698 ds_destroy(&actions);
2701 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2702 * constructing their contents based on the OVN_NB database. */
2704 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2707 struct hmap lflows = HMAP_INITIALIZER(&lflows);
2708 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2710 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2711 build_lrouter_flows(datapaths, ports, &lflows);
2713 /* Push changes to the Logical_Flow table to database. */
2714 const struct sbrec_logical_flow *sbflow, *next_sbflow;
2715 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2716 struct ovn_datapath *od
2717 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2719 sbrec_logical_flow_delete(sbflow);
2723 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
2724 enum ovn_pipeline pipeline
2725 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
2726 struct ovn_lflow *lflow = ovn_lflow_find(
2727 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2728 sbflow->priority, sbflow->match, sbflow->actions);
2730 ovn_lflow_destroy(&lflows, lflow);
2732 sbrec_logical_flow_delete(sbflow);
2735 struct ovn_lflow *lflow, *next_lflow;
2736 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
2737 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2738 uint8_t table = ovn_stage_get_table(lflow->stage);
2740 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2741 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
2742 sbrec_logical_flow_set_pipeline(
2743 sbflow, pipeline == P_IN ? "ingress" : "egress");
2744 sbrec_logical_flow_set_table_id(sbflow, table);
2745 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2746 sbrec_logical_flow_set_match(sbflow, lflow->match);
2747 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
2749 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2750 ovn_stage_to_str(lflow->stage));
2751 sbrec_logical_flow_set_external_ids(sbflow, &ids);
2753 ovn_lflow_destroy(&lflows, lflow);
2755 hmap_destroy(&lflows);
2757 /* Push changes to the Multicast_Group table to database. */
2758 const struct sbrec_multicast_group *sbmc, *next_sbmc;
2759 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2760 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2763 sbrec_multicast_group_delete(sbmc);
2767 struct multicast_group group = { .name = sbmc->name,
2768 .key = sbmc->tunnel_key };
2769 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2771 ovn_multicast_update_sbrec(mc, sbmc);
2772 ovn_multicast_destroy(&mcgroups, mc);
2774 sbrec_multicast_group_delete(sbmc);
2777 struct ovn_multicast *mc, *next_mc;
2778 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2779 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2780 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2781 sbrec_multicast_group_set_name(sbmc, mc->group->name);
2782 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2783 ovn_multicast_update_sbrec(mc, sbmc);
2784 ovn_multicast_destroy(&mcgroups, mc);
2786 hmap_destroy(&mcgroups);
2789 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
2790 * We always update OVN_Southbound to match the current data in
2791 * OVN_Northbound, so that the address sets used in Logical_Flows in
2792 * OVN_Southbound is checked against the proper set.*/
2794 sync_address_sets(struct northd_context *ctx)
2796 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
2798 const struct sbrec_address_set *sb_address_set;
2799 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
2800 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
2803 const struct nbrec_address_set *nb_address_set;
2804 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
2805 sb_address_set = shash_find_and_delete(&sb_address_sets,
2806 nb_address_set->name);
2807 if (!sb_address_set) {
2808 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
2809 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
2812 sbrec_address_set_set_addresses(sb_address_set,
2813 /* "char **" is not compatible with "const char **" */
2814 (const char **) nb_address_set->addresses,
2815 nb_address_set->n_addresses);
2818 struct shash_node *node, *next;
2819 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
2820 sbrec_address_set_delete(node->data);
2821 shash_delete(&sb_address_sets, node);
2823 shash_destroy(&sb_address_sets);
2827 ovnnb_db_run(struct northd_context *ctx)
2829 if (!ctx->ovnsb_txn) {
2832 struct hmap datapaths, ports;
2833 build_datapaths(ctx, &datapaths);
2834 build_ports(ctx, &datapaths, &ports);
2835 build_lflows(ctx, &datapaths, &ports);
2837 sync_address_sets(ctx);
2839 struct ovn_datapath *dp, *next_dp;
2840 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
2841 ovn_datapath_destroy(&datapaths, dp);
2843 hmap_destroy(&datapaths);
2845 struct ovn_port *port, *next_port;
2846 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
2847 ovn_port_destroy(&ports, port);
2849 hmap_destroy(&ports);
2853 * The only change we get notified about is if the 'chassis' column of the
2854 * 'Port_Binding' table changes. When this column is not empty, it means we
2855 * need to set the corresponding logical port as 'up' in the northbound DB.
2858 ovnsb_db_run(struct northd_context *ctx)
2860 if (!ctx->ovnnb_txn) {
2863 struct hmap lports_hmap;
2864 const struct sbrec_port_binding *sb;
2865 const struct nbrec_logical_switch_port *nb;
2867 struct lport_hash_node {
2868 struct hmap_node node;
2869 const struct nbrec_logical_switch_port *nb;
2872 hmap_init(&lports_hmap);
2874 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
2875 hash_node = xzalloc(sizeof *hash_node);
2877 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
2880 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
2882 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
2883 hash_string(sb->logical_port, 0),
2885 if (!strcmp(sb->logical_port, hash_node->nb->name)) {
2892 /* The logical port doesn't exist for this port binding. This can
2893 * happen under normal circumstances when ovn-northd hasn't gotten
2894 * around to pruning the Port_Binding yet. */
2898 if (sb->chassis && (!nb->up || !*nb->up)) {
2900 nbrec_logical_switch_port_set_up(nb, &up, 1);
2901 } else if (!sb->chassis && (!nb->up || *nb->up)) {
2903 nbrec_logical_switch_port_set_up(nb, &up, 1);
2907 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
2910 hmap_destroy(&lports_hmap);
2914 static char *default_nb_db_;
2919 if (!default_nb_db_) {
2920 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
2922 return default_nb_db_;
2925 static char *default_sb_db_;
2930 if (!default_sb_db_) {
2931 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2933 return default_sb_db_;
2937 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2940 DAEMON_OPTION_ENUMS,
2943 static const struct option long_options[] = {
2944 {"ovnsb-db", required_argument, NULL, 'd'},
2945 {"ovnnb-db", required_argument, NULL, 'D'},
2946 {"help", no_argument, NULL, 'h'},
2947 {"options", no_argument, NULL, 'o'},
2948 {"version", no_argument, NULL, 'V'},
2949 DAEMON_LONG_OPTIONS,
2951 STREAM_SSL_LONG_OPTIONS,
2954 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2959 c = getopt_long(argc, argv, short_options, long_options, NULL);
2965 DAEMON_OPTION_HANDLERS;
2966 VLOG_OPTION_HANDLERS;
2967 STREAM_SSL_OPTION_HANDLERS;
2982 ovs_cmdl_print_options(long_options);
2986 ovs_print_version(0, 0);
2995 ovnsb_db = default_sb_db();
2999 ovnnb_db = default_nb_db();
3002 free(short_options);
3006 add_column_noalert(struct ovsdb_idl *idl,
3007 const struct ovsdb_idl_column *column)
3009 ovsdb_idl_add_column(idl, column);
3010 ovsdb_idl_omit_alert(idl, column);
3014 main(int argc, char *argv[])
3016 int res = EXIT_SUCCESS;
3017 struct unixctl_server *unixctl;
3021 fatal_ignore_sigpipe();
3022 set_program_name(argv[0]);
3023 service_start(&argc, &argv);
3024 parse_options(argc, argv);
3026 daemonize_start(false);
3028 retval = unixctl_server_create(NULL, &unixctl);
3032 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
3034 daemonize_complete();
3039 /* We want to detect all changes to the ovn-nb db. */
3040 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3041 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
3043 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3044 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
3046 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
3047 add_column_noalert(ovnsb_idl_loop.idl,
3048 &sbrec_logical_flow_col_logical_datapath);
3049 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
3050 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
3051 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
3052 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
3053 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
3055 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
3056 add_column_noalert(ovnsb_idl_loop.idl,
3057 &sbrec_multicast_group_col_datapath);
3058 add_column_noalert(ovnsb_idl_loop.idl,
3059 &sbrec_multicast_group_col_tunnel_key);
3060 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
3061 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
3063 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
3064 add_column_noalert(ovnsb_idl_loop.idl,
3065 &sbrec_datapath_binding_col_tunnel_key);
3066 add_column_noalert(ovnsb_idl_loop.idl,
3067 &sbrec_datapath_binding_col_external_ids);
3069 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
3070 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
3071 add_column_noalert(ovnsb_idl_loop.idl,
3072 &sbrec_port_binding_col_logical_port);
3073 add_column_noalert(ovnsb_idl_loop.idl,
3074 &sbrec_port_binding_col_tunnel_key);
3075 add_column_noalert(ovnsb_idl_loop.idl,
3076 &sbrec_port_binding_col_parent_port);
3077 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
3078 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
3079 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
3080 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
3081 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
3083 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
3084 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
3085 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
3090 struct northd_context ctx = {
3091 .ovnnb_idl = ovnnb_idl_loop.idl,
3092 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
3093 .ovnsb_idl = ovnsb_idl_loop.idl,
3094 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
3100 unixctl_server_run(unixctl);
3101 unixctl_server_wait(unixctl);
3103 poll_immediate_wake();
3105 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
3106 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
3109 if (should_service_stop()) {
3114 unixctl_server_destroy(unixctl);
3115 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
3116 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
3119 free(default_nb_db_);
3120 free(default_sb_db_);
3125 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
3126 const char *argv[] OVS_UNUSED, void *exiting_)
3128 bool *exiting = exiting_;
3131 unixctl_command_reply(conn, NULL);