2 * Licensed under the Apache License, Version 2.0 (the "License");
3 * you may not use this file except in compliance with the License.
4 * You may obtain a copy of the License at:
6 * http://www.apache.org/licenses/LICENSE-2.0
8 * Unless required by applicable law or agreed to in writing, software
9 * distributed under the License is distributed on an "AS IS" BASIS,
10 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 * See the License for the specific language governing permissions and
12 * limitations under the License.
21 #include "command-line.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
27 #include "openvswitch/hmap.h"
28 #include "openvswitch/json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-dhcp.h"
31 #include "ovn/lib/ovn-nb-idl.h"
32 #include "ovn/lib/ovn-sb-idl.h"
33 #include "ovn/lib/ovn-util.h"
35 #include "poll-loop.h"
39 #include "stream-ssl.h"
43 #include "openvswitch/vlog.h"
45 VLOG_DEFINE_THIS_MODULE(ovn_northd);
47 static unixctl_cb_func ovn_northd_exit;
49 struct northd_context {
50 struct ovsdb_idl *ovnnb_idl;
51 struct ovsdb_idl *ovnsb_idl;
52 struct ovsdb_idl_txn *ovnnb_txn;
53 struct ovsdb_idl_txn *ovnsb_txn;
56 static const char *ovnnb_db;
57 static const char *ovnsb_db;
59 static const char *default_nb_db(void);
60 static const char *default_sb_db(void);
62 /* Pipeline stages. */
64 /* The two pipelines in an OVN logical flow table. */
66 P_IN, /* Ingress pipeline. */
67 P_OUT /* Egress pipeline. */
70 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
71 enum ovn_datapath_type {
72 DP_SWITCH, /* OVN logical switch. */
73 DP_ROUTER /* OVN logical router. */
76 /* Returns an "enum ovn_stage" built from the arguments.
78 * (It's better to use ovn_stage_build() for type-safety reasons, but inline
79 * functions can't be used in enums or switch cases.) */
80 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
81 (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
83 /* A stage within an OVN logical switch or router.
85 * An "enum ovn_stage" indicates whether the stage is part of a logical switch
86 * or router, whether the stage is part of the ingress or egress pipeline, and
87 * the table within that pipeline. The first three components are combined to
88 * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
89 * S_ROUTER_OUT_DELIVERY. */
91 #define PIPELINE_STAGES \
92 /* Logical switch ingress stages. */ \
93 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \
94 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \
95 PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \
96 PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \
97 PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \
98 PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \
99 PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \
100 PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \
101 PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \
102 PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \
103 PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 10, "ls_in_dhcp_options") \
104 PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 11, "ls_in_dhcp_response") \
105 PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 12, "ls_in_l2_lkup") \
107 /* Logical switch egress stages. */ \
108 PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \
109 PIPELINE_STAGE(SWITCH, OUT, PRE_ACL, 1, "ls_out_pre_acl") \
110 PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \
111 PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \
112 PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \
113 PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \
114 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \
115 PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \
117 /* Logical router ingress stages. */ \
118 PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \
119 PIPELINE_STAGE(ROUTER, IN, IP_INPUT, 1, "lr_in_ip_input") \
120 PIPELINE_STAGE(ROUTER, IN, UNSNAT, 2, "lr_in_unsnat") \
121 PIPELINE_STAGE(ROUTER, IN, DNAT, 3, "lr_in_dnat") \
122 PIPELINE_STAGE(ROUTER, IN, IP_ROUTING, 4, "lr_in_ip_routing") \
123 PIPELINE_STAGE(ROUTER, IN, ARP_RESOLVE, 5, "lr_in_arp_resolve") \
124 PIPELINE_STAGE(ROUTER, IN, ARP_REQUEST, 6, "lr_in_arp_request") \
126 /* Logical router egress stages. */ \
127 PIPELINE_STAGE(ROUTER, OUT, SNAT, 0, "lr_out_snat") \
128 PIPELINE_STAGE(ROUTER, OUT, DELIVERY, 1, "lr_out_delivery")
130 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
131 S_##DP_TYPE##_##PIPELINE##_##STAGE \
132 = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
134 #undef PIPELINE_STAGE
137 /* Due to various hard-coded priorities need to implement ACLs, the
138 * northbound database supports a smaller range of ACL priorities than
139 * are available to logical flows. This value is added to an ACL
140 * priority to determine the ACL's logical flow priority. */
141 #define OVN_ACL_PRI_OFFSET 1000
143 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
144 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
145 #define REGBIT_CONNTRACK_NAT "reg0[2]"
146 #define REGBIT_DHCP_OPTS_RESULT "reg0[3]"
148 /* Returns an "enum ovn_stage" built from the arguments. */
149 static enum ovn_stage
150 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
153 return OVN_STAGE_BUILD(dp_type, pipeline, table);
156 /* Returns the pipeline to which 'stage' belongs. */
157 static enum ovn_pipeline
158 ovn_stage_get_pipeline(enum ovn_stage stage)
160 return (stage >> 8) & 1;
163 /* Returns the table to which 'stage' belongs. */
165 ovn_stage_get_table(enum ovn_stage stage)
170 /* Returns a string name for 'stage'. */
172 ovn_stage_to_str(enum ovn_stage stage)
175 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
176 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
178 #undef PIPELINE_STAGE
179 default: return "<unknown>";
183 /* Returns the type of the datapath to which a flow with the given 'stage' may
185 static enum ovn_datapath_type
186 ovn_stage_to_datapath_type(enum ovn_stage stage)
189 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME) \
190 case S_##DP_TYPE##_##PIPELINE##_##STAGE: return DP_##DP_TYPE;
192 #undef PIPELINE_STAGE
193 default: OVS_NOT_REACHED();
201 %s: OVN northbound management daemon\n\
202 usage: %s [OPTIONS]\n\
205 --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
207 --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
209 -h, --help display this help message\n\
210 -o, --options list available options\n\
211 -V, --version display version information\n\
212 ", program_name, program_name, default_nb_db(), default_sb_db());
215 stream_usage("database", true, true, false);
219 struct hmap_node hmap_node;
224 destroy_tnlids(struct hmap *tnlids)
226 struct tnlid_node *node;
227 HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
230 hmap_destroy(tnlids);
234 add_tnlid(struct hmap *set, uint32_t tnlid)
236 struct tnlid_node *node = xmalloc(sizeof *node);
237 hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
242 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
244 const struct tnlid_node *node;
245 HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
246 if (node->tnlid == tnlid) {
254 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
257 for (uint32_t tnlid = *hint + 1; tnlid != *hint;
258 tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
259 if (!tnlid_in_use(set, tnlid)) {
260 add_tnlid(set, tnlid);
266 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
267 VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
271 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
272 * sb->external_ids:logical-switch. */
273 struct ovn_datapath {
274 struct hmap_node key_node; /* Index on 'key'. */
275 struct uuid key; /* (nbs/nbr)->header_.uuid. */
277 const struct nbrec_logical_switch *nbs; /* May be NULL. */
278 const struct nbrec_logical_router *nbr; /* May be NULL. */
279 const struct sbrec_datapath_binding *sb; /* May be NULL. */
281 struct ovs_list list; /* In list of similar records. */
283 /* Logical switch data. */
284 struct ovn_port **router_ports;
285 size_t n_router_ports;
287 struct hmap port_tnlids;
288 uint32_t port_key_hint;
293 static struct ovn_datapath *
294 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
295 const struct nbrec_logical_switch *nbs,
296 const struct nbrec_logical_router *nbr,
297 const struct sbrec_datapath_binding *sb)
299 struct ovn_datapath *od = xzalloc(sizeof *od);
304 hmap_init(&od->port_tnlids);
305 od->port_key_hint = 0;
306 hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
311 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
314 /* Don't remove od->list. It is used within build_datapaths() as a
315 * private list and once we've exited that function it is not safe to
317 hmap_remove(datapaths, &od->key_node);
318 destroy_tnlids(&od->port_tnlids);
319 free(od->router_ports);
324 /* Returns 'od''s datapath type. */
325 static enum ovn_datapath_type
326 ovn_datapath_get_type(const struct ovn_datapath *od)
328 return od->nbs ? DP_SWITCH : DP_ROUTER;
331 static struct ovn_datapath *
332 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
334 struct ovn_datapath *od;
336 HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
337 if (uuid_equals(uuid, &od->key)) {
344 static struct ovn_datapath *
345 ovn_datapath_from_sbrec(struct hmap *datapaths,
346 const struct sbrec_datapath_binding *sb)
350 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
351 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
354 return ovn_datapath_find(datapaths, &key);
358 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
360 return !lrouter->enabled || *lrouter->enabled;
364 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
365 struct ovs_list *sb_only, struct ovs_list *nb_only,
366 struct ovs_list *both)
368 hmap_init(datapaths);
369 ovs_list_init(sb_only);
370 ovs_list_init(nb_only);
373 const struct sbrec_datapath_binding *sb, *sb_next;
374 SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
376 if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
377 !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
378 ovsdb_idl_txn_add_comment(
380 "deleting Datapath_Binding "UUID_FMT" that lacks "
381 "external-ids:logical-switch and "
382 "external-ids:logical-router",
383 UUID_ARGS(&sb->header_.uuid));
384 sbrec_datapath_binding_delete(sb);
388 if (ovn_datapath_find(datapaths, &key)) {
389 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
391 &rl, "deleting Datapath_Binding "UUID_FMT" with "
392 "duplicate external-ids:logical-switch/router "UUID_FMT,
393 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
394 sbrec_datapath_binding_delete(sb);
398 struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
400 ovs_list_push_back(sb_only, &od->list);
403 const struct nbrec_logical_switch *nbs;
404 NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
405 struct ovn_datapath *od = ovn_datapath_find(datapaths,
409 ovs_list_remove(&od->list);
410 ovs_list_push_back(both, &od->list);
412 od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
414 ovs_list_push_back(nb_only, &od->list);
418 const struct nbrec_logical_router *nbr;
419 NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
420 if (!lrouter_is_enabled(nbr)) {
424 struct ovn_datapath *od = ovn_datapath_find(datapaths,
429 ovs_list_remove(&od->list);
430 ovs_list_push_back(both, &od->list);
433 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
435 "duplicate UUID "UUID_FMT" in OVN_Northbound",
436 UUID_ARGS(&nbr->header_.uuid));
440 od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
442 ovs_list_push_back(nb_only, &od->list);
448 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
450 static uint32_t hint;
451 return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
454 /* Updates the southbound Datapath_Binding table so that it contains the
455 * logical switches and routers specified by the northbound database.
457 * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
458 * switch and router. */
460 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
462 struct ovs_list sb_only, nb_only, both;
464 join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
466 if (!ovs_list_is_empty(&nb_only)) {
467 /* First index the in-use datapath tunnel IDs. */
468 struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
469 struct ovn_datapath *od;
470 LIST_FOR_EACH (od, list, &both) {
471 add_tnlid(&dp_tnlids, od->sb->tunnel_key);
474 /* Add southbound record for each unmatched northbound record. */
475 LIST_FOR_EACH (od, list, &nb_only) {
476 uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
481 od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
483 char uuid_s[UUID_LEN + 1];
484 sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
485 const char *key = od->nbs ? "logical-switch" : "logical-router";
486 const struct smap id = SMAP_CONST1(&id, key, uuid_s);
487 sbrec_datapath_binding_set_external_ids(od->sb, &id);
489 sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
491 destroy_tnlids(&dp_tnlids);
494 /* Delete southbound records without northbound matches. */
495 struct ovn_datapath *od, *next;
496 LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
497 ovs_list_remove(&od->list);
498 sbrec_datapath_binding_delete(od->sb);
499 ovn_datapath_destroy(datapaths, od);
504 struct hmap_node key_node; /* Index on 'key'. */
505 char *key; /* nbs->name, nbr->name, sb->logical_port. */
506 char *json_key; /* 'key', quoted for use in JSON. */
508 const struct sbrec_port_binding *sb; /* May be NULL. */
510 /* Logical switch port data. */
511 const struct nbrec_logical_switch_port *nbsp; /* May be NULL. */
513 struct lport_addresses *lsp_addrs; /* Logical switch port addresses. */
514 unsigned int n_lsp_addrs;
516 struct lport_addresses *ps_addrs; /* Port security addresses. */
517 unsigned int n_ps_addrs;
519 /* Logical router port data. */
520 const struct nbrec_logical_router_port *nbrp; /* May be NULL. */
522 struct lport_addresses lrp_networks;
526 * - A switch port S of type "router" has a router port R as a peer,
527 * and R in turn has S has its peer.
529 * - Two connected logical router ports have each other as peer. */
530 struct ovn_port *peer;
532 struct ovn_datapath *od;
534 struct ovs_list list; /* In list of similar records. */
537 static struct ovn_port *
538 ovn_port_create(struct hmap *ports, const char *key,
539 const struct nbrec_logical_switch_port *nbsp,
540 const struct nbrec_logical_router_port *nbrp,
541 const struct sbrec_port_binding *sb)
543 struct ovn_port *op = xzalloc(sizeof *op);
545 struct ds json_key = DS_EMPTY_INITIALIZER;
546 json_string_escape(key, &json_key);
547 op->json_key = ds_steal_cstr(&json_key);
549 op->key = xstrdup(key);
553 hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
558 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
561 /* Don't remove port->list. It is used within build_ports() as a
562 * private list and once we've exited that function it is not safe to
564 hmap_remove(ports, &port->key_node);
566 for (int i = 0; i < port->n_lsp_addrs; i++) {
567 destroy_lport_addresses(&port->lsp_addrs[i]);
569 free(port->lsp_addrs);
571 for (int i = 0; i < port->n_ps_addrs; i++) {
572 destroy_lport_addresses(&port->ps_addrs[i]);
574 free(port->ps_addrs);
576 destroy_lport_addresses(&port->lrp_networks);
577 free(port->json_key);
583 static struct ovn_port *
584 ovn_port_find(struct hmap *ports, const char *name)
588 HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
589 if (!strcmp(op->key, name)) {
597 ovn_port_allocate_key(struct ovn_datapath *od)
599 return allocate_tnlid(&od->port_tnlids, "port",
600 (1u << 15) - 1, &od->port_key_hint);
604 join_logical_ports(struct northd_context *ctx,
605 struct hmap *datapaths, struct hmap *ports,
606 struct ovs_list *sb_only, struct ovs_list *nb_only,
607 struct ovs_list *both)
610 ovs_list_init(sb_only);
611 ovs_list_init(nb_only);
614 const struct sbrec_port_binding *sb;
615 SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
616 struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
618 ovs_list_push_back(sb_only, &op->list);
621 struct ovn_datapath *od;
622 HMAP_FOR_EACH (od, key_node, datapaths) {
624 for (size_t i = 0; i < od->nbs->n_ports; i++) {
625 const struct nbrec_logical_switch_port *nbsp
627 struct ovn_port *op = ovn_port_find(ports, nbsp->name);
629 if (op->nbsp || op->nbrp) {
630 static struct vlog_rate_limit rl
631 = VLOG_RATE_LIMIT_INIT(5, 1);
632 VLOG_WARN_RL(&rl, "duplicate logical port %s",
637 ovs_list_remove(&op->list);
638 ovs_list_push_back(both, &op->list);
640 /* This port exists due to a SB binding, but should
641 * not have been initialized fully. */
642 ovs_assert(!op->n_lsp_addrs && !op->n_ps_addrs);
644 op = ovn_port_create(ports, nbsp->name, nbsp, NULL, NULL);
645 ovs_list_push_back(nb_only, &op->list);
649 = xmalloc(sizeof *op->lsp_addrs * nbsp->n_addresses);
650 for (size_t j = 0; j < nbsp->n_addresses; j++) {
651 if (!strcmp(nbsp->addresses[j], "unknown")) {
654 if (!extract_lsp_addresses(nbsp->addresses[j],
655 &op->lsp_addrs[op->n_lsp_addrs])) {
656 static struct vlog_rate_limit rl
657 = VLOG_RATE_LIMIT_INIT(1, 1);
658 VLOG_INFO_RL(&rl, "invalid syntax '%s' in logical "
659 "switch port addresses. No MAC "
661 op->nbsp->addresses[j]);
668 = xmalloc(sizeof *op->ps_addrs * nbsp->n_port_security);
669 for (size_t j = 0; j < nbsp->n_port_security; j++) {
670 if (!extract_lsp_addresses(nbsp->port_security[j],
671 &op->ps_addrs[op->n_ps_addrs])) {
672 static struct vlog_rate_limit rl
673 = VLOG_RATE_LIMIT_INIT(1, 1);
674 VLOG_INFO_RL(&rl, "invalid syntax '%s' in port "
675 "security. No MAC address found",
676 op->nbsp->port_security[j]);
685 for (size_t i = 0; i < od->nbr->n_ports; i++) {
686 const struct nbrec_logical_router_port *nbrp
689 struct lport_addresses lrp_networks;
690 if (!extract_lrp_networks(nbrp, &lrp_networks)) {
691 static struct vlog_rate_limit rl
692 = VLOG_RATE_LIMIT_INIT(5, 1);
693 VLOG_WARN_RL(&rl, "bad 'mac' %s", nbrp->mac);
697 if (!lrp_networks.n_ipv4_addrs && !lrp_networks.n_ipv6_addrs) {
701 struct ovn_port *op = ovn_port_find(ports, nbrp->name);
703 if (op->nbsp || op->nbrp) {
704 static struct vlog_rate_limit rl
705 = VLOG_RATE_LIMIT_INIT(5, 1);
706 VLOG_WARN_RL(&rl, "duplicate logical router port %s",
711 ovs_list_remove(&op->list);
712 ovs_list_push_back(both, &op->list);
714 /* This port exists but should not have been
715 * initialized fully. */
716 ovs_assert(!op->lrp_networks.n_ipv4_addrs
717 && !op->lrp_networks.n_ipv6_addrs);
719 op = ovn_port_create(ports, nbrp->name, NULL, nbrp, NULL);
720 ovs_list_push_back(nb_only, &op->list);
723 op->lrp_networks = lrp_networks;
729 /* Connect logical router ports, and logical switch ports of type "router",
732 HMAP_FOR_EACH (op, key_node, ports) {
733 if (op->nbsp && !strcmp(op->nbsp->type, "router")) {
734 const char *peer_name = smap_get(&op->nbsp->options, "router-port");
739 struct ovn_port *peer = ovn_port_find(ports, peer_name);
740 if (!peer || !peer->nbrp) {
746 op->od->router_ports = xrealloc(
747 op->od->router_ports,
748 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
749 op->od->router_ports[op->od->n_router_ports++] = op;
750 } else if (op->nbrp && op->nbrp->peer) {
751 struct ovn_port *peer = ovn_port_find(ports, op->nbrp->peer);
756 /* An ovn_port for a switch port of type "router" does have
757 * a router port as its peer (see the case above for
758 * "router" ports), but this is set via options:router-port
759 * in Logical_Switch_Port and does not involve the
760 * Logical_Router_Port's 'peer' column. */
761 static struct vlog_rate_limit rl =
762 VLOG_RATE_LIMIT_INIT(5, 1);
763 VLOG_WARN_RL(&rl, "Bad configuration: The peer of router "
764 "port %s is a switch port", op->key);
772 ovn_port_update_sbrec(const struct ovn_port *op)
774 sbrec_port_binding_set_datapath(op->sb, op->od->sb);
776 /* If the router is for l3 gateway, it resides on a chassis
777 * and its port type is "gateway". */
778 const char *chassis = smap_get(&op->od->nbr->options, "chassis");
780 sbrec_port_binding_set_type(op->sb, "gateway");
782 sbrec_port_binding_set_type(op->sb, "patch");
785 const char *peer = op->peer ? op->peer->key : "<error>";
788 smap_add(&new, "peer", peer);
790 smap_add(&new, "gateway-chassis", chassis);
792 sbrec_port_binding_set_options(op->sb, &new);
795 sbrec_port_binding_set_parent_port(op->sb, NULL);
796 sbrec_port_binding_set_tag(op->sb, NULL, 0);
797 sbrec_port_binding_set_mac(op->sb, NULL, 0);
799 if (strcmp(op->nbsp->type, "router")) {
800 sbrec_port_binding_set_type(op->sb, op->nbsp->type);
801 sbrec_port_binding_set_options(op->sb, &op->nbsp->options);
803 const char *chassis = NULL;
804 if (op->peer && op->peer->od && op->peer->od->nbr) {
805 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
808 /* A switch port connected to a gateway router is also of
811 sbrec_port_binding_set_type(op->sb, "gateway");
813 sbrec_port_binding_set_type(op->sb, "patch");
816 const char *router_port = smap_get(&op->nbsp->options,
819 router_port = "<error>";
823 smap_add(&new, "peer", router_port);
825 smap_add(&new, "gateway-chassis", chassis);
827 sbrec_port_binding_set_options(op->sb, &new);
830 sbrec_port_binding_set_parent_port(op->sb, op->nbsp->parent_name);
831 sbrec_port_binding_set_tag(op->sb, op->nbsp->tag, op->nbsp->n_tag);
832 sbrec_port_binding_set_mac(op->sb, (const char **) op->nbsp->addresses,
833 op->nbsp->n_addresses);
837 /* Updates the southbound Port_Binding table so that it contains the logical
838 * switch ports specified by the northbound database.
840 * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
841 * using the "struct ovn_datapath"s in 'datapaths' to look up logical
844 build_ports(struct northd_context *ctx, struct hmap *datapaths,
847 struct ovs_list sb_only, nb_only, both;
849 join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
851 /* For logical ports that are in both databases, update the southbound
852 * record based on northbound data. Also index the in-use tunnel_keys. */
853 struct ovn_port *op, *next;
854 LIST_FOR_EACH_SAFE (op, next, list, &both) {
855 ovn_port_update_sbrec(op);
857 add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
858 if (op->sb->tunnel_key > op->od->port_key_hint) {
859 op->od->port_key_hint = op->sb->tunnel_key;
863 /* Add southbound record for each unmatched northbound record. */
864 LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
865 uint16_t tunnel_key = ovn_port_allocate_key(op->od);
870 op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
871 ovn_port_update_sbrec(op);
873 sbrec_port_binding_set_logical_port(op->sb, op->key);
874 sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
877 /* Delete southbound records without northbound matches. */
878 LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
879 ovs_list_remove(&op->list);
880 sbrec_port_binding_delete(op->sb);
881 ovn_port_destroy(ports, op);
885 #define OVN_MIN_MULTICAST 32768
886 #define OVN_MAX_MULTICAST 65535
888 struct multicast_group {
890 uint16_t key; /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
893 #define MC_FLOOD "_MC_flood"
894 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
896 #define MC_UNKNOWN "_MC_unknown"
897 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
900 multicast_group_equal(const struct multicast_group *a,
901 const struct multicast_group *b)
903 return !strcmp(a->name, b->name) && a->key == b->key;
906 /* Multicast group entry. */
907 struct ovn_multicast {
908 struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
909 struct ovn_datapath *datapath;
910 const struct multicast_group *group;
912 struct ovn_port **ports;
913 size_t n_ports, allocated_ports;
917 ovn_multicast_hash(const struct ovn_datapath *datapath,
918 const struct multicast_group *group)
920 return hash_pointer(datapath, group->key);
923 static struct ovn_multicast *
924 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
925 const struct multicast_group *group)
927 struct ovn_multicast *mc;
929 HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
930 ovn_multicast_hash(datapath, group), mcgroups) {
931 if (mc->datapath == datapath
932 && multicast_group_equal(mc->group, group)) {
940 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
941 struct ovn_port *port)
943 struct ovn_datapath *od = port->od;
944 struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
946 mc = xmalloc(sizeof *mc);
947 hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
951 mc->allocated_ports = 4;
952 mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
954 if (mc->n_ports >= mc->allocated_ports) {
955 mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
958 mc->ports[mc->n_ports++] = port;
962 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
965 hmap_remove(mcgroups, &mc->hmap_node);
972 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
973 const struct sbrec_multicast_group *sb)
975 struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
976 for (size_t i = 0; i < mc->n_ports; i++) {
977 ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
979 sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
983 /* Logical flow generation.
985 * This code generates the Logical_Flow table in the southbound database, as a
986 * function of most of the northbound database.
990 struct hmap_node hmap_node;
992 struct ovn_datapath *od;
993 enum ovn_stage stage;
1000 ovn_lflow_hash(const struct ovn_lflow *lflow)
1002 size_t hash = uuid_hash(&lflow->od->key);
1003 hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
1004 hash = hash_string(lflow->match, hash);
1005 return hash_string(lflow->actions, hash);
1009 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
1011 return (a->od == b->od
1012 && a->stage == b->stage
1013 && a->priority == b->priority
1014 && !strcmp(a->match, b->match)
1015 && !strcmp(a->actions, b->actions));
1019 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
1020 enum ovn_stage stage, uint16_t priority,
1021 char *match, char *actions)
1024 lflow->stage = stage;
1025 lflow->priority = priority;
1026 lflow->match = match;
1027 lflow->actions = actions;
1030 /* Adds a row with the specified contents to the Logical_Flow table. */
1032 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
1033 enum ovn_stage stage, uint16_t priority,
1034 const char *match, const char *actions)
1036 ovs_assert(ovn_stage_to_datapath_type(stage) == ovn_datapath_get_type(od));
1038 struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
1039 ovn_lflow_init(lflow, od, stage, priority,
1040 xstrdup(match), xstrdup(actions));
1041 hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
1044 static struct ovn_lflow *
1045 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
1046 enum ovn_stage stage, uint16_t priority,
1047 const char *match, const char *actions)
1049 struct ovn_lflow target;
1050 ovn_lflow_init(&target, od, stage, priority,
1051 CONST_CAST(char *, match), CONST_CAST(char *, actions));
1053 struct ovn_lflow *lflow;
1054 HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
1056 if (ovn_lflow_equal(lflow, &target)) {
1064 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
1067 hmap_remove(lflows, &lflow->hmap_node);
1069 free(lflow->actions);
1074 /* Appends port security constraints on L2 address field 'eth_addr_field'
1075 * (e.g. "eth.src" or "eth.dst") to 'match'. 'ps_addrs', with 'n_ps_addrs'
1076 * elements, is the collection of port_security constraints from an
1077 * OVN_NB Logical_Switch_Port row generated by extract_lsp_addresses(). */
1079 build_port_security_l2(const char *eth_addr_field,
1080 struct lport_addresses *ps_addrs,
1081 unsigned int n_ps_addrs,
1088 ds_put_format(match, " && %s == {", eth_addr_field);
1090 for (size_t i = 0; i < n_ps_addrs; i++) {
1091 ds_put_format(match, "%s ", ps_addrs[i].ea_s);
1093 ds_chomp(match, ' ');
1094 ds_put_cstr(match, "}");
1098 build_port_security_ipv6_nd_flow(
1099 struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1102 ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1103 "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1104 "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1105 ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1107 if (!n_ipv6_addrs) {
1108 ds_put_cstr(match, "))");
1112 char ip6_str[INET6_ADDRSTRLEN + 1];
1113 struct in6_addr lla;
1114 in6_generate_lla(ea, &lla);
1115 memset(ip6_str, 0, sizeof(ip6_str));
1116 ipv6_string_mapped(ip6_str, &lla);
1117 ds_put_format(match, " && (nd.target == %s", ip6_str);
1119 for(int i = 0; i < n_ipv6_addrs; i++) {
1120 memset(ip6_str, 0, sizeof(ip6_str));
1121 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1122 ds_put_format(match, " || nd.target == %s", ip6_str);
1125 ds_put_format(match, ")))");
1129 build_port_security_ipv6_flow(
1130 enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1131 struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1133 char ip6_str[INET6_ADDRSTRLEN + 1];
1135 ds_put_format(match, " && %s == {",
1136 pipeline == P_IN ? "ip6.src" : "ip6.dst");
1138 /* Allow link-local address. */
1139 struct in6_addr lla;
1140 in6_generate_lla(ea, &lla);
1141 ipv6_string_mapped(ip6_str, &lla);
1142 ds_put_format(match, "%s, ", ip6_str);
1144 /* Allow ip6.dst=ff00::/8 for multicast packets */
1145 if (pipeline == P_OUT) {
1146 ds_put_cstr(match, "ff00::/8, ");
1148 for(int i = 0; i < n_ipv6_addrs; i++) {
1149 ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1150 ds_put_format(match, "%s, ", ip6_str);
1152 /* Replace ", " by "}". */
1153 ds_chomp(match, ' ');
1154 ds_chomp(match, ',');
1155 ds_put_cstr(match, "}");
1159 * Build port security constraints on ARP and IPv6 ND fields
1160 * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1162 * For each port security of the logical port, following
1163 * logical flows are added
1164 * - If the port security has no IP (both IPv4 and IPv6) or
1165 * if it has IPv4 address(es)
1166 * - Priority 90 flow to allow ARP packets for known MAC addresses
1167 * in the eth.src and arp.spa fields. If the port security
1168 * has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1170 * - If the port security has no IP (both IPv4 and IPv6) or
1171 * if it has IPv6 address(es)
1172 * - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1173 * in the eth.src and nd.sll/nd.tll fields. If the port security
1174 * has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1175 * for IPv6 Neighbor Advertisement packet.
1177 * - Priority 80 flow to drop ARP and IPv6 ND packets.
1180 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1182 struct ds match = DS_EMPTY_INITIALIZER;
1184 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1185 struct lport_addresses *ps = &op->ps_addrs[i];
1187 bool no_ip = !(ps->n_ipv4_addrs || ps->n_ipv6_addrs);
1190 if (ps->n_ipv4_addrs || no_ip) {
1191 ds_put_format(&match,
1192 "inport == %s && eth.src == %s && arp.sha == %s",
1193 op->json_key, ps->ea_s, ps->ea_s);
1195 if (ps->n_ipv4_addrs) {
1196 ds_put_cstr(&match, " && arp.spa == {");
1197 for (size_t j = 0; j < ps->n_ipv4_addrs; j++) {
1198 /* When the netmask is applied, if the host portion is
1199 * non-zero, the host can only use the specified
1200 * address in the arp.spa. If zero, the host is allowed
1201 * to use any address in the subnet. */
1202 if (ps->ipv4_addrs[j].plen == 32
1203 || ps->ipv4_addrs[j].addr & ~ps->ipv4_addrs[j].mask) {
1204 ds_put_cstr(&match, ps->ipv4_addrs[j].addr_s);
1206 ds_put_format(&match, "%s/%d",
1207 ps->ipv4_addrs[j].network_s,
1208 ps->ipv4_addrs[j].plen);
1210 ds_put_cstr(&match, ", ");
1212 ds_chomp(&match, ' ');
1213 ds_chomp(&match, ',');
1214 ds_put_cstr(&match, "}");
1216 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1217 ds_cstr(&match), "next;");
1220 if (ps->n_ipv6_addrs || no_ip) {
1222 ds_put_format(&match, "inport == %s && eth.src == %s",
1223 op->json_key, ps->ea_s);
1224 build_port_security_ipv6_nd_flow(&match, ps->ea, ps->ipv6_addrs,
1226 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1227 ds_cstr(&match), "next;");
1232 ds_put_format(&match, "inport == %s && (arp || nd)", op->json_key);
1233 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1234 ds_cstr(&match), "drop;");
1239 * Build port security constraints on IPv4 and IPv6 src and dst fields
1240 * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1242 * For each port security of the logical port, following
1243 * logical flows are added
1244 * - If the port security has IPv4 addresses,
1245 * - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1247 * - If the port security has IPv6 addresses,
1248 * - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1250 * - If the port security has IPv4 addresses or IPv6 addresses or both
1251 * - Priority 80 flow to drop all IPv4 and IPv6 traffic
1254 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1255 struct hmap *lflows)
1257 char *port_direction;
1258 enum ovn_stage stage;
1259 if (pipeline == P_IN) {
1260 port_direction = "inport";
1261 stage = S_SWITCH_IN_PORT_SEC_IP;
1263 port_direction = "outport";
1264 stage = S_SWITCH_OUT_PORT_SEC_IP;
1267 for (size_t i = 0; i < op->n_ps_addrs; i++) {
1268 struct lport_addresses *ps = &op->ps_addrs[i];
1270 if (!(ps->n_ipv4_addrs || ps->n_ipv6_addrs)) {
1274 if (ps->n_ipv4_addrs) {
1275 struct ds match = DS_EMPTY_INITIALIZER;
1276 if (pipeline == P_IN) {
1277 /* Permit use of the unspecified address for DHCP discovery */
1278 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1279 ds_put_format(&dhcp_match, "inport == %s"
1281 " && ip4.src == 0.0.0.0"
1282 " && ip4.dst == 255.255.255.255"
1283 " && udp.src == 68 && udp.dst == 67",
1284 op->json_key, ps->ea_s);
1285 ovn_lflow_add(lflows, op->od, stage, 90,
1286 ds_cstr(&dhcp_match), "next;");
1287 ds_destroy(&dhcp_match);
1288 ds_put_format(&match, "inport == %s && eth.src == %s"
1289 " && ip4.src == {", op->json_key,
1292 ds_put_format(&match, "outport == %s && eth.dst == %s"
1293 " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1294 op->json_key, ps->ea_s);
1297 for (int j = 0; j < ps->n_ipv4_addrs; j++) {
1298 ovs_be32 mask = ps->ipv4_addrs[j].mask;
1299 /* When the netmask is applied, if the host portion is
1300 * non-zero, the host can only use the specified
1301 * address. If zero, the host is allowed to use any
1302 * address in the subnet.
1304 if (ps->ipv4_addrs[j].plen == 32
1305 || ps->ipv4_addrs[j].addr & ~mask) {
1306 ds_put_format(&match, "%s", ps->ipv4_addrs[j].addr_s);
1307 if (pipeline == P_OUT && ps->ipv4_addrs[j].plen != 32) {
1308 /* Host is also allowed to receive packets to the
1309 * broadcast address in the specified subnet. */
1310 ds_put_format(&match, ", %s",
1311 ps->ipv4_addrs[j].bcast_s);
1314 /* host portion is zero */
1315 ds_put_format(&match, "%s/%d", ps->ipv4_addrs[j].network_s,
1316 ps->ipv4_addrs[j].plen);
1318 ds_put_cstr(&match, ", ");
1321 /* Replace ", " by "}". */
1322 ds_chomp(&match, ' ');
1323 ds_chomp(&match, ',');
1324 ds_put_cstr(&match, "}");
1325 ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1329 if (ps->n_ipv6_addrs) {
1330 struct ds match = DS_EMPTY_INITIALIZER;
1331 if (pipeline == P_IN) {
1332 /* Permit use of unspecified address for duplicate address
1334 struct ds dad_match = DS_EMPTY_INITIALIZER;
1335 ds_put_format(&dad_match, "inport == %s"
1338 " && ip6.dst == ff02::/16"
1339 " && icmp6.type == {131, 135, 143}", op->json_key,
1341 ovn_lflow_add(lflows, op->od, stage, 90,
1342 ds_cstr(&dad_match), "next;");
1343 ds_destroy(&dad_match);
1345 ds_put_format(&match, "%s == %s && %s == %s",
1346 port_direction, op->json_key,
1347 pipeline == P_IN ? "eth.src" : "eth.dst", ps->ea_s);
1348 build_port_security_ipv6_flow(pipeline, &match, ps->ea,
1349 ps->ipv6_addrs, ps->n_ipv6_addrs);
1350 ovn_lflow_add(lflows, op->od, stage, 90,
1351 ds_cstr(&match), "next;");
1355 char *match = xasprintf("%s == %s && %s == %s && ip",
1356 port_direction, op->json_key,
1357 pipeline == P_IN ? "eth.src" : "eth.dst",
1359 ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1366 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
1368 return !lsp->enabled || *lsp->enabled;
1372 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
1374 return !lsp->up || *lsp->up;
1378 build_dhcpv4_action(struct ovn_port *op, ovs_be32 offer_ip,
1379 struct ds *options_action, struct ds *response_action)
1381 if (!op->nbsp->dhcpv4_options) {
1382 /* CMS has disabled native DHCPv4 for this lport. */
1386 ovs_be32 host_ip, mask;
1387 char *error = ip_parse_masked(op->nbsp->dhcpv4_options->cidr, &host_ip,
1389 if (error || ((offer_ip ^ host_ip) & mask)) {
1391 * - cidr defined is invalid or
1392 * - the offer ip of the logical port doesn't belong to the cidr
1393 * defined in the DHCPv4 options.
1399 const char *server_ip = smap_get(
1400 &op->nbsp->dhcpv4_options->options, "server_id");
1401 const char *server_mac = smap_get(
1402 &op->nbsp->dhcpv4_options->options, "server_mac");
1403 const char *lease_time = smap_get(
1404 &op->nbsp->dhcpv4_options->options, "lease_time");
1405 const char *router = smap_get(
1406 &op->nbsp->dhcpv4_options->options, "router");
1408 if (!(server_ip && server_mac && lease_time && router)) {
1409 /* "server_id", "server_mac", "lease_time" and "router" should be
1410 * present in the dhcp_options. */
1411 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
1412 VLOG_WARN_RL(&rl, "Required DHCPv4 options not defined for lport - %s",
1417 struct smap dhcpv4_options = SMAP_INITIALIZER(&dhcpv4_options);
1418 smap_clone(&dhcpv4_options, &op->nbsp->dhcpv4_options->options);
1420 /* server_mac is not DHCPv4 option, delete it from the smap. */
1421 smap_remove(&dhcpv4_options, "server_mac");
1422 char *netmask = xasprintf(IP_FMT, IP_ARGS(mask));
1423 smap_add(&dhcpv4_options, "netmask", netmask);
1426 ds_put_format(options_action,
1427 REGBIT_DHCP_OPTS_RESULT" = put_dhcp_opts(offerip = "
1428 IP_FMT", ", IP_ARGS(offer_ip));
1429 struct smap_node *node;
1430 SMAP_FOR_EACH(node, &dhcpv4_options) {
1431 ds_put_format(options_action, "%s = %s, ", node->key, node->value);
1434 ds_chomp(options_action, ' ');
1435 ds_chomp(options_action, ',');
1436 ds_put_cstr(options_action, "); next;");
1438 ds_put_format(response_action, "eth.dst = eth.src; eth.src = %s; "
1439 "ip4.dst = "IP_FMT"; ip4.src = %s; udp.src = 67; "
1440 "udp.dst = 68; outport = inport; inport = \"\";"
1441 " /* Allow sending out inport. */ output;",
1442 server_mac, IP_ARGS(offer_ip), server_ip);
1444 smap_destroy(&dhcpv4_options);
1449 has_stateful_acl(struct ovn_datapath *od)
1451 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1452 struct nbrec_acl *acl = od->nbs->acls[i];
1453 if (!strcmp(acl->action, "allow-related")) {
1462 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1465 bool has_stateful = has_stateful_acl(od);
1466 struct ovn_port *op;
1468 /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1469 * allowed by default. */
1470 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1471 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1473 /* If there are any stateful ACL rules in this dapapath, we must
1474 * send all IP packets through the conntrack action, which handles
1475 * defragmentation, in order to match L4 headers. */
1477 HMAP_FOR_EACH (op, key_node, ports) {
1478 if (op->od == od && !strcmp(op->nbsp->type, "router")) {
1479 /* Can't use ct() for router ports. Consider the
1480 * following configuration: lp1(10.0.0.2) on
1481 * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1482 * ping from lp1 to lp2, First, the response will go
1483 * through ct() with a zone for lp2 in the ls2 ingress
1484 * pipeline on hostB. That ct zone knows about this
1485 * connection. Next, it goes through ct() with the zone
1486 * for the router port in the egress pipeline of ls2 on
1487 * hostB. This zone does not know about the connection,
1488 * as the icmp request went through the logical router
1489 * on hostA, not hostB. This would only work with
1490 * distributed conntrack state across all chassis. */
1491 struct ds match_in = DS_EMPTY_INITIALIZER;
1492 struct ds match_out = DS_EMPTY_INITIALIZER;
1494 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1495 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1496 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1497 ds_cstr(&match_in), "next;");
1498 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1499 ds_cstr(&match_out), "next;");
1501 ds_destroy(&match_in);
1502 ds_destroy(&match_out);
1505 /* Ingress and Egress Pre-ACL Table (Priority 110).
1507 * Not to do conntrack on ND packets. */
1508 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1509 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
1511 /* Ingress and Egress Pre-ACL Table (Priority 100).
1513 * Regardless of whether the ACL is "from-lport" or "to-lport",
1514 * we need rules in both the ingress and egress table, because
1515 * the return traffic needs to be followed.
1517 * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1518 * it to conntrack for tracking and defragmentation. */
1519 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1520 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1521 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1522 REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1526 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1527 * 'ip_address'. The caller must free() the memory allocated for
1530 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1533 char *ip_str, *start, *next;
1537 next = start = xstrdup(key);
1538 ip_str = strsep(&next, ":");
1539 if (!ip_str || !ip_str[0]) {
1540 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1541 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1547 char *error = ip_parse_masked(ip_str, &ip, &mask);
1548 if (error || mask != OVS_BE32_MAX) {
1549 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1550 VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1557 if (next && next[0]) {
1558 if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1559 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1560 VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1567 *ip_address = strdup(ip_str);
1572 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1574 /* Allow all packets to go to next tables by default. */
1575 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
1576 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
1578 struct sset all_ips = SSET_INITIALIZER(&all_ips);
1579 if (od->nbs->load_balancer) {
1580 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1581 struct smap *vips = &lb->vips;
1582 struct smap_node *node;
1583 bool vip_configured = false;
1585 SMAP_FOR_EACH (node, vips) {
1586 vip_configured = true;
1588 /* node->key contains IP:port or just IP. */
1589 char *ip_address = NULL;
1591 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1596 if (!sset_contains(&all_ips, ip_address)) {
1597 sset_add(&all_ips, ip_address);
1602 /* Ignore L4 port information in the key because fragmented packets
1603 * may not have L4 information. The pre-stateful table will send
1604 * the packet through ct() action to de-fragment. In stateful
1605 * table, we will eventually look at L4 information. */
1608 /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1609 * packet to conntrack for defragmentation. */
1610 const char *ip_address;
1611 SSET_FOR_EACH(ip_address, &all_ips) {
1612 char *match = xasprintf("ip && ip4.dst == %s", ip_address);
1613 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
1614 100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1618 sset_destroy(&all_ips);
1620 if (vip_configured) {
1621 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
1622 100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1628 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
1630 /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
1631 * allowed by default. */
1632 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
1633 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
1635 /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
1636 * sent to conntrack for tracking and defragmentation. */
1637 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
1638 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1639 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
1640 REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1644 build_acls(struct ovn_datapath *od, struct hmap *lflows)
1646 bool has_stateful = has_stateful_acl(od);
1648 /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1649 * default. A related rule at priority 1 is added below if there
1650 * are any stateful ACLs in this datapath. */
1651 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1652 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1655 /* Ingress and Egress ACL Table (Priority 1).
1657 * By default, traffic is allowed. This is partially handled by
1658 * the Priority 0 ACL flows added earlier, but we also need to
1659 * commit IP flows. This is because, while the initiater's
1660 * direction may not have any stateful rules, the server's may
1661 * and then its return traffic would not have an associated
1662 * conntrack entry and would return "+invalid".
1664 * We use "ct_commit" for a connection that is not already known
1665 * by the connection tracker. Once a connection is committed,
1666 * subsequent packets will hit the flow at priority 0 that just
1669 * We also check for established connections that have ct_label[0]
1670 * set on them. That's a connection that was disallowed, but is
1671 * now allowed by policy again since it hit this default-allow flow.
1672 * We need to set ct_label[0]=0 to let the connection continue,
1673 * which will be done by ct_commit() in the "stateful" stage.
1674 * Subsequent packets will hit the flow at priority 0 that just
1676 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1,
1677 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1678 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1679 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1,
1680 "ip && (!ct.est || (ct.est && ct_label[0] == 1))",
1681 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1683 /* Ingress and Egress ACL Table (Priority 65535).
1685 * Always drop traffic that's in an invalid state. Also drop
1686 * reply direction packets for connections that have been marked
1687 * for deletion (bit 0 of ct_label is set).
1689 * This is enforced at a higher priority than ACLs can be defined. */
1690 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1691 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1693 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1694 "ct.inv || (ct.est && ct.rpl && ct_label[0] == 1)",
1697 /* Ingress and Egress ACL Table (Priority 65535).
1699 * Allow reply traffic that is part of an established
1700 * conntrack entry that has not been marked for deletion
1701 * (bit 0 of ct_label). We only match traffic in the
1702 * reply direction because we want traffic in the request
1703 * direction to hit the currently defined policy from ACLs.
1705 * This is enforced at a higher priority than ACLs can be defined. */
1706 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1707 "ct.est && !ct.rel && !ct.new && !ct.inv "
1708 "&& ct.rpl && ct_label[0] == 0",
1710 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1711 "ct.est && !ct.rel && !ct.new && !ct.inv "
1712 "&& ct.rpl && ct_label[0] == 0",
1715 /* Ingress and Egress ACL Table (Priority 65535).
1717 * Allow traffic that is related to an existing conntrack entry that
1718 * has not been marked for deletion (bit 0 of ct_label).
1720 * This is enforced at a higher priority than ACLs can be defined.
1722 * NOTE: This does not support related data sessions (eg,
1723 * a dynamically negotiated FTP data channel), but will allow
1724 * related traffic such as an ICMP Port Unreachable through
1725 * that's generated from a non-listening UDP port. */
1726 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1727 "!ct.est && ct.rel && !ct.new && !ct.inv "
1728 "&& ct_label[0] == 0",
1730 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1731 "!ct.est && ct.rel && !ct.new && !ct.inv "
1732 "&& ct_label[0] == 0",
1735 /* Ingress and Egress ACL Table (Priority 65535).
1737 * Not to do conntrack on ND packets. */
1738 ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1739 ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
1742 /* Ingress or Egress ACL Table (Various priorities). */
1743 for (size_t i = 0; i < od->nbs->n_acls; i++) {
1744 struct nbrec_acl *acl = od->nbs->acls[i];
1745 bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1746 enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1748 if (!strcmp(acl->action, "allow")
1749 || !strcmp(acl->action, "allow-related")) {
1750 /* If there are any stateful flows, we must even commit "allow"
1751 * actions. This is because, while the initiater's
1752 * direction may not have any stateful rules, the server's
1753 * may and then its return traffic would not have an
1754 * associated conntrack entry and would return "+invalid". */
1755 if (!has_stateful) {
1756 ovn_lflow_add(lflows, od, stage,
1757 acl->priority + OVN_ACL_PRI_OFFSET,
1758 acl->match, "next;");
1760 struct ds match = DS_EMPTY_INITIALIZER;
1762 /* Commit the connection tracking entry if it's a new
1763 * connection that matches this ACL. After this commit,
1764 * the reply traffic is allowed by a flow we create at
1765 * priority 65535, defined earlier.
1767 * It's also possible that a known connection was marked for
1768 * deletion after a policy was deleted, but the policy was
1769 * re-added while that connection is still known. We catch
1770 * that case here and un-set ct_label[0] (which will be done
1771 * by ct_commit in the "stateful" stage) to indicate that the
1772 * connection should be allowed to resume.
1774 ds_put_format(&match, "((ct.new && !ct.est)"
1775 " || (!ct.new && ct.est && !ct.rpl "
1776 "&& ct_label[0] == 1)) "
1777 "&& (%s)", acl->match);
1778 ovn_lflow_add(lflows, od, stage,
1779 acl->priority + OVN_ACL_PRI_OFFSET,
1781 REGBIT_CONNTRACK_COMMIT" = 1; next;");
1783 /* Match on traffic in the request direction for an established
1784 * connection tracking entry that has not been marked for
1785 * deletion. There is no need to commit here, so we can just
1786 * proceed to the next table. We use this to ensure that this
1787 * connection is still allowed by the currently defined
1790 ds_put_format(&match,
1791 "!ct.new && ct.est && !ct.rpl"
1792 " && ct_label[0] == 0 && (%s)",
1794 ovn_lflow_add(lflows, od, stage,
1795 acl->priority + OVN_ACL_PRI_OFFSET,
1796 ds_cstr(&match), "next;");
1800 } else if (!strcmp(acl->action, "drop")
1801 || !strcmp(acl->action, "reject")) {
1802 struct ds match = DS_EMPTY_INITIALIZER;
1804 /* XXX Need to support "reject", treat it as "drop;" for now. */
1805 if (!strcmp(acl->action, "reject")) {
1806 VLOG_INFO("reject is not a supported action");
1809 /* The implementation of "drop" differs if stateful ACLs are in
1810 * use for this datapath. In that case, the actions differ
1811 * depending on whether the connection was previously committed
1812 * to the connection tracker with ct_commit. */
1814 /* If the packet is not part of an established connection, then
1815 * we can simply drop it. */
1816 ds_put_format(&match,
1817 "(!ct.est || (ct.est && ct_label[0] == 1)) "
1820 ovn_lflow_add(lflows, od, stage, acl->priority +
1821 OVN_ACL_PRI_OFFSET, ds_cstr(&match), "drop;");
1823 /* For an existing connection without ct_label set, we've
1824 * encountered a policy change. ACLs previously allowed
1825 * this connection and we committed the connection tracking
1826 * entry. Current policy says that we should drop this
1827 * connection. First, we set bit 0 of ct_label to indicate
1828 * that this connection is set for deletion. By not
1829 * specifying "next;", we implicitly drop the packet after
1830 * updating conntrack state. We would normally defer
1831 * ct_commit() to the "stateful" stage, but since we're
1832 * dropping the packet, we go ahead and do it here. */
1834 ds_put_format(&match,
1835 "ct.est && ct_label[0] == 0 && (%s)",
1837 ovn_lflow_add(lflows, od, stage,
1838 acl->priority + OVN_ACL_PRI_OFFSET,
1839 ds_cstr(&match), "ct_commit(ct_label=1/1);");
1843 /* There are no stateful ACLs in use on this datapath,
1844 * so a "drop" ACL is simply the "drop" logical flow action
1846 ovn_lflow_add(lflows, od, stage,
1847 acl->priority + OVN_ACL_PRI_OFFSET,
1848 acl->match, "drop;");
1853 /* Add 34000 priority flow to allow DHCP reply from ovn-controller to all
1854 * logical ports of the datapath if the CMS has configured DHCPv4 options*/
1855 if (od->nbs && od->nbs->n_ports) {
1856 for (size_t i = 0; i < od->nbs->n_ports; i++) {
1857 if (od->nbs->ports[i]->dhcpv4_options) {
1858 const char *server_id = smap_get(
1859 &od->nbs->ports[i]->dhcpv4_options->options, "server_id");
1860 const char *server_mac = smap_get(
1861 &od->nbs->ports[i]->dhcpv4_options->options, "server_mac");
1862 const char *lease_time = smap_get(
1863 &od->nbs->ports[i]->dhcpv4_options->options, "lease_time");
1864 const char *router = smap_get(
1865 &od->nbs->ports[i]->dhcpv4_options->options, "router");
1866 if (server_id && server_mac && lease_time && router) {
1867 struct ds match = DS_EMPTY_INITIALIZER;
1868 const char *actions =
1869 has_stateful ? "ct_commit; next;" : "next;";
1870 ds_put_format(&match, "outport == \"%s\" && eth.src == %s "
1871 "&& ip4.src == %s && udp && udp.src == 67 "
1872 "&& udp.dst == 68", od->nbs->ports[i]->name,
1873 server_mac, server_id);
1875 lflows, od, S_SWITCH_OUT_ACL, 34000, ds_cstr(&match),
1884 build_lb(struct ovn_datapath *od, struct hmap *lflows)
1886 /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
1888 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
1889 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
1891 if (od->nbs->load_balancer) {
1892 /* Ingress and Egress LB Table (Priority 65535).
1894 * Send established traffic through conntrack for just NAT. */
1895 ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
1896 "ct.est && !ct.rel && !ct.new && !ct.inv",
1897 REGBIT_CONNTRACK_NAT" = 1; next;");
1898 ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
1899 "ct.est && !ct.rel && !ct.new && !ct.inv",
1900 REGBIT_CONNTRACK_NAT" = 1; next;");
1905 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
1907 /* Ingress and Egress stateful Table (Priority 0): Packets are
1908 * allowed by default. */
1909 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
1910 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
1912 /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
1913 * committed to conntrack. We always set ct_label[0] to 0 here as
1914 * any packet that makes it this far is part of a connection we
1915 * want to allow to continue. */
1916 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1917 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
1918 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1919 REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit(ct_label=0/1); next;");
1921 /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
1922 * through nat (without committing).
1924 * REGBIT_CONNTRACK_COMMIT is set for new connections and
1925 * REGBIT_CONNTRACK_NAT is set for established connections. So they
1928 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1929 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1930 ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1931 REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1933 /* Load balancing rules for new connections get committed to conntrack
1934 * table. So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
1935 * a higher priority rule for load balancing below also commits the
1936 * connection, so it is okay if we do not hit the above match on
1937 * REGBIT_CONNTRACK_COMMIT. */
1938 if (od->nbs->load_balancer) {
1939 struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1940 struct smap *vips = &lb->vips;
1941 struct smap_node *node;
1943 SMAP_FOR_EACH (node, vips) {
1946 /* node->key contains IP:port or just IP. */
1947 char *ip_address = NULL;
1948 ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1953 /* New connections in Ingress table. */
1954 char *action = xasprintf("ct_lb(%s);", node->value);
1955 struct ds match = DS_EMPTY_INITIALIZER;
1956 ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
1958 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
1959 ds_put_format(&match, "&& udp && udp.dst == %d", port);
1961 ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
1963 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1964 120, ds_cstr(&match), action);
1966 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1967 110, ds_cstr(&match), action);
1977 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1978 struct hmap *lflows, struct hmap *mcgroups)
1980 /* This flow table structure is documented in ovn-northd(8), so please
1981 * update ovn-northd.8.xml if you change anything. */
1983 struct ds match = DS_EMPTY_INITIALIZER;
1984 struct ds actions = DS_EMPTY_INITIALIZER;
1986 /* Build pre-ACL and ACL tables for both ingress and egress.
1987 * Ingress tables 3 and 4. Egress tables 0 and 1. */
1988 struct ovn_datapath *od;
1989 HMAP_FOR_EACH (od, key_node, datapaths) {
1994 build_pre_acls(od, lflows, ports);
1995 build_pre_lb(od, lflows);
1996 build_pre_stateful(od, lflows);
1997 build_acls(od, lflows);
1998 build_lb(od, lflows);
1999 build_stateful(od, lflows);
2002 /* Logical switch ingress table 0: Admission control framework (priority
2004 HMAP_FOR_EACH (od, key_node, datapaths) {
2009 /* Logical VLANs not supported. */
2010 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
2013 /* Broadcast/multicast source address is invalid. */
2014 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
2017 /* Port security flows have priority 50 (see below) and will continue
2018 * to the next table if packet source is acceptable. */
2021 /* Logical switch ingress table 0: Ingress port security - L2
2023 * Ingress table 1: Ingress port security - IP (priority 90 and 80)
2024 * Ingress table 2: Ingress port security - ND (priority 90 and 80)
2026 struct ovn_port *op;
2027 HMAP_FOR_EACH (op, key_node, ports) {
2032 if (!lsp_is_enabled(op->nbsp)) {
2033 /* Drop packets from disabled logical ports (since logical flow
2034 * tables are default-drop). */
2039 ds_put_format(&match, "inport == %s", op->json_key);
2040 build_port_security_l2("eth.src", op->ps_addrs, op->n_ps_addrs,
2042 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
2043 ds_cstr(&match), "next;");
2045 if (op->nbsp->n_port_security) {
2046 build_port_security_ip(P_IN, op, lflows);
2047 build_port_security_nd(op, lflows);
2051 /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
2053 HMAP_FOR_EACH (od, key_node, datapaths) {
2058 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
2059 ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
2062 /* Ingress table 9: ARP/ND responder, skip requests coming from localnet
2063 * ports. (priority 100). */
2064 HMAP_FOR_EACH (op, key_node, ports) {
2069 if (!strcmp(op->nbsp->type, "localnet")) {
2071 ds_put_format(&match, "inport == %s", op->json_key);
2072 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
2073 ds_cstr(&match), "next;");
2077 /* Ingress table 9: ARP/ND responder, reply for known IPs.
2079 HMAP_FOR_EACH (op, key_node, ports) {
2085 * Add ARP/ND reply flows if either the
2087 * - port type is router
2089 if (!lsp_is_up(op->nbsp) && strcmp(op->nbsp->type, "router")) {
2093 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2094 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2096 ds_put_format(&match, "arp.tpa == %s && arp.op == 1",
2097 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
2099 ds_put_format(&actions,
2100 "eth.dst = eth.src; "
2102 "arp.op = 2; /* ARP reply */ "
2103 "arp.tha = arp.sha; "
2105 "arp.tpa = arp.spa; "
2107 "outport = inport; "
2108 "inport = \"\"; /* Allow sending out inport. */ "
2110 op->lsp_addrs[i].ea_s, op->lsp_addrs[i].ea_s,
2111 op->lsp_addrs[i].ipv4_addrs[j].addr_s);
2112 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
2113 ds_cstr(&match), ds_cstr(&actions));
2116 if (op->lsp_addrs[i].n_ipv6_addrs > 0) {
2118 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
2119 if (op->lsp_addrs[i].n_ipv6_addrs == 1) {
2120 ds_put_format(&match, "nd.target == %s",
2121 op->lsp_addrs[i].ipv6_addrs[0].addr_s);
2123 ds_put_format(&match, "nd.target == {");
2124 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv6_addrs; j++) {
2125 ds_put_format(&match, "%s, ",
2126 op->lsp_addrs[i].ipv6_addrs[j].addr_s);
2128 ds_chomp(&match, ' ');
2129 ds_chomp(&match, ',');
2130 ds_put_cstr(&match, "}");
2133 ds_put_format(&actions,
2134 "na { eth.src = %s; "
2136 "outport = inport; "
2137 "inport = \"\"; /* Allow sending out inport. */ "
2139 op->lsp_addrs[i].ea_s,
2140 op->lsp_addrs[i].ea_s);
2142 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
2143 ds_cstr(&match), ds_cstr(&actions));
2149 /* Ingress table 9: ARP/ND responder, by default goto next.
2151 HMAP_FOR_EACH (od, key_node, datapaths) {
2156 ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
2159 /* Logical switch ingress table 10 and 11: DHCP options and response
2160 * priority 100 flows. */
2161 HMAP_FOR_EACH (op, key_node, ports) {
2166 if (!lsp_is_enabled(op->nbsp) || !strcmp(op->nbsp->type, "router")) {
2167 /* Don't add the DHCP flows if the port is not enabled or if the
2168 * port is a router port. */
2172 if (!op->nbsp->dhcpv4_options) {
2173 /* CMS has disabled native DHCPv4 for this lport. */
2177 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2178 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2179 struct ds options_action = DS_EMPTY_INITIALIZER;
2180 struct ds response_action = DS_EMPTY_INITIALIZER;
2181 if (build_dhcpv4_action(
2182 op, op->lsp_addrs[i].ipv4_addrs[j].addr,
2183 &options_action, &response_action)) {
2184 struct ds match = DS_EMPTY_INITIALIZER;
2186 &match, "inport == %s && eth.src == %s && "
2187 "ip4.src == 0.0.0.0 && ip4.dst == 255.255.255.255 && "
2188 "udp.src == 68 && udp.dst == 67", op->json_key,
2189 op->lsp_addrs[i].ea_s);
2191 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_OPTIONS,
2192 100, ds_cstr(&match),
2193 ds_cstr(&options_action));
2194 /* If REGBIT_DHCP_OPTS_RESULT is set, it means the
2195 * put_dhcp_opts action is successful */
2196 ds_put_cstr(&match, " && "REGBIT_DHCP_OPTS_RESULT);
2197 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_DHCP_RESPONSE,
2198 100, ds_cstr(&match),
2199 ds_cstr(&response_action));
2201 ds_destroy(&options_action);
2202 ds_destroy(&response_action);
2209 /* Ingress table 10 and 11: DHCP options and response, by default goto next.
2212 HMAP_FOR_EACH (od, key_node, datapaths) {
2217 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_OPTIONS, 0, "1", "next;");
2218 ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;");
2221 /* Ingress table 12: Destination lookup, broadcast and multicast handling
2222 * (priority 100). */
2223 HMAP_FOR_EACH (op, key_node, ports) {
2228 if (lsp_is_enabled(op->nbsp)) {
2229 ovn_multicast_add(mcgroups, &mc_flood, op);
2232 HMAP_FOR_EACH (od, key_node, datapaths) {
2237 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
2238 "outport = \""MC_FLOOD"\"; output;");
2241 /* Ingress table 12: Destination lookup, unicast handling (priority 50), */
2242 HMAP_FOR_EACH (op, key_node, ports) {
2247 for (size_t i = 0; i < op->nbsp->n_addresses; i++) {
2248 struct eth_addr mac;
2250 if (eth_addr_from_string(op->nbsp->addresses[i], &mac)) {
2252 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
2253 ETH_ADDR_ARGS(mac));
2256 ds_put_format(&actions, "outport = %s; output;", op->json_key);
2257 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
2258 ds_cstr(&match), ds_cstr(&actions));
2259 } else if (!strcmp(op->nbsp->addresses[i], "unknown")) {
2260 if (lsp_is_enabled(op->nbsp)) {
2261 ovn_multicast_add(mcgroups, &mc_unknown, op);
2262 op->od->has_unknown = true;
2265 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
2268 "%s: invalid syntax '%s' in addresses column",
2269 op->nbsp->name, op->nbsp->addresses[i]);
2274 /* Ingress table 12: Destination lookup for unknown MACs (priority 0). */
2275 HMAP_FOR_EACH (od, key_node, datapaths) {
2280 if (od->has_unknown) {
2281 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
2282 "outport = \""MC_UNKNOWN"\"; output;");
2286 /* Egress tables 6: Egress port security - IP (priority 0)
2287 * Egress table 7: Egress port security L2 - multicast/broadcast
2288 * (priority 100). */
2289 HMAP_FOR_EACH (od, key_node, datapaths) {
2294 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
2295 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
2299 /* Egress table 6: Egress port security - IP (priorities 90 and 80)
2300 * if port security enabled.
2302 * Egress table 7: Egress port security - L2 (priorities 50 and 150).
2304 * Priority 50 rules implement port security for enabled logical port.
2306 * Priority 150 rules drop packets to disabled logical ports, so that they
2307 * don't even receive multicast or broadcast packets. */
2308 HMAP_FOR_EACH (op, key_node, ports) {
2314 ds_put_format(&match, "outport == %s", op->json_key);
2315 if (lsp_is_enabled(op->nbsp)) {
2316 build_port_security_l2("eth.dst", op->ps_addrs, op->n_ps_addrs,
2318 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
2319 ds_cstr(&match), "output;");
2321 ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
2322 ds_cstr(&match), "drop;");
2325 if (op->nbsp->n_port_security) {
2326 build_port_security_ip(P_OUT, op, lflows);
2331 ds_destroy(&actions);
2335 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2337 return !lrport->enabled || *lrport->enabled;
2340 /* Returns a string of the IP address of the router port 'op' that
2341 * overlaps with 'ip_s". If one is not found, returns NULL.
2343 * The caller must not free the returned string. */
2345 find_lrp_member_ip(const struct ovn_port *op, const char *ip_s)
2349 if (!ip_parse(ip_s, &ip)) {
2350 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2351 VLOG_WARN_RL(&rl, "bad ip address %s", ip_s);
2355 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2356 const struct ipv4_netaddr *na = &op->lrp_networks.ipv4_addrs[i];
2358 if (!((na->network ^ ip) & na->mask)) {
2359 /* There should be only 1 interface that matches the
2360 * next hop. Otherwise, it's a configuration error,
2361 * because subnets of router's interfaces should NOT
2371 add_route(struct hmap *lflows, const struct ovn_port *op,
2372 const char *lrp_addr_s, const char *network_s, int plen,
2373 const char *gateway)
2375 char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
2377 struct ds actions = DS_EMPTY_INITIALIZER;
2378 ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
2380 ds_put_cstr(&actions, gateway);
2382 ds_put_cstr(&actions, "ip4.dst");
2384 ds_put_format(&actions, "; "
2388 "inport = \"\"; /* Allow sending out inport. */ "
2391 op->lrp_networks.ea_s,
2394 /* The priority here is calculated to implement longest-prefix-match
2396 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match,
2398 ds_destroy(&actions);
2403 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2405 const struct nbrec_logical_router_static_route *route)
2407 ovs_be32 prefix, nexthop, mask;
2408 const char *lrp_addr_s;
2410 /* Verify that next hop is an IP address with 32 bits mask. */
2411 char *error = ip_parse_masked(route->nexthop, &nexthop, &mask);
2412 if (error || mask != OVS_BE32_MAX) {
2413 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2414 VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2419 /* Verify that ip prefix is a valid CIDR address. */
2420 error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
2421 if (error || !ip_is_cidr(mask)) {
2422 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2423 VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
2429 /* Find the outgoing port. */
2430 struct ovn_port *out_port = NULL;
2431 if (route->output_port) {
2432 out_port = ovn_port_find(ports, route->output_port);
2434 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2435 VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
2436 route->output_port, route->ip_prefix);
2439 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
2441 /* output_port is not specified, find the
2442 * router port matching the next hop. */
2444 for (i = 0; i < od->nbr->n_ports; i++) {
2445 struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
2446 out_port = ovn_port_find(ports, lrp->name);
2448 /* This should not happen. */
2452 lrp_addr_s = find_lrp_member_ip(out_port, route->nexthop);
2460 /* There is no matched out port. */
2461 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2462 VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
2463 route->ip_prefix, route->nexthop);
2467 char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix & mask));
2468 add_route(lflows, out_port, lrp_addr_s, prefix_s,
2469 ip_count_cidr_bits(mask), route->nexthop);
2474 op_put_networks(struct ds *ds, const struct ovn_port *op, bool add_bcast)
2476 if (!add_bcast && op->lrp_networks.n_ipv4_addrs == 1) {
2477 ds_put_format(ds, "%s", op->lrp_networks.ipv4_addrs[0].addr_s);
2481 ds_put_cstr(ds, "{");
2482 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2483 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].addr_s);
2485 ds_put_format(ds, "%s, ", op->lrp_networks.ipv4_addrs[i].bcast_s);
2490 ds_put_cstr(ds, "}");
2494 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
2495 struct hmap *lflows)
2497 /* This flow table structure is documented in ovn-northd(8), so please
2498 * update ovn-northd.8.xml if you change anything. */
2500 struct ds match = DS_EMPTY_INITIALIZER;
2501 struct ds actions = DS_EMPTY_INITIALIZER;
2503 /* Logical router ingress table 0: Admission control framework. */
2504 struct ovn_datapath *od;
2505 HMAP_FOR_EACH (od, key_node, datapaths) {
2510 /* Logical VLANs not supported.
2511 * Broadcast/multicast source address is invalid. */
2512 ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
2513 "vlan.present || eth.src[40]", "drop;");
2516 /* Logical router ingress table 0: match (priority 50). */
2517 struct ovn_port *op;
2518 HMAP_FOR_EACH (op, key_node, ports) {
2523 if (!lrport_is_enabled(op->nbrp)) {
2524 /* Drop packets from disabled logical ports (since logical flow
2525 * tables are default-drop). */
2530 ds_put_format(&match, "(eth.mcast || eth.dst == %s) && inport == %s",
2531 op->lrp_networks.ea_s, op->json_key);
2532 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
2533 ds_cstr(&match), "next;");
2536 /* Logical router ingress table 1: IP Input. */
2537 HMAP_FOR_EACH (od, key_node, datapaths) {
2542 /* L3 admission control: drop multicast and broadcast source, localhost
2543 * source or destination, and zero network source or destination
2544 * (priority 100). */
2545 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
2547 "ip4.src == 255.255.255.255 || "
2548 "ip4.src == 127.0.0.0/8 || "
2549 "ip4.dst == 127.0.0.0/8 || "
2550 "ip4.src == 0.0.0.0/8 || "
2551 "ip4.dst == 0.0.0.0/8",
2554 /* ARP reply handling. Use ARP replies to populate the logical
2555 * router's ARP table. */
2556 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
2557 "put_arp(inport, arp.spa, arp.sha);");
2559 /* Drop Ethernet local broadcast. By definition this traffic should
2560 * not be forwarded.*/
2561 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2562 "eth.bcast", "drop;");
2566 * XXX Need to send ICMP time exceeded if !ip.later_frag. */
2568 ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
2569 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
2570 ds_cstr(&match), "drop;");
2572 /* Pass other traffic not already handled to the next table for
2574 ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
2577 HMAP_FOR_EACH (op, key_node, ports) {
2582 /* L3 admission control: drop packets that originate from an IP address
2583 * owned by the router or a broadcast address known to the router
2584 * (priority 100). */
2586 ds_put_cstr(&match, "ip4.src == ");
2587 op_put_networks(&match, op, true);
2588 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
2589 ds_cstr(&match), "drop;");
2591 /* ICMP echo reply. These flows reply to ICMP echo requests
2592 * received for the router's IP address. Since packets only
2593 * get here as part of the logical router datapath, the inport
2594 * (i.e. the incoming locally attached net) does not matter.
2595 * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
2597 ds_put_cstr(&match, "ip4.dst == ");
2598 op_put_networks(&match, op, false);
2599 ds_put_cstr(&match, " && icmp4.type == 8 && icmp4.code == 0");
2602 ds_put_format(&actions,
2603 "ip4.dst <-> ip4.src; "
2606 "inport = \"\"; /* Allow sending out inport. */ "
2608 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2609 ds_cstr(&match), ds_cstr(&actions));
2611 /* ARP reply. These flows reply to ARP requests for the router's own
2613 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2615 ds_put_format(&match,
2616 "inport == %s && arp.tpa == %s && arp.op == 1",
2617 op->json_key, op->lrp_networks.ipv4_addrs[i].addr_s);
2620 ds_put_format(&actions,
2621 "eth.dst = eth.src; "
2623 "arp.op = 2; /* ARP reply */ "
2624 "arp.tha = arp.sha; "
2626 "arp.tpa = arp.spa; "
2629 "inport = \"\"; /* Allow sending out inport. */ "
2631 op->lrp_networks.ea_s,
2632 op->lrp_networks.ea_s,
2633 op->lrp_networks.ipv4_addrs[i].addr_s,
2635 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2636 ds_cstr(&match), ds_cstr(&actions));
2639 ovs_be32 *snat_ips = xmalloc(sizeof *snat_ips * op->od->nbr->n_nat);
2640 size_t n_snat_ips = 0;
2641 for (int i = 0; i < op->od->nbr->n_nat; i++) {
2642 const struct nbrec_nat *nat;
2644 nat = op->od->nbr->nat[i];
2647 if (!ip_parse(nat->external_ip, &ip) || !ip) {
2648 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2649 VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
2650 "for router %s", nat->external_ip, op->key);
2654 if (!strcmp(nat->type, "snat")) {
2655 snat_ips[n_snat_ips++] = ip;
2659 /* ARP handling for external IP addresses.
2661 * DNAT IP addresses are external IP addresses that need ARP
2664 ds_put_format(&match,
2665 "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2666 op->json_key, IP_ARGS(ip));
2669 ds_put_format(&actions,
2670 "eth.dst = eth.src; "
2672 "arp.op = 2; /* ARP reply */ "
2673 "arp.tha = arp.sha; "
2675 "arp.tpa = arp.spa; "
2676 "arp.spa = "IP_FMT"; "
2678 "inport = \"\"; /* Allow sending out inport. */ "
2680 op->lrp_networks.ea_s,
2681 op->lrp_networks.ea_s,
2684 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2685 ds_cstr(&match), ds_cstr(&actions));
2689 ds_put_cstr(&match, "ip4.dst == {");
2690 bool has_drop_ips = false;
2691 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2692 for (int j = 0; j < n_snat_ips; j++) {
2693 /* Packets to SNAT IPs should not be dropped. */
2694 if (op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j]) {
2698 ds_put_format(&match, "%s, ",
2699 op->lrp_networks.ipv4_addrs[i].addr_s);
2700 has_drop_ips = true;
2702 ds_chomp(&match, ' ');
2703 ds_chomp(&match, ',');
2704 ds_put_cstr(&match, "}");
2707 /* Drop IP traffic to this router. */
2708 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2709 ds_cstr(&match), "drop;");
2715 /* NAT in Gateway routers. */
2716 HMAP_FOR_EACH (od, key_node, datapaths) {
2721 /* Packets are allowed by default. */
2722 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2723 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2724 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2726 /* NAT rules are only valid on Gateway routers. */
2727 if (!smap_get(&od->nbr->options, "chassis")) {
2731 for (int i = 0; i < od->nbr->n_nat; i++) {
2732 const struct nbrec_nat *nat;
2734 nat = od->nbr->nat[i];
2738 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2739 if (error || mask != OVS_BE32_MAX) {
2740 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2741 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2747 /* Check the validity of nat->logical_ip. 'logical_ip' can
2748 * be a subnet when the type is "snat". */
2749 error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2750 if (!strcmp(nat->type, "snat")) {
2752 static struct vlog_rate_limit rl =
2753 VLOG_RATE_LIMIT_INIT(5, 1);
2754 VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2755 "in router "UUID_FMT"",
2756 nat->logical_ip, UUID_ARGS(&od->key));
2761 if (error || mask != OVS_BE32_MAX) {
2762 static struct vlog_rate_limit rl =
2763 VLOG_RATE_LIMIT_INIT(5, 1);
2764 VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2765 ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2771 /* Ingress UNSNAT table: It is for already established connections'
2772 * reverse traffic. i.e., SNAT has already been done in egress
2773 * pipeline and now the packet has entered the ingress pipeline as
2774 * part of a reply. We undo the SNAT here.
2776 * Undoing SNAT has to happen before DNAT processing. This is
2777 * because when the packet was DNATed in ingress pipeline, it did
2778 * not know about the possibility of eventual additional SNAT in
2779 * egress pipeline. */
2780 if (!strcmp(nat->type, "snat")
2781 || !strcmp(nat->type, "dnat_and_snat")) {
2783 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2784 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
2785 ds_cstr(&match), "ct_snat; next;");
2788 /* Ingress DNAT table: Packets enter the pipeline with destination
2789 * IP address that needs to be DNATted from a external IP address
2790 * to a logical IP address. */
2791 if (!strcmp(nat->type, "dnat")
2792 || !strcmp(nat->type, "dnat_and_snat")) {
2793 /* Packet when it goes from the initiator to destination.
2794 * We need to zero the inport because the router can
2795 * send the packet back through the same interface. */
2797 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2799 ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);",
2801 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
2802 ds_cstr(&match), ds_cstr(&actions));
2805 /* Egress SNAT table: Packets enter the egress pipeline with
2806 * source ip address that needs to be SNATted to a external ip
2808 if (!strcmp(nat->type, "snat")
2809 || !strcmp(nat->type, "dnat_and_snat")) {
2811 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
2813 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
2815 /* The priority here is calculated such that the
2816 * nat->logical_ip with the longest mask gets a higher
2818 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
2819 count_1bits(ntohl(mask)) + 1,
2820 ds_cstr(&match), ds_cstr(&actions));
2824 /* Re-circulate every packet through the DNAT zone.
2825 * This helps with two things.
2827 * 1. Any packet that needs to be unDNATed in the reverse
2828 * direction gets unDNATed. Ideally this could be done in
2829 * the egress pipeline. But since the gateway router
2830 * does not have any feature that depends on the source
2831 * ip address being external IP address for IP routing,
2832 * we can do it here, saving a future re-circulation.
2834 * 2. Any packet that was sent through SNAT zone in the
2835 * previous table automatically gets re-circulated to get
2836 * back the new destination IP address that is needed for
2837 * routing in the openflow pipeline. */
2838 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2839 "ip", "inport = \"\"; ct_dnat;");
2842 /* Logical router ingress table 4: IP Routing.
2844 * A packet that arrives at this table is an IP packet that should be
2845 * routed to the address in ip4.dst. This table sets outport to the correct
2846 * output port, eth.src to the output port's MAC address, and reg0 to the
2847 * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2848 * unchanged), and advances to the next table for ARP resolution. */
2849 HMAP_FOR_EACH (op, key_node, ports) {
2854 for (int i = 0; i < op->lrp_networks.n_ipv4_addrs; i++) {
2855 add_route(lflows, op, op->lrp_networks.ipv4_addrs[i].addr_s,
2856 op->lrp_networks.ipv4_addrs[i].network_s,
2857 op->lrp_networks.ipv4_addrs[i].plen, NULL);
2861 HMAP_FOR_EACH (od, key_node, datapaths) {
2866 /* Convert the static routes to flows. */
2867 for (int i = 0; i < od->nbr->n_static_routes; i++) {
2868 const struct nbrec_logical_router_static_route *route;
2870 route = od->nbr->static_routes[i];
2871 build_static_route_flow(lflows, od, ports, route);
2874 /* XXX destination unreachable */
2876 /* Local router ingress table 5: ARP Resolution.
2878 * Any packet that reaches this table is an IP packet whose next-hop IP
2879 * address is in reg0. (ip4.dst is the final destination.) This table
2880 * resolves the IP address in reg0 into an output port in outport and an
2881 * Ethernet address in eth.dst. */
2882 HMAP_FOR_EACH (op, key_node, ports) {
2884 /* This is a logical router port. If next-hop IP address in 'reg0'
2885 * matches ip address of this router port, then the packet is
2886 * intended to eventually be sent to this logical port. Set the
2887 * destination mac address using this port's mac address.
2889 * The packet is still in peer's logical pipeline. So the match
2890 * should be on peer's outport. */
2891 if (op->peer && op->peer->nbrp) {
2893 ds_put_format(&match, "outport == %s && reg0 == ",
2894 op->peer->json_key);
2895 op_put_networks(&match, op, false);
2898 ds_put_format(&actions, "eth.dst = %s; next;",
2899 op->lrp_networks.ea_s);
2900 ovn_lflow_add(lflows, op->peer->od, S_ROUTER_IN_ARP_RESOLVE,
2901 100, ds_cstr(&match), ds_cstr(&actions));
2903 } else if (op->od->n_router_ports && strcmp(op->nbsp->type, "router")) {
2904 /* This is a logical switch port that backs a VM or a container.
2905 * Extract its addresses. For each of the address, go through all
2906 * the router ports attached to the switch (to which this port
2907 * connects) and if the address in question is reachable from the
2908 * router port, add an ARP entry in that router's pipeline. */
2910 for (size_t i = 0; i < op->n_lsp_addrs; i++) {
2911 const char *ea_s = op->lsp_addrs[i].ea_s;
2912 for (size_t j = 0; j < op->lsp_addrs[i].n_ipv4_addrs; j++) {
2913 const char *ip_s = op->lsp_addrs[i].ipv4_addrs[j].addr_s;
2914 for (size_t k = 0; k < op->od->n_router_ports; k++) {
2915 /* Get the Logical_Router_Port that the
2916 * Logical_Switch_Port is connected to, as
2918 const char *peer_name = smap_get(
2919 &op->od->router_ports[k]->nbsp->options,
2925 struct ovn_port *peer = ovn_port_find(ports, peer_name);
2926 if (!peer || !peer->nbrp) {
2930 if (!find_lrp_member_ip(peer, ip_s)) {
2935 ds_put_format(&match, "outport == %s && reg0 == %s",
2936 peer->json_key, ip_s);
2939 ds_put_format(&actions, "eth.dst = %s; next;", ea_s);
2940 ovn_lflow_add(lflows, peer->od,
2941 S_ROUTER_IN_ARP_RESOLVE, 100,
2942 ds_cstr(&match), ds_cstr(&actions));
2946 } else if (!strcmp(op->nbsp->type, "router")) {
2947 /* This is a logical switch port that connects to a router. */
2949 /* The peer of this switch port is the router port for which
2950 * we need to add logical flows such that it can resolve
2951 * ARP entries for all the other router ports connected to
2952 * the switch in question. */
2954 const char *peer_name = smap_get(&op->nbsp->options,
2960 struct ovn_port *peer = ovn_port_find(ports, peer_name);
2961 if (!peer || !peer->nbrp) {
2965 for (size_t i = 0; i < op->od->n_router_ports; i++) {
2966 const char *router_port_name = smap_get(
2967 &op->od->router_ports[i]->nbsp->options,
2969 struct ovn_port *router_port = ovn_port_find(ports,
2971 if (!router_port || !router_port->nbrp) {
2975 /* Skip the router port under consideration. */
2976 if (router_port == peer) {
2981 ds_put_format(&match, "outport == %s && reg0 == ",
2983 op_put_networks(&match, router_port, false);
2986 ds_put_format(&actions, "eth.dst = %s; next;",
2987 router_port->lrp_networks.ea_s);
2988 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2989 100, ds_cstr(&match), ds_cstr(&actions));
2994 HMAP_FOR_EACH (od, key_node, datapaths) {
2999 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
3000 "get_arp(outport, reg0); next;");
3003 /* Local router ingress table 6: ARP request.
3005 * In the common case where the Ethernet destination has been resolved,
3006 * this table outputs the packet (priority 0). Otherwise, it composes
3007 * and sends an ARP request (priority 100). */
3008 HMAP_FOR_EACH (od, key_node, datapaths) {
3013 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
3014 "eth.dst == 00:00:00:00:00:00",
3016 "eth.dst = ff:ff:ff:ff:ff:ff; "
3018 "arp.op = 1; " /* ARP request */
3021 ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
3024 /* Logical router egress table 1: Delivery (priority 100).
3026 * Priority 100 rules deliver packets to enabled logical ports. */
3027 HMAP_FOR_EACH (op, key_node, ports) {
3032 if (!lrport_is_enabled(op->nbrp)) {
3033 /* Drop packets to disabled logical ports (since logical flow
3034 * tables are default-drop). */
3039 ds_put_format(&match, "outport == %s", op->json_key);
3040 ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
3041 ds_cstr(&match), "output;");
3045 ds_destroy(&actions);
3048 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
3049 * constructing their contents based on the OVN_NB database. */
3051 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
3054 struct hmap lflows = HMAP_INITIALIZER(&lflows);
3055 struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
3057 build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
3058 build_lrouter_flows(datapaths, ports, &lflows);
3060 /* Push changes to the Logical_Flow table to database. */
3061 const struct sbrec_logical_flow *sbflow, *next_sbflow;
3062 SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
3063 struct ovn_datapath *od
3064 = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
3066 sbrec_logical_flow_delete(sbflow);
3070 enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
3071 enum ovn_pipeline pipeline
3072 = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
3073 struct ovn_lflow *lflow = ovn_lflow_find(
3074 &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
3075 sbflow->priority, sbflow->match, sbflow->actions);
3077 ovn_lflow_destroy(&lflows, lflow);
3079 sbrec_logical_flow_delete(sbflow);
3082 struct ovn_lflow *lflow, *next_lflow;
3083 HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
3084 enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
3085 uint8_t table = ovn_stage_get_table(lflow->stage);
3087 sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
3088 sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
3089 sbrec_logical_flow_set_pipeline(
3090 sbflow, pipeline == P_IN ? "ingress" : "egress");
3091 sbrec_logical_flow_set_table_id(sbflow, table);
3092 sbrec_logical_flow_set_priority(sbflow, lflow->priority);
3093 sbrec_logical_flow_set_match(sbflow, lflow->match);
3094 sbrec_logical_flow_set_actions(sbflow, lflow->actions);
3096 const struct smap ids = SMAP_CONST1(&ids, "stage-name",
3097 ovn_stage_to_str(lflow->stage));
3098 sbrec_logical_flow_set_external_ids(sbflow, &ids);
3100 ovn_lflow_destroy(&lflows, lflow);
3102 hmap_destroy(&lflows);
3104 /* Push changes to the Multicast_Group table to database. */
3105 const struct sbrec_multicast_group *sbmc, *next_sbmc;
3106 SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
3107 struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
3110 sbrec_multicast_group_delete(sbmc);
3114 struct multicast_group group = { .name = sbmc->name,
3115 .key = sbmc->tunnel_key };
3116 struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
3118 ovn_multicast_update_sbrec(mc, sbmc);
3119 ovn_multicast_destroy(&mcgroups, mc);
3121 sbrec_multicast_group_delete(sbmc);
3124 struct ovn_multicast *mc, *next_mc;
3125 HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
3126 sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
3127 sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
3128 sbrec_multicast_group_set_name(sbmc, mc->group->name);
3129 sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
3130 ovn_multicast_update_sbrec(mc, sbmc);
3131 ovn_multicast_destroy(&mcgroups, mc);
3133 hmap_destroy(&mcgroups);
3136 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
3137 * We always update OVN_Southbound to match the current data in
3138 * OVN_Northbound, so that the address sets used in Logical_Flows in
3139 * OVN_Southbound is checked against the proper set.*/
3141 sync_address_sets(struct northd_context *ctx)
3143 struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
3145 const struct sbrec_address_set *sb_address_set;
3146 SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
3147 shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
3150 const struct nbrec_address_set *nb_address_set;
3151 NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
3152 sb_address_set = shash_find_and_delete(&sb_address_sets,
3153 nb_address_set->name);
3154 if (!sb_address_set) {
3155 sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
3156 sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
3159 sbrec_address_set_set_addresses(sb_address_set,
3160 /* "char **" is not compatible with "const char **" */
3161 (const char **) nb_address_set->addresses,
3162 nb_address_set->n_addresses);
3165 struct shash_node *node, *next;
3166 SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
3167 sbrec_address_set_delete(node->data);
3168 shash_delete(&sb_address_sets, node);
3170 shash_destroy(&sb_address_sets);
3174 ovnnb_db_run(struct northd_context *ctx)
3176 if (!ctx->ovnsb_txn) {
3179 struct hmap datapaths, ports;
3180 build_datapaths(ctx, &datapaths);
3181 build_ports(ctx, &datapaths, &ports);
3182 build_lflows(ctx, &datapaths, &ports);
3184 sync_address_sets(ctx);
3186 struct ovn_datapath *dp, *next_dp;
3187 HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
3188 ovn_datapath_destroy(&datapaths, dp);
3190 hmap_destroy(&datapaths);
3192 struct ovn_port *port, *next_port;
3193 HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
3194 ovn_port_destroy(&ports, port);
3196 hmap_destroy(&ports);
3200 * The only change we get notified about is if the 'chassis' column of the
3201 * 'Port_Binding' table changes. When this column is not empty, it means we
3202 * need to set the corresponding logical port as 'up' in the northbound DB.
3205 ovnsb_db_run(struct northd_context *ctx)
3207 if (!ctx->ovnnb_txn) {
3210 struct hmap lports_hmap;
3211 const struct sbrec_port_binding *sb;
3212 const struct nbrec_logical_switch_port *nbsp;
3214 struct lport_hash_node {
3215 struct hmap_node node;
3216 const struct nbrec_logical_switch_port *nbsp;
3219 hmap_init(&lports_hmap);
3221 NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nbsp, ctx->ovnnb_idl) {
3222 hash_node = xzalloc(sizeof *hash_node);
3223 hash_node->nbsp = nbsp;
3224 hmap_insert(&lports_hmap, &hash_node->node, hash_string(nbsp->name, 0));
3227 SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
3229 HMAP_FOR_EACH_WITH_HASH(hash_node, node,
3230 hash_string(sb->logical_port, 0),
3232 if (!strcmp(sb->logical_port, hash_node->nbsp->name)) {
3233 nbsp = hash_node->nbsp;
3239 /* The logical port doesn't exist for this port binding. This can
3240 * happen under normal circumstances when ovn-northd hasn't gotten
3241 * around to pruning the Port_Binding yet. */
3245 if (sb->chassis && (!nbsp->up || !*nbsp->up)) {
3247 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
3248 } else if (!sb->chassis && (!nbsp->up || *nbsp->up)) {
3250 nbrec_logical_switch_port_set_up(nbsp, &up, 1);
3254 HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
3257 hmap_destroy(&lports_hmap);
3261 static struct dhcp_opts_map supported_dhcp_opts[] = {
3265 DHCP_OPT_DNS_SERVER,
3266 DHCP_OPT_LOG_SERVER,
3267 DHCP_OPT_LPR_SERVER,
3268 DHCP_OPT_SWAP_SERVER,
3269 DHCP_OPT_POLICY_FILTER,
3270 DHCP_OPT_ROUTER_SOLICITATION,
3271 DHCP_OPT_NIS_SERVER,
3272 DHCP_OPT_NTP_SERVER,
3274 DHCP_OPT_TFTP_SERVER,
3275 DHCP_OPT_CLASSLESS_STATIC_ROUTE,
3276 DHCP_OPT_MS_CLASSLESS_STATIC_ROUTE,
3277 DHCP_OPT_IP_FORWARD_ENABLE,
3278 DHCP_OPT_ROUTER_DISCOVERY,
3279 DHCP_OPT_ETHERNET_ENCAP,
3280 DHCP_OPT_DEFAULT_TTL,
3283 DHCP_OPT_LEASE_TIME,
3289 check_and_add_supported_dhcp_opts_to_sb_db(struct northd_context *ctx)
3291 struct hmap dhcp_opts_to_add = HMAP_INITIALIZER(&dhcp_opts_to_add);
3292 for (size_t i = 0; (i < sizeof(supported_dhcp_opts) /
3293 sizeof(supported_dhcp_opts[0])); i++) {
3294 hmap_insert(&dhcp_opts_to_add, &supported_dhcp_opts[i].hmap_node,
3295 dhcp_opt_hash(supported_dhcp_opts[i].name));
3298 const struct sbrec_dhcp_options *opt_row, *opt_row_next;
3299 SBREC_DHCP_OPTIONS_FOR_EACH_SAFE(opt_row, opt_row_next, ctx->ovnsb_idl) {
3300 struct dhcp_opts_map *dhcp_opt =
3301 dhcp_opts_find(&dhcp_opts_to_add, opt_row->name);
3303 hmap_remove(&dhcp_opts_to_add, &dhcp_opt->hmap_node);
3305 sbrec_dhcp_options_delete(opt_row);
3309 struct dhcp_opts_map *opt;
3310 HMAP_FOR_EACH (opt, hmap_node, &dhcp_opts_to_add) {
3311 struct sbrec_dhcp_options *sbrec_dhcp_option =
3312 sbrec_dhcp_options_insert(ctx->ovnsb_txn);
3313 sbrec_dhcp_options_set_name(sbrec_dhcp_option, opt->name);
3314 sbrec_dhcp_options_set_code(sbrec_dhcp_option, opt->code);
3315 sbrec_dhcp_options_set_type(sbrec_dhcp_option, opt->type);
3318 hmap_destroy(&dhcp_opts_to_add);
3321 static char *default_nb_db_;
3326 if (!default_nb_db_) {
3327 default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
3329 return default_nb_db_;
3332 static char *default_sb_db_;
3337 if (!default_sb_db_) {
3338 default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
3340 return default_sb_db_;
3344 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
3347 DAEMON_OPTION_ENUMS,
3350 static const struct option long_options[] = {
3351 {"ovnsb-db", required_argument, NULL, 'd'},
3352 {"ovnnb-db", required_argument, NULL, 'D'},
3353 {"help", no_argument, NULL, 'h'},
3354 {"options", no_argument, NULL, 'o'},
3355 {"version", no_argument, NULL, 'V'},
3356 DAEMON_LONG_OPTIONS,
3358 STREAM_SSL_LONG_OPTIONS,
3361 char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
3366 c = getopt_long(argc, argv, short_options, long_options, NULL);
3372 DAEMON_OPTION_HANDLERS;
3373 VLOG_OPTION_HANDLERS;
3374 STREAM_SSL_OPTION_HANDLERS;
3389 ovs_cmdl_print_options(long_options);
3393 ovs_print_version(0, 0);
3402 ovnsb_db = default_sb_db();
3406 ovnnb_db = default_nb_db();
3409 free(short_options);
3413 add_column_noalert(struct ovsdb_idl *idl,
3414 const struct ovsdb_idl_column *column)
3416 ovsdb_idl_add_column(idl, column);
3417 ovsdb_idl_omit_alert(idl, column);
3421 main(int argc, char *argv[])
3423 int res = EXIT_SUCCESS;
3424 struct unixctl_server *unixctl;
3428 fatal_ignore_sigpipe();
3429 set_program_name(argv[0]);
3430 service_start(&argc, &argv);
3431 parse_options(argc, argv);
3433 daemonize_start(false);
3435 retval = unixctl_server_create(NULL, &unixctl);
3439 unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
3441 daemonize_complete();
3446 /* We want to detect all changes to the ovn-nb db. */
3447 struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3448 ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
3450 struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3451 ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
3453 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
3454 add_column_noalert(ovnsb_idl_loop.idl,
3455 &sbrec_logical_flow_col_logical_datapath);
3456 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
3457 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
3458 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
3459 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
3460 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
3462 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
3463 add_column_noalert(ovnsb_idl_loop.idl,
3464 &sbrec_multicast_group_col_datapath);
3465 add_column_noalert(ovnsb_idl_loop.idl,
3466 &sbrec_multicast_group_col_tunnel_key);
3467 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
3468 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
3470 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
3471 add_column_noalert(ovnsb_idl_loop.idl,
3472 &sbrec_datapath_binding_col_tunnel_key);
3473 add_column_noalert(ovnsb_idl_loop.idl,
3474 &sbrec_datapath_binding_col_external_ids);
3476 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
3477 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
3478 add_column_noalert(ovnsb_idl_loop.idl,
3479 &sbrec_port_binding_col_logical_port);
3480 add_column_noalert(ovnsb_idl_loop.idl,
3481 &sbrec_port_binding_col_tunnel_key);
3482 add_column_noalert(ovnsb_idl_loop.idl,
3483 &sbrec_port_binding_col_parent_port);
3484 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
3485 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
3486 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
3487 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
3488 ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
3489 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_dhcp_options);
3490 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_code);
3491 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_type);
3492 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_dhcp_options_col_name);
3494 ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
3495 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
3496 add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
3501 struct northd_context ctx = {
3502 .ovnnb_idl = ovnnb_idl_loop.idl,
3503 .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
3504 .ovnsb_idl = ovnsb_idl_loop.idl,
3505 .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
3510 if (ctx.ovnsb_txn) {
3511 check_and_add_supported_dhcp_opts_to_sb_db(&ctx);
3514 unixctl_server_run(unixctl);
3515 unixctl_server_wait(unixctl);
3517 poll_immediate_wake();
3519 ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
3520 ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
3523 if (should_service_stop()) {
3528 unixctl_server_destroy(unixctl);
3529 ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
3530 ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
3533 free(default_nb_db_);
3534 free(default_sb_db_);
3539 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
3540 const char *argv[] OVS_UNUSED, void *exiting_)
3542 bool *exiting = exiting_;
3545 unixctl_command_reply(conn, NULL);