ovn-northd: Support pinging logical router ports.
[cascardo/ovs.git] / ovn / northd / ovn-northd.c
1 /*
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use this file except in compliance with the License.
4  * You may obtain a copy of the License at:
5  *
6  *     http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
33 #include "smap.h"
34 #include "stream.h"
35 #include "stream-ssl.h"
36 #include "unixctl.h"
37 #include "util.h"
38 #include "uuid.h"
39 #include "openvswitch/vlog.h"
40
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
42
43 static unixctl_cb_func ovn_northd_exit;
44
45 struct northd_context {
46     struct ovsdb_idl *ovnnb_idl;
47     struct ovsdb_idl *ovnsb_idl;
48     struct ovsdb_idl_txn *ovnnb_txn;
49     struct ovsdb_idl_txn *ovnsb_txn;
50 };
51
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
54
55 static const char *default_db(void);
56 \f
57 /* Pipeline stages. */
58
59 /* The two pipelines in an OVN logical flow table. */
60 enum ovn_pipeline {
61     P_IN,                       /* Ingress pipeline. */
62     P_OUT                       /* Egress pipeline. */
63 };
64
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67     DP_SWITCH,                  /* OVN logical switch. */
68     DP_ROUTER                   /* OVN logical router. */
69 };
70
71 /* Returns an "enum ovn_stage" built from the arguments.
72  *
73  * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74  * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76     (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
77
78 /* A stage within an OVN logical switch or router.
79  *
80  * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81  * or router, whether the stage is part of the ingress or egress pipeline, and
82  * the table within that pipeline.  The first three components are combined to
83  * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84  * S_ROUTER_OUT_DELIVERY. */
85 enum ovn_stage {
86 #define PIPELINE_STAGES                                               \
87     /* Logical switch ingress stages. */                              \
88     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC,    0, "ls_in_port_sec")     \
89     PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,     1, "ls_in_pre_acl")      \
90     PIPELINE_STAGE(SWITCH, IN,  ACL,         2, "ls_in_acl")          \
91     PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,     3, "ls_in_l2_lkup")      \
92                                                                       \
93     /* Logical switch egress stages. */                               \
94     PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,     0, "ls_out_pre_acl")     \
95     PIPELINE_STAGE(SWITCH, OUT, ACL,         1, "ls_out_acl")         \
96     PIPELINE_STAGE(SWITCH, OUT, PORT_SEC,    2, "ls_out_port_sec")    \
97                                                                       \
98     /* Logical router ingress stages. */                              \
99     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
100     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
101     PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "lr_in_ip_routing")   \
102     PIPELINE_STAGE(ROUTER, IN,  ARP,         3, "lr_in_arp")          \
103                                                                       \
104     /* Logical router egress stages. */                               \
105     PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "lr_out_delivery")
106
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)   \
108     S_##DP_TYPE##_##PIPELINE##_##STAGE                          \
109         = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
110     PIPELINE_STAGES
111 #undef PIPELINE_STAGE
112 };
113
114 /* Due to various hard-coded priorities need to implement ACLs, the
115  * northbound database supports a smaller range of ACL priorities than
116  * are available to logical flows.  This value is added to an ACL
117  * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
119
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
123                 uint8_t table)
124 {
125     return OVN_STAGE_BUILD(dp_type, pipeline, table);
126 }
127
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
131 {
132     return (stage >> 8) & 1;
133 }
134
135 /* Returns the table to which 'stage' belongs. */
136 static uint8_t
137 ovn_stage_get_table(enum ovn_stage stage)
138 {
139     return stage & 0xff;
140 }
141
142 /* Returns a string name for 'stage'. */
143 static const char *
144 ovn_stage_to_str(enum ovn_stage stage)
145 {
146     switch (stage) {
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)       \
148         case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
149     PIPELINE_STAGES
150 #undef PIPELINE_STAGE
151         default: return "<unknown>";
152     }
153 }
154 \f
155 static void
156 usage(void)
157 {
158     printf("\
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
161 \n\
162 Options:\n\
163   --ovnnb-db=DATABASE       connect to ovn-nb database at DATABASE\n\
164                             (default: %s)\n\
165   --ovnsb-db=DATABASE       connect to ovn-sb database at DATABASE\n\
166                             (default: %s)\n\
167   -h, --help                display this help message\n\
168   -o, --options             list available options\n\
169   -V, --version             display version information\n\
170 ", program_name, program_name, default_db(), default_db());
171     daemon_usage();
172     vlog_usage();
173     stream_usage("database", true, true, false);
174 }
175 \f
176 struct tnlid_node {
177     struct hmap_node hmap_node;
178     uint32_t tnlid;
179 };
180
181 static void
182 destroy_tnlids(struct hmap *tnlids)
183 {
184     struct tnlid_node *node, *next;
185     HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186         hmap_remove(tnlids, &node->hmap_node);
187         free(node);
188     }
189     hmap_destroy(tnlids);
190 }
191
192 static void
193 add_tnlid(struct hmap *set, uint32_t tnlid)
194 {
195     struct tnlid_node *node = xmalloc(sizeof *node);
196     hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
197     node->tnlid = tnlid;
198 }
199
200 static bool
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
202 {
203     const struct tnlid_node *node;
204     HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205         if (node->tnlid == tnlid) {
206             return true;
207         }
208     }
209     return false;
210 }
211
212 static uint32_t
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
214                uint32_t *hint)
215 {
216     for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217          tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218         if (!tnlid_in_use(set, tnlid)) {
219             add_tnlid(set, tnlid);
220             *hint = tnlid;
221             return tnlid;
222         }
223     }
224
225     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226     VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
227     return 0;
228 }
229 \f
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231  * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233     struct hmap_node key_node;  /* Index on 'key'. */
234     struct uuid key;            /* (nbs/nbr)->header_.uuid. */
235
236     const struct nbrec_logical_switch *nbs;  /* May be NULL. */
237     const struct nbrec_logical_router *nbr;  /* May be NULL. */
238     const struct sbrec_datapath_binding *sb; /* May be NULL. */
239
240     struct ovs_list list;       /* In list of similar records. */
241
242     /* Logical router data (digested from nbr). */
243     ovs_be32 gateway;
244
245     /* Logical switch data. */
246     struct ovn_port **router_ports;
247     size_t n_router_ports;
248
249     struct hmap port_tnlids;
250     uint32_t port_key_hint;
251
252     bool has_unknown;
253 };
254
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257                     const struct nbrec_logical_switch *nbs,
258                     const struct nbrec_logical_router *nbr,
259                     const struct sbrec_datapath_binding *sb)
260 {
261     struct ovn_datapath *od = xzalloc(sizeof *od);
262     od->key = *key;
263     od->sb = sb;
264     od->nbs = nbs;
265     od->nbr = nbr;
266     hmap_init(&od->port_tnlids);
267     od->port_key_hint = 0;
268     hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
269     return od;
270 }
271
272 static void
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
274 {
275     if (od) {
276         /* Don't remove od->list.  It is used within build_datapaths() as a
277          * private list and once we've exited that function it is not safe to
278          * use it. */
279         hmap_remove(datapaths, &od->key_node);
280         destroy_tnlids(&od->port_tnlids);
281         free(od->router_ports);
282         free(od);
283     }
284 }
285
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
288 {
289     struct ovn_datapath *od;
290
291     HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292         if (uuid_equals(uuid, &od->key)) {
293             return od;
294         }
295     }
296     return NULL;
297 }
298
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301                         const struct sbrec_datapath_binding *sb)
302 {
303     struct uuid key;
304
305     if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306         !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
307         return NULL;
308     }
309     return ovn_datapath_find(datapaths, &key);
310 }
311
312 static void
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314                struct ovs_list *sb_only, struct ovs_list *nb_only,
315                struct ovs_list *both)
316 {
317     hmap_init(datapaths);
318     list_init(sb_only);
319     list_init(nb_only);
320     list_init(both);
321
322     const struct sbrec_datapath_binding *sb, *sb_next;
323     SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
324         struct uuid key;
325         if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326             !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327             ovsdb_idl_txn_add_comment(
328                 ctx->ovnsb_txn,
329                 "deleting Datapath_Binding "UUID_FMT" that lacks "
330                 "external-ids:logical-switch and "
331                 "external-ids:logical-router",
332                 UUID_ARGS(&sb->header_.uuid));
333             sbrec_datapath_binding_delete(sb);
334             continue;
335         }
336
337         if (ovn_datapath_find(datapaths, &key)) {
338             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
339             VLOG_INFO_RL(
340                 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341                 "duplicate external-ids:logical-switch/router "UUID_FMT,
342                 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343             sbrec_datapath_binding_delete(sb);
344             continue;
345         }
346
347         struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
348                                                       NULL, NULL, sb);
349         list_push_back(sb_only, &od->list);
350     }
351
352     const struct nbrec_logical_switch *nbs;
353     NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354         struct ovn_datapath *od = ovn_datapath_find(datapaths,
355                                                     &nbs->header_.uuid);
356         if (od) {
357             od->nbs = nbs;
358             list_remove(&od->list);
359             list_push_back(both, &od->list);
360         } else {
361             od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
362                                      nbs, NULL, NULL);
363             list_push_back(nb_only, &od->list);
364         }
365     }
366
367     const struct nbrec_logical_router *nbr;
368     NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369         struct ovn_datapath *od = ovn_datapath_find(datapaths,
370                                                     &nbr->header_.uuid);
371         if (od) {
372             if (!od->nbs) {
373                 od->nbr = nbr;
374                 list_remove(&od->list);
375                 list_push_back(both, &od->list);
376             } else {
377                 /* Can't happen! */
378                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
379                 VLOG_WARN_RL(&rl,
380                              "duplicate UUID "UUID_FMT" in OVN_Northbound",
381                              UUID_ARGS(&nbr->header_.uuid));
382                 continue;
383             }
384         } else {
385             od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
386                                      NULL, nbr, NULL);
387             list_push_back(nb_only, &od->list);
388         }
389
390         od->gateway = 0;
391         if (nbr->default_gw) {
392             ovs_be32 ip, mask;
393             char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394             if (error || !ip || mask != OVS_BE32_MAX) {
395                 static struct vlog_rate_limit rl
396                     = VLOG_RATE_LIMIT_INIT(5, 1);
397                 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
398                 free(error);
399             } else {
400                 od->gateway = ip;
401             }
402         }
403     }
404 }
405
406 static uint32_t
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
408 {
409     static uint32_t hint;
410     return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
411 }
412
413 static void
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
415 {
416     struct ovs_list sb_only, nb_only, both;
417
418     join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
419
420     if (!list_is_empty(&nb_only)) {
421         /* First index the in-use datapath tunnel IDs. */
422         struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423         struct ovn_datapath *od;
424         LIST_FOR_EACH (od, list, &both) {
425             add_tnlid(&dp_tnlids, od->sb->tunnel_key);
426         }
427
428         /* Add southbound record for each unmatched northbound record. */
429         LIST_FOR_EACH (od, list, &nb_only) {
430             uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
431             if (!tunnel_key) {
432                 break;
433             }
434
435             od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
436
437             char uuid_s[UUID_LEN + 1];
438             sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439             const char *key = od->nbs ? "logical-switch" : "logical-router";
440             const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441             sbrec_datapath_binding_set_external_ids(od->sb, &id);
442
443             sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
444         }
445         destroy_tnlids(&dp_tnlids);
446     }
447
448     /* Delete southbound records without northbound matches. */
449     struct ovn_datapath *od, *next;
450     LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451         list_remove(&od->list);
452         sbrec_datapath_binding_delete(od->sb);
453         ovn_datapath_destroy(datapaths, od);
454     }
455 }
456 \f
457 struct ovn_port {
458     struct hmap_node key_node;  /* Index on 'key'. */
459     char *key;                  /* nbs->name, nbr->name, sb->logical_port. */
460     char *json_key;             /* 'key', quoted for use in JSON. */
461
462     const struct nbrec_logical_port *nbs;        /* May be NULL. */
463     const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464     const struct sbrec_port_binding *sb;         /* May be NULL. */
465
466     /* Logical router port data. */
467     ovs_be32 ip, mask;          /* 192.168.10.123/24. */
468     ovs_be32 network;           /* 192.168.10.0. */
469     ovs_be32 bcast;             /* 192.168.10.255. */
470     struct eth_addr mac;
471     struct ovn_port *peer;
472
473     struct ovn_datapath *od;
474
475     struct ovs_list list;       /* In list of similar records. */
476 };
477
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480                 const struct nbrec_logical_port *nbs,
481                 const struct nbrec_logical_router_port *nbr,
482                 const struct sbrec_port_binding *sb)
483 {
484     struct ovn_port *op = xzalloc(sizeof *op);
485
486     struct ds json_key = DS_EMPTY_INITIALIZER;
487     json_string_escape(key, &json_key);
488     op->json_key = ds_steal_cstr(&json_key);
489
490     op->key = xstrdup(key);
491     op->sb = sb;
492     op->nbs = nbs;
493     op->nbr = nbr;
494     hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
495     return op;
496 }
497
498 static void
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
500 {
501     if (port) {
502         /* Don't remove port->list.  It is used within build_ports() as a
503          * private list and once we've exited that function it is not safe to
504          * use it. */
505         hmap_remove(ports, &port->key_node);
506         free(port->json_key);
507         free(port->key);
508         free(port);
509     }
510 }
511
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
514 {
515     struct ovn_port *op;
516
517     HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518         if (!strcmp(op->key, name)) {
519             return op;
520         }
521     }
522     return NULL;
523 }
524
525 static uint32_t
526 ovn_port_allocate_key(struct ovn_datapath *od)
527 {
528     return allocate_tnlid(&od->port_tnlids, "port",
529                           (1u << 15) - 1, &od->port_key_hint);
530 }
531
532 static void
533 join_logical_ports(struct northd_context *ctx,
534                    struct hmap *datapaths, struct hmap *ports,
535                    struct ovs_list *sb_only, struct ovs_list *nb_only,
536                    struct ovs_list *both)
537 {
538     hmap_init(ports);
539     list_init(sb_only);
540     list_init(nb_only);
541     list_init(both);
542
543     const struct sbrec_port_binding *sb;
544     SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545         struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
546                                               NULL, NULL, sb);
547         list_push_back(sb_only, &op->list);
548     }
549
550     struct ovn_datapath *od;
551     HMAP_FOR_EACH (od, key_node, datapaths) {
552         if (od->nbs) {
553             for (size_t i = 0; i < od->nbs->n_ports; i++) {
554                 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555                 struct ovn_port *op = ovn_port_find(ports, nbs->name);
556                 if (op) {
557                     if (op->nbs || op->nbr) {
558                         static struct vlog_rate_limit rl
559                             = VLOG_RATE_LIMIT_INIT(5, 1);
560                         VLOG_WARN_RL(&rl, "duplicate logical port %s",
561                                      nbs->name);
562                         continue;
563                     }
564                     op->nbs = nbs;
565                     list_remove(&op->list);
566                     list_push_back(both, &op->list);
567                 } else {
568                     op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569                     list_push_back(nb_only, &op->list);
570                 }
571
572                 op->od = od;
573             }
574         } else {
575             for (size_t i = 0; i < od->nbr->n_ports; i++) {
576                 const struct nbrec_logical_router_port *nbr
577                     = od->nbr->ports[i];
578
579                 struct eth_addr mac;
580                 if (!eth_addr_from_string(nbr->mac, &mac)) {
581                     static struct vlog_rate_limit rl
582                         = VLOG_RATE_LIMIT_INIT(5, 1);
583                     VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
584                     continue;
585                 }
586
587                 ovs_be32 ip, mask;
588                 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589                 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590                     static struct vlog_rate_limit rl
591                         = VLOG_RATE_LIMIT_INIT(5, 1);
592                     VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
593                     free(error);
594                     continue;
595                 }
596
597                 char name[UUID_LEN + 1];
598                 snprintf(name, sizeof name, UUID_FMT,
599                          UUID_ARGS(&nbr->header_.uuid));
600                 struct ovn_port *op = ovn_port_find(ports, name);
601                 if (op) {
602                     if (op->nbs || op->nbr) {
603                         static struct vlog_rate_limit rl
604                             = VLOG_RATE_LIMIT_INIT(5, 1);
605                         VLOG_WARN_RL(&rl, "duplicate logical router port %s",
606                                      name);
607                         continue;
608                     }
609                     op->nbr = nbr;
610                     list_remove(&op->list);
611                     list_push_back(both, &op->list);
612                 } else {
613                     op = ovn_port_create(ports, name, NULL, nbr, NULL);
614                     list_push_back(nb_only, &op->list);
615                 }
616
617                 op->ip = ip;
618                 op->mask = mask;
619                 op->network = ip & mask;
620                 op->bcast = ip | ~mask;
621                 op->mac = mac;
622
623                 op->od = od;
624             }
625         }
626     }
627
628     /* Connect logical router ports, and logical switch ports of type "router",
629      * to their peers. */
630     struct ovn_port *op;
631     HMAP_FOR_EACH (op, key_node, ports) {
632         if (op->nbs && !strcmp(op->nbs->type, "router")) {
633             const char *peer_name = smap_get(&op->nbs->options, "router-port");
634             if (!peer_name) {
635                 continue;
636             }
637
638             struct ovn_port *peer = ovn_port_find(ports, peer_name);
639             if (!peer || !peer->nbr) {
640                 continue;
641             }
642
643             peer->peer = op;
644             op->peer = peer;
645             op->od->router_ports = xrealloc(
646                 op->od->router_ports,
647                 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
648             op->od->router_ports[op->od->n_router_ports++] = op;
649         } else if (op->nbr && op->nbr->peer) {
650             char peer_name[UUID_LEN + 1];
651             snprintf(peer_name, sizeof peer_name, UUID_FMT,
652                      UUID_ARGS(&op->nbr->peer->header_.uuid));
653             op->peer = ovn_port_find(ports, peer_name);
654         }
655     }
656 }
657
658 static void
659 ovn_port_update_sbrec(const struct ovn_port *op)
660 {
661     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
662     if (op->nbr) {
663         sbrec_port_binding_set_type(op->sb, "patch");
664
665         const char *peer = op->peer ? op->peer->key : "<error>";
666         const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
667         sbrec_port_binding_set_options(op->sb, &ids);
668
669         sbrec_port_binding_set_parent_port(op->sb, NULL);
670         sbrec_port_binding_set_tag(op->sb, NULL, 0);
671         sbrec_port_binding_set_mac(op->sb, NULL, 0);
672     } else {
673         if (strcmp(op->nbs->type, "router")) {
674             sbrec_port_binding_set_type(op->sb, op->nbs->type);
675             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
676         } else {
677             sbrec_port_binding_set_type(op->sb, "patch");
678
679             const char *router_port = smap_get(&op->nbs->options,
680                                                "router-port");
681             if (!router_port) {
682                 router_port = "<error>";
683             }
684             const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
685             sbrec_port_binding_set_options(op->sb, &ids);
686         }
687         sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
688         sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
689         sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
690                                    op->nbs->n_addresses);
691     }
692 }
693
694 static void
695 build_ports(struct northd_context *ctx, struct hmap *datapaths,
696             struct hmap *ports)
697 {
698     struct ovs_list sb_only, nb_only, both;
699
700     join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
701
702     /* For logical ports that are in both databases, update the southbound
703      * record based on northbound data.  Also index the in-use tunnel_keys. */
704     struct ovn_port *op, *next;
705     LIST_FOR_EACH_SAFE (op, next, list, &both) {
706         ovn_port_update_sbrec(op);
707
708         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
709         if (op->sb->tunnel_key > op->od->port_key_hint) {
710             op->od->port_key_hint = op->sb->tunnel_key;
711         }
712     }
713
714     /* Add southbound record for each unmatched northbound record. */
715     LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
716         uint16_t tunnel_key = ovn_port_allocate_key(op->od);
717         if (!tunnel_key) {
718             continue;
719         }
720
721         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
722         ovn_port_update_sbrec(op);
723
724         sbrec_port_binding_set_logical_port(op->sb, op->key);
725         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
726     }
727
728     /* Delete southbound records without northbound matches. */
729     LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
730         list_remove(&op->list);
731         sbrec_port_binding_delete(op->sb);
732         ovn_port_destroy(ports, op);
733     }
734 }
735 \f
736 #define OVN_MIN_MULTICAST 32768
737 #define OVN_MAX_MULTICAST 65535
738
739 struct multicast_group {
740     const char *name;
741     uint16_t key;               /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
742 };
743
744 #define MC_FLOOD "_MC_flood"
745 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
746
747 #define MC_UNKNOWN "_MC_unknown"
748 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
749
750 static bool
751 multicast_group_equal(const struct multicast_group *a,
752                       const struct multicast_group *b)
753 {
754     return !strcmp(a->name, b->name) && a->key == b->key;
755 }
756
757 /* Multicast group entry. */
758 struct ovn_multicast {
759     struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
760     struct ovn_datapath *datapath;
761     const struct multicast_group *group;
762
763     struct ovn_port **ports;
764     size_t n_ports, allocated_ports;
765 };
766
767 static uint32_t
768 ovn_multicast_hash(const struct ovn_datapath *datapath,
769                    const struct multicast_group *group)
770 {
771     return hash_pointer(datapath, group->key);
772 }
773
774 static struct ovn_multicast *
775 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
776                    const struct multicast_group *group)
777 {
778     struct ovn_multicast *mc;
779
780     HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
781                              ovn_multicast_hash(datapath, group), mcgroups) {
782         if (mc->datapath == datapath
783             && multicast_group_equal(mc->group, group)) {
784             return mc;
785         }
786     }
787     return NULL;
788 }
789
790 static void
791 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
792                   struct ovn_port *port)
793 {
794     struct ovn_datapath *od = port->od;
795     struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
796     if (!mc) {
797         mc = xmalloc(sizeof *mc);
798         hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
799         mc->datapath = od;
800         mc->group = group;
801         mc->n_ports = 0;
802         mc->allocated_ports = 4;
803         mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
804     }
805     if (mc->n_ports >= mc->allocated_ports) {
806         mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
807                                sizeof *mc->ports);
808     }
809     mc->ports[mc->n_ports++] = port;
810 }
811
812 static void
813 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
814 {
815     if (mc) {
816         hmap_remove(mcgroups, &mc->hmap_node);
817         free(mc->ports);
818         free(mc);
819     }
820 }
821
822 static void
823 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
824                            const struct sbrec_multicast_group *sb)
825 {
826     struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
827     for (size_t i = 0; i < mc->n_ports; i++) {
828         ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
829     }
830     sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
831     free(ports);
832 }
833 \f
834 /* Logical flow generation.
835  *
836  * This code generates the Logical_Flow table in the southbound database, as a
837  * function of most of the northbound database.
838  */
839
840 struct ovn_lflow {
841     struct hmap_node hmap_node;
842
843     struct ovn_datapath *od;
844     enum ovn_stage stage;
845     uint16_t priority;
846     char *match;
847     char *actions;
848 };
849
850 static size_t
851 ovn_lflow_hash(const struct ovn_lflow *lflow)
852 {
853     size_t hash = uuid_hash(&lflow->od->key);
854     hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
855     hash = hash_string(lflow->match, hash);
856     return hash_string(lflow->actions, hash);
857 }
858
859 static bool
860 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
861 {
862     return (a->od == b->od
863             && a->stage == b->stage
864             && a->priority == b->priority
865             && !strcmp(a->match, b->match)
866             && !strcmp(a->actions, b->actions));
867 }
868
869 static void
870 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
871               enum ovn_stage stage, uint16_t priority,
872               char *match, char *actions)
873 {
874     lflow->od = od;
875     lflow->stage = stage;
876     lflow->priority = priority;
877     lflow->match = match;
878     lflow->actions = actions;
879 }
880
881 /* Adds a row with the specified contents to the Logical_Flow table. */
882 static void
883 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
884               enum ovn_stage stage, uint16_t priority,
885               const char *match, const char *actions)
886 {
887     struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
888     ovn_lflow_init(lflow, od, stage, priority,
889                    xstrdup(match), xstrdup(actions));
890     hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
891 }
892
893 static struct ovn_lflow *
894 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
895                enum ovn_stage stage, uint16_t priority,
896                const char *match, const char *actions)
897 {
898     struct ovn_lflow target;
899     ovn_lflow_init(&target, od, stage, priority,
900                    CONST_CAST(char *, match), CONST_CAST(char *, actions));
901
902     struct ovn_lflow *lflow;
903     HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
904                              lflows) {
905         if (ovn_lflow_equal(lflow, &target)) {
906             return lflow;
907         }
908     }
909     return NULL;
910 }
911
912 static void
913 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
914 {
915     if (lflow) {
916         hmap_remove(lflows, &lflow->hmap_node);
917         free(lflow->match);
918         free(lflow->actions);
919         free(lflow);
920     }
921 }
922
923 /* Appends port security constraints on L2 address field 'eth_addr_field'
924  * (e.g. "eth.src" or "eth.dst") to 'match'.  'port_security', with
925  * 'n_port_security' elements, is the collection of port_security constraints
926  * from an OVN_NB Logical_Port row. */
927 static void
928 build_port_security(const char *eth_addr_field,
929                     char **port_security, size_t n_port_security,
930                     struct ds *match)
931 {
932     size_t base_len = match->length;
933     ds_put_format(match, " && %s == {", eth_addr_field);
934
935     size_t n = 0;
936     for (size_t i = 0; i < n_port_security; i++) {
937         struct eth_addr ea;
938
939         if (eth_addr_from_string(port_security[i], &ea)) {
940             ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
941             ds_put_char(match, ' ');
942             n++;
943         }
944     }
945     ds_chomp(match, ' ');
946     ds_put_cstr(match, "}");
947
948     if (!n) {
949         match->length = base_len;
950     }
951 }
952
953 static bool
954 lport_is_enabled(const struct nbrec_logical_port *lport)
955 {
956     return !lport->enabled || *lport->enabled;
957 }
958
959 static bool
960 has_stateful_acl(struct ovn_datapath *od)
961 {
962     for (size_t i = 0; i < od->nbs->n_acls; i++) {
963         struct nbrec_acl *acl = od->nbs->acls[i];
964         if (!strcmp(acl->action, "allow-related")) {
965             return true;
966         }
967     }
968
969     return false;
970 }
971
972 static void
973 build_acls(struct ovn_datapath *od, struct hmap *lflows)
974 {
975     bool has_stateful = has_stateful_acl(od);
976
977     /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
978      * allowed by default. */
979     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
980     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
981
982     /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
983      * default.  A related rule at priority 1 is added below if there
984      * are any stateful ACLs in this datapath. */
985     ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
986     ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
987
988     /* If there are any stateful ACL rules in this dapapath, we must
989      * send all IP packets through the conntrack action, which handles
990      * defragmentation, in order to match L4 headers. */
991     if (has_stateful) {
992         /* Ingress and Egress Pre-ACL Table (Priority 100).
993          *
994          * Regardless of whether the ACL is "from-lport" or "to-lport",
995          * we need rules in both the ingress and egress table, because
996          * the return traffic needs to be followed. */
997         ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
998         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
999
1000         /* Ingress and Egress ACL Table (Priority 1).
1001          *
1002          * By default, traffic is allowed.  This is partially handled by
1003          * the Priority 0 ACL flows added earlier, but we also need to
1004          * commit IP flows.  This is because, while the initiater's
1005          * direction may not have any stateful rules, the server's may
1006          * and then its return traffic would not have an associated
1007          * conntrack entry and would return "+invalid". */
1008         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1009                       "ct_commit; next;");
1010         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1011                       "ct_commit; next;");
1012
1013         /* Ingress and Egress ACL Table (Priority 65535).
1014          *
1015          * Always drop traffic that's in an invalid state.  This is
1016          * enforced at a higher priority than ACLs can be defined. */
1017         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1018                       "ct.inv", "drop;");
1019         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1020                       "ct.inv", "drop;");
1021
1022         /* Ingress and Egress ACL Table (Priority 65535).
1023          *
1024          * Always allow traffic that is established to a committed
1025          * conntrack entry.  This is enforced at a higher priority than
1026          * ACLs can be defined. */
1027         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1028                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1029                       "next;");
1030         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1031                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1032                       "next;");
1033
1034         /* Ingress and Egress ACL Table (Priority 65535).
1035          *
1036          * Always allow traffic that is related to an existing conntrack
1037          * entry.  This is enforced at a higher priority than ACLs can
1038          * be defined.
1039          *
1040          * NOTE: This does not support related data sessions (eg,
1041          * a dynamically negotiated FTP data channel), but will allow
1042          * related traffic such as an ICMP Port Unreachable through
1043          * that's generated from a non-listening UDP port.  */
1044         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1045                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1046                       "next;");
1047         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1048                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1049                       "next;");
1050     }
1051
1052     /* Ingress or Egress ACL Table (Various priorities). */
1053     for (size_t i = 0; i < od->nbs->n_acls; i++) {
1054         struct nbrec_acl *acl = od->nbs->acls[i];
1055         bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1056         enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1057
1058         if (!strcmp(acl->action, "allow")) {
1059             /* If there are any stateful flows, we must even commit "allow"
1060              * actions.  This is because, while the initiater's
1061              * direction may not have any stateful rules, the server's
1062              * may and then its return traffic would not have an
1063              * associated conntrack entry and would return "+invalid". */
1064             const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1065             ovn_lflow_add(lflows, od, stage,
1066                           acl->priority + OVN_ACL_PRI_OFFSET,
1067                           acl->match, actions);
1068         } else if (!strcmp(acl->action, "allow-related")) {
1069             struct ds match = DS_EMPTY_INITIALIZER;
1070
1071             /* Commit the connection tracking entry, which allows all
1072              * other traffic related to this entry to flow due to the
1073              * 65535 priority flow defined earlier. */
1074             ds_put_format(&match, "ct.new && (%s)", acl->match);
1075             ovn_lflow_add(lflows, od, stage,
1076                           acl->priority + OVN_ACL_PRI_OFFSET,
1077                           ds_cstr(&match), "ct_commit; next;");
1078
1079             ds_destroy(&match);
1080         } else if (!strcmp(acl->action, "drop")) {
1081             ovn_lflow_add(lflows, od, stage,
1082                           acl->priority + OVN_ACL_PRI_OFFSET,
1083                           acl->match, "drop;");
1084         } else if (!strcmp(acl->action, "reject")) {
1085             /* xxx Need to support "reject". */
1086             VLOG_INFO("reject is not a supported action");
1087             ovn_lflow_add(lflows, od, stage,
1088                           acl->priority + OVN_ACL_PRI_OFFSET,
1089                           acl->match, "drop;");
1090         }
1091     }
1092 }
1093
1094 static void
1095 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1096                     struct hmap *lflows, struct hmap *mcgroups)
1097 {
1098     /* This flow table structure is documented in ovn-northd(8), so please
1099      * update ovn-northd.8.xml if you change anything. */
1100
1101     /* Build pre-ACL and ACL tables for both ingress and egress.
1102      * Ingress tables 1 and 2.  Egress tables 0 and 1. */
1103     struct ovn_datapath *od;
1104     HMAP_FOR_EACH (od, key_node, datapaths) {
1105         if (!od->nbs) {
1106             continue;
1107         }
1108
1109         build_acls(od, lflows);
1110     }
1111
1112     /* Logical switch ingress table 0: Admission control framework (priority
1113      * 100). */
1114     HMAP_FOR_EACH (od, key_node, datapaths) {
1115         if (!od->nbs) {
1116             continue;
1117         }
1118
1119         /* Logical VLANs not supported. */
1120         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1121                       "drop;");
1122
1123         /* Broadcast/multicast source address is invalid. */
1124         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1125                       "drop;");
1126
1127         /* Port security flows have priority 50 (see below) and will continue
1128          * to the next table if packet source is acceptable. */
1129     }
1130
1131     /* Logical switch ingress table 0: Ingress port security (priority 50). */
1132     struct ovn_port *op;
1133     HMAP_FOR_EACH (op, key_node, ports) {
1134         if (!op->nbs) {
1135             continue;
1136         }
1137
1138         if (!lport_is_enabled(op->nbs)) {
1139             /* Drop packets from disabled logical ports (since logical flow
1140              * tables are default-drop). */
1141             continue;
1142         }
1143
1144         struct ds match = DS_EMPTY_INITIALIZER;
1145         ds_put_format(&match, "inport == %s", op->json_key);
1146         build_port_security("eth.src",
1147                             op->nbs->port_security, op->nbs->n_port_security,
1148                             &match);
1149         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1150                       ds_cstr(&match), "next;");
1151         ds_destroy(&match);
1152     }
1153
1154     /* Ingress table 3: Destination lookup, broadcast and multicast handling
1155      * (priority 100). */
1156     HMAP_FOR_EACH (op, key_node, ports) {
1157         if (!op->nbs) {
1158             continue;
1159         }
1160
1161         if (lport_is_enabled(op->nbs)) {
1162             ovn_multicast_add(mcgroups, &mc_flood, op);
1163         }
1164     }
1165     HMAP_FOR_EACH (od, key_node, datapaths) {
1166         if (!od->nbs) {
1167             continue;
1168         }
1169
1170         ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1171                       "outport = \""MC_FLOOD"\"; output;");
1172     }
1173
1174     /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1175     HMAP_FOR_EACH (op, key_node, ports) {
1176         if (!op->nbs) {
1177             continue;
1178         }
1179
1180         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1181             struct eth_addr mac;
1182
1183             if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1184                 struct ds match, actions;
1185
1186                 ds_init(&match);
1187                 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1188                               ETH_ADDR_ARGS(mac));
1189
1190                 ds_init(&actions);
1191                 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1192                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1193                               ds_cstr(&match), ds_cstr(&actions));
1194                 ds_destroy(&actions);
1195                 ds_destroy(&match);
1196             } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1197                 if (lport_is_enabled(op->nbs)) {
1198                     ovn_multicast_add(mcgroups, &mc_unknown, op);
1199                     op->od->has_unknown = true;
1200                 }
1201             } else {
1202                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1203
1204                 VLOG_INFO_RL(&rl,
1205                              "%s: invalid syntax '%s' in addresses column",
1206                              op->nbs->name, op->nbs->addresses[i]);
1207             }
1208         }
1209     }
1210
1211     /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1212     HMAP_FOR_EACH (od, key_node, datapaths) {
1213         if (!od->nbs) {
1214             continue;
1215         }
1216
1217         if (od->has_unknown) {
1218             ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1219                           "outport = \""MC_UNKNOWN"\"; output;");
1220         }
1221     }
1222
1223     /* Egress table 2: Egress port security multicast/broadcast (priority
1224      * 100). */
1225     HMAP_FOR_EACH (od, key_node, datapaths) {
1226         if (!od->nbs) {
1227             continue;
1228         }
1229
1230         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1231                       "output;");
1232     }
1233
1234     /* Egress table 2: Egress port security (priorities 50 and 150).
1235      *
1236      * Priority 50 rules implement port security for enabled logical port.
1237      *
1238      * Priority 150 rules drop packets to disabled logical ports, so that they
1239      * don't even receive multicast or broadcast packets. */
1240     HMAP_FOR_EACH (op, key_node, ports) {
1241         if (!op->nbs) {
1242             continue;
1243         }
1244
1245         struct ds match = DS_EMPTY_INITIALIZER;
1246         ds_put_format(&match, "outport == %s", op->json_key);
1247         if (lport_is_enabled(op->nbs)) {
1248             build_port_security("eth.dst", op->nbs->port_security,
1249                                 op->nbs->n_port_security, &match);
1250             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1251                           ds_cstr(&match), "output;");
1252         } else {
1253             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1254                           ds_cstr(&match), "drop;");
1255         }
1256
1257         ds_destroy(&match);
1258     }
1259 }
1260
1261 static bool
1262 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1263 {
1264     return !lrport->enabled || *lrport->enabled;
1265 }
1266
1267 static void
1268 add_route(struct hmap *lflows, struct ovn_datapath *od,
1269           ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1270 {
1271     char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1272                             IP_ARGS(network), IP_ARGS(mask));
1273
1274     struct ds actions = DS_EMPTY_INITIALIZER;
1275     ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1276     if (gateway) {
1277         ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1278     } else {
1279         ds_put_cstr(&actions, "ip4.dst");
1280     }
1281     ds_put_cstr(&actions, "; next;");
1282
1283     /* The priority here is calculated to implement longest-prefix-match
1284      * routing. */
1285     ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1286                   count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1287     ds_destroy(&actions);
1288     free(match);
1289 }
1290
1291 static void
1292 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1293                     struct hmap *lflows)
1294 {
1295     /* This flow table structure is documented in ovn-northd(8), so please
1296      * update ovn-northd.8.xml if you change anything. */
1297
1298     /* Logical router ingress table 0: Admission control framework. */
1299     struct ovn_datapath *od;
1300     HMAP_FOR_EACH (od, key_node, datapaths) {
1301         if (!od->nbr) {
1302             continue;
1303         }
1304
1305         /* Logical VLANs not supported.
1306          * Broadcast/multicast source address is invalid. */
1307         ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1308                       "vlan.present || eth.src[40]", "drop;");
1309     }
1310
1311     /* Logical router ingress table 0: match (priority 50). */
1312     struct ovn_port *op;
1313     HMAP_FOR_EACH (op, key_node, ports) {
1314         if (!op->nbr) {
1315             continue;
1316         }
1317
1318         if (!lrport_is_enabled(op->nbr)) {
1319             /* Drop packets from disabled logical ports (since logical flow
1320              * tables are default-drop). */
1321             continue;
1322         }
1323
1324         char *match = xasprintf(
1325             "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1326             ETH_ADDR_ARGS(op->mac), op->json_key);
1327         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1328                       match, "next;");
1329         free(match);
1330     }
1331
1332     /* Logical router ingress table 1: IP Input. */
1333     HMAP_FOR_EACH (od, key_node, datapaths) {
1334         if (!od->nbr) {
1335             continue;
1336         }
1337
1338         /* L3 admission control: drop multicast and broadcast source, localhost
1339          * source or destination, and zero network source or destination
1340          * (priority 100). */
1341         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1342                       "ip4.mcast || "
1343                       "ip4.src == 255.255.255.255 || "
1344                       "ip4.src == 127.0.0.0/8 || "
1345                       "ip4.dst == 127.0.0.0/8 || "
1346                       "ip4.src == 0.0.0.0/8 || "
1347                       "ip4.dst == 0.0.0.0/8",
1348                       "drop;");
1349
1350         /* Drop Ethernet local broadcast.  By definition this traffic should
1351          * not be forwarded.*/
1352         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1353                       "eth.bcast", "drop;");
1354
1355         /* Drop IP multicast. */
1356         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1357                       "ip4.mcast", "drop;");
1358
1359         /* TTL discard.
1360          *
1361          * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1362         char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1363         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1364         free(match);
1365
1366         /* Pass other traffic not already handled to the next table for
1367          * routing. */
1368         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1369     }
1370
1371     HMAP_FOR_EACH (op, key_node, ports) {
1372         if (!op->nbr) {
1373             continue;
1374         }
1375
1376         /* L3 admission control: drop packets that originate from an IP address
1377          * owned by the router or a broadcast address known to the router
1378          * (priority 100). */
1379         char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1380                                 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1381         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1382                       match, "drop;");
1383         free(match);
1384
1385         /* ICMP echo reply.  These flows reply to ICMP echo requests
1386          * received for the router's IP address. */
1387         match = xasprintf(
1388             "inport == %s && (ip4.dst == "IP_FMT" || ip4.dst == "IP_FMT") && "
1389             "icmp4.type == 8 && icmp4.code == 0",
1390             op->json_key, IP_ARGS(op->ip), IP_ARGS(op->bcast));
1391         char *actions = xasprintf(
1392             "ip4.dst = ip4.src; "
1393             "ip4.src = "IP_FMT"; "
1394             "ip.ttl = 255; "
1395             "icmp4.type = 0; "
1396             "inport = \"\"; /* Allow sending out inport. */ "
1397             "next; ",
1398             IP_ARGS(op->ip));
1399         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1400                       match, actions);
1401         free(match);
1402         free(actions);
1403
1404         /* ARP reply.  These flows reply to ARP requests for the router's own
1405          * IP address. */
1406         match = xasprintf(
1407             "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1408             op->json_key, IP_ARGS(op->ip));
1409         actions = xasprintf(
1410             "eth.dst = eth.src; "
1411             "eth.src = "ETH_ADDR_FMT"; "
1412             "arp.op = 2; /* ARP reply */ "
1413             "arp.tha = arp.sha; "
1414             "arp.sha = "ETH_ADDR_FMT"; "
1415             "arp.tpa = arp.spa; "
1416             "arp.spa = "IP_FMT"; "
1417             "outport = %s; "
1418             "inport = \"\"; /* Allow sending out inport. */ "
1419             "output;",
1420             ETH_ADDR_ARGS(op->mac),
1421             ETH_ADDR_ARGS(op->mac),
1422             IP_ARGS(op->ip),
1423             op->json_key);
1424         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1425                       match, actions);
1426         free(match);
1427         free(actions);
1428
1429         /* Drop IP traffic to this router. */
1430         match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1431         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1432                       match, "drop;");
1433         free(match);
1434     }
1435
1436     /* Logical router ingress table 2: IP Routing.
1437      *
1438      * A packet that arrives at this table is an IP packet that should be
1439      * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1440      * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1441      * and advances to the next table for ARP resolution. */
1442     HMAP_FOR_EACH (op, key_node, ports) {
1443         if (!op->nbr) {
1444             continue;
1445         }
1446
1447         add_route(lflows, op->od, op->network, op->mask, 0);
1448     }
1449     HMAP_FOR_EACH (od, key_node, datapaths) {
1450         if (!od->nbr) {
1451             continue;
1452         }
1453
1454         if (od->gateway) {
1455             add_route(lflows, od, 0, 0, od->gateway);
1456         }
1457     }
1458     /* XXX destination unreachable */
1459
1460     /* Local router ingress table 3: ARP Resolution.
1461      *
1462      * Any packet that reaches this table is an IP packet whose next-hop IP
1463      * address is in reg0. (ip4.dst is the final destination.) This table
1464      * resolves the IP address in reg0 into an output port in outport and an
1465      * Ethernet address in eth.dst. */
1466     HMAP_FOR_EACH (op, key_node, ports) {
1467         if (op->nbr) {
1468             /* XXX ARP for neighboring router */
1469         } else if (op->od->n_router_ports) {
1470             for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1471                 struct eth_addr ea;
1472                 ovs_be32 ip;
1473
1474                 if (ovs_scan(op->nbs->addresses[i],
1475                              ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1476                              ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1477                     for (size_t j = 0; j < op->od->n_router_ports; j++) {
1478                         /* Get the Logical_Router_Port that the Logical_Port is
1479                          * connected to, as 'peer'. */
1480                         const char *peer_name = smap_get(
1481                             &op->od->router_ports[j]->nbs->options,
1482                             "router-port");
1483                         if (!peer_name) {
1484                             continue;
1485                         }
1486
1487                         struct ovn_port *peer
1488                             = ovn_port_find(ports, peer_name);
1489                         if (!peer || !peer->nbr) {
1490                             continue;
1491                         }
1492
1493                         /* Make sure that 'ip' is in 'peer''s network. */
1494                         if ((ip ^ peer->network) & peer->mask) {
1495                             continue;
1496                         }
1497
1498                         char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1499                         char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1500                                                   "eth.dst = "ETH_ADDR_FMT"; "
1501                                                   "outport = %s; "
1502                                                   "output;",
1503                                                   ETH_ADDR_ARGS(peer->mac),
1504                                                   ETH_ADDR_ARGS(ea),
1505                                                   peer->json_key);
1506                         ovn_lflow_add(lflows, peer->od,
1507                                       S_ROUTER_IN_ARP, 200, match, actions);
1508                         free(actions);
1509                         free(match);
1510                         break;
1511                     }
1512                 }
1513             }
1514         }
1515     }
1516
1517     /* Logical router egress table 0: Delivery (priority 100).
1518      *
1519      * Priority 100 rules deliver packets to enabled logical ports. */
1520     HMAP_FOR_EACH (op, key_node, ports) {
1521         if (!op->nbr) {
1522             continue;
1523         }
1524
1525         if (!lrport_is_enabled(op->nbr)) {
1526             /* Drop packets to disabled logical ports (since logical flow
1527              * tables are default-drop). */
1528             continue;
1529         }
1530
1531         char *match = xasprintf("outport == %s", op->json_key);
1532         ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1533                       match, "output;");
1534         free(match);
1535     }
1536 }
1537
1538 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1539  * constructing their contents based on the OVN_NB database. */
1540 static void
1541 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1542              struct hmap *ports)
1543 {
1544     struct hmap lflows = HMAP_INITIALIZER(&lflows);
1545     struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1546
1547     build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1548     build_lrouter_flows(datapaths, ports, &lflows);
1549
1550     /* Push changes to the Logical_Flow table to database. */
1551     const struct sbrec_logical_flow *sbflow, *next_sbflow;
1552     SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1553         struct ovn_datapath *od
1554             = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1555         if (!od) {
1556             sbrec_logical_flow_delete(sbflow);
1557             continue;
1558         }
1559
1560         enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1561         enum ovn_pipeline pipeline
1562             = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1563         struct ovn_lflow *lflow = ovn_lflow_find(
1564             &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1565             sbflow->priority, sbflow->match, sbflow->actions);
1566         if (lflow) {
1567             ovn_lflow_destroy(&lflows, lflow);
1568         } else {
1569             sbrec_logical_flow_delete(sbflow);
1570         }
1571     }
1572     struct ovn_lflow *lflow, *next_lflow;
1573     HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1574         enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1575         uint8_t table = ovn_stage_get_table(lflow->stage);
1576
1577         sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1578         sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1579         sbrec_logical_flow_set_pipeline(
1580             sbflow, pipeline == P_IN ? "ingress" : "egress");
1581         sbrec_logical_flow_set_table_id(sbflow, table);
1582         sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1583         sbrec_logical_flow_set_match(sbflow, lflow->match);
1584         sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1585
1586         const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1587                                             ovn_stage_to_str(lflow->stage));
1588         sbrec_logical_flow_set_external_ids(sbflow, &ids);
1589
1590         ovn_lflow_destroy(&lflows, lflow);
1591     }
1592     hmap_destroy(&lflows);
1593
1594     /* Push changes to the Multicast_Group table to database. */
1595     const struct sbrec_multicast_group *sbmc, *next_sbmc;
1596     SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1597         struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1598                                                           sbmc->datapath);
1599         if (!od) {
1600             sbrec_multicast_group_delete(sbmc);
1601             continue;
1602         }
1603
1604         struct multicast_group group = { .name = sbmc->name,
1605                                          .key = sbmc->tunnel_key };
1606         struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1607         if (mc) {
1608             ovn_multicast_update_sbrec(mc, sbmc);
1609             ovn_multicast_destroy(&mcgroups, mc);
1610         } else {
1611             sbrec_multicast_group_delete(sbmc);
1612         }
1613     }
1614     struct ovn_multicast *mc, *next_mc;
1615     HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1616         sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1617         sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1618         sbrec_multicast_group_set_name(sbmc, mc->group->name);
1619         sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1620         ovn_multicast_update_sbrec(mc, sbmc);
1621         ovn_multicast_destroy(&mcgroups, mc);
1622     }
1623     hmap_destroy(&mcgroups);
1624 }
1625 \f
1626 static void
1627 ovnnb_db_changed(struct northd_context *ctx)
1628 {
1629     VLOG_DBG("ovn-nb db contents have changed.");
1630
1631     struct hmap datapaths, ports;
1632     build_datapaths(ctx, &datapaths);
1633     build_ports(ctx, &datapaths, &ports);
1634     build_lflows(ctx, &datapaths, &ports);
1635
1636     struct ovn_datapath *dp, *next_dp;
1637     HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1638         ovn_datapath_destroy(&datapaths, dp);
1639     }
1640     hmap_destroy(&datapaths);
1641
1642     struct ovn_port *port, *next_port;
1643     HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1644         ovn_port_destroy(&ports, port);
1645     }
1646     hmap_destroy(&ports);
1647 }
1648
1649 /*
1650  * The only change we get notified about is if the 'chassis' column of the
1651  * 'Port_Binding' table changes.  When this column is not empty, it means we
1652  * need to set the corresponding logical port as 'up' in the northbound DB.
1653  */
1654 static void
1655 ovnsb_db_changed(struct northd_context *ctx)
1656 {
1657     struct hmap lports_hmap;
1658     const struct sbrec_port_binding *sb;
1659     const struct nbrec_logical_port *nb;
1660
1661     struct lport_hash_node {
1662         struct hmap_node node;
1663         const struct nbrec_logical_port *nb;
1664     } *hash_node, *hash_node_next;
1665
1666     VLOG_DBG("Recalculating port up states for ovn-nb db.");
1667
1668     hmap_init(&lports_hmap);
1669
1670     NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1671         hash_node = xzalloc(sizeof *hash_node);
1672         hash_node->nb = nb;
1673         hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1674     }
1675
1676     SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1677         nb = NULL;
1678         HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1679                                 hash_string(sb->logical_port, 0),
1680                                 &lports_hmap) {
1681             if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1682                 nb = hash_node->nb;
1683                 break;
1684             }
1685         }
1686
1687         if (!nb) {
1688             /* The logical port doesn't exist for this port binding.  This can
1689              * happen under normal circumstances when ovn-northd hasn't gotten
1690              * around to pruning the Port_Binding yet. */
1691             continue;
1692         }
1693
1694         if (sb->chassis && (!nb->up || !*nb->up)) {
1695             bool up = true;
1696             nbrec_logical_port_set_up(nb, &up, 1);
1697         } else if (!sb->chassis && (!nb->up || *nb->up)) {
1698             bool up = false;
1699             nbrec_logical_port_set_up(nb, &up, 1);
1700         }
1701     }
1702
1703     HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1704         hmap_remove(&lports_hmap, &hash_node->node);
1705         free(hash_node);
1706     }
1707     hmap_destroy(&lports_hmap);
1708 }
1709 \f
1710
1711 static char *default_db_;
1712
1713 static const char *
1714 default_db(void)
1715 {
1716     if (!default_db_) {
1717         default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1718     }
1719     return default_db_;
1720 }
1721
1722 static void
1723 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1724 {
1725     enum {
1726         DAEMON_OPTION_ENUMS,
1727         VLOG_OPTION_ENUMS,
1728     };
1729     static const struct option long_options[] = {
1730         {"ovnsb-db", required_argument, NULL, 'd'},
1731         {"ovnnb-db", required_argument, NULL, 'D'},
1732         {"help", no_argument, NULL, 'h'},
1733         {"options", no_argument, NULL, 'o'},
1734         {"version", no_argument, NULL, 'V'},
1735         DAEMON_LONG_OPTIONS,
1736         VLOG_LONG_OPTIONS,
1737         STREAM_SSL_LONG_OPTIONS,
1738         {NULL, 0, NULL, 0},
1739     };
1740     char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1741
1742     for (;;) {
1743         int c;
1744
1745         c = getopt_long(argc, argv, short_options, long_options, NULL);
1746         if (c == -1) {
1747             break;
1748         }
1749
1750         switch (c) {
1751         DAEMON_OPTION_HANDLERS;
1752         VLOG_OPTION_HANDLERS;
1753         STREAM_SSL_OPTION_HANDLERS;
1754
1755         case 'd':
1756             ovnsb_db = optarg;
1757             break;
1758
1759         case 'D':
1760             ovnnb_db = optarg;
1761             break;
1762
1763         case 'h':
1764             usage();
1765             exit(EXIT_SUCCESS);
1766
1767         case 'o':
1768             ovs_cmdl_print_options(long_options);
1769             exit(EXIT_SUCCESS);
1770
1771         case 'V':
1772             ovs_print_version(0, 0);
1773             exit(EXIT_SUCCESS);
1774
1775         default:
1776             break;
1777         }
1778     }
1779
1780     if (!ovnsb_db) {
1781         ovnsb_db = default_db();
1782     }
1783
1784     if (!ovnnb_db) {
1785         ovnnb_db = default_db();
1786     }
1787
1788     free(short_options);
1789 }
1790
1791 static void
1792 add_column_noalert(struct ovsdb_idl *idl,
1793                    const struct ovsdb_idl_column *column)
1794 {
1795     ovsdb_idl_add_column(idl, column);
1796     ovsdb_idl_omit_alert(idl, column);
1797 }
1798
1799 int
1800 main(int argc, char *argv[])
1801 {
1802     extern struct vlog_module VLM_reconnect;
1803     struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
1804     unsigned int ovnnb_seqno, ovn_seqno;
1805     int res = EXIT_SUCCESS;
1806     struct northd_context ctx = {
1807         .ovnsb_txn = NULL,
1808     };
1809     bool ovnnb_changes_pending = false;
1810     bool ovn_changes_pending = false;
1811     struct unixctl_server *unixctl;
1812     int retval;
1813     bool exiting;
1814
1815     fatal_ignore_sigpipe();
1816     set_program_name(argv[0]);
1817     service_start(&argc, &argv);
1818     vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1819     vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1820     parse_options(argc, argv);
1821
1822     daemonize_start(false);
1823
1824     retval = unixctl_server_create(NULL, &unixctl);
1825     if (retval) {
1826         exit(EXIT_FAILURE);
1827     }
1828     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1829
1830     daemonize_complete();
1831
1832     nbrec_init();
1833     sbrec_init();
1834
1835     /* We want to detect all changes to the ovn-nb db. */
1836     ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
1837             &nbrec_idl_class, true, true);
1838
1839     ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
1840             &sbrec_idl_class, false, true);
1841
1842     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
1843     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
1844     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
1845     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
1846     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
1847     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
1848     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
1849
1850     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
1851     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
1852     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
1853     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
1854     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
1855
1856     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
1857     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
1858     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
1859
1860     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
1861     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
1862     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
1863     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
1864     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
1865     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
1866     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
1867     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
1868     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
1869     ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
1870
1871     /*
1872      * The loop here just runs the IDL in a loop waiting for the seqno to
1873      * change, which indicates that the contents of the db have changed.
1874      *
1875      * If the contents of the ovn-nb db change, the mappings to the ovn-sb
1876      * db must be recalculated.
1877      *
1878      * If the contents of the ovn-sb db change, it means the 'up' state of
1879      * a port may have changed, as that's the only type of change ovn-northd is
1880      * watching for.
1881      */
1882
1883     ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1884     ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1885     exiting = false;
1886     while (!exiting) {
1887         ovsdb_idl_run(ovnnb_idl);
1888         ovsdb_idl_run(ovnsb_idl);
1889         unixctl_server_run(unixctl);
1890
1891         if (!ovsdb_idl_is_alive(ovnnb_idl)) {
1892             int retval = ovsdb_idl_get_last_error(ovnnb_idl);
1893             VLOG_ERR("%s: database connection failed (%s)",
1894                     ovnnb_db, ovs_retval_to_string(retval));
1895             res = EXIT_FAILURE;
1896             break;
1897         }
1898
1899         if (!ovsdb_idl_is_alive(ovnsb_idl)) {
1900             int retval = ovsdb_idl_get_last_error(ovnsb_idl);
1901             VLOG_ERR("%s: database connection failed (%s)",
1902                     ovnsb_db, ovs_retval_to_string(retval));
1903             res = EXIT_FAILURE;
1904             break;
1905         }
1906
1907         if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
1908             ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1909             ovnnb_changes_pending = true;
1910         }
1911
1912         if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
1913             ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1914             ovn_changes_pending = true;
1915         }
1916
1917         /*
1918          * If there are any pending changes, we delay recalculating the
1919          * necessary updates until after an existing transaction finishes.
1920          * This avoids the possibility of rapid updates causing ovn-northd to
1921          * never be able to successfully make the corresponding updates to the
1922          * other db.  Instead, pending changes are batched up until the next
1923          * time we get a chance to calculate the new state and apply it.
1924          */
1925
1926         if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
1927             /*
1928              * The OVN-nb db contents have changed, so create a transaction for
1929              * updating the OVN-sb DB.
1930              */
1931             ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
1932             ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
1933                                       "ovn-northd: northbound db changed");
1934             ovnnb_db_changed(&ctx);
1935             ovnnb_changes_pending = false;
1936         }
1937
1938         if (ovn_changes_pending && !ctx.ovnnb_txn) {
1939             /*
1940              * The OVN-sb db contents have changed, so create a transaction for
1941              * updating the northbound DB.
1942              */
1943             ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
1944             ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
1945                                       "ovn-northd: southbound db changed");
1946             ovnsb_db_changed(&ctx);
1947             ovn_changes_pending = false;
1948         }
1949
1950         if (ctx.ovnnb_txn) {
1951             enum ovsdb_idl_txn_status txn_status;
1952             txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
1953             switch (txn_status) {
1954             case TXN_UNCOMMITTED:
1955             case TXN_INCOMPLETE:
1956                 /* Come back around and try to commit this transaction again */
1957                 break;
1958             case TXN_ABORTED:
1959             case TXN_TRY_AGAIN:
1960             case TXN_NOT_LOCKED:
1961             case TXN_ERROR:
1962                 /* Something went wrong, so try creating a new transaction. */
1963                 ovn_changes_pending = true;
1964             case TXN_UNCHANGED:
1965             case TXN_SUCCESS:
1966                 ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
1967                 ctx.ovnnb_txn = NULL;
1968             }
1969         }
1970
1971         if (ctx.ovnsb_txn) {
1972             enum ovsdb_idl_txn_status txn_status;
1973             txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
1974             switch (txn_status) {
1975             case TXN_UNCOMMITTED:
1976             case TXN_INCOMPLETE:
1977                 /* Come back around and try to commit this transaction again */
1978                 break;
1979             case TXN_ABORTED:
1980             case TXN_TRY_AGAIN:
1981             case TXN_NOT_LOCKED:
1982             case TXN_ERROR:
1983                 /* Something went wrong, so try creating a new transaction. */
1984                 ovnnb_changes_pending = true;
1985             case TXN_UNCHANGED:
1986             case TXN_SUCCESS:
1987                 ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
1988                 ctx.ovnsb_txn = NULL;
1989             }
1990         }
1991
1992         if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
1993                 ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
1994             ovsdb_idl_wait(ovnnb_idl);
1995             ovsdb_idl_wait(ovnsb_idl);
1996             if (ctx.ovnnb_txn) {
1997                 ovsdb_idl_txn_wait(ctx.ovnnb_txn);
1998             }
1999             if (ctx.ovnsb_txn) {
2000                 ovsdb_idl_txn_wait(ctx.ovnsb_txn);
2001             }
2002             unixctl_server_wait(unixctl);
2003             if (exiting) {
2004                 poll_immediate_wake();
2005             }
2006             poll_block();
2007         }
2008         if (should_service_stop()) {
2009             exiting = true;
2010         }
2011     }
2012
2013     unixctl_server_destroy(unixctl);
2014     ovsdb_idl_destroy(ovnsb_idl);
2015     ovsdb_idl_destroy(ovnnb_idl);
2016     service_stop();
2017
2018     free(default_db_);
2019
2020     exit(res);
2021 }
2022
2023 static void
2024 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2025                 const char *argv[] OVS_UNUSED, void *exiting_)
2026 {
2027     bool *exiting = exiting_;
2028     *exiting = true;
2029
2030     unixctl_command_reply(conn, NULL);
2031 }