ovn: Use "ip.ttl--" instead of "ip4.ttl--".
[cascardo/ovs.git] / ovn / northd / ovn-northd.c
1 /*
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use this file except in compliance with the License.
4  * You may obtain a copy of the License at:
5  *
6  *     http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "poll-loop.h"
33 #include "smap.h"
34 #include "stream.h"
35 #include "stream-ssl.h"
36 #include "unixctl.h"
37 #include "util.h"
38 #include "uuid.h"
39 #include "openvswitch/vlog.h"
40
41 VLOG_DEFINE_THIS_MODULE(ovn_northd);
42
43 static unixctl_cb_func ovn_northd_exit;
44
45 struct northd_context {
46     struct ovsdb_idl *ovnnb_idl;
47     struct ovsdb_idl *ovnsb_idl;
48     struct ovsdb_idl_txn *ovnnb_txn;
49     struct ovsdb_idl_txn *ovnsb_txn;
50 };
51
52 static const char *ovnnb_db;
53 static const char *ovnsb_db;
54
55 static const char *default_db(void);
56 \f
57 /* Pipeline stages. */
58
59 /* The two pipelines in an OVN logical flow table. */
60 enum ovn_pipeline {
61     P_IN,                       /* Ingress pipeline. */
62     P_OUT                       /* Egress pipeline. */
63 };
64
65 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
66 enum ovn_datapath_type {
67     DP_SWITCH,                  /* OVN logical switch. */
68     DP_ROUTER                   /* OVN logical router. */
69 };
70
71 /* Returns an "enum ovn_stage" built from the arguments.
72  *
73  * (It's better to use ovn_stage_build() for type-safety reasons, but inline
74  * functions can't be used in enums or switch cases.) */
75 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
76     (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
77
78 /* A stage within an OVN logical switch or router.
79  *
80  * An "enum ovn_stage" indicates whether the stage is part of a logical switch
81  * or router, whether the stage is part of the ingress or egress pipeline, and
82  * the table within that pipeline.  The first three components are combined to
83  * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC,
84  * S_ROUTER_OUT_DELIVERY. */
85 enum ovn_stage {
86 #define PIPELINE_STAGES                                                 \
87     /* Logical switch ingress stages. */                                \
88     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC,    0, "switch_in_port_sec")   \
89     PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,     1, "switch_in_pre_acl")    \
90     PIPELINE_STAGE(SWITCH, IN,  ACL,         2, "switch_in_acl")        \
91     PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,     3, "switch_in_l2_lkup")    \
92                                                                         \
93     /* Logical switch egress stages. */                                 \
94     PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,     0, "switch_out_pre_acl")   \
95     PIPELINE_STAGE(SWITCH, OUT, ACL,         1, "switch_out_acl")       \
96     PIPELINE_STAGE(SWITCH, OUT, PORT_SEC,    2, "switch_out_port_sec")  \
97                                                                         \
98     /* Logical router ingress stages. */                                \
99     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "router_in_admission")  \
100     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "router_in_ip_input")   \
101     PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  2, "router_in_ip_routing") \
102     PIPELINE_STAGE(ROUTER, IN,  ARP,         3, "router_in_arp")        \
103                                                                         \
104     /* Logical router egress stages. */                                 \
105     PIPELINE_STAGE(ROUTER, OUT, DELIVERY,    0, "router_out_delivery")
106
107 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)   \
108     S_##DP_TYPE##_##PIPELINE##_##STAGE                          \
109         = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
110     PIPELINE_STAGES
111 #undef PIPELINE_STAGE
112 };
113
114 /* Due to various hard-coded priorities need to implement ACLs, the
115  * northbound database supports a smaller range of ACL priorities than
116  * are available to logical flows.  This value is added to an ACL
117  * priority to determine the ACL's logical flow priority. */
118 #define OVN_ACL_PRI_OFFSET 1000
119
120 /* Returns an "enum ovn_stage" built from the arguments. */
121 static enum ovn_stage
122 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
123                 uint8_t table)
124 {
125     return OVN_STAGE_BUILD(dp_type, pipeline, table);
126 }
127
128 /* Returns the pipeline to which 'stage' belongs. */
129 static enum ovn_pipeline
130 ovn_stage_get_pipeline(enum ovn_stage stage)
131 {
132     return (stage >> 8) & 1;
133 }
134
135 /* Returns the table to which 'stage' belongs. */
136 static uint8_t
137 ovn_stage_get_table(enum ovn_stage stage)
138 {
139     return stage & 0xff;
140 }
141
142 /* Returns a string name for 'stage'. */
143 static const char *
144 ovn_stage_to_str(enum ovn_stage stage)
145 {
146     switch (stage) {
147 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)       \
148         case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
149     PIPELINE_STAGES
150 #undef PIPELINE_STAGE
151         default: return "<unknown>";
152     }
153 }
154 \f
155 static void
156 usage(void)
157 {
158     printf("\
159 %s: OVN northbound management daemon\n\
160 usage: %s [OPTIONS]\n\
161 \n\
162 Options:\n\
163   --ovnnb-db=DATABASE       connect to ovn-nb database at DATABASE\n\
164                             (default: %s)\n\
165   --ovnsb-db=DATABASE       connect to ovn-sb database at DATABASE\n\
166                             (default: %s)\n\
167   -h, --help                display this help message\n\
168   -o, --options             list available options\n\
169   -V, --version             display version information\n\
170 ", program_name, program_name, default_db(), default_db());
171     daemon_usage();
172     vlog_usage();
173     stream_usage("database", true, true, false);
174 }
175 \f
176 struct tnlid_node {
177     struct hmap_node hmap_node;
178     uint32_t tnlid;
179 };
180
181 static void
182 destroy_tnlids(struct hmap *tnlids)
183 {
184     struct tnlid_node *node, *next;
185     HMAP_FOR_EACH_SAFE (node, next, hmap_node, tnlids) {
186         hmap_remove(tnlids, &node->hmap_node);
187         free(node);
188     }
189     hmap_destroy(tnlids);
190 }
191
192 static void
193 add_tnlid(struct hmap *set, uint32_t tnlid)
194 {
195     struct tnlid_node *node = xmalloc(sizeof *node);
196     hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
197     node->tnlid = tnlid;
198 }
199
200 static bool
201 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
202 {
203     const struct tnlid_node *node;
204     HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
205         if (node->tnlid == tnlid) {
206             return true;
207         }
208     }
209     return false;
210 }
211
212 static uint32_t
213 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
214                uint32_t *hint)
215 {
216     for (uint32_t tnlid = *hint + 1; tnlid != *hint;
217          tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
218         if (!tnlid_in_use(set, tnlid)) {
219             add_tnlid(set, tnlid);
220             *hint = tnlid;
221             return tnlid;
222         }
223     }
224
225     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
226     VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
227     return 0;
228 }
229 \f
230 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
231  * sb->external_ids:logical-switch. */
232 struct ovn_datapath {
233     struct hmap_node key_node;  /* Index on 'key'. */
234     struct uuid key;            /* (nbs/nbr)->header_.uuid. */
235
236     const struct nbrec_logical_switch *nbs;  /* May be NULL. */
237     const struct nbrec_logical_router *nbr;  /* May be NULL. */
238     const struct sbrec_datapath_binding *sb; /* May be NULL. */
239
240     struct ovs_list list;       /* In list of similar records. */
241
242     /* Logical router data (digested from nbr). */
243     ovs_be32 gateway;
244
245     /* Logical switch data. */
246     struct ovn_port **router_ports;
247     size_t n_router_ports;
248
249     struct hmap port_tnlids;
250     uint32_t port_key_hint;
251
252     bool has_unknown;
253 };
254
255 static struct ovn_datapath *
256 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
257                     const struct nbrec_logical_switch *nbs,
258                     const struct nbrec_logical_router *nbr,
259                     const struct sbrec_datapath_binding *sb)
260 {
261     struct ovn_datapath *od = xzalloc(sizeof *od);
262     od->key = *key;
263     od->sb = sb;
264     od->nbs = nbs;
265     od->nbr = nbr;
266     hmap_init(&od->port_tnlids);
267     od->port_key_hint = 0;
268     hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
269     return od;
270 }
271
272 static void
273 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
274 {
275     if (od) {
276         /* Don't remove od->list.  It is used within build_datapaths() as a
277          * private list and once we've exited that function it is not safe to
278          * use it. */
279         hmap_remove(datapaths, &od->key_node);
280         destroy_tnlids(&od->port_tnlids);
281         free(od->router_ports);
282         free(od);
283     }
284 }
285
286 static struct ovn_datapath *
287 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
288 {
289     struct ovn_datapath *od;
290
291     HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
292         if (uuid_equals(uuid, &od->key)) {
293             return od;
294         }
295     }
296     return NULL;
297 }
298
299 static struct ovn_datapath *
300 ovn_datapath_from_sbrec(struct hmap *datapaths,
301                         const struct sbrec_datapath_binding *sb)
302 {
303     struct uuid key;
304
305     if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
306         !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
307         return NULL;
308     }
309     return ovn_datapath_find(datapaths, &key);
310 }
311
312 static void
313 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
314                struct ovs_list *sb_only, struct ovs_list *nb_only,
315                struct ovs_list *both)
316 {
317     hmap_init(datapaths);
318     list_init(sb_only);
319     list_init(nb_only);
320     list_init(both);
321
322     const struct sbrec_datapath_binding *sb, *sb_next;
323     SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
324         struct uuid key;
325         if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326             !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327             ovsdb_idl_txn_add_comment(
328                 ctx->ovnsb_txn,
329                 "deleting Datapath_Binding "UUID_FMT" that lacks "
330                 "external-ids:logical-switch and "
331                 "external-ids:logical-router",
332                 UUID_ARGS(&sb->header_.uuid));
333             sbrec_datapath_binding_delete(sb);
334             continue;
335         }
336
337         if (ovn_datapath_find(datapaths, &key)) {
338             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
339             VLOG_INFO_RL(
340                 &rl, "deleting Datapath_Binding "UUID_FMT" with "
341                 "duplicate external-ids:logical-switch/router "UUID_FMT,
342                 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
343             sbrec_datapath_binding_delete(sb);
344             continue;
345         }
346
347         struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
348                                                       NULL, NULL, sb);
349         list_push_back(sb_only, &od->list);
350     }
351
352     const struct nbrec_logical_switch *nbs;
353     NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
354         struct ovn_datapath *od = ovn_datapath_find(datapaths,
355                                                     &nbs->header_.uuid);
356         if (od) {
357             od->nbs = nbs;
358             list_remove(&od->list);
359             list_push_back(both, &od->list);
360         } else {
361             od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
362                                      nbs, NULL, NULL);
363             list_push_back(nb_only, &od->list);
364         }
365     }
366
367     const struct nbrec_logical_router *nbr;
368     NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
369         struct ovn_datapath *od = ovn_datapath_find(datapaths,
370                                                     &nbr->header_.uuid);
371         if (od) {
372             if (!od->nbs) {
373                 od->nbr = nbr;
374                 list_remove(&od->list);
375                 list_push_back(both, &od->list);
376             } else {
377                 /* Can't happen! */
378                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
379                 VLOG_WARN_RL(&rl,
380                              "duplicate UUID "UUID_FMT" in OVN_Northbound",
381                              UUID_ARGS(&nbr->header_.uuid));
382                 continue;
383             }
384         } else {
385             od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
386                                      NULL, nbr, NULL);
387             list_push_back(nb_only, &od->list);
388         }
389
390         od->gateway = 0;
391         if (nbr->default_gw) {
392             ovs_be32 ip, mask;
393             char *error = ip_parse_masked(nbr->default_gw, &ip, &mask);
394             if (error || !ip || mask != OVS_BE32_MAX) {
395                 static struct vlog_rate_limit rl
396                     = VLOG_RATE_LIMIT_INIT(5, 1);
397                 VLOG_WARN_RL(&rl, "bad 'gateway' %s", nbr->default_gw);
398                 free(error);
399             } else {
400                 od->gateway = ip;
401             }
402         }
403     }
404 }
405
406 static uint32_t
407 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
408 {
409     static uint32_t hint;
410     return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
411 }
412
413 static void
414 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
415 {
416     struct ovs_list sb_only, nb_only, both;
417
418     join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
419
420     if (!list_is_empty(&nb_only)) {
421         /* First index the in-use datapath tunnel IDs. */
422         struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
423         struct ovn_datapath *od;
424         LIST_FOR_EACH (od, list, &both) {
425             add_tnlid(&dp_tnlids, od->sb->tunnel_key);
426         }
427
428         /* Add southbound record for each unmatched northbound record. */
429         LIST_FOR_EACH (od, list, &nb_only) {
430             uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
431             if (!tunnel_key) {
432                 break;
433             }
434
435             od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
436
437             char uuid_s[UUID_LEN + 1];
438             sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
439             const char *key = od->nbs ? "logical-switch" : "logical-router";
440             const struct smap id = SMAP_CONST1(&id, key, uuid_s);
441             sbrec_datapath_binding_set_external_ids(od->sb, &id);
442
443             sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
444         }
445         destroy_tnlids(&dp_tnlids);
446     }
447
448     /* Delete southbound records without northbound matches. */
449     struct ovn_datapath *od, *next;
450     LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
451         list_remove(&od->list);
452         sbrec_datapath_binding_delete(od->sb);
453         ovn_datapath_destroy(datapaths, od);
454     }
455 }
456 \f
457 struct ovn_port {
458     struct hmap_node key_node;  /* Index on 'key'. */
459     char *key;                  /* nbs->name, nbr->name, sb->logical_port. */
460     char *json_key;             /* 'key', quoted for use in JSON. */
461
462     const struct nbrec_logical_port *nbs;        /* May be NULL. */
463     const struct nbrec_logical_router_port *nbr; /* May be NULL. */
464     const struct sbrec_port_binding *sb;         /* May be NULL. */
465
466     /* Logical router port data. */
467     ovs_be32 ip, mask;          /* 192.168.10.123/24. */
468     ovs_be32 network;           /* 192.168.10.0. */
469     ovs_be32 bcast;             /* 192.168.10.255. */
470     struct eth_addr mac;
471     struct ovn_port *peer;
472
473     struct ovn_datapath *od;
474
475     struct ovs_list list;       /* In list of similar records. */
476 };
477
478 static struct ovn_port *
479 ovn_port_create(struct hmap *ports, const char *key,
480                 const struct nbrec_logical_port *nbs,
481                 const struct nbrec_logical_router_port *nbr,
482                 const struct sbrec_port_binding *sb)
483 {
484     struct ovn_port *op = xzalloc(sizeof *op);
485
486     struct ds json_key = DS_EMPTY_INITIALIZER;
487     json_string_escape(key, &json_key);
488     op->json_key = ds_steal_cstr(&json_key);
489
490     op->key = xstrdup(key);
491     op->sb = sb;
492     op->nbs = nbs;
493     op->nbr = nbr;
494     hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
495     return op;
496 }
497
498 static void
499 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
500 {
501     if (port) {
502         /* Don't remove port->list.  It is used within build_ports() as a
503          * private list and once we've exited that function it is not safe to
504          * use it. */
505         hmap_remove(ports, &port->key_node);
506         free(port->json_key);
507         free(port->key);
508         free(port);
509     }
510 }
511
512 static struct ovn_port *
513 ovn_port_find(struct hmap *ports, const char *name)
514 {
515     struct ovn_port *op;
516
517     HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
518         if (!strcmp(op->key, name)) {
519             return op;
520         }
521     }
522     return NULL;
523 }
524
525 static uint32_t
526 ovn_port_allocate_key(struct ovn_datapath *od)
527 {
528     return allocate_tnlid(&od->port_tnlids, "port",
529                           (1u << 15) - 1, &od->port_key_hint);
530 }
531
532 static void
533 join_logical_ports(struct northd_context *ctx,
534                    struct hmap *datapaths, struct hmap *ports,
535                    struct ovs_list *sb_only, struct ovs_list *nb_only,
536                    struct ovs_list *both)
537 {
538     hmap_init(ports);
539     list_init(sb_only);
540     list_init(nb_only);
541     list_init(both);
542
543     const struct sbrec_port_binding *sb;
544     SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
545         struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
546                                               NULL, NULL, sb);
547         list_push_back(sb_only, &op->list);
548     }
549
550     struct ovn_datapath *od;
551     HMAP_FOR_EACH (od, key_node, datapaths) {
552         if (od->nbs) {
553             for (size_t i = 0; i < od->nbs->n_ports; i++) {
554                 const struct nbrec_logical_port *nbs = od->nbs->ports[i];
555                 struct ovn_port *op = ovn_port_find(ports, nbs->name);
556                 if (op) {
557                     if (op->nbs || op->nbr) {
558                         static struct vlog_rate_limit rl
559                             = VLOG_RATE_LIMIT_INIT(5, 1);
560                         VLOG_WARN_RL(&rl, "duplicate logical port %s",
561                                      nbs->name);
562                         continue;
563                     }
564                     op->nbs = nbs;
565                     list_remove(&op->list);
566                     list_push_back(both, &op->list);
567                 } else {
568                     op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
569                     list_push_back(nb_only, &op->list);
570                 }
571
572                 op->od = od;
573             }
574         } else {
575             for (size_t i = 0; i < od->nbr->n_ports; i++) {
576                 const struct nbrec_logical_router_port *nbr
577                     = od->nbr->ports[i];
578
579                 struct eth_addr mac;
580                 if (!eth_addr_from_string(nbr->mac, &mac)) {
581                     static struct vlog_rate_limit rl
582                         = VLOG_RATE_LIMIT_INIT(5, 1);
583                     VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
584                     continue;
585                 }
586
587                 ovs_be32 ip, mask;
588                 char *error = ip_parse_masked(nbr->network, &ip, &mask);
589                 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
590                     static struct vlog_rate_limit rl
591                         = VLOG_RATE_LIMIT_INIT(5, 1);
592                     VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
593                     free(error);
594                     continue;
595                 }
596
597                 char name[UUID_LEN + 1];
598                 snprintf(name, sizeof name, UUID_FMT,
599                          UUID_ARGS(&nbr->header_.uuid));
600                 struct ovn_port *op = ovn_port_find(ports, name);
601                 if (op) {
602                     if (op->nbs || op->nbr) {
603                         static struct vlog_rate_limit rl
604                             = VLOG_RATE_LIMIT_INIT(5, 1);
605                         VLOG_WARN_RL(&rl, "duplicate logical router port %s",
606                                      name);
607                         continue;
608                     }
609                     op->nbr = nbr;
610                     list_remove(&op->list);
611                     list_push_back(both, &op->list);
612                 } else {
613                     op = ovn_port_create(ports, name, NULL, nbr, NULL);
614                     list_push_back(nb_only, &op->list);
615                 }
616
617                 op->ip = ip;
618                 op->mask = mask;
619                 op->network = ip & mask;
620                 op->bcast = ip | ~mask;
621                 op->mac = mac;
622
623                 op->od = od;
624             }
625         }
626     }
627
628     /* Connect logical router ports, and logical switch ports of type "router",
629      * to their peers. */
630     struct ovn_port *op;
631     HMAP_FOR_EACH (op, key_node, ports) {
632         if (op->nbs && !strcmp(op->nbs->type, "router")) {
633             const char *peer_name = smap_get(&op->nbs->options, "router-port");
634             if (!peer_name) {
635                 continue;
636             }
637
638             struct ovn_port *peer = ovn_port_find(ports, peer_name);
639             if (!peer || !peer->nbr) {
640                 continue;
641             }
642
643             peer->peer = op;
644             op->peer = peer;
645             op->od->router_ports = xrealloc(
646                 op->od->router_ports,
647                 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
648             op->od->router_ports[op->od->n_router_ports++] = op;
649         } else if (op->nbr && op->nbr->peer) {
650             char peer_name[UUID_LEN + 1];
651             snprintf(peer_name, sizeof peer_name, UUID_FMT,
652                      UUID_ARGS(&op->nbr->peer->header_.uuid));
653             op->peer = ovn_port_find(ports, peer_name);
654         }
655     }
656 }
657
658 static void
659 ovn_port_update_sbrec(const struct ovn_port *op)
660 {
661     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
662     if (op->nbr) {
663         sbrec_port_binding_set_type(op->sb, "patch");
664
665         const char *peer = op->peer ? op->peer->key : "<error>";
666         const struct smap ids = SMAP_CONST1(&ids, "peer", peer);
667         sbrec_port_binding_set_options(op->sb, &ids);
668
669         sbrec_port_binding_set_parent_port(op->sb, NULL);
670         sbrec_port_binding_set_tag(op->sb, NULL, 0);
671         sbrec_port_binding_set_mac(op->sb, NULL, 0);
672     } else {
673         if (strcmp(op->nbs->type, "router")) {
674             sbrec_port_binding_set_type(op->sb, op->nbs->type);
675             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
676         } else {
677             sbrec_port_binding_set_type(op->sb, "patch");
678
679             const char *router_port = smap_get(&op->nbs->options,
680                                                "router-port");
681             if (!router_port) {
682                 router_port = "<error>";
683             }
684             const struct smap ids = SMAP_CONST1(&ids, "peer", router_port);
685             sbrec_port_binding_set_options(op->sb, &ids);
686         }
687         sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
688         sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
689         sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
690                                    op->nbs->n_addresses);
691     }
692 }
693
694 static void
695 build_ports(struct northd_context *ctx, struct hmap *datapaths,
696             struct hmap *ports)
697 {
698     struct ovs_list sb_only, nb_only, both;
699
700     join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
701
702     /* For logical ports that are in both databases, update the southbound
703      * record based on northbound data.  Also index the in-use tunnel_keys. */
704     struct ovn_port *op, *next;
705     LIST_FOR_EACH_SAFE (op, next, list, &both) {
706         ovn_port_update_sbrec(op);
707
708         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
709         if (op->sb->tunnel_key > op->od->port_key_hint) {
710             op->od->port_key_hint = op->sb->tunnel_key;
711         }
712     }
713
714     /* Add southbound record for each unmatched northbound record. */
715     LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
716         uint16_t tunnel_key = ovn_port_allocate_key(op->od);
717         if (!tunnel_key) {
718             continue;
719         }
720
721         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
722         ovn_port_update_sbrec(op);
723
724         sbrec_port_binding_set_logical_port(op->sb, op->key);
725         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
726     }
727
728     /* Delete southbound records without northbound matches. */
729     LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
730         list_remove(&op->list);
731         sbrec_port_binding_delete(op->sb);
732         ovn_port_destroy(ports, op);
733     }
734 }
735 \f
736 #define OVN_MIN_MULTICAST 32768
737 #define OVN_MAX_MULTICAST 65535
738
739 struct multicast_group {
740     const char *name;
741     uint16_t key;               /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
742 };
743
744 #define MC_FLOOD "_MC_flood"
745 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
746
747 #define MC_UNKNOWN "_MC_unknown"
748 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
749
750 static bool
751 multicast_group_equal(const struct multicast_group *a,
752                       const struct multicast_group *b)
753 {
754     return !strcmp(a->name, b->name) && a->key == b->key;
755 }
756
757 /* Multicast group entry. */
758 struct ovn_multicast {
759     struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
760     struct ovn_datapath *datapath;
761     const struct multicast_group *group;
762
763     struct ovn_port **ports;
764     size_t n_ports, allocated_ports;
765 };
766
767 static uint32_t
768 ovn_multicast_hash(const struct ovn_datapath *datapath,
769                    const struct multicast_group *group)
770 {
771     return hash_pointer(datapath, group->key);
772 }
773
774 static struct ovn_multicast *
775 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
776                    const struct multicast_group *group)
777 {
778     struct ovn_multicast *mc;
779
780     HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
781                              ovn_multicast_hash(datapath, group), mcgroups) {
782         if (mc->datapath == datapath
783             && multicast_group_equal(mc->group, group)) {
784             return mc;
785         }
786     }
787     return NULL;
788 }
789
790 static void
791 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
792                   struct ovn_port *port)
793 {
794     struct ovn_datapath *od = port->od;
795     struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
796     if (!mc) {
797         mc = xmalloc(sizeof *mc);
798         hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
799         mc->datapath = od;
800         mc->group = group;
801         mc->n_ports = 0;
802         mc->allocated_ports = 4;
803         mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
804     }
805     if (mc->n_ports >= mc->allocated_ports) {
806         mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
807                                sizeof *mc->ports);
808     }
809     mc->ports[mc->n_ports++] = port;
810 }
811
812 static void
813 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
814 {
815     if (mc) {
816         hmap_remove(mcgroups, &mc->hmap_node);
817         free(mc->ports);
818         free(mc);
819     }
820 }
821
822 static void
823 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
824                            const struct sbrec_multicast_group *sb)
825 {
826     struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
827     for (size_t i = 0; i < mc->n_ports; i++) {
828         ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
829     }
830     sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
831     free(ports);
832 }
833 \f
834 /* Logical flow generation.
835  *
836  * This code generates the Logical_Flow table in the southbound database, as a
837  * function of most of the northbound database.
838  */
839
840 struct ovn_lflow {
841     struct hmap_node hmap_node;
842
843     struct ovn_datapath *od;
844     enum ovn_stage stage;
845     uint16_t priority;
846     char *match;
847     char *actions;
848 };
849
850 static size_t
851 ovn_lflow_hash(const struct ovn_lflow *lflow)
852 {
853     size_t hash = uuid_hash(&lflow->od->key);
854     hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
855     hash = hash_string(lflow->match, hash);
856     return hash_string(lflow->actions, hash);
857 }
858
859 static bool
860 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
861 {
862     return (a->od == b->od
863             && a->stage == b->stage
864             && a->priority == b->priority
865             && !strcmp(a->match, b->match)
866             && !strcmp(a->actions, b->actions));
867 }
868
869 static void
870 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
871               enum ovn_stage stage, uint16_t priority,
872               char *match, char *actions)
873 {
874     lflow->od = od;
875     lflow->stage = stage;
876     lflow->priority = priority;
877     lflow->match = match;
878     lflow->actions = actions;
879 }
880
881 /* Adds a row with the specified contents to the Logical_Flow table. */
882 static void
883 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
884               enum ovn_stage stage, uint16_t priority,
885               const char *match, const char *actions)
886 {
887     struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
888     ovn_lflow_init(lflow, od, stage, priority,
889                    xstrdup(match), xstrdup(actions));
890     hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
891 }
892
893 static struct ovn_lflow *
894 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
895                enum ovn_stage stage, uint16_t priority,
896                const char *match, const char *actions)
897 {
898     struct ovn_lflow target;
899     ovn_lflow_init(&target, od, stage, priority,
900                    CONST_CAST(char *, match), CONST_CAST(char *, actions));
901
902     struct ovn_lflow *lflow;
903     HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
904                              lflows) {
905         if (ovn_lflow_equal(lflow, &target)) {
906             return lflow;
907         }
908     }
909     return NULL;
910 }
911
912 static void
913 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
914 {
915     if (lflow) {
916         hmap_remove(lflows, &lflow->hmap_node);
917         free(lflow->match);
918         free(lflow->actions);
919         free(lflow);
920     }
921 }
922
923 /* Appends port security constraints on L2 address field 'eth_addr_field'
924  * (e.g. "eth.src" or "eth.dst") to 'match'.  'port_security', with
925  * 'n_port_security' elements, is the collection of port_security constraints
926  * from an OVN_NB Logical_Port row. */
927 static void
928 build_port_security(const char *eth_addr_field,
929                     char **port_security, size_t n_port_security,
930                     struct ds *match)
931 {
932     size_t base_len = match->length;
933     ds_put_format(match, " && %s == {", eth_addr_field);
934
935     size_t n = 0;
936     for (size_t i = 0; i < n_port_security; i++) {
937         struct eth_addr ea;
938
939         if (eth_addr_from_string(port_security[i], &ea)) {
940             ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
941             ds_put_char(match, ' ');
942             n++;
943         }
944     }
945     ds_chomp(match, ' ');
946     ds_put_cstr(match, "}");
947
948     if (!n) {
949         match->length = base_len;
950     }
951 }
952
953 static bool
954 lport_is_enabled(const struct nbrec_logical_port *lport)
955 {
956     return !lport->enabled || *lport->enabled;
957 }
958
959 static bool
960 has_stateful_acl(struct ovn_datapath *od)
961 {
962     for (size_t i = 0; i < od->nbs->n_acls; i++) {
963         struct nbrec_acl *acl = od->nbs->acls[i];
964         if (!strcmp(acl->action, "allow-related")) {
965             return true;
966         }
967     }
968
969     return false;
970 }
971
972 static void
973 build_acls(struct ovn_datapath *od, struct hmap *lflows)
974 {
975     bool has_stateful = has_stateful_acl(od);
976
977     /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
978      * allowed by default. */
979     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
980     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
981
982     /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
983      * default.  A related rule at priority 1 is added below if there
984      * are any stateful ACLs in this datapath. */
985     ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
986     ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
987
988     /* If there are any stateful ACL rules in this dapapath, we must
989      * send all IP packets through the conntrack action, which handles
990      * defragmentation, in order to match L4 headers. */
991     if (has_stateful) {
992         /* Ingress and Egress Pre-ACL Table (Priority 100).
993          *
994          * Regardless of whether the ACL is "from-lport" or "to-lport",
995          * we need rules in both the ingress and egress table, because
996          * the return traffic needs to be followed. */
997         ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip", "ct_next;");
998         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip", "ct_next;");
999
1000         /* Ingress and Egress ACL Table (Priority 1).
1001          *
1002          * By default, traffic is allowed.  This is partially handled by
1003          * the Priority 0 ACL flows added earlier, but we also need to
1004          * commit IP flows.  This is because, while the initiater's
1005          * direction may not have any stateful rules, the server's may
1006          * and then its return traffic would not have an associated
1007          * conntrack entry and would return "+invalid". */
1008         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1009                       "ct_commit; next;");
1010         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1011                       "ct_commit; next;");
1012
1013         /* Ingress and Egress ACL Table (Priority 65535).
1014          *
1015          * Always drop traffic that's in an invalid state.  This is
1016          * enforced at a higher priority than ACLs can be defined. */
1017         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1018                       "ct.inv", "drop;");
1019         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1020                       "ct.inv", "drop;");
1021
1022         /* Ingress and Egress ACL Table (Priority 65535).
1023          *
1024          * Always allow traffic that is established to a committed
1025          * conntrack entry.  This is enforced at a higher priority than
1026          * ACLs can be defined. */
1027         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1028                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1029                       "next;");
1030         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1031                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1032                       "next;");
1033
1034         /* Ingress and Egress ACL Table (Priority 65535).
1035          *
1036          * Always allow traffic that is related to an existing conntrack
1037          * entry.  This is enforced at a higher priority than ACLs can
1038          * be defined.
1039          *
1040          * NOTE: This does not support related data sessions (eg,
1041          * a dynamically negotiated FTP data channel), but will allow
1042          * related traffic such as an ICMP Port Unreachable through
1043          * that's generated from a non-listening UDP port.  */
1044         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1045                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1046                       "next;");
1047         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1048                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1049                       "next;");
1050     }
1051
1052     /* Ingress or Egress ACL Table (Various priorities). */
1053     for (size_t i = 0; i < od->nbs->n_acls; i++) {
1054         struct nbrec_acl *acl = od->nbs->acls[i];
1055         bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1056         enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1057
1058         if (!strcmp(acl->action, "allow")) {
1059             /* If there are any stateful flows, we must even commit "allow"
1060              * actions.  This is because, while the initiater's
1061              * direction may not have any stateful rules, the server's
1062              * may and then its return traffic would not have an
1063              * associated conntrack entry and would return "+invalid". */
1064             const char *actions = has_stateful ? "ct_commit; next;" : "next;";
1065             ovn_lflow_add(lflows, od, stage,
1066                           acl->priority + OVN_ACL_PRI_OFFSET,
1067                           acl->match, actions);
1068         } else if (!strcmp(acl->action, "allow-related")) {
1069             struct ds match = DS_EMPTY_INITIALIZER;
1070
1071             /* Commit the connection tracking entry, which allows all
1072              * other traffic related to this entry to flow due to the
1073              * 65535 priority flow defined earlier. */
1074             ds_put_format(&match, "ct.new && (%s)", acl->match);
1075             ovn_lflow_add(lflows, od, stage,
1076                           acl->priority + OVN_ACL_PRI_OFFSET,
1077                           ds_cstr(&match), "ct_commit; next;");
1078
1079             ds_destroy(&match);
1080         } else if (!strcmp(acl->action, "drop")) {
1081             ovn_lflow_add(lflows, od, stage,
1082                           acl->priority + OVN_ACL_PRI_OFFSET,
1083                           acl->match, "drop;");
1084         } else if (!strcmp(acl->action, "reject")) {
1085             /* xxx Need to support "reject". */
1086             VLOG_INFO("reject is not a supported action");
1087             ovn_lflow_add(lflows, od, stage,
1088                           acl->priority + OVN_ACL_PRI_OFFSET,
1089                           acl->match, "drop;");
1090         }
1091     }
1092 }
1093
1094 static void
1095 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1096                     struct hmap *lflows, struct hmap *mcgroups)
1097 {
1098     /* This flow table structure is documented in ovn-northd(8), so please
1099      * update ovn-northd.8.xml if you change anything. */
1100
1101     /* Build pre-ACL and ACL tables for both ingress and egress.
1102      * Ingress tables 1 and 2.  Egress tables 0 and 1. */
1103     struct ovn_datapath *od;
1104     HMAP_FOR_EACH (od, key_node, datapaths) {
1105         if (!od->nbs) {
1106             continue;
1107         }
1108
1109         build_acls(od, lflows);
1110     }
1111
1112     /* Logical switch ingress table 0: Admission control framework (priority
1113      * 100). */
1114     HMAP_FOR_EACH (od, key_node, datapaths) {
1115         if (!od->nbs) {
1116             continue;
1117         }
1118
1119         /* Logical VLANs not supported. */
1120         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "vlan.present",
1121                       "drop;");
1122
1123         /* Broadcast/multicast source address is invalid. */
1124         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC, 100, "eth.src[40]",
1125                       "drop;");
1126
1127         /* Port security flows have priority 50 (see below) and will continue
1128          * to the next table if packet source is acceptable. */
1129     }
1130
1131     /* Logical switch ingress table 0: Ingress port security (priority 50). */
1132     struct ovn_port *op;
1133     HMAP_FOR_EACH (op, key_node, ports) {
1134         if (!op->nbs) {
1135             continue;
1136         }
1137
1138         if (!lport_is_enabled(op->nbs)) {
1139             /* Drop packets from disabled logical ports (since logical flow
1140              * tables are default-drop). */
1141             continue;
1142         }
1143
1144         struct ds match = DS_EMPTY_INITIALIZER;
1145         ds_put_format(&match, "inport == %s", op->json_key);
1146         build_port_security("eth.src",
1147                             op->nbs->port_security, op->nbs->n_port_security,
1148                             &match);
1149         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC, 50,
1150                       ds_cstr(&match), "next;");
1151         ds_destroy(&match);
1152     }
1153
1154     /* Ingress table 3: Destination lookup, broadcast and multicast handling
1155      * (priority 100). */
1156     HMAP_FOR_EACH (op, key_node, ports) {
1157         if (!op->nbs) {
1158             continue;
1159         }
1160
1161         if (lport_is_enabled(op->nbs)) {
1162             ovn_multicast_add(mcgroups, &mc_flood, op);
1163         }
1164     }
1165     HMAP_FOR_EACH (od, key_node, datapaths) {
1166         if (!od->nbs) {
1167             continue;
1168         }
1169
1170         ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1171                       "outport = \""MC_FLOOD"\"; output;");
1172     }
1173
1174     /* Ingress table 3: Destination lookup, unicast handling (priority 50), */
1175     HMAP_FOR_EACH (op, key_node, ports) {
1176         if (!op->nbs) {
1177             continue;
1178         }
1179
1180         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1181             struct eth_addr mac;
1182
1183             if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1184                 struct ds match, actions;
1185
1186                 ds_init(&match);
1187                 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1188                               ETH_ADDR_ARGS(mac));
1189
1190                 ds_init(&actions);
1191                 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1192                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1193                               ds_cstr(&match), ds_cstr(&actions));
1194                 ds_destroy(&actions);
1195                 ds_destroy(&match);
1196             } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1197                 if (lport_is_enabled(op->nbs)) {
1198                     ovn_multicast_add(mcgroups, &mc_unknown, op);
1199                     op->od->has_unknown = true;
1200                 }
1201             } else {
1202                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1203
1204                 VLOG_INFO_RL(&rl,
1205                              "%s: invalid syntax '%s' in addresses column",
1206                              op->nbs->name, op->nbs->addresses[i]);
1207             }
1208         }
1209     }
1210
1211     /* Ingress table 3: Destination lookup for unknown MACs (priority 0). */
1212     HMAP_FOR_EACH (od, key_node, datapaths) {
1213         if (!od->nbs) {
1214             continue;
1215         }
1216
1217         if (od->has_unknown) {
1218             ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1219                           "outport = \""MC_UNKNOWN"\"; output;");
1220         }
1221     }
1222
1223     /* Egress table 2: Egress port security multicast/broadcast (priority
1224      * 100). */
1225     HMAP_FOR_EACH (od, key_node, datapaths) {
1226         if (!od->nbs) {
1227             continue;
1228         }
1229
1230         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC, 100, "eth.mcast",
1231                       "output;");
1232     }
1233
1234     /* Egress table 2: Egress port security (priorities 50 and 150).
1235      *
1236      * Priority 50 rules implement port security for enabled logical port.
1237      *
1238      * Priority 150 rules drop packets to disabled logical ports, so that they
1239      * don't even receive multicast or broadcast packets. */
1240     HMAP_FOR_EACH (op, key_node, ports) {
1241         if (!op->nbs) {
1242             continue;
1243         }
1244
1245         struct ds match = DS_EMPTY_INITIALIZER;
1246         ds_put_format(&match, "outport == %s", op->json_key);
1247         if (lport_is_enabled(op->nbs)) {
1248             build_port_security("eth.dst", op->nbs->port_security,
1249                                 op->nbs->n_port_security, &match);
1250             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 50,
1251                           ds_cstr(&match), "output;");
1252         } else {
1253             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC, 150,
1254                           ds_cstr(&match), "drop;");
1255         }
1256
1257         ds_destroy(&match);
1258     }
1259 }
1260
1261 static bool
1262 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
1263 {
1264     return !lrport->enabled || *lrport->enabled;
1265 }
1266
1267 static void
1268 add_route(struct hmap *lflows, struct ovn_datapath *od,
1269           ovs_be32 network, ovs_be32 mask, ovs_be32 gateway)
1270 {
1271     char *match = xasprintf("ip4.dst == "IP_FMT"/"IP_FMT,
1272                             IP_ARGS(network), IP_ARGS(mask));
1273
1274     struct ds actions = DS_EMPTY_INITIALIZER;
1275     ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
1276     if (gateway) {
1277         ds_put_format(&actions, IP_FMT, IP_ARGS(gateway));
1278     } else {
1279         ds_put_cstr(&actions, "ip4.dst");
1280     }
1281     ds_put_cstr(&actions, "; next;");
1282
1283     /* The priority here is calculated to implement longest-prefix-match
1284      * routing. */
1285     ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_ROUTING,
1286                   count_1bits(ntohl(mask)), match, ds_cstr(&actions));
1287     ds_destroy(&actions);
1288     free(match);
1289 }
1290
1291 static void
1292 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
1293                     struct hmap *lflows)
1294 {
1295     /* This flow table structure is documented in ovn-northd(8), so please
1296      * update ovn-northd.8.xml if you change anything. */
1297
1298     /* XXX ICMP echo reply */
1299
1300     /* Logical router ingress table 0: Admission control framework. */
1301     struct ovn_datapath *od;
1302     HMAP_FOR_EACH (od, key_node, datapaths) {
1303         if (!od->nbr) {
1304             continue;
1305         }
1306
1307         /* Logical VLANs not supported.
1308          * Broadcast/multicast source address is invalid. */
1309         ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
1310                       "vlan.present || eth.src[40]", "drop;");
1311     }
1312
1313     /* Logical router ingress table 0: match (priority 50). */
1314     struct ovn_port *op;
1315     HMAP_FOR_EACH (op, key_node, ports) {
1316         if (!op->nbr) {
1317             continue;
1318         }
1319
1320         if (!lrport_is_enabled(op->nbr)) {
1321             /* Drop packets from disabled logical ports (since logical flow
1322              * tables are default-drop). */
1323             continue;
1324         }
1325
1326         char *match = xasprintf(
1327             "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
1328             ETH_ADDR_ARGS(op->mac), op->json_key);
1329         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
1330                       match, "next;");
1331         free(match);
1332     }
1333
1334     /* Logical router ingress table 1: IP Input. */
1335     HMAP_FOR_EACH (od, key_node, datapaths) {
1336         if (!od->nbr) {
1337             continue;
1338         }
1339
1340         /* L3 admission control: drop multicast and broadcast source, localhost
1341          * source or destination, and zero network source or destination
1342          * (priority 100). */
1343         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
1344                       "ip4.mcast || "
1345                       "ip4.src == 255.255.255.255 || "
1346                       "ip4.src == 127.0.0.0/8 || "
1347                       "ip4.dst == 127.0.0.0/8 || "
1348                       "ip4.src == 0.0.0.0/8 || "
1349                       "ip4.dst == 0.0.0.0/8",
1350                       "drop;");
1351
1352         /* Drop Ethernet local broadcast.  By definition this traffic should
1353          * not be forwarded.*/
1354         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1355                       "eth.bcast", "drop;");
1356
1357         /* Drop IP multicast. */
1358         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
1359                       "ip4.mcast", "drop;");
1360
1361         /* TTL discard.
1362          *
1363          * XXX Need to send ICMP time exceeded if !ip.later_frag. */
1364         char *match = xasprintf("ip4 && ip.ttl == {0, 1}");
1365         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30, match, "drop;");
1366         free(match);
1367
1368         /* Pass other traffic not already handled to the next table for
1369          * routing. */
1370         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
1371     }
1372
1373     HMAP_FOR_EACH (op, key_node, ports) {
1374         if (!op->nbr) {
1375             continue;
1376         }
1377
1378         /* L3 admission control: drop packets that originate from an IP address
1379          * owned by the router or a broadcast address known to the router
1380          * (priority 100). */
1381         char *match = xasprintf("ip4.src == {"IP_FMT", "IP_FMT"}",
1382                                 IP_ARGS(op->ip), IP_ARGS(op->bcast));
1383         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
1384                       match, "drop;");
1385         free(match);
1386
1387         /* ARP reply.  These flows reply to ARP requests for the router's own
1388          * IP address. */
1389         match = xasprintf(
1390             "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
1391             op->json_key, IP_ARGS(op->ip));
1392         char *actions = xasprintf(
1393             "eth.dst = eth.src; "
1394             "eth.src = "ETH_ADDR_FMT"; "
1395             "arp.op = 2; /* ARP reply */ "
1396             "arp.tha = arp.sha; "
1397             "arp.sha = "ETH_ADDR_FMT"; "
1398             "arp.tpa = arp.spa; "
1399             "arp.spa = "IP_FMT"; "
1400             "outport = %s; "
1401             "inport = \"\"; /* Allow sending out inport. */ "
1402             "output;",
1403             ETH_ADDR_ARGS(op->mac),
1404             ETH_ADDR_ARGS(op->mac),
1405             IP_ARGS(op->ip),
1406             op->json_key);
1407         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
1408                       match, actions);
1409         free(match);
1410         free(actions);
1411
1412         /* Drop IP traffic to this router. */
1413         match = xasprintf("ip4.dst == "IP_FMT, IP_ARGS(op->ip));
1414         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
1415                       match, "drop;");
1416         free(match);
1417     }
1418
1419     /* Logical router ingress table 2: IP Routing.
1420      *
1421      * A packet that arrives at this table is an IP packet that should be
1422      * routed to the address in ip4.dst. This table sets reg0 to the next-hop
1423      * IP address (leaving ip4.dst, the packet’s final destination, unchanged)
1424      * and advances to the next table for ARP resolution. */
1425     HMAP_FOR_EACH (op, key_node, ports) {
1426         if (!op->nbr) {
1427             continue;
1428         }
1429
1430         add_route(lflows, op->od, op->network, op->mask, 0);
1431     }
1432     HMAP_FOR_EACH (od, key_node, datapaths) {
1433         if (!od->nbr) {
1434             continue;
1435         }
1436
1437         if (od->gateway) {
1438             add_route(lflows, od, 0, 0, od->gateway);
1439         }
1440     }
1441     /* XXX destination unreachable */
1442
1443     /* Local router ingress table 3: ARP Resolution.
1444      *
1445      * Any packet that reaches this table is an IP packet whose next-hop IP
1446      * address is in reg0. (ip4.dst is the final destination.) This table
1447      * resolves the IP address in reg0 into an output port in outport and an
1448      * Ethernet address in eth.dst. */
1449     HMAP_FOR_EACH (op, key_node, ports) {
1450         if (op->nbr) {
1451             /* XXX ARP for neighboring router */
1452         } else if (op->od->n_router_ports) {
1453             for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1454                 struct eth_addr ea;
1455                 ovs_be32 ip;
1456
1457                 if (ovs_scan(op->nbs->addresses[i],
1458                              ETH_ADDR_SCAN_FMT" "IP_SCAN_FMT,
1459                              ETH_ADDR_SCAN_ARGS(ea), IP_SCAN_ARGS(&ip))) {
1460                     for (size_t j = 0; j < op->od->n_router_ports; j++) {
1461                         /* Get the Logical_Router_Port that the Logical_Port is
1462                          * connected to, as 'peer'. */
1463                         const char *peer_name = smap_get(
1464                             &op->od->router_ports[j]->nbs->options,
1465                             "router-port");
1466                         if (!peer_name) {
1467                             continue;
1468                         }
1469
1470                         struct ovn_port *peer
1471                             = ovn_port_find(ports, peer_name);
1472                         if (!peer || !peer->nbr) {
1473                             continue;
1474                         }
1475
1476                         /* Make sure that 'ip' is in 'peer''s network. */
1477                         if ((ip ^ peer->network) & peer->mask) {
1478                             continue;
1479                         }
1480
1481                         char *match = xasprintf("reg0 == "IP_FMT, IP_ARGS(ip));
1482                         char *actions = xasprintf("eth.src = "ETH_ADDR_FMT"; "
1483                                                   "eth.dst = "ETH_ADDR_FMT"; "
1484                                                   "outport = %s; "
1485                                                   "output;",
1486                                                   ETH_ADDR_ARGS(peer->mac),
1487                                                   ETH_ADDR_ARGS(ea),
1488                                                   peer->json_key);
1489                         ovn_lflow_add(lflows, peer->od,
1490                                       S_ROUTER_IN_ARP, 200, match, actions);
1491                         free(actions);
1492                         free(match);
1493                         break;
1494                     }
1495                 }
1496             }
1497         }
1498     }
1499
1500     /* Logical router egress table 0: Delivery (priority 100).
1501      *
1502      * Priority 100 rules deliver packets to enabled logical ports. */
1503     HMAP_FOR_EACH (op, key_node, ports) {
1504         if (!op->nbr) {
1505             continue;
1506         }
1507
1508         if (!lrport_is_enabled(op->nbr)) {
1509             /* Drop packets to disabled logical ports (since logical flow
1510              * tables are default-drop). */
1511             continue;
1512         }
1513
1514         char *match = xasprintf("outport == %s", op->json_key);
1515         ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
1516                       match, "output;");
1517         free(match);
1518     }
1519 }
1520
1521 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
1522  * constructing their contents based on the OVN_NB database. */
1523 static void
1524 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
1525              struct hmap *ports)
1526 {
1527     struct hmap lflows = HMAP_INITIALIZER(&lflows);
1528     struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
1529
1530     build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
1531     build_lrouter_flows(datapaths, ports, &lflows);
1532
1533     /* Push changes to the Logical_Flow table to database. */
1534     const struct sbrec_logical_flow *sbflow, *next_sbflow;
1535     SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
1536         struct ovn_datapath *od
1537             = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
1538         if (!od) {
1539             sbrec_logical_flow_delete(sbflow);
1540             continue;
1541         }
1542
1543         enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
1544         enum ovn_pipeline pipeline
1545             = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
1546         struct ovn_lflow *lflow = ovn_lflow_find(
1547             &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
1548             sbflow->priority, sbflow->match, sbflow->actions);
1549         if (lflow) {
1550             ovn_lflow_destroy(&lflows, lflow);
1551         } else {
1552             sbrec_logical_flow_delete(sbflow);
1553         }
1554     }
1555     struct ovn_lflow *lflow, *next_lflow;
1556     HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
1557         enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
1558         uint8_t table = ovn_stage_get_table(lflow->stage);
1559
1560         sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
1561         sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
1562         sbrec_logical_flow_set_pipeline(
1563             sbflow, pipeline == P_IN ? "ingress" : "egress");
1564         sbrec_logical_flow_set_table_id(sbflow, table);
1565         sbrec_logical_flow_set_priority(sbflow, lflow->priority);
1566         sbrec_logical_flow_set_match(sbflow, lflow->match);
1567         sbrec_logical_flow_set_actions(sbflow, lflow->actions);
1568
1569         const struct smap ids = SMAP_CONST1(&ids, "stage-name",
1570                                             ovn_stage_to_str(lflow->stage));
1571         sbrec_logical_flow_set_external_ids(sbflow, &ids);
1572
1573         ovn_lflow_destroy(&lflows, lflow);
1574     }
1575     hmap_destroy(&lflows);
1576
1577     /* Push changes to the Multicast_Group table to database. */
1578     const struct sbrec_multicast_group *sbmc, *next_sbmc;
1579     SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
1580         struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
1581                                                           sbmc->datapath);
1582         if (!od) {
1583             sbrec_multicast_group_delete(sbmc);
1584             continue;
1585         }
1586
1587         struct multicast_group group = { .name = sbmc->name,
1588                                          .key = sbmc->tunnel_key };
1589         struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
1590         if (mc) {
1591             ovn_multicast_update_sbrec(mc, sbmc);
1592             ovn_multicast_destroy(&mcgroups, mc);
1593         } else {
1594             sbrec_multicast_group_delete(sbmc);
1595         }
1596     }
1597     struct ovn_multicast *mc, *next_mc;
1598     HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
1599         sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
1600         sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
1601         sbrec_multicast_group_set_name(sbmc, mc->group->name);
1602         sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
1603         ovn_multicast_update_sbrec(mc, sbmc);
1604         ovn_multicast_destroy(&mcgroups, mc);
1605     }
1606     hmap_destroy(&mcgroups);
1607 }
1608 \f
1609 static void
1610 ovnnb_db_changed(struct northd_context *ctx)
1611 {
1612     VLOG_DBG("ovn-nb db contents have changed.");
1613
1614     struct hmap datapaths, ports;
1615     build_datapaths(ctx, &datapaths);
1616     build_ports(ctx, &datapaths, &ports);
1617     build_lflows(ctx, &datapaths, &ports);
1618
1619     struct ovn_datapath *dp, *next_dp;
1620     HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
1621         ovn_datapath_destroy(&datapaths, dp);
1622     }
1623     hmap_destroy(&datapaths);
1624
1625     struct ovn_port *port, *next_port;
1626     HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
1627         ovn_port_destroy(&ports, port);
1628     }
1629     hmap_destroy(&ports);
1630 }
1631
1632 /*
1633  * The only change we get notified about is if the 'chassis' column of the
1634  * 'Port_Binding' table changes.  When this column is not empty, it means we
1635  * need to set the corresponding logical port as 'up' in the northbound DB.
1636  */
1637 static void
1638 ovnsb_db_changed(struct northd_context *ctx)
1639 {
1640     struct hmap lports_hmap;
1641     const struct sbrec_port_binding *sb;
1642     const struct nbrec_logical_port *nb;
1643
1644     struct lport_hash_node {
1645         struct hmap_node node;
1646         const struct nbrec_logical_port *nb;
1647     } *hash_node, *hash_node_next;
1648
1649     VLOG_DBG("Recalculating port up states for ovn-nb db.");
1650
1651     hmap_init(&lports_hmap);
1652
1653     NBREC_LOGICAL_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
1654         hash_node = xzalloc(sizeof *hash_node);
1655         hash_node->nb = nb;
1656         hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
1657     }
1658
1659     SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
1660         nb = NULL;
1661         HMAP_FOR_EACH_WITH_HASH(hash_node, node,
1662                                 hash_string(sb->logical_port, 0),
1663                                 &lports_hmap) {
1664             if (!strcmp(sb->logical_port, hash_node->nb->name)) {
1665                 nb = hash_node->nb;
1666                 break;
1667             }
1668         }
1669
1670         if (!nb) {
1671             /* The logical port doesn't exist for this port binding.  This can
1672              * happen under normal circumstances when ovn-northd hasn't gotten
1673              * around to pruning the Port_Binding yet. */
1674             continue;
1675         }
1676
1677         if (sb->chassis && (!nb->up || !*nb->up)) {
1678             bool up = true;
1679             nbrec_logical_port_set_up(nb, &up, 1);
1680         } else if (!sb->chassis && (!nb->up || *nb->up)) {
1681             bool up = false;
1682             nbrec_logical_port_set_up(nb, &up, 1);
1683         }
1684     }
1685
1686     HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
1687         hmap_remove(&lports_hmap, &hash_node->node);
1688         free(hash_node);
1689     }
1690     hmap_destroy(&lports_hmap);
1691 }
1692 \f
1693
1694 static char *default_db_;
1695
1696 static const char *
1697 default_db(void)
1698 {
1699     if (!default_db_) {
1700         default_db_ = xasprintf("unix:%s/db.sock", ovs_rundir());
1701     }
1702     return default_db_;
1703 }
1704
1705 static void
1706 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
1707 {
1708     enum {
1709         DAEMON_OPTION_ENUMS,
1710         VLOG_OPTION_ENUMS,
1711     };
1712     static const struct option long_options[] = {
1713         {"ovnsb-db", required_argument, NULL, 'd'},
1714         {"ovnnb-db", required_argument, NULL, 'D'},
1715         {"help", no_argument, NULL, 'h'},
1716         {"options", no_argument, NULL, 'o'},
1717         {"version", no_argument, NULL, 'V'},
1718         DAEMON_LONG_OPTIONS,
1719         VLOG_LONG_OPTIONS,
1720         STREAM_SSL_LONG_OPTIONS,
1721         {NULL, 0, NULL, 0},
1722     };
1723     char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
1724
1725     for (;;) {
1726         int c;
1727
1728         c = getopt_long(argc, argv, short_options, long_options, NULL);
1729         if (c == -1) {
1730             break;
1731         }
1732
1733         switch (c) {
1734         DAEMON_OPTION_HANDLERS;
1735         VLOG_OPTION_HANDLERS;
1736         STREAM_SSL_OPTION_HANDLERS;
1737
1738         case 'd':
1739             ovnsb_db = optarg;
1740             break;
1741
1742         case 'D':
1743             ovnnb_db = optarg;
1744             break;
1745
1746         case 'h':
1747             usage();
1748             exit(EXIT_SUCCESS);
1749
1750         case 'o':
1751             ovs_cmdl_print_options(long_options);
1752             exit(EXIT_SUCCESS);
1753
1754         case 'V':
1755             ovs_print_version(0, 0);
1756             exit(EXIT_SUCCESS);
1757
1758         default:
1759             break;
1760         }
1761     }
1762
1763     if (!ovnsb_db) {
1764         ovnsb_db = default_db();
1765     }
1766
1767     if (!ovnnb_db) {
1768         ovnnb_db = default_db();
1769     }
1770
1771     free(short_options);
1772 }
1773
1774 static void
1775 add_column_noalert(struct ovsdb_idl *idl,
1776                    const struct ovsdb_idl_column *column)
1777 {
1778     ovsdb_idl_add_column(idl, column);
1779     ovsdb_idl_omit_alert(idl, column);
1780 }
1781
1782 int
1783 main(int argc, char *argv[])
1784 {
1785     extern struct vlog_module VLM_reconnect;
1786     struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
1787     unsigned int ovnnb_seqno, ovn_seqno;
1788     int res = EXIT_SUCCESS;
1789     struct northd_context ctx = {
1790         .ovnsb_txn = NULL,
1791     };
1792     bool ovnnb_changes_pending = false;
1793     bool ovn_changes_pending = false;
1794     struct unixctl_server *unixctl;
1795     int retval;
1796     bool exiting;
1797
1798     fatal_ignore_sigpipe();
1799     set_program_name(argv[0]);
1800     service_start(&argc, &argv);
1801     vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
1802     vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
1803     parse_options(argc, argv);
1804
1805     daemonize_start(false);
1806
1807     retval = unixctl_server_create(NULL, &unixctl);
1808     if (retval) {
1809         exit(EXIT_FAILURE);
1810     }
1811     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
1812
1813     daemonize_complete();
1814
1815     nbrec_init();
1816     sbrec_init();
1817
1818     /* We want to detect all changes to the ovn-nb db. */
1819     ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
1820             &nbrec_idl_class, true, true);
1821
1822     ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
1823             &sbrec_idl_class, false, true);
1824
1825     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_logical_flow);
1826     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_logical_datapath);
1827     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_pipeline);
1828     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_table_id);
1829     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_priority);
1830     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_match);
1831     add_column_noalert(ovnsb_idl, &sbrec_logical_flow_col_actions);
1832
1833     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_multicast_group);
1834     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_datapath);
1835     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_tunnel_key);
1836     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_name);
1837     add_column_noalert(ovnsb_idl, &sbrec_multicast_group_col_ports);
1838
1839     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_datapath_binding);
1840     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_tunnel_key);
1841     add_column_noalert(ovnsb_idl, &sbrec_datapath_binding_col_external_ids);
1842
1843     ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_port_binding);
1844     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_datapath);
1845     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_logical_port);
1846     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tunnel_key);
1847     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_parent_port);
1848     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_tag);
1849     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_type);
1850     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_options);
1851     add_column_noalert(ovnsb_idl, &sbrec_port_binding_col_mac);
1852     ovsdb_idl_add_column(ovnsb_idl, &sbrec_port_binding_col_chassis);
1853
1854     /*
1855      * The loop here just runs the IDL in a loop waiting for the seqno to
1856      * change, which indicates that the contents of the db have changed.
1857      *
1858      * If the contents of the ovn-nb db change, the mappings to the ovn-sb
1859      * db must be recalculated.
1860      *
1861      * If the contents of the ovn-sb db change, it means the 'up' state of
1862      * a port may have changed, as that's the only type of change ovn-northd is
1863      * watching for.
1864      */
1865
1866     ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1867     ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1868     exiting = false;
1869     while (!exiting) {
1870         ovsdb_idl_run(ovnnb_idl);
1871         ovsdb_idl_run(ovnsb_idl);
1872         unixctl_server_run(unixctl);
1873
1874         if (!ovsdb_idl_is_alive(ovnnb_idl)) {
1875             int retval = ovsdb_idl_get_last_error(ovnnb_idl);
1876             VLOG_ERR("%s: database connection failed (%s)",
1877                     ovnnb_db, ovs_retval_to_string(retval));
1878             res = EXIT_FAILURE;
1879             break;
1880         }
1881
1882         if (!ovsdb_idl_is_alive(ovnsb_idl)) {
1883             int retval = ovsdb_idl_get_last_error(ovnsb_idl);
1884             VLOG_ERR("%s: database connection failed (%s)",
1885                     ovnsb_db, ovs_retval_to_string(retval));
1886             res = EXIT_FAILURE;
1887             break;
1888         }
1889
1890         if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
1891             ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
1892             ovnnb_changes_pending = true;
1893         }
1894
1895         if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
1896             ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
1897             ovn_changes_pending = true;
1898         }
1899
1900         /*
1901          * If there are any pending changes, we delay recalculating the
1902          * necessary updates until after an existing transaction finishes.
1903          * This avoids the possibility of rapid updates causing ovn-northd to
1904          * never be able to successfully make the corresponding updates to the
1905          * other db.  Instead, pending changes are batched up until the next
1906          * time we get a chance to calculate the new state and apply it.
1907          */
1908
1909         if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
1910             /*
1911              * The OVN-nb db contents have changed, so create a transaction for
1912              * updating the OVN-sb DB.
1913              */
1914             ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
1915             ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
1916                                       "ovn-northd: northbound db changed");
1917             ovnnb_db_changed(&ctx);
1918             ovnnb_changes_pending = false;
1919         }
1920
1921         if (ovn_changes_pending && !ctx.ovnnb_txn) {
1922             /*
1923              * The OVN-sb db contents have changed, so create a transaction for
1924              * updating the northbound DB.
1925              */
1926             ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
1927             ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
1928                                       "ovn-northd: southbound db changed");
1929             ovnsb_db_changed(&ctx);
1930             ovn_changes_pending = false;
1931         }
1932
1933         if (ctx.ovnnb_txn) {
1934             enum ovsdb_idl_txn_status txn_status;
1935             txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
1936             switch (txn_status) {
1937             case TXN_UNCOMMITTED:
1938             case TXN_INCOMPLETE:
1939                 /* Come back around and try to commit this transaction again */
1940                 break;
1941             case TXN_ABORTED:
1942             case TXN_TRY_AGAIN:
1943             case TXN_NOT_LOCKED:
1944             case TXN_ERROR:
1945                 /* Something went wrong, so try creating a new transaction. */
1946                 ovn_changes_pending = true;
1947             case TXN_UNCHANGED:
1948             case TXN_SUCCESS:
1949                 ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
1950                 ctx.ovnnb_txn = NULL;
1951             }
1952         }
1953
1954         if (ctx.ovnsb_txn) {
1955             enum ovsdb_idl_txn_status txn_status;
1956             txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
1957             switch (txn_status) {
1958             case TXN_UNCOMMITTED:
1959             case TXN_INCOMPLETE:
1960                 /* Come back around and try to commit this transaction again */
1961                 break;
1962             case TXN_ABORTED:
1963             case TXN_TRY_AGAIN:
1964             case TXN_NOT_LOCKED:
1965             case TXN_ERROR:
1966                 /* Something went wrong, so try creating a new transaction. */
1967                 ovnnb_changes_pending = true;
1968             case TXN_UNCHANGED:
1969             case TXN_SUCCESS:
1970                 ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
1971                 ctx.ovnsb_txn = NULL;
1972             }
1973         }
1974
1975         if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
1976                 ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
1977             ovsdb_idl_wait(ovnnb_idl);
1978             ovsdb_idl_wait(ovnsb_idl);
1979             if (ctx.ovnnb_txn) {
1980                 ovsdb_idl_txn_wait(ctx.ovnnb_txn);
1981             }
1982             if (ctx.ovnsb_txn) {
1983                 ovsdb_idl_txn_wait(ctx.ovnsb_txn);
1984             }
1985             unixctl_server_wait(unixctl);
1986             if (exiting) {
1987                 poll_immediate_wake();
1988             }
1989             poll_block();
1990         }
1991         if (should_service_stop()) {
1992             exiting = true;
1993         }
1994     }
1995
1996     unixctl_server_destroy(unixctl);
1997     ovsdb_idl_destroy(ovnsb_idl);
1998     ovsdb_idl_destroy(ovnnb_idl);
1999     service_stop();
2000
2001     free(default_db_);
2002
2003     exit(res);
2004 }
2005
2006 static void
2007 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
2008                 const char *argv[] OVS_UNUSED, void *exiting_)
2009 {
2010     bool *exiting = exiting_;
2011     *exiting = true;
2012
2013     unixctl_command_reply(conn, NULL);
2014 }