de88806521419c9563d79c190a0aae77423a1c24
[cascardo/ovs.git] / ovn / northd / ovn-northd.c
1 /*
2  * Licensed under the Apache License, Version 2.0 (the "License");
3  * you may not use this file except in compliance with the License.
4  * You may obtain a copy of the License at:
5  *
6  *     http://www.apache.org/licenses/LICENSE-2.0
7  *
8  * Unless required by applicable law or agreed to in writing, software
9  * distributed under the License is distributed on an "AS IS" BASIS,
10  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  * See the License for the specific language governing permissions and
12  * limitations under the License.
13  */
14
15 #include <config.h>
16
17 #include <getopt.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20
21 #include "command-line.h"
22 #include "daemon.h"
23 #include "dirs.h"
24 #include "openvswitch/dynamic-string.h"
25 #include "fatal-signal.h"
26 #include "hash.h"
27 #include "hmap.h"
28 #include "json.h"
29 #include "ovn/lib/lex.h"
30 #include "ovn/lib/ovn-nb-idl.h"
31 #include "ovn/lib/ovn-sb-idl.h"
32 #include "ovn/lib/ovn-util.h"
33 #include "packets.h"
34 #include "poll-loop.h"
35 #include "smap.h"
36 #include "sset.h"
37 #include "stream.h"
38 #include "stream-ssl.h"
39 #include "unixctl.h"
40 #include "util.h"
41 #include "uuid.h"
42 #include "openvswitch/vlog.h"
43
44 VLOG_DEFINE_THIS_MODULE(ovn_northd);
45
46 static unixctl_cb_func ovn_northd_exit;
47
48 struct northd_context {
49     struct ovsdb_idl *ovnnb_idl;
50     struct ovsdb_idl *ovnsb_idl;
51     struct ovsdb_idl_txn *ovnnb_txn;
52     struct ovsdb_idl_txn *ovnsb_txn;
53 };
54
55 static const char *ovnnb_db;
56 static const char *ovnsb_db;
57
58 static const char *default_nb_db(void);
59 static const char *default_sb_db(void);
60 \f
61 /* Pipeline stages. */
62
63 /* The two pipelines in an OVN logical flow table. */
64 enum ovn_pipeline {
65     P_IN,                       /* Ingress pipeline. */
66     P_OUT                       /* Egress pipeline. */
67 };
68
69 /* The two purposes for which ovn-northd uses OVN logical datapaths. */
70 enum ovn_datapath_type {
71     DP_SWITCH,                  /* OVN logical switch. */
72     DP_ROUTER                   /* OVN logical router. */
73 };
74
75 /* Returns an "enum ovn_stage" built from the arguments.
76  *
77  * (It's better to use ovn_stage_build() for type-safety reasons, but inline
78  * functions can't be used in enums or switch cases.) */
79 #define OVN_STAGE_BUILD(DP_TYPE, PIPELINE, TABLE) \
80     (((DP_TYPE) << 9) | ((PIPELINE) << 8) | (TABLE))
81
82 /* A stage within an OVN logical switch or router.
83  *
84  * An "enum ovn_stage" indicates whether the stage is part of a logical switch
85  * or router, whether the stage is part of the ingress or egress pipeline, and
86  * the table within that pipeline.  The first three components are combined to
87  * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2,
88  * S_ROUTER_OUT_DELIVERY. */
89 enum ovn_stage {
90 #define PIPELINE_STAGES                                               \
91     /* Logical switch ingress stages. */                              \
92     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_L2,    0, "ls_in_port_sec_l2")     \
93     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_IP,    1, "ls_in_port_sec_ip")     \
94     PIPELINE_STAGE(SWITCH, IN,  PORT_SEC_ND,    2, "ls_in_port_sec_nd")     \
95     PIPELINE_STAGE(SWITCH, IN,  PRE_ACL,        3, "ls_in_pre_acl")      \
96     PIPELINE_STAGE(SWITCH, IN,  PRE_LB,         4, "ls_in_pre_lb")         \
97     PIPELINE_STAGE(SWITCH, IN,  PRE_STATEFUL,   5, "ls_in_pre_stateful")    \
98     PIPELINE_STAGE(SWITCH, IN,  ACL,            6, "ls_in_acl")          \
99     PIPELINE_STAGE(SWITCH, IN,  LB,             7, "ls_in_lb")           \
100     PIPELINE_STAGE(SWITCH, IN,  STATEFUL,       8, "ls_in_stateful")     \
101     PIPELINE_STAGE(SWITCH, IN,  ARP_ND_RSP,     9, "ls_in_arp_rsp")      \
102     PIPELINE_STAGE(SWITCH, IN,  L2_LKUP,       10, "ls_in_l2_lkup")      \
103                                                                       \
104     /* Logical switch egress stages. */                               \
105     PIPELINE_STAGE(SWITCH, OUT, PRE_LB,       0, "ls_out_pre_lb")     \
106     PIPELINE_STAGE(SWITCH, OUT, PRE_ACL,      1, "ls_out_pre_acl")     \
107     PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful")  \
108     PIPELINE_STAGE(SWITCH, OUT, LB,           3, "ls_out_lb")            \
109     PIPELINE_STAGE(SWITCH, OUT, ACL,          4, "ls_out_acl")            \
110     PIPELINE_STAGE(SWITCH, OUT, STATEFUL,     5, "ls_out_stateful")       \
111     PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP,  6, "ls_out_port_sec_ip")    \
112     PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2,  7, "ls_out_port_sec_l2")    \
113                                                                       \
114     /* Logical router ingress stages. */                              \
115     PIPELINE_STAGE(ROUTER, IN,  ADMISSION,   0, "lr_in_admission")    \
116     PIPELINE_STAGE(ROUTER, IN,  IP_INPUT,    1, "lr_in_ip_input")     \
117     PIPELINE_STAGE(ROUTER, IN,  UNSNAT,      2, "lr_in_unsnat")       \
118     PIPELINE_STAGE(ROUTER, IN,  DNAT,        3, "lr_in_dnat")         \
119     PIPELINE_STAGE(ROUTER, IN,  IP_ROUTING,  4, "lr_in_ip_routing")   \
120     PIPELINE_STAGE(ROUTER, IN,  ARP_RESOLVE, 5, "lr_in_arp_resolve")  \
121     PIPELINE_STAGE(ROUTER, IN,  ARP_REQUEST, 6, "lr_in_arp_request")  \
122                                                                       \
123     /* Logical router egress stages. */                               \
124     PIPELINE_STAGE(ROUTER, OUT, SNAT,      0, "lr_out_snat")          \
125     PIPELINE_STAGE(ROUTER, OUT, DELIVERY,  1, "lr_out_delivery")
126
127 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)   \
128     S_##DP_TYPE##_##PIPELINE##_##STAGE                          \
129         = OVN_STAGE_BUILD(DP_##DP_TYPE, P_##PIPELINE, TABLE),
130     PIPELINE_STAGES
131 #undef PIPELINE_STAGE
132 };
133
134 /* Due to various hard-coded priorities need to implement ACLs, the
135  * northbound database supports a smaller range of ACL priorities than
136  * are available to logical flows.  This value is added to an ACL
137  * priority to determine the ACL's logical flow priority. */
138 #define OVN_ACL_PRI_OFFSET 1000
139
140 #define REGBIT_CONNTRACK_DEFRAG "reg0[0]"
141 #define REGBIT_CONNTRACK_COMMIT "reg0[1]"
142 #define REGBIT_CONNTRACK_NAT    "reg0[2]"
143
144 /* Returns an "enum ovn_stage" built from the arguments. */
145 static enum ovn_stage
146 ovn_stage_build(enum ovn_datapath_type dp_type, enum ovn_pipeline pipeline,
147                 uint8_t table)
148 {
149     return OVN_STAGE_BUILD(dp_type, pipeline, table);
150 }
151
152 /* Returns the pipeline to which 'stage' belongs. */
153 static enum ovn_pipeline
154 ovn_stage_get_pipeline(enum ovn_stage stage)
155 {
156     return (stage >> 8) & 1;
157 }
158
159 /* Returns the table to which 'stage' belongs. */
160 static uint8_t
161 ovn_stage_get_table(enum ovn_stage stage)
162 {
163     return stage & 0xff;
164 }
165
166 /* Returns a string name for 'stage'. */
167 static const char *
168 ovn_stage_to_str(enum ovn_stage stage)
169 {
170     switch (stage) {
171 #define PIPELINE_STAGE(DP_TYPE, PIPELINE, STAGE, TABLE, NAME)       \
172         case S_##DP_TYPE##_##PIPELINE##_##STAGE: return NAME;
173     PIPELINE_STAGES
174 #undef PIPELINE_STAGE
175         default: return "<unknown>";
176     }
177 }
178 \f
179 static void
180 usage(void)
181 {
182     printf("\
183 %s: OVN northbound management daemon\n\
184 usage: %s [OPTIONS]\n\
185 \n\
186 Options:\n\
187   --ovnnb-db=DATABASE       connect to ovn-nb database at DATABASE\n\
188                             (default: %s)\n\
189   --ovnsb-db=DATABASE       connect to ovn-sb database at DATABASE\n\
190                             (default: %s)\n\
191   -h, --help                display this help message\n\
192   -o, --options             list available options\n\
193   -V, --version             display version information\n\
194 ", program_name, program_name, default_nb_db(), default_sb_db());
195     daemon_usage();
196     vlog_usage();
197     stream_usage("database", true, true, false);
198 }
199 \f
200 struct tnlid_node {
201     struct hmap_node hmap_node;
202     uint32_t tnlid;
203 };
204
205 static void
206 destroy_tnlids(struct hmap *tnlids)
207 {
208     struct tnlid_node *node;
209     HMAP_FOR_EACH_POP (node, hmap_node, tnlids) {
210         free(node);
211     }
212     hmap_destroy(tnlids);
213 }
214
215 static void
216 add_tnlid(struct hmap *set, uint32_t tnlid)
217 {
218     struct tnlid_node *node = xmalloc(sizeof *node);
219     hmap_insert(set, &node->hmap_node, hash_int(tnlid, 0));
220     node->tnlid = tnlid;
221 }
222
223 static bool
224 tnlid_in_use(const struct hmap *set, uint32_t tnlid)
225 {
226     const struct tnlid_node *node;
227     HMAP_FOR_EACH_IN_BUCKET (node, hmap_node, hash_int(tnlid, 0), set) {
228         if (node->tnlid == tnlid) {
229             return true;
230         }
231     }
232     return false;
233 }
234
235 static uint32_t
236 allocate_tnlid(struct hmap *set, const char *name, uint32_t max,
237                uint32_t *hint)
238 {
239     for (uint32_t tnlid = *hint + 1; tnlid != *hint;
240          tnlid = tnlid + 1 <= max ? tnlid + 1 : 1) {
241         if (!tnlid_in_use(set, tnlid)) {
242             add_tnlid(set, tnlid);
243             *hint = tnlid;
244             return tnlid;
245         }
246     }
247
248     static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
249     VLOG_WARN_RL(&rl, "all %s tunnel ids exhausted", name);
250     return 0;
251 }
252 \f
253 /* The 'key' comes from nbs->header_.uuid or nbr->header_.uuid or
254  * sb->external_ids:logical-switch. */
255 struct ovn_datapath {
256     struct hmap_node key_node;  /* Index on 'key'. */
257     struct uuid key;            /* (nbs/nbr)->header_.uuid. */
258
259     const struct nbrec_logical_switch *nbs;  /* May be NULL. */
260     const struct nbrec_logical_router *nbr;  /* May be NULL. */
261     const struct sbrec_datapath_binding *sb; /* May be NULL. */
262
263     struct ovs_list list;       /* In list of similar records. */
264
265     /* Logical switch data. */
266     struct ovn_port **router_ports;
267     size_t n_router_ports;
268
269     struct hmap port_tnlids;
270     uint32_t port_key_hint;
271
272     bool has_unknown;
273 };
274
275 static struct ovn_datapath *
276 ovn_datapath_create(struct hmap *datapaths, const struct uuid *key,
277                     const struct nbrec_logical_switch *nbs,
278                     const struct nbrec_logical_router *nbr,
279                     const struct sbrec_datapath_binding *sb)
280 {
281     struct ovn_datapath *od = xzalloc(sizeof *od);
282     od->key = *key;
283     od->sb = sb;
284     od->nbs = nbs;
285     od->nbr = nbr;
286     hmap_init(&od->port_tnlids);
287     od->port_key_hint = 0;
288     hmap_insert(datapaths, &od->key_node, uuid_hash(&od->key));
289     return od;
290 }
291
292 static void
293 ovn_datapath_destroy(struct hmap *datapaths, struct ovn_datapath *od)
294 {
295     if (od) {
296         /* Don't remove od->list.  It is used within build_datapaths() as a
297          * private list and once we've exited that function it is not safe to
298          * use it. */
299         hmap_remove(datapaths, &od->key_node);
300         destroy_tnlids(&od->port_tnlids);
301         free(od->router_ports);
302         free(od);
303     }
304 }
305
306 static struct ovn_datapath *
307 ovn_datapath_find(struct hmap *datapaths, const struct uuid *uuid)
308 {
309     struct ovn_datapath *od;
310
311     HMAP_FOR_EACH_WITH_HASH (od, key_node, uuid_hash(uuid), datapaths) {
312         if (uuid_equals(uuid, &od->key)) {
313             return od;
314         }
315     }
316     return NULL;
317 }
318
319 static struct ovn_datapath *
320 ovn_datapath_from_sbrec(struct hmap *datapaths,
321                         const struct sbrec_datapath_binding *sb)
322 {
323     struct uuid key;
324
325     if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
326         !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
327         return NULL;
328     }
329     return ovn_datapath_find(datapaths, &key);
330 }
331
332 static bool
333 lrouter_is_enabled(const struct nbrec_logical_router *lrouter)
334 {
335     return !lrouter->enabled || *lrouter->enabled;
336 }
337
338 static void
339 join_datapaths(struct northd_context *ctx, struct hmap *datapaths,
340                struct ovs_list *sb_only, struct ovs_list *nb_only,
341                struct ovs_list *both)
342 {
343     hmap_init(datapaths);
344     ovs_list_init(sb_only);
345     ovs_list_init(nb_only);
346     ovs_list_init(both);
347
348     const struct sbrec_datapath_binding *sb, *sb_next;
349     SBREC_DATAPATH_BINDING_FOR_EACH_SAFE (sb, sb_next, ctx->ovnsb_idl) {
350         struct uuid key;
351         if (!smap_get_uuid(&sb->external_ids, "logical-switch", &key) &&
352             !smap_get_uuid(&sb->external_ids, "logical-router", &key)) {
353             ovsdb_idl_txn_add_comment(
354                 ctx->ovnsb_txn,
355                 "deleting Datapath_Binding "UUID_FMT" that lacks "
356                 "external-ids:logical-switch and "
357                 "external-ids:logical-router",
358                 UUID_ARGS(&sb->header_.uuid));
359             sbrec_datapath_binding_delete(sb);
360             continue;
361         }
362
363         if (ovn_datapath_find(datapaths, &key)) {
364             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
365             VLOG_INFO_RL(
366                 &rl, "deleting Datapath_Binding "UUID_FMT" with "
367                 "duplicate external-ids:logical-switch/router "UUID_FMT,
368                 UUID_ARGS(&sb->header_.uuid), UUID_ARGS(&key));
369             sbrec_datapath_binding_delete(sb);
370             continue;
371         }
372
373         struct ovn_datapath *od = ovn_datapath_create(datapaths, &key,
374                                                       NULL, NULL, sb);
375         ovs_list_push_back(sb_only, &od->list);
376     }
377
378     const struct nbrec_logical_switch *nbs;
379     NBREC_LOGICAL_SWITCH_FOR_EACH (nbs, ctx->ovnnb_idl) {
380         struct ovn_datapath *od = ovn_datapath_find(datapaths,
381                                                     &nbs->header_.uuid);
382         if (od) {
383             od->nbs = nbs;
384             ovs_list_remove(&od->list);
385             ovs_list_push_back(both, &od->list);
386         } else {
387             od = ovn_datapath_create(datapaths, &nbs->header_.uuid,
388                                      nbs, NULL, NULL);
389             ovs_list_push_back(nb_only, &od->list);
390         }
391     }
392
393     const struct nbrec_logical_router *nbr;
394     NBREC_LOGICAL_ROUTER_FOR_EACH (nbr, ctx->ovnnb_idl) {
395         if (!lrouter_is_enabled(nbr)) {
396             continue;
397         }
398
399         struct ovn_datapath *od = ovn_datapath_find(datapaths,
400                                                     &nbr->header_.uuid);
401         if (od) {
402             if (!od->nbs) {
403                 od->nbr = nbr;
404                 ovs_list_remove(&od->list);
405                 ovs_list_push_back(both, &od->list);
406             } else {
407                 /* Can't happen! */
408                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
409                 VLOG_WARN_RL(&rl,
410                              "duplicate UUID "UUID_FMT" in OVN_Northbound",
411                              UUID_ARGS(&nbr->header_.uuid));
412                 continue;
413             }
414         } else {
415             od = ovn_datapath_create(datapaths, &nbr->header_.uuid,
416                                      NULL, nbr, NULL);
417             ovs_list_push_back(nb_only, &od->list);
418         }
419     }
420 }
421
422 static uint32_t
423 ovn_datapath_allocate_key(struct hmap *dp_tnlids)
424 {
425     static uint32_t hint;
426     return allocate_tnlid(dp_tnlids, "datapath", (1u << 24) - 1, &hint);
427 }
428
429 /* Updates the southbound Datapath_Binding table so that it contains the
430  * logical switches and routers specified by the northbound database.
431  *
432  * Initializes 'datapaths' to contain a "struct ovn_datapath" for every logical
433  * switch and router. */
434 static void
435 build_datapaths(struct northd_context *ctx, struct hmap *datapaths)
436 {
437     struct ovs_list sb_only, nb_only, both;
438
439     join_datapaths(ctx, datapaths, &sb_only, &nb_only, &both);
440
441     if (!ovs_list_is_empty(&nb_only)) {
442         /* First index the in-use datapath tunnel IDs. */
443         struct hmap dp_tnlids = HMAP_INITIALIZER(&dp_tnlids);
444         struct ovn_datapath *od;
445         LIST_FOR_EACH (od, list, &both) {
446             add_tnlid(&dp_tnlids, od->sb->tunnel_key);
447         }
448
449         /* Add southbound record for each unmatched northbound record. */
450         LIST_FOR_EACH (od, list, &nb_only) {
451             uint16_t tunnel_key = ovn_datapath_allocate_key(&dp_tnlids);
452             if (!tunnel_key) {
453                 break;
454             }
455
456             od->sb = sbrec_datapath_binding_insert(ctx->ovnsb_txn);
457
458             char uuid_s[UUID_LEN + 1];
459             sprintf(uuid_s, UUID_FMT, UUID_ARGS(&od->key));
460             const char *key = od->nbs ? "logical-switch" : "logical-router";
461             const struct smap id = SMAP_CONST1(&id, key, uuid_s);
462             sbrec_datapath_binding_set_external_ids(od->sb, &id);
463
464             sbrec_datapath_binding_set_tunnel_key(od->sb, tunnel_key);
465         }
466         destroy_tnlids(&dp_tnlids);
467     }
468
469     /* Delete southbound records without northbound matches. */
470     struct ovn_datapath *od, *next;
471     LIST_FOR_EACH_SAFE (od, next, list, &sb_only) {
472         ovs_list_remove(&od->list);
473         sbrec_datapath_binding_delete(od->sb);
474         ovn_datapath_destroy(datapaths, od);
475     }
476 }
477 \f
478 struct ovn_port {
479     struct hmap_node key_node;  /* Index on 'key'. */
480     char *key;                  /* nbs->name, nbr->name, sb->logical_port. */
481     char *json_key;             /* 'key', quoted for use in JSON. */
482
483     const struct nbrec_logical_switch_port *nbs; /* May be NULL. */
484     const struct nbrec_logical_router_port *nbr; /* May be NULL. */
485     const struct sbrec_port_binding *sb;         /* May be NULL. */
486
487     /* Logical router port data. */
488     char *ip_s;                 /* "192.168.10.123" */
489     char *network_s;            /* "192.168.10.0" */
490     char *bcast_s;              /* "192.168.10.255" */
491     int plen;                   /* CIDR prefix: 24 */
492
493     ovs_be32 ip;                /* 192.168.10.123 */
494     ovs_be32 mask;              /* 255.255.255.0 */
495     ovs_be32 network;           /* 192.168.10.255 */
496
497     struct eth_addr mac;
498     struct ovn_port *peer;
499
500     struct ovn_datapath *od;
501
502     struct ovs_list list;       /* In list of similar records. */
503 };
504
505 static struct ovn_port *
506 ovn_port_create(struct hmap *ports, const char *key,
507                 const struct nbrec_logical_switch_port *nbs,
508                 const struct nbrec_logical_router_port *nbr,
509                 const struct sbrec_port_binding *sb)
510 {
511     struct ovn_port *op = xzalloc(sizeof *op);
512
513     struct ds json_key = DS_EMPTY_INITIALIZER;
514     json_string_escape(key, &json_key);
515     op->json_key = ds_steal_cstr(&json_key);
516
517     op->key = xstrdup(key);
518     op->sb = sb;
519     op->nbs = nbs;
520     op->nbr = nbr;
521     hmap_insert(ports, &op->key_node, hash_string(op->key, 0));
522     return op;
523 }
524
525 static void
526 ovn_port_destroy(struct hmap *ports, struct ovn_port *port)
527 {
528     if (port) {
529         /* Don't remove port->list.  It is used within build_ports() as a
530          * private list and once we've exited that function it is not safe to
531          * use it. */
532         hmap_remove(ports, &port->key_node);
533         free(port->bcast_s);
534         free(port->network_s);
535         free(port->ip_s);
536         free(port->json_key);
537         free(port->key);
538         free(port);
539     }
540 }
541
542 static struct ovn_port *
543 ovn_port_find(struct hmap *ports, const char *name)
544 {
545     struct ovn_port *op;
546
547     HMAP_FOR_EACH_WITH_HASH (op, key_node, hash_string(name, 0), ports) {
548         if (!strcmp(op->key, name)) {
549             return op;
550         }
551     }
552     return NULL;
553 }
554
555 static uint32_t
556 ovn_port_allocate_key(struct ovn_datapath *od)
557 {
558     return allocate_tnlid(&od->port_tnlids, "port",
559                           (1u << 15) - 1, &od->port_key_hint);
560 }
561
562 static void
563 join_logical_ports(struct northd_context *ctx,
564                    struct hmap *datapaths, struct hmap *ports,
565                    struct ovs_list *sb_only, struct ovs_list *nb_only,
566                    struct ovs_list *both)
567 {
568     hmap_init(ports);
569     ovs_list_init(sb_only);
570     ovs_list_init(nb_only);
571     ovs_list_init(both);
572
573     const struct sbrec_port_binding *sb;
574     SBREC_PORT_BINDING_FOR_EACH (sb, ctx->ovnsb_idl) {
575         struct ovn_port *op = ovn_port_create(ports, sb->logical_port,
576                                               NULL, NULL, sb);
577         ovs_list_push_back(sb_only, &op->list);
578     }
579
580     struct ovn_datapath *od;
581     HMAP_FOR_EACH (od, key_node, datapaths) {
582         if (od->nbs) {
583             for (size_t i = 0; i < od->nbs->n_ports; i++) {
584                 const struct nbrec_logical_switch_port *nbs = od->nbs->ports[i];
585                 struct ovn_port *op = ovn_port_find(ports, nbs->name);
586                 if (op) {
587                     if (op->nbs || op->nbr) {
588                         static struct vlog_rate_limit rl
589                             = VLOG_RATE_LIMIT_INIT(5, 1);
590                         VLOG_WARN_RL(&rl, "duplicate logical port %s",
591                                      nbs->name);
592                         continue;
593                     }
594                     op->nbs = nbs;
595                     ovs_list_remove(&op->list);
596                     ovs_list_push_back(both, &op->list);
597                 } else {
598                     op = ovn_port_create(ports, nbs->name, nbs, NULL, NULL);
599                     ovs_list_push_back(nb_only, &op->list);
600                 }
601
602                 op->od = od;
603             }
604         } else {
605             for (size_t i = 0; i < od->nbr->n_ports; i++) {
606                 const struct nbrec_logical_router_port *nbr
607                     = od->nbr->ports[i];
608
609                 struct eth_addr mac;
610                 if (!eth_addr_from_string(nbr->mac, &mac)) {
611                     static struct vlog_rate_limit rl
612                         = VLOG_RATE_LIMIT_INIT(5, 1);
613                     VLOG_WARN_RL(&rl, "bad 'mac' %s", nbr->mac);
614                     continue;
615                 }
616
617                 ovs_be32 ip, mask;
618                 char *error = ip_parse_masked(nbr->network, &ip, &mask);
619                 if (error || mask == OVS_BE32_MAX || !ip_is_cidr(mask)) {
620                     static struct vlog_rate_limit rl
621                         = VLOG_RATE_LIMIT_INIT(5, 1);
622                     VLOG_WARN_RL(&rl, "bad 'network' %s", nbr->network);
623                     free(error);
624                     continue;
625                 }
626
627                 struct ovn_port *op = ovn_port_find(ports, nbr->name);
628                 if (op) {
629                     if (op->nbs || op->nbr) {
630                         static struct vlog_rate_limit rl
631                             = VLOG_RATE_LIMIT_INIT(5, 1);
632                         VLOG_WARN_RL(&rl, "duplicate logical router port %s",
633                                      nbr->name);
634                         continue;
635                     }
636                     op->nbr = nbr;
637                     ovs_list_remove(&op->list);
638                     ovs_list_push_back(both, &op->list);
639                 } else {
640                     op = ovn_port_create(ports, nbr->name, NULL, nbr, NULL);
641                     ovs_list_push_back(nb_only, &op->list);
642                 }
643
644                 op->ip = ip;
645                 op->mask = mask;
646                 op->network = ip & mask;
647                 op->plen = ip_count_cidr_bits(mask);
648
649                 op->ip_s = xasprintf(IP_FMT, IP_ARGS(ip));
650                 op->network_s = xasprintf(IP_FMT, IP_ARGS(op->network));
651                 op->bcast_s = xasprintf(IP_FMT, IP_ARGS(ip | ~mask));
652
653                 op->mac = mac;
654                 op->od = od;
655             }
656         }
657     }
658
659     /* Connect logical router ports, and logical switch ports of type "router",
660      * to their peers. */
661     struct ovn_port *op;
662     HMAP_FOR_EACH (op, key_node, ports) {
663         if (op->nbs && !strcmp(op->nbs->type, "router")) {
664             const char *peer_name = smap_get(&op->nbs->options, "router-port");
665             if (!peer_name) {
666                 continue;
667             }
668
669             struct ovn_port *peer = ovn_port_find(ports, peer_name);
670             if (!peer || !peer->nbr) {
671                 continue;
672             }
673
674             peer->peer = op;
675             op->peer = peer;
676             op->od->router_ports = xrealloc(
677                 op->od->router_ports,
678                 sizeof *op->od->router_ports * (op->od->n_router_ports + 1));
679             op->od->router_ports[op->od->n_router_ports++] = op;
680         } else if (op->nbr && op->nbr->peer) {
681             op->peer = ovn_port_find(ports, op->nbr->peer);
682         }
683     }
684 }
685
686 static void
687 ovn_port_update_sbrec(const struct ovn_port *op)
688 {
689     sbrec_port_binding_set_datapath(op->sb, op->od->sb);
690     if (op->nbr) {
691         /* If the router is for l3 gateway, it resides on a chassis
692          * and its port type is "gateway". */
693         const char *chassis = smap_get(&op->od->nbr->options, "chassis");
694         if (chassis) {
695             sbrec_port_binding_set_type(op->sb, "gateway");
696         } else {
697             sbrec_port_binding_set_type(op->sb, "patch");
698         }
699
700         const char *peer = op->peer ? op->peer->key : "<error>";
701         struct smap new;
702         smap_init(&new);
703         smap_add(&new, "peer", peer);
704         if (chassis) {
705             smap_add(&new, "gateway-chassis", chassis);
706         }
707         sbrec_port_binding_set_options(op->sb, &new);
708         smap_destroy(&new);
709
710         sbrec_port_binding_set_parent_port(op->sb, NULL);
711         sbrec_port_binding_set_tag(op->sb, NULL, 0);
712         sbrec_port_binding_set_mac(op->sb, NULL, 0);
713     } else {
714         if (strcmp(op->nbs->type, "router")) {
715             sbrec_port_binding_set_type(op->sb, op->nbs->type);
716             sbrec_port_binding_set_options(op->sb, &op->nbs->options);
717         } else {
718             const char *chassis = NULL;
719             if (op->peer && op->peer->od && op->peer->od->nbr) {
720                 chassis = smap_get(&op->peer->od->nbr->options, "chassis");
721             }
722
723             /* A switch port connected to a gateway router is also of
724              * type "gateway". */
725             if (chassis) {
726                 sbrec_port_binding_set_type(op->sb, "gateway");
727             } else {
728                 sbrec_port_binding_set_type(op->sb, "patch");
729             }
730
731             const char *router_port = smap_get(&op->nbs->options,
732                                                "router-port");
733             if (!router_port) {
734                 router_port = "<error>";
735             }
736             struct smap new;
737             smap_init(&new);
738             smap_add(&new, "peer", router_port);
739             if (chassis) {
740                 smap_add(&new, "gateway-chassis", chassis);
741             }
742             sbrec_port_binding_set_options(op->sb, &new);
743             smap_destroy(&new);
744         }
745         sbrec_port_binding_set_parent_port(op->sb, op->nbs->parent_name);
746         sbrec_port_binding_set_tag(op->sb, op->nbs->tag, op->nbs->n_tag);
747         sbrec_port_binding_set_mac(op->sb, (const char **) op->nbs->addresses,
748                                    op->nbs->n_addresses);
749     }
750 }
751
752 /* Updates the southbound Port_Binding table so that it contains the logical
753  * switch ports specified by the northbound database.
754  *
755  * Initializes 'ports' to contain a "struct ovn_port" for every logical port,
756  * using the "struct ovn_datapath"s in 'datapaths' to look up logical
757  * datapaths. */
758 static void
759 build_ports(struct northd_context *ctx, struct hmap *datapaths,
760             struct hmap *ports)
761 {
762     struct ovs_list sb_only, nb_only, both;
763
764     join_logical_ports(ctx, datapaths, ports, &sb_only, &nb_only, &both);
765
766     /* For logical ports that are in both databases, update the southbound
767      * record based on northbound data.  Also index the in-use tunnel_keys. */
768     struct ovn_port *op, *next;
769     LIST_FOR_EACH_SAFE (op, next, list, &both) {
770         ovn_port_update_sbrec(op);
771
772         add_tnlid(&op->od->port_tnlids, op->sb->tunnel_key);
773         if (op->sb->tunnel_key > op->od->port_key_hint) {
774             op->od->port_key_hint = op->sb->tunnel_key;
775         }
776     }
777
778     /* Add southbound record for each unmatched northbound record. */
779     LIST_FOR_EACH_SAFE (op, next, list, &nb_only) {
780         uint16_t tunnel_key = ovn_port_allocate_key(op->od);
781         if (!tunnel_key) {
782             continue;
783         }
784
785         op->sb = sbrec_port_binding_insert(ctx->ovnsb_txn);
786         ovn_port_update_sbrec(op);
787
788         sbrec_port_binding_set_logical_port(op->sb, op->key);
789         sbrec_port_binding_set_tunnel_key(op->sb, tunnel_key);
790     }
791
792     /* Delete southbound records without northbound matches. */
793     LIST_FOR_EACH_SAFE(op, next, list, &sb_only) {
794         ovs_list_remove(&op->list);
795         sbrec_port_binding_delete(op->sb);
796         ovn_port_destroy(ports, op);
797     }
798 }
799 \f
800 #define OVN_MIN_MULTICAST 32768
801 #define OVN_MAX_MULTICAST 65535
802
803 struct multicast_group {
804     const char *name;
805     uint16_t key;               /* OVN_MIN_MULTICAST...OVN_MAX_MULTICAST. */
806 };
807
808 #define MC_FLOOD "_MC_flood"
809 static const struct multicast_group mc_flood = { MC_FLOOD, 65535 };
810
811 #define MC_UNKNOWN "_MC_unknown"
812 static const struct multicast_group mc_unknown = { MC_UNKNOWN, 65534 };
813
814 static bool
815 multicast_group_equal(const struct multicast_group *a,
816                       const struct multicast_group *b)
817 {
818     return !strcmp(a->name, b->name) && a->key == b->key;
819 }
820
821 /* Multicast group entry. */
822 struct ovn_multicast {
823     struct hmap_node hmap_node; /* Index on 'datapath' and 'key'. */
824     struct ovn_datapath *datapath;
825     const struct multicast_group *group;
826
827     struct ovn_port **ports;
828     size_t n_ports, allocated_ports;
829 };
830
831 static uint32_t
832 ovn_multicast_hash(const struct ovn_datapath *datapath,
833                    const struct multicast_group *group)
834 {
835     return hash_pointer(datapath, group->key);
836 }
837
838 static struct ovn_multicast *
839 ovn_multicast_find(struct hmap *mcgroups, struct ovn_datapath *datapath,
840                    const struct multicast_group *group)
841 {
842     struct ovn_multicast *mc;
843
844     HMAP_FOR_EACH_WITH_HASH (mc, hmap_node,
845                              ovn_multicast_hash(datapath, group), mcgroups) {
846         if (mc->datapath == datapath
847             && multicast_group_equal(mc->group, group)) {
848             return mc;
849         }
850     }
851     return NULL;
852 }
853
854 static void
855 ovn_multicast_add(struct hmap *mcgroups, const struct multicast_group *group,
856                   struct ovn_port *port)
857 {
858     struct ovn_datapath *od = port->od;
859     struct ovn_multicast *mc = ovn_multicast_find(mcgroups, od, group);
860     if (!mc) {
861         mc = xmalloc(sizeof *mc);
862         hmap_insert(mcgroups, &mc->hmap_node, ovn_multicast_hash(od, group));
863         mc->datapath = od;
864         mc->group = group;
865         mc->n_ports = 0;
866         mc->allocated_ports = 4;
867         mc->ports = xmalloc(mc->allocated_ports * sizeof *mc->ports);
868     }
869     if (mc->n_ports >= mc->allocated_ports) {
870         mc->ports = x2nrealloc(mc->ports, &mc->allocated_ports,
871                                sizeof *mc->ports);
872     }
873     mc->ports[mc->n_ports++] = port;
874 }
875
876 static void
877 ovn_multicast_destroy(struct hmap *mcgroups, struct ovn_multicast *mc)
878 {
879     if (mc) {
880         hmap_remove(mcgroups, &mc->hmap_node);
881         free(mc->ports);
882         free(mc);
883     }
884 }
885
886 static void
887 ovn_multicast_update_sbrec(const struct ovn_multicast *mc,
888                            const struct sbrec_multicast_group *sb)
889 {
890     struct sbrec_port_binding **ports = xmalloc(mc->n_ports * sizeof *ports);
891     for (size_t i = 0; i < mc->n_ports; i++) {
892         ports[i] = CONST_CAST(struct sbrec_port_binding *, mc->ports[i]->sb);
893     }
894     sbrec_multicast_group_set_ports(sb, ports, mc->n_ports);
895     free(ports);
896 }
897 \f
898 /* Logical flow generation.
899  *
900  * This code generates the Logical_Flow table in the southbound database, as a
901  * function of most of the northbound database.
902  */
903
904 struct ovn_lflow {
905     struct hmap_node hmap_node;
906
907     struct ovn_datapath *od;
908     enum ovn_stage stage;
909     uint16_t priority;
910     char *match;
911     char *actions;
912 };
913
914 static size_t
915 ovn_lflow_hash(const struct ovn_lflow *lflow)
916 {
917     size_t hash = uuid_hash(&lflow->od->key);
918     hash = hash_2words((lflow->stage << 16) | lflow->priority, hash);
919     hash = hash_string(lflow->match, hash);
920     return hash_string(lflow->actions, hash);
921 }
922
923 static bool
924 ovn_lflow_equal(const struct ovn_lflow *a, const struct ovn_lflow *b)
925 {
926     return (a->od == b->od
927             && a->stage == b->stage
928             && a->priority == b->priority
929             && !strcmp(a->match, b->match)
930             && !strcmp(a->actions, b->actions));
931 }
932
933 static void
934 ovn_lflow_init(struct ovn_lflow *lflow, struct ovn_datapath *od,
935               enum ovn_stage stage, uint16_t priority,
936               char *match, char *actions)
937 {
938     lflow->od = od;
939     lflow->stage = stage;
940     lflow->priority = priority;
941     lflow->match = match;
942     lflow->actions = actions;
943 }
944
945 /* Adds a row with the specified contents to the Logical_Flow table. */
946 static void
947 ovn_lflow_add(struct hmap *lflow_map, struct ovn_datapath *od,
948               enum ovn_stage stage, uint16_t priority,
949               const char *match, const char *actions)
950 {
951     struct ovn_lflow *lflow = xmalloc(sizeof *lflow);
952     ovn_lflow_init(lflow, od, stage, priority,
953                    xstrdup(match), xstrdup(actions));
954     hmap_insert(lflow_map, &lflow->hmap_node, ovn_lflow_hash(lflow));
955 }
956
957 static struct ovn_lflow *
958 ovn_lflow_find(struct hmap *lflows, struct ovn_datapath *od,
959                enum ovn_stage stage, uint16_t priority,
960                const char *match, const char *actions)
961 {
962     struct ovn_lflow target;
963     ovn_lflow_init(&target, od, stage, priority,
964                    CONST_CAST(char *, match), CONST_CAST(char *, actions));
965
966     struct ovn_lflow *lflow;
967     HMAP_FOR_EACH_WITH_HASH (lflow, hmap_node, ovn_lflow_hash(&target),
968                              lflows) {
969         if (ovn_lflow_equal(lflow, &target)) {
970             return lflow;
971         }
972     }
973     return NULL;
974 }
975
976 static void
977 ovn_lflow_destroy(struct hmap *lflows, struct ovn_lflow *lflow)
978 {
979     if (lflow) {
980         hmap_remove(lflows, &lflow->hmap_node);
981         free(lflow->match);
982         free(lflow->actions);
983         free(lflow);
984     }
985 }
986
987 /* Appends port security constraints on L2 address field 'eth_addr_field'
988  * (e.g. "eth.src" or "eth.dst") to 'match'.  'port_security', with
989  * 'n_port_security' elements, is the collection of port_security constraints
990  * from an OVN_NB Logical_Switch_Port row. */
991 static void
992 build_port_security_l2(const char *eth_addr_field,
993                        char **port_security, size_t n_port_security,
994                        struct ds *match)
995 {
996     size_t base_len = match->length;
997     ds_put_format(match, " && %s == {", eth_addr_field);
998
999     size_t n = 0;
1000     for (size_t i = 0; i < n_port_security; i++) {
1001         struct eth_addr ea;
1002
1003         if (eth_addr_from_string(port_security[i], &ea)) {
1004             ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
1005             ds_put_char(match, ' ');
1006             n++;
1007         }
1008     }
1009     ds_chomp(match, ' ');
1010     ds_put_cstr(match, "}");
1011
1012     if (!n) {
1013         match->length = base_len;
1014     }
1015 }
1016
1017 static void
1018 build_port_security_ipv6_nd_flow(
1019     struct ds *match, struct eth_addr ea, struct ipv6_netaddr *ipv6_addrs,
1020     int n_ipv6_addrs)
1021 {
1022     ds_put_format(match, " && ip6 && nd && ((nd.sll == "ETH_ADDR_FMT" || "
1023                   "nd.sll == "ETH_ADDR_FMT") || ((nd.tll == "ETH_ADDR_FMT" || "
1024                   "nd.tll == "ETH_ADDR_FMT")", ETH_ADDR_ARGS(eth_addr_zero),
1025                   ETH_ADDR_ARGS(ea), ETH_ADDR_ARGS(eth_addr_zero),
1026                   ETH_ADDR_ARGS(ea));
1027     if (!n_ipv6_addrs) {
1028         ds_put_cstr(match, "))");
1029         return;
1030     }
1031
1032     char ip6_str[INET6_ADDRSTRLEN + 1];
1033     struct in6_addr lla;
1034     in6_generate_lla(ea, &lla);
1035     memset(ip6_str, 0, sizeof(ip6_str));
1036     ipv6_string_mapped(ip6_str, &lla);
1037     ds_put_format(match, " && (nd.target == %s", ip6_str);
1038
1039     for(int i = 0; i < n_ipv6_addrs; i++) {
1040         memset(ip6_str, 0, sizeof(ip6_str));
1041         ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1042         ds_put_format(match, " || nd.target == %s", ip6_str);
1043     }
1044
1045     ds_put_format(match, ")))");
1046 }
1047
1048 static void
1049 build_port_security_ipv6_flow(
1050     enum ovn_pipeline pipeline, struct ds *match, struct eth_addr ea,
1051     struct ipv6_netaddr *ipv6_addrs, int n_ipv6_addrs)
1052 {
1053     char ip6_str[INET6_ADDRSTRLEN + 1];
1054
1055     ds_put_format(match, " && %s == {",
1056                   pipeline == P_IN ? "ip6.src" : "ip6.dst");
1057
1058     /* Allow link-local address. */
1059     struct in6_addr lla;
1060     in6_generate_lla(ea, &lla);
1061     ipv6_string_mapped(ip6_str, &lla);
1062     ds_put_format(match, "%s, ", ip6_str);
1063
1064     /* Allow ip6.dst=ff00::/8 for multicast packets */
1065     if (pipeline == P_OUT) {
1066         ds_put_cstr(match, "ff00::/8, ");
1067     }
1068     for(int i = 0; i < n_ipv6_addrs; i++) {
1069         ipv6_string_mapped(ip6_str, &ipv6_addrs[i].addr);
1070         ds_put_format(match, "%s, ", ip6_str);
1071     }
1072     /* Replace ", " by "}". */
1073     ds_chomp(match, ' ');
1074     ds_chomp(match, ',');
1075     ds_put_cstr(match, "}");
1076 }
1077
1078 /**
1079  * Build port security constraints on ARP and IPv6 ND fields
1080  * and add logical flows to S_SWITCH_IN_PORT_SEC_ND stage.
1081  *
1082  * For each port security of the logical port, following
1083  * logical flows are added
1084  *   - If the port security has no IP (both IPv4 and IPv6) or
1085  *     if it has IPv4 address(es)
1086  *      - Priority 90 flow to allow ARP packets for known MAC addresses
1087  *        in the eth.src and arp.spa fields. If the port security
1088  *        has IPv4 addresses, allow known IPv4 addresses in the arp.tpa field.
1089  *
1090  *   - If the port security has no IP (both IPv4 and IPv6) or
1091  *     if it has IPv6 address(es)
1092  *     - Priority 90 flow to allow IPv6 ND packets for known MAC addresses
1093  *       in the eth.src and nd.sll/nd.tll fields. If the port security
1094  *       has IPv6 addresses, allow known IPv6 addresses in the nd.target field
1095  *       for IPv6 Neighbor Advertisement packet.
1096  *
1097  *   - Priority 80 flow to drop ARP and IPv6 ND packets.
1098  */
1099 static void
1100 build_port_security_nd(struct ovn_port *op, struct hmap *lflows)
1101 {
1102     for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1103         struct lport_addresses ps;
1104         if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1105             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1106             VLOG_INFO_RL(&rl, "invalid syntax '%s' in port security. No MAC"
1107                          " address found", op->nbs->port_security[i]);
1108             continue;
1109         }
1110
1111         bool no_ip = !(ps.n_ipv4_addrs || ps.n_ipv6_addrs);
1112         struct ds match = DS_EMPTY_INITIALIZER;
1113
1114         if (ps.n_ipv4_addrs || no_ip) {
1115             ds_put_format(
1116                 &match, "inport == %s && eth.src == "ETH_ADDR_FMT" && arp.sha == "
1117                 ETH_ADDR_FMT, op->json_key, ETH_ADDR_ARGS(ps.ea),
1118                 ETH_ADDR_ARGS(ps.ea));
1119
1120             if (ps.n_ipv4_addrs) {
1121                 ds_put_cstr(&match, " && (");
1122                 for (size_t i = 0; i < ps.n_ipv4_addrs; i++) {
1123                     ds_put_cstr(&match, "arp.spa == ");
1124                     ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1125                     /* When the netmask is applied, if the host portion is
1126                      * non-zero, the host can only use the specified
1127                      * address in the arp.spa.  If zero, the host is allowed
1128                      * to use any address in the subnet. */
1129                     if (ps.ipv4_addrs[i].addr & ~mask) {
1130                         ds_put_format(&match, IP_FMT,
1131                                       IP_ARGS(ps.ipv4_addrs[i].addr));
1132                     } else {
1133                        ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1134                                         &match);
1135                     }
1136                     ds_put_cstr(&match, " || ");
1137                 }
1138                 ds_chomp(&match, ' ');
1139                 ds_chomp(&match, '|');
1140                 ds_chomp(&match, '|');
1141                 ds_put_cstr(&match, ")");
1142             }
1143             ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1144                           ds_cstr(&match), "next;");
1145             ds_destroy(&match);
1146         }
1147
1148         if (ps.n_ipv6_addrs || no_ip) {
1149             ds_init(&match);
1150             ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT,
1151                           op->json_key, ETH_ADDR_ARGS(ps.ea));
1152             build_port_security_ipv6_nd_flow(&match, ps.ea, ps.ipv6_addrs,
1153                                              ps.n_ipv6_addrs);
1154             ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 90,
1155                           ds_cstr(&match), "next;");
1156             ds_destroy(&match);
1157         }
1158         destroy_lport_addresses(&ps);
1159     }
1160
1161     char *match = xasprintf("inport == %s && (arp || nd)", op->json_key);
1162     ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_ND, 80,
1163                   match, "drop;");
1164     free(match);
1165 }
1166
1167 /**
1168  * Build port security constraints on IPv4 and IPv6 src and dst fields
1169  * and add logical flows to S_SWITCH_(IN/OUT)_PORT_SEC_IP stage.
1170  *
1171  * For each port security of the logical port, following
1172  * logical flows are added
1173  *   - If the port security has IPv4 addresses,
1174  *     - Priority 90 flow to allow IPv4 packets for known IPv4 addresses
1175  *
1176  *   - If the port security has IPv6 addresses,
1177  *     - Priority 90 flow to allow IPv6 packets for known IPv6 addresses
1178  *
1179  *   - If the port security has IPv4 addresses or IPv6 addresses or both
1180  *     - Priority 80 flow to drop all IPv4 and IPv6 traffic
1181  */
1182 static void
1183 build_port_security_ip(enum ovn_pipeline pipeline, struct ovn_port *op,
1184                        struct hmap *lflows)
1185 {
1186     char *port_direction;
1187     enum ovn_stage stage;
1188     if (pipeline == P_IN) {
1189         port_direction = "inport";
1190         stage = S_SWITCH_IN_PORT_SEC_IP;
1191     } else {
1192         port_direction = "outport";
1193         stage = S_SWITCH_OUT_PORT_SEC_IP;
1194     }
1195
1196     for (size_t i = 0; i < op->nbs->n_port_security; i++) {
1197         struct lport_addresses ps;
1198         if (!extract_lsp_addresses(op->nbs->port_security[i], &ps, true)) {
1199             continue;
1200         }
1201
1202         if (!(ps.n_ipv4_addrs || ps.n_ipv6_addrs)) {
1203             continue;
1204         }
1205
1206         if (ps.n_ipv4_addrs) {
1207             struct ds match = DS_EMPTY_INITIALIZER;
1208             if (pipeline == P_IN) {
1209                 /* Permit use of the unspecified address for DHCP discovery */
1210                 struct ds dhcp_match = DS_EMPTY_INITIALIZER;
1211                 ds_put_format(&dhcp_match, "inport == %s"
1212                               " && eth.src == "ETH_ADDR_FMT
1213                               " && ip4.src == 0.0.0.0"
1214                               " && ip4.dst == 255.255.255.255"
1215                               " && udp.src == 68 && udp.dst == 67", op->json_key,
1216                               ETH_ADDR_ARGS(ps.ea));
1217                 ovn_lflow_add(lflows, op->od, stage, 90,
1218                               ds_cstr(&dhcp_match), "next;");
1219                 ds_destroy(&dhcp_match);
1220                 ds_put_format(&match, "inport == %s && eth.src == "ETH_ADDR_FMT
1221                               " && ip4.src == {", op->json_key,
1222                               ETH_ADDR_ARGS(ps.ea));
1223             } else {
1224                 ds_put_format(&match, "outport == %s && eth.dst == "ETH_ADDR_FMT
1225                               " && ip4.dst == {255.255.255.255, 224.0.0.0/4, ",
1226                               op->json_key, ETH_ADDR_ARGS(ps.ea));
1227             }
1228
1229             for (int i = 0; i < ps.n_ipv4_addrs; i++) {
1230                 ovs_be32 mask = be32_prefix_mask(ps.ipv4_addrs[i].plen);
1231                 /* When the netmask is applied, if the host portion is
1232                  * non-zero, the host can only use the specified
1233                  * address.  If zero, the host is allowed to use any
1234                  * address in the subnet.
1235                  * */
1236                 if (ps.ipv4_addrs[i].addr & ~mask) {
1237                     ds_put_format(&match, IP_FMT,
1238                                   IP_ARGS(ps.ipv4_addrs[i].addr));
1239                     if (pipeline == P_OUT && ps.ipv4_addrs[i].plen != 32) {
1240                          /* Host is also allowed to receive packets to the
1241                          * broadcast address in the specified subnet.
1242                          */
1243                         ds_put_format(&match, ", "IP_FMT,
1244                                       IP_ARGS(ps.ipv4_addrs[i].addr | ~mask));
1245                     }
1246                 } else {
1247                     /* host portion is zero */
1248                     ip_format_masked(ps.ipv4_addrs[i].addr & mask, mask,
1249                                      &match);
1250                 }
1251                 ds_put_cstr(&match, ", ");
1252             }
1253
1254             /* Replace ", " by "}". */
1255             ds_chomp(&match, ' ');
1256             ds_chomp(&match, ',');
1257             ds_put_cstr(&match, "}");
1258             ovn_lflow_add(lflows, op->od, stage, 90, ds_cstr(&match), "next;");
1259             ds_destroy(&match);
1260         }
1261
1262         if (ps.n_ipv6_addrs) {
1263             struct ds match = DS_EMPTY_INITIALIZER;
1264             if (pipeline == P_IN) {
1265                 /* Permit use of unspecified address for duplicate address
1266                  * detection */
1267                 struct ds dad_match = DS_EMPTY_INITIALIZER;
1268                 ds_put_format(&dad_match, "inport == %s"
1269                               " && eth.src == "ETH_ADDR_FMT
1270                               " && ip6.src == ::"
1271                               " && ip6.dst == ff02::/16"
1272                               " && icmp6.type == {131, 135, 143}", op->json_key,
1273                               ETH_ADDR_ARGS(ps.ea));
1274                 ovn_lflow_add(lflows, op->od, stage, 90,
1275                               ds_cstr(&dad_match), "next;");
1276                 ds_destroy(&dad_match);
1277             }
1278             ds_put_format(&match, "%s == %s && %s == "ETH_ADDR_FMT"",
1279                           port_direction, op->json_key,
1280                           pipeline == P_IN ? "eth.src" : "eth.dst",
1281                           ETH_ADDR_ARGS(ps.ea));
1282             build_port_security_ipv6_flow(pipeline, &match, ps.ea,
1283                                           ps.ipv6_addrs, ps.n_ipv6_addrs);
1284             ovn_lflow_add(lflows, op->od, stage, 90,
1285                           ds_cstr(&match), "next;");
1286             ds_destroy(&match);
1287         }
1288
1289         destroy_lport_addresses(&ps);
1290
1291         char *match = xasprintf(
1292             "%s == %s && %s == "ETH_ADDR_FMT" && ip", port_direction,
1293             op->json_key, pipeline == P_IN ? "eth.src" : "eth.dst",
1294             ETH_ADDR_ARGS(ps.ea));
1295         ovn_lflow_add(lflows, op->od, stage, 80, match, "drop;");
1296         free(match);
1297     }
1298
1299 }
1300
1301 static bool
1302 lsp_is_enabled(const struct nbrec_logical_switch_port *lsp)
1303 {
1304     return !lsp->enabled || *lsp->enabled;
1305 }
1306
1307 static bool
1308 lsp_is_up(const struct nbrec_logical_switch_port *lsp)
1309 {
1310     return !lsp->up || *lsp->up;
1311 }
1312
1313 static bool
1314 has_stateful_acl(struct ovn_datapath *od)
1315 {
1316     for (size_t i = 0; i < od->nbs->n_acls; i++) {
1317         struct nbrec_acl *acl = od->nbs->acls[i];
1318         if (!strcmp(acl->action, "allow-related")) {
1319             return true;
1320         }
1321     }
1322
1323     return false;
1324 }
1325
1326 static void
1327 build_pre_acls(struct ovn_datapath *od, struct hmap *lflows,
1328                struct hmap *ports)
1329 {
1330     bool has_stateful = has_stateful_acl(od);
1331     struct ovn_port *op;
1332
1333     /* Ingress and Egress Pre-ACL Table (Priority 0): Packets are
1334      * allowed by default. */
1335     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 0, "1", "next;");
1336     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 0, "1", "next;");
1337
1338     /* If there are any stateful ACL rules in this dapapath, we must
1339      * send all IP packets through the conntrack action, which handles
1340      * defragmentation, in order to match L4 headers. */
1341     if (has_stateful) {
1342         HMAP_FOR_EACH (op, key_node, ports) {
1343             if (op->od == od && !strcmp(op->nbs->type, "router")) {
1344                 /* Can't use ct() for router ports. Consider the
1345                  * following configuration: lp1(10.0.0.2) on
1346                  * hostA--ls1--lr0--ls2--lp2(10.0.1.2) on hostB, For a
1347                  * ping from lp1 to lp2, First, the response will go
1348                  * through ct() with a zone for lp2 in the ls2 ingress
1349                  * pipeline on hostB.  That ct zone knows about this
1350                  * connection. Next, it goes through ct() with the zone
1351                  * for the router port in the egress pipeline of ls2 on
1352                  * hostB.  This zone does not know about the connection,
1353                  * as the icmp request went through the logical router
1354                  * on hostA, not hostB. This would only work with
1355                  * distributed conntrack state across all chassis. */
1356                 struct ds match_in = DS_EMPTY_INITIALIZER;
1357                 struct ds match_out = DS_EMPTY_INITIALIZER;
1358
1359                 ds_put_format(&match_in, "ip && inport == %s", op->json_key);
1360                 ds_put_format(&match_out, "ip && outport == %s", op->json_key);
1361                 ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110,
1362                               ds_cstr(&match_in), "next;");
1363                 ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110,
1364                               ds_cstr(&match_out), "next;");
1365
1366                 ds_destroy(&match_in);
1367                 ds_destroy(&match_out);
1368             }
1369         }
1370         /* Ingress and Egress Pre-ACL Table (Priority 110).
1371          *
1372          * Not to do conntrack on ND packets. */
1373         ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 110, "nd", "next;");
1374         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 110, "nd", "next;");
1375
1376         /* Ingress and Egress Pre-ACL Table (Priority 100).
1377          *
1378          * Regardless of whether the ACL is "from-lport" or "to-lport",
1379          * we need rules in both the ingress and egress table, because
1380          * the return traffic needs to be followed.
1381          *
1382          * 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1383          * it to conntrack for tracking and defragmentation. */
1384         ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_ACL, 100, "ip",
1385                       REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1386         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_ACL, 100, "ip",
1387                       REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1388     }
1389 }
1390
1391 /* For a 'key' of the form "IP:port" or just "IP", sets 'port' and
1392  * 'ip_address'.  The caller must free() the memory allocated for
1393  * 'ip_address'. */
1394 static void
1395 ip_address_and_port_from_lb_key(const char *key, char **ip_address,
1396                                 uint16_t *port)
1397 {
1398     char *ip_str, *start, *next;
1399     *ip_address = NULL;
1400     *port = 0;
1401
1402     next = start = xstrdup(key);
1403     ip_str = strsep(&next, ":");
1404     if (!ip_str || !ip_str[0]) {
1405         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1406         VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1407         free(start);
1408         return;
1409     }
1410
1411     ovs_be32 ip, mask;
1412     char *error = ip_parse_masked(ip_str, &ip, &mask);
1413     if (error || mask != OVS_BE32_MAX) {
1414         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1415         VLOG_WARN_RL(&rl, "bad ip address for load balancer key %s", key);
1416         free(start);
1417         free(error);
1418         return;
1419     }
1420
1421     int l4_port = 0;
1422     if (next && next[0]) {
1423         if (!str_to_int(next, 0, &l4_port) || l4_port < 0 || l4_port > 65535) {
1424             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
1425             VLOG_WARN_RL(&rl, "bad ip port for load balancer key %s", key);
1426             free(start);
1427             return;
1428         }
1429     }
1430
1431     *port = l4_port;
1432     *ip_address = strdup(ip_str);
1433     free(start);
1434 }
1435
1436 static void
1437 build_pre_lb(struct ovn_datapath *od, struct hmap *lflows)
1438 {
1439     /* Allow all packets to go to next tables by default. */
1440     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB, 0, "1", "next;");
1441     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB, 0, "1", "next;");
1442
1443     struct sset all_ips = SSET_INITIALIZER(&all_ips);
1444     if (od->nbs->load_balancer) {
1445         struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1446         struct smap *vips = &lb->vips;
1447         struct smap_node *node;
1448         bool vip_configured = false;
1449
1450         SMAP_FOR_EACH (node, vips) {
1451             vip_configured = true;
1452
1453             /* node->key contains IP:port or just IP. */
1454             char *ip_address = NULL;
1455             uint16_t port;
1456             ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1457             if (!ip_address) {
1458                 continue;
1459             }
1460
1461             if (!sset_contains(&all_ips, ip_address)) {
1462                 sset_add(&all_ips, ip_address);
1463             }
1464
1465             free(ip_address);
1466
1467             /* Ignore L4 port information in the key because fragmented packets
1468              * may not have L4 information.  The pre-stateful table will send
1469              * the packet through ct() action to de-fragment. In stateful
1470              * table, we will eventually look at L4 information. */
1471         }
1472
1473         /* 'REGBIT_CONNTRACK_DEFRAG' is set to let the pre-stateful table send
1474          * packet to conntrack for defragmentation. */
1475         const char *ip_address;
1476         SSET_FOR_EACH(ip_address, &all_ips) {
1477             char *match = xasprintf("ip && ip4.dst == %s", ip_address);
1478             ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_LB,
1479                           100, match, REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1480             free(match);
1481         }
1482
1483         sset_destroy(&all_ips);
1484
1485         if (vip_configured) {
1486             ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_LB,
1487                           100, "ip", REGBIT_CONNTRACK_DEFRAG" = 1; next;");
1488         }
1489     }
1490 }
1491
1492 static void
1493 build_pre_stateful(struct ovn_datapath *od, struct hmap *lflows)
1494 {
1495     /* Ingress and Egress pre-stateful Table (Priority 0): Packets are
1496      * allowed by default. */
1497     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 0, "1", "next;");
1498     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 0, "1", "next;");
1499
1500     /* If REGBIT_CONNTRACK_DEFRAG is set as 1, then the packets should be
1501      * sent to conntrack for tracking and defragmentation. */
1502     ovn_lflow_add(lflows, od, S_SWITCH_IN_PRE_STATEFUL, 100,
1503                   REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1504     ovn_lflow_add(lflows, od, S_SWITCH_OUT_PRE_STATEFUL, 100,
1505                   REGBIT_CONNTRACK_DEFRAG" == 1", "ct_next;");
1506 }
1507
1508 static void
1509 build_acls(struct ovn_datapath *od, struct hmap *lflows)
1510 {
1511     bool has_stateful = has_stateful_acl(od);
1512
1513     /* Ingress and Egress ACL Table (Priority 0): Packets are allowed by
1514      * default.  A related rule at priority 1 is added below if there
1515      * are any stateful ACLs in this datapath. */
1516     ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 0, "1", "next;");
1517     ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 0, "1", "next;");
1518
1519     if (has_stateful) {
1520         /* Ingress and Egress ACL Table (Priority 1).
1521          *
1522          * By default, traffic is allowed.  This is partially handled by
1523          * the Priority 0 ACL flows added earlier, but we also need to
1524          * commit IP flows.  This is because, while the initiater's
1525          * direction may not have any stateful rules, the server's may
1526          * and then its return traffic would not have an associated
1527          * conntrack entry and would return "+invalid". */
1528         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, 1, "ip",
1529                       REGBIT_CONNTRACK_COMMIT" = 1; next;");
1530         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, 1, "ip",
1531                       REGBIT_CONNTRACK_COMMIT" = 1; next;");
1532
1533         /* Ingress and Egress ACL Table (Priority 65535).
1534          *
1535          * Always drop traffic that's in an invalid state.  This is
1536          * enforced at a higher priority than ACLs can be defined. */
1537         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1538                       "ct.inv", "drop;");
1539         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1540                       "ct.inv", "drop;");
1541
1542         /* Ingress and Egress ACL Table (Priority 65535).
1543          *
1544          * Always allow traffic that is established to a committed
1545          * conntrack entry.  This is enforced at a higher priority than
1546          * ACLs can be defined. */
1547         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1548                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1549                       "next;");
1550         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1551                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1552                       "next;");
1553
1554         /* Ingress and Egress ACL Table (Priority 65535).
1555          *
1556          * Always allow traffic that is related to an existing conntrack
1557          * entry.  This is enforced at a higher priority than ACLs can
1558          * be defined.
1559          *
1560          * NOTE: This does not support related data sessions (eg,
1561          * a dynamically negotiated FTP data channel), but will allow
1562          * related traffic such as an ICMP Port Unreachable through
1563          * that's generated from a non-listening UDP port.  */
1564         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX,
1565                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1566                       "next;");
1567         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX,
1568                       "!ct.est && ct.rel && !ct.new && !ct.inv",
1569                       "next;");
1570
1571         /* Ingress and Egress ACL Table (Priority 65535).
1572          *
1573          * Not to do conntrack on ND packets. */
1574         ovn_lflow_add(lflows, od, S_SWITCH_IN_ACL, UINT16_MAX, "nd", "next;");
1575         ovn_lflow_add(lflows, od, S_SWITCH_OUT_ACL, UINT16_MAX, "nd", "next;");
1576     }
1577
1578     /* Ingress or Egress ACL Table (Various priorities). */
1579     for (size_t i = 0; i < od->nbs->n_acls; i++) {
1580         struct nbrec_acl *acl = od->nbs->acls[i];
1581         bool ingress = !strcmp(acl->direction, "from-lport") ? true :false;
1582         enum ovn_stage stage = ingress ? S_SWITCH_IN_ACL : S_SWITCH_OUT_ACL;
1583
1584         if (!strcmp(acl->action, "allow")) {
1585             /* If there are any stateful flows, we must even commit "allow"
1586              * actions.  This is because, while the initiater's
1587              * direction may not have any stateful rules, the server's
1588              * may and then its return traffic would not have an
1589              * associated conntrack entry and would return "+invalid". */
1590             const char *actions = has_stateful
1591                                     ? REGBIT_CONNTRACK_COMMIT" = 1; next;"
1592                                     : "next;";
1593             ovn_lflow_add(lflows, od, stage,
1594                           acl->priority + OVN_ACL_PRI_OFFSET,
1595                           acl->match, actions);
1596         } else if (!strcmp(acl->action, "allow-related")) {
1597             struct ds match = DS_EMPTY_INITIALIZER;
1598
1599             /* Commit the connection tracking entry, which allows all
1600              * other traffic related to this entry to flow due to the
1601              * 65535 priority flow defined earlier. */
1602             ds_put_format(&match, "ct.new && (%s)", acl->match);
1603             ovn_lflow_add(lflows, od, stage,
1604                           acl->priority + OVN_ACL_PRI_OFFSET,
1605                           ds_cstr(&match),
1606                           REGBIT_CONNTRACK_COMMIT" = 1; next;");
1607
1608             ds_destroy(&match);
1609         } else if (!strcmp(acl->action, "drop")) {
1610             ovn_lflow_add(lflows, od, stage,
1611                           acl->priority + OVN_ACL_PRI_OFFSET,
1612                           acl->match, "drop;");
1613         } else if (!strcmp(acl->action, "reject")) {
1614             /* xxx Need to support "reject". */
1615             VLOG_INFO("reject is not a supported action");
1616             ovn_lflow_add(lflows, od, stage,
1617                           acl->priority + OVN_ACL_PRI_OFFSET,
1618                           acl->match, "drop;");
1619         }
1620     }
1621 }
1622
1623 static void
1624 build_lb(struct ovn_datapath *od, struct hmap *lflows)
1625 {
1626     /* Ingress and Egress LB Table (Priority 0): Packets are allowed by
1627      * default.  */
1628     ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, 0, "1", "next;");
1629     ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, 0, "1", "next;");
1630
1631     if (od->nbs->load_balancer) {
1632         /* Ingress and Egress LB Table (Priority 65535).
1633          *
1634          * Send established traffic through conntrack for just NAT. */
1635         ovn_lflow_add(lflows, od, S_SWITCH_IN_LB, UINT16_MAX,
1636                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1637                       REGBIT_CONNTRACK_NAT" = 1; next;");
1638         ovn_lflow_add(lflows, od, S_SWITCH_OUT_LB, UINT16_MAX,
1639                       "ct.est && !ct.rel && !ct.new && !ct.inv",
1640                       REGBIT_CONNTRACK_NAT" = 1; next;");
1641     }
1642 }
1643
1644 static void
1645 build_stateful(struct ovn_datapath *od, struct hmap *lflows)
1646 {
1647     /* Ingress and Egress stateful Table (Priority 0): Packets are
1648      * allowed by default. */
1649     ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 0, "1", "next;");
1650     ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 0, "1", "next;");
1651
1652     /* If REGBIT_CONNTRACK_COMMIT is set as 1, then the packets should be
1653      * committed to conntrack. */
1654     ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1655                   REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit; next;");
1656     ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1657                   REGBIT_CONNTRACK_COMMIT" == 1", "ct_commit; next;");
1658
1659     /* If REGBIT_CONNTRACK_NAT is set as 1, then packets should just be sent
1660      * through nat (without committing).
1661      *
1662      * REGBIT_CONNTRACK_COMMIT is set for new connections and
1663      * REGBIT_CONNTRACK_NAT is set for established connections. So they
1664      * don't overlap.
1665      */
1666     ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL, 100,
1667                   REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1668     ovn_lflow_add(lflows, od, S_SWITCH_OUT_STATEFUL, 100,
1669                   REGBIT_CONNTRACK_NAT" == 1", "ct_lb;");
1670
1671     /* Load balancing rules for new connections get committed to conntrack
1672      * table.  So even if REGBIT_CONNTRACK_COMMIT is set in a previous table
1673      * a higher priority rule for load balancing below also commits the
1674      * connection, so it is okay if we do not hit the above match on
1675      * REGBIT_CONNTRACK_COMMIT. */
1676     if (od->nbs->load_balancer) {
1677         struct nbrec_load_balancer *lb = od->nbs->load_balancer;
1678         struct smap *vips = &lb->vips;
1679         struct smap_node *node;
1680
1681         SMAP_FOR_EACH (node, vips) {
1682             uint16_t port = 0;
1683
1684             /* node->key contains IP:port or just IP. */
1685             char *ip_address = NULL;
1686             ip_address_and_port_from_lb_key(node->key, &ip_address, &port);
1687             if (!ip_address) {
1688                 continue;
1689             }
1690
1691             /* New connections in Ingress table. */
1692             char *action = xasprintf("ct_lb(%s);", node->value);
1693             struct ds match = DS_EMPTY_INITIALIZER;
1694             ds_put_format(&match, "ct.new && ip && ip4.dst == %s", ip_address);
1695             if (port) {
1696                 if (lb->protocol && !strcmp(lb->protocol, "udp")) {
1697                     ds_put_format(&match, "&& udp && udp.dst == %d", port);
1698                 } else {
1699                     ds_put_format(&match, "&& tcp && tcp.dst == %d", port);
1700                 }
1701                 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1702                               120, ds_cstr(&match), action);
1703             } else {
1704                 ovn_lflow_add(lflows, od, S_SWITCH_IN_STATEFUL,
1705                               110, ds_cstr(&match), action);
1706             }
1707
1708             ds_destroy(&match);
1709             free(action);
1710        }
1711     }
1712 }
1713
1714 static void
1715 build_lswitch_flows(struct hmap *datapaths, struct hmap *ports,
1716                     struct hmap *lflows, struct hmap *mcgroups)
1717 {
1718     /* This flow table structure is documented in ovn-northd(8), so please
1719      * update ovn-northd.8.xml if you change anything. */
1720
1721     struct ds match = DS_EMPTY_INITIALIZER;
1722     struct ds actions = DS_EMPTY_INITIALIZER;
1723
1724     /* Build pre-ACL and ACL tables for both ingress and egress.
1725      * Ingress tables 3 and 4.  Egress tables 0 and 1. */
1726     struct ovn_datapath *od;
1727     HMAP_FOR_EACH (od, key_node, datapaths) {
1728         if (!od->nbs) {
1729             continue;
1730         }
1731
1732         build_pre_acls(od, lflows, ports);
1733         build_pre_lb(od, lflows);
1734         build_pre_stateful(od, lflows);
1735         build_acls(od, lflows);
1736         build_lb(od, lflows);
1737         build_stateful(od, lflows);
1738     }
1739
1740     /* Logical switch ingress table 0: Admission control framework (priority
1741      * 100). */
1742     HMAP_FOR_EACH (od, key_node, datapaths) {
1743         if (!od->nbs) {
1744             continue;
1745         }
1746
1747         /* Logical VLANs not supported. */
1748         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "vlan.present",
1749                       "drop;");
1750
1751         /* Broadcast/multicast source address is invalid. */
1752         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_L2, 100, "eth.src[40]",
1753                       "drop;");
1754
1755         /* Port security flows have priority 50 (see below) and will continue
1756          * to the next table if packet source is acceptable. */
1757     }
1758
1759     /* Logical switch ingress table 0: Ingress port security - L2
1760      *  (priority 50).
1761      *  Ingress table 1: Ingress port security - IP (priority 90 and 80)
1762      *  Ingress table 2: Ingress port security - ND (priority 90 and 80)
1763      */
1764     struct ovn_port *op;
1765     HMAP_FOR_EACH (op, key_node, ports) {
1766         if (!op->nbs) {
1767             continue;
1768         }
1769
1770         if (!lsp_is_enabled(op->nbs)) {
1771             /* Drop packets from disabled logical ports (since logical flow
1772              * tables are default-drop). */
1773             continue;
1774         }
1775
1776         ds_clear(&match);
1777         ds_put_format(&match, "inport == %s", op->json_key);
1778         build_port_security_l2(
1779             "eth.src", op->nbs->port_security, op->nbs->n_port_security,
1780             &match);
1781         ovn_lflow_add(lflows, op->od, S_SWITCH_IN_PORT_SEC_L2, 50,
1782                       ds_cstr(&match), "next;");
1783
1784         if (op->nbs->n_port_security) {
1785             build_port_security_ip(P_IN, op, lflows);
1786             build_port_security_nd(op, lflows);
1787         }
1788     }
1789
1790     /* Ingress table 1 and 2: Port security - IP and ND, by default goto next.
1791      * (priority 0)*/
1792     HMAP_FOR_EACH (od, key_node, datapaths) {
1793         if (!od->nbs) {
1794             continue;
1795         }
1796
1797         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_ND, 0, "1", "next;");
1798         ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;");
1799     }
1800
1801     /* Ingress table 9: ARP responder, skip requests coming from localnet ports.
1802      * (priority 100). */
1803     HMAP_FOR_EACH (op, key_node, ports) {
1804         if (!op->nbs) {
1805             continue;
1806         }
1807
1808         if (!strcmp(op->nbs->type, "localnet")) {
1809             ds_clear(&match);
1810             ds_put_format(&match, "inport == %s", op->json_key);
1811             ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 100,
1812                           ds_cstr(&match), "next;");
1813         }
1814     }
1815
1816     /* Ingress table 9: ARP/ND responder, reply for known IPs.
1817      * (priority 50). */
1818     HMAP_FOR_EACH (op, key_node, ports) {
1819         if (!op->nbs) {
1820             continue;
1821         }
1822
1823         /*
1824          * Add ARP/ND reply flows if either the
1825          *  - port is up or
1826          *  - port type is router
1827          */
1828         if (!lsp_is_up(op->nbs) && strcmp(op->nbs->type, "router")) {
1829             continue;
1830         }
1831
1832         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1833             struct lport_addresses laddrs;
1834             if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
1835                                        true)) {
1836                 continue;
1837             }
1838             for (size_t j = 0; j < laddrs.n_ipv4_addrs; j++) {
1839                 ds_clear(&match);
1840                 ds_put_format(&match, "arp.tpa == "IP_FMT" && arp.op == 1",
1841                               IP_ARGS(laddrs.ipv4_addrs[j].addr));
1842                 ds_clear(&actions);
1843                 ds_put_format(&actions,
1844                     "eth.dst = eth.src; "
1845                     "eth.src = "ETH_ADDR_FMT"; "
1846                     "arp.op = 2; /* ARP reply */ "
1847                     "arp.tha = arp.sha; "
1848                     "arp.sha = "ETH_ADDR_FMT"; "
1849                     "arp.tpa = arp.spa; "
1850                     "arp.spa = "IP_FMT"; "
1851                     "outport = inport; "
1852                     "inport = \"\"; /* Allow sending out inport. */ "
1853                     "output;",
1854                     ETH_ADDR_ARGS(laddrs.ea),
1855                     ETH_ADDR_ARGS(laddrs.ea),
1856                     IP_ARGS(laddrs.ipv4_addrs[j].addr));
1857                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1858                               ds_cstr(&match), ds_cstr(&actions));
1859             }
1860
1861             if (laddrs.n_ipv6_addrs > 0) {
1862                 char ip6_str[INET6_ADDRSTRLEN + 1];
1863                 ds_clear(&match);
1864                 ds_put_cstr(&match, "icmp6 && icmp6.type == 135 && ");
1865                 if (laddrs.n_ipv6_addrs == 1) {
1866                     ipv6_string_mapped(ip6_str,
1867                                        &(laddrs.ipv6_addrs[0].addr));
1868                     ds_put_format(&match, "nd.target == %s", ip6_str);
1869                 } else {
1870                     ds_put_cstr(&match, "(");
1871                     for (size_t j = 0; j < laddrs.n_ipv6_addrs; j++) {
1872                         ipv6_string_mapped(ip6_str,
1873                                            &(laddrs.ipv6_addrs[j].addr));
1874                         ds_put_format(&match, "nd.target == %s || ", ip6_str);
1875                     }
1876                     ds_chomp(&match, ' ');
1877                     ds_chomp(&match, '|');
1878                     ds_chomp(&match, '|');
1879                     ds_chomp(&match, ' ');
1880                     ds_put_cstr(&match, ")");
1881                 }
1882                 ds_clear(&actions);
1883                 ds_put_format(&actions,
1884                     "na { eth.src = "ETH_ADDR_FMT"; "
1885                     "nd.tll = "ETH_ADDR_FMT"; "
1886                     "outport = inport; "
1887                     "inport = \"\"; /* Allow sending out inport. */ "
1888                     "output; };",
1889                     ETH_ADDR_ARGS(laddrs.ea),
1890                     ETH_ADDR_ARGS(laddrs.ea));
1891
1892                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_ARP_ND_RSP, 50,
1893                               ds_cstr(&match), ds_cstr(&actions));
1894
1895             }
1896
1897             destroy_lport_addresses(&laddrs);
1898         }
1899     }
1900
1901     /* Ingress table 9: ARP/ND responder, by default goto next.
1902      * (priority 0)*/
1903     HMAP_FOR_EACH (od, key_node, datapaths) {
1904         if (!od->nbs) {
1905             continue;
1906         }
1907
1908         ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;");
1909     }
1910
1911     /* Ingress table 10: Destination lookup, broadcast and multicast handling
1912      * (priority 100). */
1913     HMAP_FOR_EACH (op, key_node, ports) {
1914         if (!op->nbs) {
1915             continue;
1916         }
1917
1918         if (lsp_is_enabled(op->nbs)) {
1919             ovn_multicast_add(mcgroups, &mc_flood, op);
1920         }
1921     }
1922     HMAP_FOR_EACH (od, key_node, datapaths) {
1923         if (!od->nbs) {
1924             continue;
1925         }
1926
1927         ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 100, "eth.mcast",
1928                       "outport = \""MC_FLOOD"\"; output;");
1929     }
1930
1931     /* Ingress table 10: Destination lookup, unicast handling (priority 50), */
1932     HMAP_FOR_EACH (op, key_node, ports) {
1933         if (!op->nbs) {
1934             continue;
1935         }
1936
1937         for (size_t i = 0; i < op->nbs->n_addresses; i++) {
1938             struct eth_addr mac;
1939
1940             if (eth_addr_from_string(op->nbs->addresses[i], &mac)) {
1941                 ds_clear(&match);
1942                 ds_put_format(&match, "eth.dst == "ETH_ADDR_FMT,
1943                               ETH_ADDR_ARGS(mac));
1944
1945                 ds_clear(&actions);
1946                 ds_put_format(&actions, "outport = %s; output;", op->json_key);
1947                 ovn_lflow_add(lflows, op->od, S_SWITCH_IN_L2_LKUP, 50,
1948                               ds_cstr(&match), ds_cstr(&actions));
1949             } else if (!strcmp(op->nbs->addresses[i], "unknown")) {
1950                 if (lsp_is_enabled(op->nbs)) {
1951                     ovn_multicast_add(mcgroups, &mc_unknown, op);
1952                     op->od->has_unknown = true;
1953                 }
1954             } else {
1955                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
1956
1957                 VLOG_INFO_RL(&rl,
1958                              "%s: invalid syntax '%s' in addresses column",
1959                              op->nbs->name, op->nbs->addresses[i]);
1960             }
1961         }
1962     }
1963
1964     /* Ingress table 10: Destination lookup for unknown MACs (priority 0). */
1965     HMAP_FOR_EACH (od, key_node, datapaths) {
1966         if (!od->nbs) {
1967             continue;
1968         }
1969
1970         if (od->has_unknown) {
1971             ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 0, "1",
1972                           "outport = \""MC_UNKNOWN"\"; output;");
1973         }
1974     }
1975
1976     /* Egress tables 6: Egress port security - IP (priority 0)
1977      * Egress table 7: Egress port security L2 - multicast/broadcast
1978      *                 (priority 100). */
1979     HMAP_FOR_EACH (od, key_node, datapaths) {
1980         if (!od->nbs) {
1981             continue;
1982         }
1983
1984         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_IP, 0, "1", "next;");
1985         ovn_lflow_add(lflows, od, S_SWITCH_OUT_PORT_SEC_L2, 100, "eth.mcast",
1986                       "output;");
1987     }
1988
1989     /* Egress table 6: Egress port security - IP (priorities 90 and 80)
1990      * if port security enabled.
1991      *
1992      * Egress table 7: Egress port security - L2 (priorities 50 and 150).
1993      *
1994      * Priority 50 rules implement port security for enabled logical port.
1995      *
1996      * Priority 150 rules drop packets to disabled logical ports, so that they
1997      * don't even receive multicast or broadcast packets. */
1998     HMAP_FOR_EACH (op, key_node, ports) {
1999         if (!op->nbs) {
2000             continue;
2001         }
2002
2003         ds_clear(&match);
2004         ds_put_format(&match, "outport == %s", op->json_key);
2005         if (lsp_is_enabled(op->nbs)) {
2006             build_port_security_l2("eth.dst", op->nbs->port_security,
2007                                    op->nbs->n_port_security, &match);
2008             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 50,
2009                           ds_cstr(&match), "output;");
2010         } else {
2011             ovn_lflow_add(lflows, op->od, S_SWITCH_OUT_PORT_SEC_L2, 150,
2012                           ds_cstr(&match), "drop;");
2013         }
2014
2015         if (op->nbs->n_port_security) {
2016             build_port_security_ip(P_OUT, op, lflows);
2017         }
2018     }
2019
2020     ds_destroy(&match);
2021     ds_destroy(&actions);
2022 }
2023
2024 static bool
2025 lrport_is_enabled(const struct nbrec_logical_router_port *lrport)
2026 {
2027     return !lrport->enabled || *lrport->enabled;
2028 }
2029
2030 static void
2031 add_route(struct hmap *lflows, const struct ovn_port *op,
2032           const char *network_s, int plen, const char *gateway)
2033 {
2034     char *match = xasprintf("ip4.dst == %s/%d", network_s, plen);
2035
2036     struct ds actions = DS_EMPTY_INITIALIZER;
2037     ds_put_cstr(&actions, "ip.ttl--; reg0 = ");
2038     if (gateway) {
2039         ds_put_cstr(&actions, gateway);
2040     } else {
2041         ds_put_cstr(&actions, "ip4.dst");
2042     }
2043     ds_put_format(&actions,
2044                   "; "
2045                   "reg1 = %s; "
2046                   "eth.src = "ETH_ADDR_FMT"; "
2047                   "outport = %s; "
2048                   "next;",
2049                   op->ip_s, ETH_ADDR_ARGS(op->mac), op->json_key);
2050
2051     /* The priority here is calculated to implement longest-prefix-match
2052      * routing. */
2053     ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, plen, match,
2054                   ds_cstr(&actions));
2055     ds_destroy(&actions);
2056     free(match);
2057 }
2058
2059 static void
2060 build_static_route_flow(struct hmap *lflows, struct ovn_datapath *od,
2061                         struct hmap *ports,
2062                         const struct nbrec_logical_router_static_route *route)
2063 {
2064     ovs_be32 prefix, next_hop, mask;
2065
2066     /* Verify that next hop is an IP address with 32 bits mask. */
2067     char *error = ip_parse_masked(route->nexthop, &next_hop, &mask);
2068     if (error || mask != OVS_BE32_MAX) {
2069         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2070         VLOG_WARN_RL(&rl, "bad next hop ip address %s", route->nexthop);
2071         free(error);
2072         return;
2073     }
2074
2075     /* Verify that ip prefix is a valid CIDR address. */
2076     error = ip_parse_masked(route->ip_prefix, &prefix, &mask);
2077     if (error || !ip_is_cidr(mask)) {
2078         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2079         VLOG_WARN_RL(&rl, "bad 'ip_prefix' in static routes %s",
2080                      route->ip_prefix);
2081         free(error);
2082         return;
2083     }
2084
2085     /* Find the outgoing port. */
2086     struct ovn_port *out_port = NULL;
2087     if (route->output_port) {
2088         out_port = ovn_port_find(ports, route->output_port);
2089         if (!out_port) {
2090             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2091             VLOG_WARN_RL(&rl, "Bad out port %s for static route %s",
2092                          route->output_port, route->ip_prefix);
2093             return;
2094         }
2095     } else {
2096         /* output_port is not specified, find the
2097          * router port matching the next hop. */
2098         int i;
2099         for (i = 0; i < od->nbr->n_ports; i++) {
2100             struct nbrec_logical_router_port *lrp = od->nbr->ports[i];
2101             out_port = ovn_port_find(ports, lrp->name);
2102             if (!out_port) {
2103                 /* This should not happen. */
2104                 continue;
2105             }
2106
2107             if (out_port->network
2108                 && !((out_port->network ^ next_hop) & out_port->mask)) {
2109                 /* There should be only 1 interface that matches the next hop.
2110                  * Otherwise, it's a configuration error, because subnets of
2111                  * router's interfaces should NOT overlap. */
2112                 break;
2113             }
2114         }
2115         if (i == od->nbr->n_ports) {
2116             /* There is no matched out port. */
2117             static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2118             VLOG_WARN_RL(&rl, "No path for static route %s; next hop %s",
2119                          route->ip_prefix, route->nexthop);
2120             return;
2121         }
2122     }
2123
2124     char *prefix_s = xasprintf(IP_FMT, IP_ARGS(prefix));
2125     add_route(lflows, out_port, prefix_s, ip_count_cidr_bits(mask),
2126               route->nexthop);
2127     free(prefix_s);
2128 }
2129
2130 static void
2131 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
2132                     struct hmap *lflows)
2133 {
2134     /* This flow table structure is documented in ovn-northd(8), so please
2135      * update ovn-northd.8.xml if you change anything. */
2136
2137     struct ds match = DS_EMPTY_INITIALIZER;
2138     struct ds actions = DS_EMPTY_INITIALIZER;
2139
2140     /* Logical router ingress table 0: Admission control framework. */
2141     struct ovn_datapath *od;
2142     HMAP_FOR_EACH (od, key_node, datapaths) {
2143         if (!od->nbr) {
2144             continue;
2145         }
2146
2147         /* Logical VLANs not supported.
2148          * Broadcast/multicast source address is invalid. */
2149         ovn_lflow_add(lflows, od, S_ROUTER_IN_ADMISSION, 100,
2150                       "vlan.present || eth.src[40]", "drop;");
2151     }
2152
2153     /* Logical router ingress table 0: match (priority 50). */
2154     struct ovn_port *op;
2155     HMAP_FOR_EACH (op, key_node, ports) {
2156         if (!op->nbr) {
2157             continue;
2158         }
2159
2160         if (!lrport_is_enabled(op->nbr)) {
2161             /* Drop packets from disabled logical ports (since logical flow
2162              * tables are default-drop). */
2163             continue;
2164         }
2165
2166         ds_clear(&match);
2167         ds_put_format(&match,
2168             "(eth.mcast || eth.dst == "ETH_ADDR_FMT") && inport == %s",
2169             ETH_ADDR_ARGS(op->mac), op->json_key);
2170         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_ADMISSION, 50,
2171                       ds_cstr(&match), "next;");
2172     }
2173
2174     /* Logical router ingress table 1: IP Input. */
2175     HMAP_FOR_EACH (od, key_node, datapaths) {
2176         if (!od->nbr) {
2177             continue;
2178         }
2179
2180         /* L3 admission control: drop multicast and broadcast source, localhost
2181          * source or destination, and zero network source or destination
2182          * (priority 100). */
2183         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 100,
2184                       "ip4.mcast || "
2185                       "ip4.src == 255.255.255.255 || "
2186                       "ip4.src == 127.0.0.0/8 || "
2187                       "ip4.dst == 127.0.0.0/8 || "
2188                       "ip4.src == 0.0.0.0/8 || "
2189                       "ip4.dst == 0.0.0.0/8",
2190                       "drop;");
2191
2192         /* ARP reply handling.  Use ARP replies to populate the logical
2193          * router's ARP table. */
2194         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 90, "arp.op == 2",
2195                       "put_arp(inport, arp.spa, arp.sha);");
2196
2197         /* Drop Ethernet local broadcast.  By definition this traffic should
2198          * not be forwarded.*/
2199         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2200                       "eth.bcast", "drop;");
2201
2202         /* Drop IP multicast. */
2203         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 50,
2204                       "ip4.mcast", "drop;");
2205
2206         /* TTL discard.
2207          *
2208          * XXX Need to send ICMP time exceeded if !ip.later_frag. */
2209         ds_clear(&match);
2210         ds_put_cstr(&match, "ip4 && ip.ttl == {0, 1}");
2211         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 30,
2212                       ds_cstr(&match), "drop;");
2213
2214         /* Pass other traffic not already handled to the next table for
2215          * routing. */
2216         ovn_lflow_add(lflows, od, S_ROUTER_IN_IP_INPUT, 0, "1", "next;");
2217     }
2218
2219     HMAP_FOR_EACH (op, key_node, ports) {
2220         if (!op->nbr) {
2221             continue;
2222         }
2223
2224         /* L3 admission control: drop packets that originate from an IP address
2225          * owned by the router or a broadcast address known to the router
2226          * (priority 100). */
2227         ds_clear(&match);
2228         ds_put_format(&match, "ip4.src == {%s, %s}", op->ip_s, op->bcast_s);
2229         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 100,
2230                       ds_cstr(&match), "drop;");
2231
2232         /* ICMP echo reply.  These flows reply to ICMP echo requests
2233          * received for the router's IP address. Since packets only
2234          * get here as part of the logical router datapath, the inport
2235          * (i.e. the incoming locally attached net) does not matter.
2236          * The ip.ttl also does not matter (RFC1812 section 4.2.2.9) */
2237         ds_clear(&match);
2238         ds_put_format(&match,
2239                       "ip4.dst == %s && icmp4.type == 8 && icmp4.code == 0",
2240                       op->ip_s);
2241         ds_clear(&actions);
2242         ds_put_format(&actions,
2243             "ip4.dst = ip4.src; "
2244             "ip4.src = %s; "
2245             "ip.ttl = 255; "
2246             "icmp4.type = 0; "
2247             "inport = \"\"; /* Allow sending out inport. */ "
2248             "next; ",
2249             op->ip_s);
2250         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2251                       ds_cstr(&match), ds_cstr(&actions));
2252
2253         /* ARP reply.  These flows reply to ARP requests for the router's own
2254          * IP address. */
2255         ds_clear(&match);
2256         ds_put_format(&match, "inport == %s && arp.tpa == %s && arp.op == 1",
2257                       op->json_key, op->ip_s);
2258         ds_clear(&actions);
2259         ds_put_format(&actions,
2260             "eth.dst = eth.src; "
2261             "eth.src = "ETH_ADDR_FMT"; "
2262             "arp.op = 2; /* ARP reply */ "
2263             "arp.tha = arp.sha; "
2264             "arp.sha = "ETH_ADDR_FMT"; "
2265             "arp.tpa = arp.spa; "
2266             "arp.spa = %s; "
2267             "outport = %s; "
2268             "inport = \"\"; /* Allow sending out inport. */ "
2269             "output;",
2270             ETH_ADDR_ARGS(op->mac),
2271             ETH_ADDR_ARGS(op->mac),
2272             op->ip_s,
2273             op->json_key);
2274         ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2275                       ds_cstr(&match), ds_cstr(&actions));
2276
2277         /* ARP handling for external IP addresses.
2278          *
2279          * DNAT IP addresses are external IP addresses that need ARP
2280          * handling. */
2281         for (int i = 0; i < op->od->nbr->n_nat; i++) {
2282             const struct nbrec_nat *nat;
2283
2284             nat = op->od->nbr->nat[i];
2285
2286             if(!strcmp(nat->type, "snat")) {
2287                 continue;
2288             }
2289
2290             ovs_be32 ip;
2291             if (!ip_parse(nat->external_ip, &ip) || !ip) {
2292                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2293                 VLOG_WARN_RL(&rl, "bad ip address %s in dnat configuration "
2294                              "for router %s", nat->external_ip, op->key);
2295                 continue;
2296             }
2297
2298             ds_clear(&match);
2299             ds_put_format(&match,
2300                           "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1",
2301                           op->json_key, IP_ARGS(ip));
2302             ds_clear(&actions);
2303             ds_put_format(&actions,
2304                 "eth.dst = eth.src; "
2305                 "eth.src = "ETH_ADDR_FMT"; "
2306                 "arp.op = 2; /* ARP reply */ "
2307                 "arp.tha = arp.sha; "
2308                 "arp.sha = "ETH_ADDR_FMT"; "
2309                 "arp.tpa = arp.spa; "
2310                 "arp.spa = "IP_FMT"; "
2311                 "outport = %s; "
2312                 "inport = \"\"; /* Allow sending out inport. */ "
2313                 "output;",
2314                 ETH_ADDR_ARGS(op->mac),
2315                 ETH_ADDR_ARGS(op->mac),
2316                 IP_ARGS(ip),
2317                 op->json_key);
2318             ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 90,
2319                           ds_cstr(&match), ds_cstr(&actions));
2320         }
2321
2322         /* Drop IP traffic to this router, unless the router ip is used as
2323          * SNAT ip. */
2324         bool snat_ip_is_router_ip = false;
2325         for (int i = 0; i < op->od->nbr->n_nat; i++) {
2326             const struct nbrec_nat *nat;
2327             ovs_be32 ip;
2328
2329             nat = op->od->nbr->nat[i];
2330             if (strcmp(nat->type, "snat")) {
2331                 continue;
2332             }
2333
2334             if (!ip_parse(nat->external_ip, &ip) || !ip) {
2335                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2336                 VLOG_WARN_RL(&rl, "bad ip address %s in snat configuration "
2337                          "for router %s", nat->external_ip, op->key);
2338                 continue;
2339             }
2340
2341             if (ip == op->ip) {
2342                 snat_ip_is_router_ip = true;
2343                 break;
2344             }
2345         }
2346
2347         if (!snat_ip_is_router_ip) {
2348             ds_clear(&match);
2349             ds_put_format(&match, "ip4.dst == %s", op->ip_s);
2350             ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_INPUT, 60,
2351                           ds_cstr(&match), "drop;");
2352         }
2353     }
2354
2355     /* NAT in Gateway routers. */
2356     HMAP_FOR_EACH (od, key_node, datapaths) {
2357         if (!od->nbr) {
2358             continue;
2359         }
2360
2361         /* Packets are allowed by default. */
2362         ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 0, "1", "next;");
2363         ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT, 0, "1", "next;");
2364         ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 0, "1", "next;");
2365
2366         /* NAT rules are only valid on Gateway routers. */
2367         if (!smap_get(&od->nbr->options, "chassis")) {
2368             continue;
2369         }
2370
2371         for (int i = 0; i < od->nbr->n_nat; i++) {
2372             const struct nbrec_nat *nat;
2373
2374             nat = od->nbr->nat[i];
2375
2376             ovs_be32 ip, mask;
2377
2378             char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
2379             if (error || mask != OVS_BE32_MAX) {
2380                 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
2381                 VLOG_WARN_RL(&rl, "bad external ip %s for nat",
2382                              nat->external_ip);
2383                 free(error);
2384                 continue;
2385             }
2386
2387             /* Check the validity of nat->logical_ip. 'logical_ip' can
2388              * be a subnet when the type is "snat". */
2389             error = ip_parse_masked(nat->logical_ip, &ip, &mask);
2390             if (!strcmp(nat->type, "snat")) {
2391                 if (error) {
2392                     static struct vlog_rate_limit rl =
2393                         VLOG_RATE_LIMIT_INIT(5, 1);
2394                     VLOG_WARN_RL(&rl, "bad ip network or ip %s for snat "
2395                                  "in router "UUID_FMT"",
2396                                  nat->logical_ip, UUID_ARGS(&od->key));
2397                     free(error);
2398                     continue;
2399                 }
2400             } else {
2401                 if (error || mask != OVS_BE32_MAX) {
2402                     static struct vlog_rate_limit rl =
2403                         VLOG_RATE_LIMIT_INIT(5, 1);
2404                     VLOG_WARN_RL(&rl, "bad ip %s for dnat in router "
2405                         ""UUID_FMT"", nat->logical_ip, UUID_ARGS(&od->key));
2406                     free(error);
2407                     continue;
2408                 }
2409             }
2410
2411             /* Ingress UNSNAT table: It is for already established connections'
2412              * reverse traffic. i.e., SNAT has already been done in egress
2413              * pipeline and now the packet has entered the ingress pipeline as
2414              * part of a reply. We undo the SNAT here.
2415              *
2416              * Undoing SNAT has to happen before DNAT processing.  This is
2417              * because when the packet was DNATed in ingress pipeline, it did
2418              * not know about the possibility of eventual additional SNAT in
2419              * egress pipeline. */
2420             if (!strcmp(nat->type, "snat")
2421                 || !strcmp(nat->type, "dnat_and_snat")) {
2422                 ds_clear(&match);
2423                 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2424                 ovn_lflow_add(lflows, od, S_ROUTER_IN_UNSNAT, 100,
2425                               ds_cstr(&match), "ct_snat; next;");
2426             }
2427
2428             /* Ingress DNAT table: Packets enter the pipeline with destination
2429              * IP address that needs to be DNATted from a external IP address
2430              * to a logical IP address. */
2431             if (!strcmp(nat->type, "dnat")
2432                 || !strcmp(nat->type, "dnat_and_snat")) {
2433                 /* Packet when it goes from the initiator to destination.
2434                  * We need to zero the inport because the router can
2435                  * send the packet back through the same interface. */
2436                 ds_clear(&match);
2437                 ds_put_format(&match, "ip && ip4.dst == %s", nat->external_ip);
2438                 ds_clear(&actions);
2439                 ds_put_format(&actions,"inport = \"\"; ct_dnat(%s);",
2440                               nat->logical_ip);
2441                 ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 100,
2442                               ds_cstr(&match), ds_cstr(&actions));
2443             }
2444
2445             /* Egress SNAT table: Packets enter the egress pipeline with
2446              * source ip address that needs to be SNATted to a external ip
2447              * address. */
2448             if (!strcmp(nat->type, "snat")
2449                 || !strcmp(nat->type, "dnat_and_snat")) {
2450                 ds_clear(&match);
2451                 ds_put_format(&match, "ip && ip4.src == %s", nat->logical_ip);
2452                 ds_clear(&actions);
2453                 ds_put_format(&actions, "ct_snat(%s);", nat->external_ip);
2454
2455                 /* The priority here is calculated such that the
2456                  * nat->logical_ip with the longest mask gets a higher
2457                  * priority. */
2458                 ovn_lflow_add(lflows, od, S_ROUTER_OUT_SNAT,
2459                               count_1bits(ntohl(mask)) + 1,
2460                               ds_cstr(&match), ds_cstr(&actions));
2461             }
2462         }
2463
2464         /* Re-circulate every packet through the DNAT zone.
2465         * This helps with two things.
2466         *
2467         * 1. Any packet that needs to be unDNATed in the reverse
2468         * direction gets unDNATed. Ideally this could be done in
2469         * the egress pipeline. But since the gateway router
2470         * does not have any feature that depends on the source
2471         * ip address being external IP address for IP routing,
2472         * we can do it here, saving a future re-circulation.
2473         *
2474         * 2. Any packet that was sent through SNAT zone in the
2475         * previous table automatically gets re-circulated to get
2476         * back the new destination IP address that is needed for
2477         * routing in the openflow pipeline. */
2478         ovn_lflow_add(lflows, od, S_ROUTER_IN_DNAT, 50,
2479                       "ip", "inport = \"\"; ct_dnat;");
2480     }
2481
2482     /* Logical router ingress table 4: IP Routing.
2483      *
2484      * A packet that arrives at this table is an IP packet that should be
2485      * routed to the address in ip4.dst. This table sets outport to the correct
2486      * output port, eth.src to the output port's MAC address, and reg0 to the
2487      * next-hop IP address (leaving ip4.dst, the packet’s final destination,
2488      * unchanged), and advances to the next table for ARP resolution. */
2489     HMAP_FOR_EACH (op, key_node, ports) {
2490         if (!op->nbr) {
2491             continue;
2492         }
2493
2494         add_route(lflows, op, op->network_s, op->plen, NULL);
2495     }
2496     HMAP_FOR_EACH (od, key_node, datapaths) {
2497         if (!od->nbr) {
2498             continue;
2499         }
2500
2501         /* Convert the static routes to flows. */
2502         for (int i = 0; i < od->nbr->n_static_routes; i++) {
2503             const struct nbrec_logical_router_static_route *route;
2504
2505             route = od->nbr->static_routes[i];
2506             build_static_route_flow(lflows, od, ports, route);
2507         }
2508     }
2509     /* XXX destination unreachable */
2510
2511     /* Local router ingress table 5: ARP Resolution.
2512      *
2513      * Any packet that reaches this table is an IP packet whose next-hop IP
2514      * address is in reg0. (ip4.dst is the final destination.) This table
2515      * resolves the IP address in reg0 into an output port in outport and an
2516      * Ethernet address in eth.dst. */
2517     HMAP_FOR_EACH (op, key_node, ports) {
2518         if (op->nbr) {
2519             /* This is a logical router port. If next-hop IP address in 'reg0'
2520              * matches ip address of this router port, then the packet is
2521              * intended to eventually be sent to this logical port. Set the
2522              * destination mac address using this port's mac address.
2523              *
2524              * The packet is still in peer's logical pipeline. So the match
2525              * should be on peer's outport. */
2526             if (op->nbr->peer) {
2527                 struct ovn_port *peer = ovn_port_find(ports, op->nbr->peer);
2528                 if (!peer) {
2529                     continue;
2530                 }
2531
2532                 if (!peer->ip || !op->ip) {
2533                     continue;
2534                 }
2535                 ds_clear(&match);
2536                 ds_put_format(&match, "outport == %s && reg0 == %s",
2537                               peer->json_key, op->ip_s);
2538                 ds_clear(&actions);
2539                 ds_put_format(&actions, "eth.dst = "ETH_ADDR_FMT"; next;",
2540                               ETH_ADDR_ARGS(op->mac));
2541                 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2542                               100, ds_cstr(&match), ds_cstr(&actions));
2543             }
2544         } else if (op->od->n_router_ports && strcmp(op->nbs->type, "router")) {
2545             /* This is a logical switch port that backs a VM or a container.
2546              * Extract its addresses. For each of the address, go through all
2547              * the router ports attached to the switch (to which this port
2548              * connects) and if the address in question is reachable from the
2549              * router port, add an ARP entry in that router's pipeline. */
2550
2551             for (size_t i = 0; i < op->nbs->n_addresses; i++) {
2552                 struct lport_addresses laddrs;
2553                 if (!extract_lsp_addresses(op->nbs->addresses[i], &laddrs,
2554                                            false)) {
2555                     continue;
2556                 }
2557
2558                 for (size_t k = 0; k < laddrs.n_ipv4_addrs; k++) {
2559                     ovs_be32 ip = laddrs.ipv4_addrs[k].addr;
2560                     for (size_t j = 0; j < op->od->n_router_ports; j++) {
2561                         /* Get the Logical_Router_Port that the
2562                          * Logical_Switch_Port is connected to, as
2563                          * 'peer'. */
2564                         const char *peer_name = smap_get(
2565                             &op->od->router_ports[j]->nbs->options,
2566                             "router-port");
2567                         if (!peer_name) {
2568                             continue;
2569                         }
2570
2571                         struct ovn_port *peer
2572                             = ovn_port_find(ports, peer_name);
2573                         if (!peer || !peer->nbr) {
2574                             continue;
2575                         }
2576
2577                         /* Make sure that 'ip' is in 'peer''s network. */
2578                         if ((ip ^ peer->network) & peer->mask) {
2579                             continue;
2580                         }
2581
2582                         ds_clear(&match);
2583                         ds_put_format(&match, "outport == %s && reg0 == "IP_FMT,
2584                                       peer->json_key, IP_ARGS(ip));
2585                         ds_clear(&actions);
2586                         ds_put_format(&actions,
2587                                       "eth.dst = "ETH_ADDR_FMT"; next;",
2588                                       ETH_ADDR_ARGS(laddrs.ea));
2589                         ovn_lflow_add(lflows, peer->od,
2590                                       S_ROUTER_IN_ARP_RESOLVE, 100,
2591                                       ds_cstr(&match), ds_cstr(&actions));
2592                         break;
2593                     }
2594                 }
2595
2596                 destroy_lport_addresses(&laddrs);
2597             }
2598         } else if (!strcmp(op->nbs->type, "router")) {
2599             /* This is a logical switch port that connects to a router. */
2600
2601             /* The peer of this switch port is the router port for which
2602              * we need to add logical flows such that it can resolve
2603              * ARP entries for all the other router ports connected to
2604              * the switch in question. */
2605
2606             const char *peer_name = smap_get(&op->nbs->options,
2607                                              "router-port");
2608             if (!peer_name) {
2609                 continue;
2610             }
2611
2612             struct ovn_port *peer = ovn_port_find(ports, peer_name);
2613             if (!peer || !peer->nbr || !peer->ip) {
2614                 continue;
2615             }
2616
2617             for (size_t j = 0; j < op->od->n_router_ports; j++) {
2618                 const char *router_port_name = smap_get(
2619                                     &op->od->router_ports[j]->nbs->options,
2620                                     "router-port");
2621                 struct ovn_port *router_port = ovn_port_find(ports,
2622                                                              router_port_name);
2623                 if (!router_port || !router_port->nbr || !router_port->ip) {
2624                     continue;
2625                 }
2626
2627                 /* Skip the router port under consideration. */
2628                 if (router_port == peer) {
2629                    continue;
2630                 }
2631
2632                 if (!router_port->ip) {
2633                     continue;
2634                 }
2635                 ds_clear(&match);
2636                 ds_put_format(&match, "outport == %s && reg0 == %s",
2637                               peer->json_key, router_port->ip_s);
2638                 ds_clear(&actions);
2639                 ds_put_format(&actions, "eth.dst = "ETH_ADDR_FMT"; next;",
2640                               ETH_ADDR_ARGS(router_port->mac));
2641                 ovn_lflow_add(lflows, peer->od, S_ROUTER_IN_ARP_RESOLVE,
2642                               100, ds_cstr(&match), ds_cstr(&actions));
2643             }
2644         }
2645     }
2646
2647     HMAP_FOR_EACH (od, key_node, datapaths) {
2648         if (!od->nbr) {
2649             continue;
2650         }
2651
2652         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_RESOLVE, 0, "1",
2653                       "get_arp(outport, reg0); next;");
2654     }
2655
2656     /* Local router ingress table 6: ARP request.
2657      *
2658      * In the common case where the Ethernet destination has been resolved,
2659      * this table outputs the packet (priority 0).  Otherwise, it composes
2660      * and sends an ARP request (priority 100). */
2661     HMAP_FOR_EACH (od, key_node, datapaths) {
2662         if (!od->nbr) {
2663             continue;
2664         }
2665
2666         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 100,
2667                       "eth.dst == 00:00:00:00:00:00",
2668                       "arp { "
2669                       "eth.dst = ff:ff:ff:ff:ff:ff; "
2670                       "arp.spa = reg1; "
2671                       "arp.op = 1; " /* ARP request */
2672                       "output; "
2673                       "};");
2674         ovn_lflow_add(lflows, od, S_ROUTER_IN_ARP_REQUEST, 0, "1", "output;");
2675     }
2676
2677     /* Logical router egress table 1: Delivery (priority 100).
2678      *
2679      * Priority 100 rules deliver packets to enabled logical ports. */
2680     HMAP_FOR_EACH (op, key_node, ports) {
2681         if (!op->nbr) {
2682             continue;
2683         }
2684
2685         if (!lrport_is_enabled(op->nbr)) {
2686             /* Drop packets to disabled logical ports (since logical flow
2687              * tables are default-drop). */
2688             continue;
2689         }
2690
2691         ds_clear(&match);
2692         ds_put_format(&match, "outport == %s", op->json_key);
2693         ovn_lflow_add(lflows, op->od, S_ROUTER_OUT_DELIVERY, 100,
2694                       ds_cstr(&match), "output;");
2695     }
2696
2697     ds_destroy(&match);
2698     ds_destroy(&actions);
2699 }
2700
2701 /* Updates the Logical_Flow and Multicast_Group tables in the OVN_SB database,
2702  * constructing their contents based on the OVN_NB database. */
2703 static void
2704 build_lflows(struct northd_context *ctx, struct hmap *datapaths,
2705              struct hmap *ports)
2706 {
2707     struct hmap lflows = HMAP_INITIALIZER(&lflows);
2708     struct hmap mcgroups = HMAP_INITIALIZER(&mcgroups);
2709
2710     build_lswitch_flows(datapaths, ports, &lflows, &mcgroups);
2711     build_lrouter_flows(datapaths, ports, &lflows);
2712
2713     /* Push changes to the Logical_Flow table to database. */
2714     const struct sbrec_logical_flow *sbflow, *next_sbflow;
2715     SBREC_LOGICAL_FLOW_FOR_EACH_SAFE (sbflow, next_sbflow, ctx->ovnsb_idl) {
2716         struct ovn_datapath *od
2717             = ovn_datapath_from_sbrec(datapaths, sbflow->logical_datapath);
2718         if (!od) {
2719             sbrec_logical_flow_delete(sbflow);
2720             continue;
2721         }
2722
2723         enum ovn_datapath_type dp_type = od->nbs ? DP_SWITCH : DP_ROUTER;
2724         enum ovn_pipeline pipeline
2725             = !strcmp(sbflow->pipeline, "ingress") ? P_IN : P_OUT;
2726         struct ovn_lflow *lflow = ovn_lflow_find(
2727             &lflows, od, ovn_stage_build(dp_type, pipeline, sbflow->table_id),
2728             sbflow->priority, sbflow->match, sbflow->actions);
2729         if (lflow) {
2730             ovn_lflow_destroy(&lflows, lflow);
2731         } else {
2732             sbrec_logical_flow_delete(sbflow);
2733         }
2734     }
2735     struct ovn_lflow *lflow, *next_lflow;
2736     HMAP_FOR_EACH_SAFE (lflow, next_lflow, hmap_node, &lflows) {
2737         enum ovn_pipeline pipeline = ovn_stage_get_pipeline(lflow->stage);
2738         uint8_t table = ovn_stage_get_table(lflow->stage);
2739
2740         sbflow = sbrec_logical_flow_insert(ctx->ovnsb_txn);
2741         sbrec_logical_flow_set_logical_datapath(sbflow, lflow->od->sb);
2742         sbrec_logical_flow_set_pipeline(
2743             sbflow, pipeline == P_IN ? "ingress" : "egress");
2744         sbrec_logical_flow_set_table_id(sbflow, table);
2745         sbrec_logical_flow_set_priority(sbflow, lflow->priority);
2746         sbrec_logical_flow_set_match(sbflow, lflow->match);
2747         sbrec_logical_flow_set_actions(sbflow, lflow->actions);
2748
2749         const struct smap ids = SMAP_CONST1(&ids, "stage-name",
2750                                             ovn_stage_to_str(lflow->stage));
2751         sbrec_logical_flow_set_external_ids(sbflow, &ids);
2752
2753         ovn_lflow_destroy(&lflows, lflow);
2754     }
2755     hmap_destroy(&lflows);
2756
2757     /* Push changes to the Multicast_Group table to database. */
2758     const struct sbrec_multicast_group *sbmc, *next_sbmc;
2759     SBREC_MULTICAST_GROUP_FOR_EACH_SAFE (sbmc, next_sbmc, ctx->ovnsb_idl) {
2760         struct ovn_datapath *od = ovn_datapath_from_sbrec(datapaths,
2761                                                           sbmc->datapath);
2762         if (!od) {
2763             sbrec_multicast_group_delete(sbmc);
2764             continue;
2765         }
2766
2767         struct multicast_group group = { .name = sbmc->name,
2768                                          .key = sbmc->tunnel_key };
2769         struct ovn_multicast *mc = ovn_multicast_find(&mcgroups, od, &group);
2770         if (mc) {
2771             ovn_multicast_update_sbrec(mc, sbmc);
2772             ovn_multicast_destroy(&mcgroups, mc);
2773         } else {
2774             sbrec_multicast_group_delete(sbmc);
2775         }
2776     }
2777     struct ovn_multicast *mc, *next_mc;
2778     HMAP_FOR_EACH_SAFE (mc, next_mc, hmap_node, &mcgroups) {
2779         sbmc = sbrec_multicast_group_insert(ctx->ovnsb_txn);
2780         sbrec_multicast_group_set_datapath(sbmc, mc->datapath->sb);
2781         sbrec_multicast_group_set_name(sbmc, mc->group->name);
2782         sbrec_multicast_group_set_tunnel_key(sbmc, mc->group->key);
2783         ovn_multicast_update_sbrec(mc, sbmc);
2784         ovn_multicast_destroy(&mcgroups, mc);
2785     }
2786     hmap_destroy(&mcgroups);
2787 }
2788
2789 /* OVN_Northbound and OVN_Southbound have an identical Address_Set table.
2790  * We always update OVN_Southbound to match the current data in
2791  * OVN_Northbound, so that the address sets used in Logical_Flows in
2792  * OVN_Southbound is checked against the proper set.*/
2793 static void
2794 sync_address_sets(struct northd_context *ctx)
2795 {
2796     struct shash sb_address_sets = SHASH_INITIALIZER(&sb_address_sets);
2797
2798     const struct sbrec_address_set *sb_address_set;
2799     SBREC_ADDRESS_SET_FOR_EACH (sb_address_set, ctx->ovnsb_idl) {
2800         shash_add(&sb_address_sets, sb_address_set->name, sb_address_set);
2801     }
2802
2803     const struct nbrec_address_set *nb_address_set;
2804     NBREC_ADDRESS_SET_FOR_EACH (nb_address_set, ctx->ovnnb_idl) {
2805         sb_address_set = shash_find_and_delete(&sb_address_sets,
2806                                                nb_address_set->name);
2807         if (!sb_address_set) {
2808             sb_address_set = sbrec_address_set_insert(ctx->ovnsb_txn);
2809             sbrec_address_set_set_name(sb_address_set, nb_address_set->name);
2810         }
2811
2812         sbrec_address_set_set_addresses(sb_address_set,
2813                 /* "char **" is not compatible with "const char **" */
2814                 (const char **) nb_address_set->addresses,
2815                 nb_address_set->n_addresses);
2816     }
2817
2818     struct shash_node *node, *next;
2819     SHASH_FOR_EACH_SAFE (node, next, &sb_address_sets) {
2820         sbrec_address_set_delete(node->data);
2821         shash_delete(&sb_address_sets, node);
2822     }
2823     shash_destroy(&sb_address_sets);
2824 }
2825 \f
2826 static void
2827 ovnnb_db_run(struct northd_context *ctx)
2828 {
2829     if (!ctx->ovnsb_txn) {
2830         return;
2831     }
2832     struct hmap datapaths, ports;
2833     build_datapaths(ctx, &datapaths);
2834     build_ports(ctx, &datapaths, &ports);
2835     build_lflows(ctx, &datapaths, &ports);
2836
2837     sync_address_sets(ctx);
2838
2839     struct ovn_datapath *dp, *next_dp;
2840     HMAP_FOR_EACH_SAFE (dp, next_dp, key_node, &datapaths) {
2841         ovn_datapath_destroy(&datapaths, dp);
2842     }
2843     hmap_destroy(&datapaths);
2844
2845     struct ovn_port *port, *next_port;
2846     HMAP_FOR_EACH_SAFE (port, next_port, key_node, &ports) {
2847         ovn_port_destroy(&ports, port);
2848     }
2849     hmap_destroy(&ports);
2850 }
2851
2852 /*
2853  * The only change we get notified about is if the 'chassis' column of the
2854  * 'Port_Binding' table changes.  When this column is not empty, it means we
2855  * need to set the corresponding logical port as 'up' in the northbound DB.
2856  */
2857 static void
2858 ovnsb_db_run(struct northd_context *ctx)
2859 {
2860     if (!ctx->ovnnb_txn) {
2861         return;
2862     }
2863     struct hmap lports_hmap;
2864     const struct sbrec_port_binding *sb;
2865     const struct nbrec_logical_switch_port *nb;
2866
2867     struct lport_hash_node {
2868         struct hmap_node node;
2869         const struct nbrec_logical_switch_port *nb;
2870     } *hash_node;
2871
2872     hmap_init(&lports_hmap);
2873
2874     NBREC_LOGICAL_SWITCH_PORT_FOR_EACH(nb, ctx->ovnnb_idl) {
2875         hash_node = xzalloc(sizeof *hash_node);
2876         hash_node->nb = nb;
2877         hmap_insert(&lports_hmap, &hash_node->node, hash_string(nb->name, 0));
2878     }
2879
2880     SBREC_PORT_BINDING_FOR_EACH(sb, ctx->ovnsb_idl) {
2881         nb = NULL;
2882         HMAP_FOR_EACH_WITH_HASH(hash_node, node,
2883                                 hash_string(sb->logical_port, 0),
2884                                 &lports_hmap) {
2885             if (!strcmp(sb->logical_port, hash_node->nb->name)) {
2886                 nb = hash_node->nb;
2887                 break;
2888             }
2889         }
2890
2891         if (!nb) {
2892             /* The logical port doesn't exist for this port binding.  This can
2893              * happen under normal circumstances when ovn-northd hasn't gotten
2894              * around to pruning the Port_Binding yet. */
2895             continue;
2896         }
2897
2898         if (sb->chassis && (!nb->up || !*nb->up)) {
2899             bool up = true;
2900             nbrec_logical_switch_port_set_up(nb, &up, 1);
2901         } else if (!sb->chassis && (!nb->up || *nb->up)) {
2902             bool up = false;
2903             nbrec_logical_switch_port_set_up(nb, &up, 1);
2904         }
2905     }
2906
2907     HMAP_FOR_EACH_POP(hash_node, node, &lports_hmap) {
2908         free(hash_node);
2909     }
2910     hmap_destroy(&lports_hmap);
2911 }
2912 \f
2913
2914 static char *default_nb_db_;
2915
2916 static const char *
2917 default_nb_db(void)
2918 {
2919     if (!default_nb_db_) {
2920         default_nb_db_ = xasprintf("unix:%s/ovnnb_db.sock", ovs_rundir());
2921     }
2922     return default_nb_db_;
2923 }
2924
2925 static char *default_sb_db_;
2926
2927 static const char *
2928 default_sb_db(void)
2929 {
2930     if (!default_sb_db_) {
2931         default_sb_db_ = xasprintf("unix:%s/ovnsb_db.sock", ovs_rundir());
2932     }
2933     return default_sb_db_;
2934 }
2935
2936 static void
2937 parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
2938 {
2939     enum {
2940         DAEMON_OPTION_ENUMS,
2941         VLOG_OPTION_ENUMS,
2942     };
2943     static const struct option long_options[] = {
2944         {"ovnsb-db", required_argument, NULL, 'd'},
2945         {"ovnnb-db", required_argument, NULL, 'D'},
2946         {"help", no_argument, NULL, 'h'},
2947         {"options", no_argument, NULL, 'o'},
2948         {"version", no_argument, NULL, 'V'},
2949         DAEMON_LONG_OPTIONS,
2950         VLOG_LONG_OPTIONS,
2951         STREAM_SSL_LONG_OPTIONS,
2952         {NULL, 0, NULL, 0},
2953     };
2954     char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
2955
2956     for (;;) {
2957         int c;
2958
2959         c = getopt_long(argc, argv, short_options, long_options, NULL);
2960         if (c == -1) {
2961             break;
2962         }
2963
2964         switch (c) {
2965         DAEMON_OPTION_HANDLERS;
2966         VLOG_OPTION_HANDLERS;
2967         STREAM_SSL_OPTION_HANDLERS;
2968
2969         case 'd':
2970             ovnsb_db = optarg;
2971             break;
2972
2973         case 'D':
2974             ovnnb_db = optarg;
2975             break;
2976
2977         case 'h':
2978             usage();
2979             exit(EXIT_SUCCESS);
2980
2981         case 'o':
2982             ovs_cmdl_print_options(long_options);
2983             exit(EXIT_SUCCESS);
2984
2985         case 'V':
2986             ovs_print_version(0, 0);
2987             exit(EXIT_SUCCESS);
2988
2989         default:
2990             break;
2991         }
2992     }
2993
2994     if (!ovnsb_db) {
2995         ovnsb_db = default_sb_db();
2996     }
2997
2998     if (!ovnnb_db) {
2999         ovnnb_db = default_nb_db();
3000     }
3001
3002     free(short_options);
3003 }
3004
3005 static void
3006 add_column_noalert(struct ovsdb_idl *idl,
3007                    const struct ovsdb_idl_column *column)
3008 {
3009     ovsdb_idl_add_column(idl, column);
3010     ovsdb_idl_omit_alert(idl, column);
3011 }
3012
3013 int
3014 main(int argc, char *argv[])
3015 {
3016     int res = EXIT_SUCCESS;
3017     struct unixctl_server *unixctl;
3018     int retval;
3019     bool exiting;
3020
3021     fatal_ignore_sigpipe();
3022     set_program_name(argv[0]);
3023     service_start(&argc, &argv);
3024     parse_options(argc, argv);
3025
3026     daemonize_start(false);
3027
3028     retval = unixctl_server_create(NULL, &unixctl);
3029     if (retval) {
3030         exit(EXIT_FAILURE);
3031     }
3032     unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
3033
3034     daemonize_complete();
3035
3036     nbrec_init();
3037     sbrec_init();
3038
3039     /* We want to detect all changes to the ovn-nb db. */
3040     struct ovsdb_idl_loop ovnnb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3041         ovsdb_idl_create(ovnnb_db, &nbrec_idl_class, true, true));
3042
3043     struct ovsdb_idl_loop ovnsb_idl_loop = OVSDB_IDL_LOOP_INITIALIZER(
3044         ovsdb_idl_create(ovnsb_db, &sbrec_idl_class, false, true));
3045
3046     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_logical_flow);
3047     add_column_noalert(ovnsb_idl_loop.idl,
3048                        &sbrec_logical_flow_col_logical_datapath);
3049     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_pipeline);
3050     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_table_id);
3051     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_priority);
3052     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_match);
3053     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_logical_flow_col_actions);
3054
3055     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_multicast_group);
3056     add_column_noalert(ovnsb_idl_loop.idl,
3057                        &sbrec_multicast_group_col_datapath);
3058     add_column_noalert(ovnsb_idl_loop.idl,
3059                        &sbrec_multicast_group_col_tunnel_key);
3060     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_name);
3061     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_multicast_group_col_ports);
3062
3063     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_datapath_binding);
3064     add_column_noalert(ovnsb_idl_loop.idl,
3065                        &sbrec_datapath_binding_col_tunnel_key);
3066     add_column_noalert(ovnsb_idl_loop.idl,
3067                        &sbrec_datapath_binding_col_external_ids);
3068
3069     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_port_binding);
3070     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_datapath);
3071     add_column_noalert(ovnsb_idl_loop.idl,
3072                        &sbrec_port_binding_col_logical_port);
3073     add_column_noalert(ovnsb_idl_loop.idl,
3074                        &sbrec_port_binding_col_tunnel_key);
3075     add_column_noalert(ovnsb_idl_loop.idl,
3076                        &sbrec_port_binding_col_parent_port);
3077     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_tag);
3078     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_type);
3079     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_options);
3080     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_port_binding_col_mac);
3081     ovsdb_idl_add_column(ovnsb_idl_loop.idl, &sbrec_port_binding_col_chassis);
3082
3083     ovsdb_idl_add_table(ovnsb_idl_loop.idl, &sbrec_table_address_set);
3084     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
3085     add_column_noalert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
3086
3087     /* Main loop. */
3088     exiting = false;
3089     while (!exiting) {
3090         struct northd_context ctx = {
3091             .ovnnb_idl = ovnnb_idl_loop.idl,
3092             .ovnnb_txn = ovsdb_idl_loop_run(&ovnnb_idl_loop),
3093             .ovnsb_idl = ovnsb_idl_loop.idl,
3094             .ovnsb_txn = ovsdb_idl_loop_run(&ovnsb_idl_loop),
3095         };
3096
3097         ovnnb_db_run(&ctx);
3098         ovnsb_db_run(&ctx);
3099
3100         unixctl_server_run(unixctl);
3101         unixctl_server_wait(unixctl);
3102         if (exiting) {
3103             poll_immediate_wake();
3104         }
3105         ovsdb_idl_loop_commit_and_wait(&ovnnb_idl_loop);
3106         ovsdb_idl_loop_commit_and_wait(&ovnsb_idl_loop);
3107
3108         poll_block();
3109         if (should_service_stop()) {
3110             exiting = true;
3111         }
3112     }
3113
3114     unixctl_server_destroy(unixctl);
3115     ovsdb_idl_loop_destroy(&ovnnb_idl_loop);
3116     ovsdb_idl_loop_destroy(&ovnsb_idl_loop);
3117     service_stop();
3118
3119     free(default_nb_db_);
3120     free(default_sb_db_);
3121     exit(res);
3122 }
3123
3124 static void
3125 ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
3126                 const char *argv[] OVS_UNUSED, void *exiting_)
3127 {
3128     bool *exiting = exiting_;
3129     *exiting = true;
3130
3131     unixctl_command_reply(conn, NULL);
3132 }