Francesco Fusco ffusco@redhat.com
FUJITA Tomonori fujita.tomonori@lab.ntt.co.jp
Gaetano Catalli gaetano.catalli@gmail.com
+Gal Sagie gal.sagie@gmail.com
Geoffrey Wossum gwossum@acm.org
Gianluca Merlo gianluca.merlo@gmail.com
Giuseppe Lettieri g.lettieri@iet.unipi.it
-e 's,[@]sysconfdir[@],$(sysconfdir),g' \
-e 's,[@]bindir[@],$(bindir),g' \
-e 's,[@]sbindir[@],$(sbindir),g' \
+ -e 's,[@]abs_builddir[@],$(abs_builddir),g' \
-e 's,[@]abs_top_srcdir[@],$(abs_top_srcdir),g' \
> $@.tmp
@if head -n 1 $@.tmp | grep '#!' > /dev/null; then \
include datapath-windows/automake.mk
include datapath-windows/include/automake.mk
include windows/automake.mk
+include ovn/automake.mk
openvswitch.ko but built and loaded automatically as individual kernel
modules (vport-*.ko).
- Support for STT tunneling.
+ - ovs-sim: New developer tool for simulating multiple OVS instances.
+ See ovs-sim(1) for more information.
v2.3.0 - 14 Aug 2014
print """\
%(argv0)s: XML to nroff converter
Converts the XML format supplied as input into an nroff-formatted manpage.
-usage: %(argv0)s [OPTIONS] INPUT.XML
+usage: %(argv0)s [OPTIONS] INPUT.XML [VAR=VALUE]...
where INPUT.XML is a manpage in an OVS-specific XML format.
+Each VAR, when enclosed by "@"s in the input, is replaced by its
+corresponding VALUE, with characters &<>"' in VALUE escaped.
+
The following options are also available:
--version=VERSION use VERSION to display on document footer
-h, --help display this help message\
""" % {'argv0': argv0}
sys.exit(0)
-def manpage_to_nroff(xml_file, version=None):
- doc = xml.dom.minidom.parse(xml_file).documentElement
+def manpage_to_nroff(xml_file, subst, version=None):
+ f = open(xml_file)
+ input = []
+ for line in f:
+ for k, v in subst.iteritems():
+ line = line.replace(k, v)
+ input += [line]
+ doc = xml.dom.minidom.parseString(''.join(input)).documentElement
d = date.fromtimestamp(os.stat(xml_file).st_mtime)
if version == None:
else:
sys.exit(0)
- if len(args) != 1:
+ if len(args) < 1:
sys.stderr.write("%s: exactly 1 non-option arguments required "
"(use --help for help)\n" % argv0)
sys.exit(1)
+ subst = {}
+ for s in args[1:]:
+ var, value = s.split('=', 1)
+ value = value.replace('&', '&')
+ value = value.replace('<', '<')
+ value = value.replace('>', '>')
+ value = value.replace('"', '"')
+ value = value.replace("'", ''')
+ subst['@%s@' % var] = value
+
try:
- s = manpage_to_nroff(args[0], version)
+ s = manpage_to_nroff(args[0], subst, version)
except error.Error, e:
sys.stderr.write("%s: %s\n" % (argv0, e.msg))
sys.exit(1)
ofproto/libofproto.sym
lib/libsflow.sym
lib/libopenvswitch.sym
+ ovn/lib/libovn.sym
vtep/libvtep.sym])
OVS_ENABLE_OPTION([-Wall])
AC_CONFIG_COMMANDS([include/openflow/openflow.h.stamp])
AC_CONFIG_COMMANDS([utilities/bugtool/dummy], [:])
+AC_CONFIG_COMMANDS([ovn/dummy], [:])
+AC_CONFIG_COMMANDS([ovn/utilities/dummy], [:])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES])
flow_cache->sweep_idx = (flow_cache->sweep_idx + 1) & EM_FLOW_HASH_MASK;
}
+/* Returns true if 'dpif' is a netdev or dummy dpif, false otherwise. */
+bool
+dpif_is_netdev(const struct dpif *dpif)
+{
+ return dpif->dpif_class->open == dpif_netdev_open;
+}
+
static struct dpif_netdev *
dpif_netdev_cast(const struct dpif *dpif)
{
- ovs_assert(dpif->dpif_class->open == dpif_netdev_open);
+ ovs_assert(dpif_is_netdev(dpif));
return CONTAINER_OF(dpif, struct dpif_netdev, dpif);
}
dp_register_provider(class);
}
+static void
+dpif_dummy_override(const char *type)
+{
+ if (!dp_unregister_provider(type)) {
+ dpif_dummy_register__(type);
+ }
+}
+
void
-dpif_dummy_register(bool override)
+dpif_dummy_register(enum dummy_level level)
{
- if (override) {
+ if (level == DUMMY_OVERRIDE_ALL) {
struct sset types;
const char *type;
sset_init(&types);
dp_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
- if (!dp_unregister_provider(type)) {
- dpif_dummy_register__(type);
- }
+ dpif_dummy_override(type);
}
sset_destroy(&types);
+ } else if (level == DUMMY_OVERRIDE_SYSTEM) {
+ dpif_dummy_override("system");
}
dpif_dummy_register__("dummy");
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
}
}
+bool dpif_is_netdev(const struct dpif *);
+
#define NR_QUEUE 1
#define NR_PMD_THREADS 1
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "coverage.h"
#include "dpctl.h"
#include "dp-packet.h"
+#include "dpif-netdev.h"
#include "dynamic-string.h"
#include "flow.h"
#include "netdev.h"
bool
dpif_supports_tnl_push_pop(const struct dpif *dpif)
{
- return !strcmp(dpif->dpif_class->type, "netdev") ||
- !strcmp(dpif->dpif_class->type, "dummy");
+ return dpif_is_netdev(dpif);
}
/*
- * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
*/
#include <config.h>
-
#include "dummy.h"
+#include <string.h>
+#include "util.h"
/* Enables support for "dummy" network devices and dpifs, which are useful for
* testing. A client program might call this function if it is designed
* specifically for testing or the user enables it on the command line.
*
- * If 'override' is false, then "dummy" dpif and netdev classes will be
- * created. If 'override' is true, then in addition all existing dpif and
- * netdev classes will be deleted and replaced by dummy classes.
+ * 'arg' is parsed to determine the override level (see the definition of enum
+ * dummy_level).
*
* There is no strong reason why dummy devices shouldn't always be enabled. */
void
-dummy_enable(bool override)
+dummy_enable(const char *arg)
{
- netdev_dummy_register(override);
- dpif_dummy_register(override);
+ enum dummy_level level;
+
+ if (!arg || !arg[0]) {
+ level = DUMMY_OVERRIDE_NONE;
+ } else if (!strcmp(arg, "system")) {
+ level = DUMMY_OVERRIDE_SYSTEM;
+ } else if (!strcmp(arg, "override")) {
+ level = DUMMY_OVERRIDE_ALL;
+ } else {
+ ovs_fatal(0, "%s: unknown dummy level", arg);
+ }
+
+ netdev_dummy_register(level);
+ dpif_dummy_register(level);
timeval_dummy_register();
vlandev_dummy_enable();
}
/*
- * Copyright (c) 2010, 2011, 2012, 2013 Nicira, Inc.
+ * Copyright (c) 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include <stdbool.h>
+/* Degree of dummy support.
+ *
+ * Beyond enabling support for dummies, it can be useful to replace some kinds
+ * of bridges and netdevs, or all kinds, by dummies. This enum expresses the
+ * degree to which this should happen. */
+enum dummy_level {
+ DUMMY_OVERRIDE_NONE, /* Support dummy but don't force its use. */
+ DUMMY_OVERRIDE_SYSTEM, /* Replace "system" by dummy. */
+ DUMMY_OVERRIDE_ALL, /* Replace all types by dummy. */
+};
+
/* For client programs to call directly to enable dummy support. */
-void dummy_enable(bool override);
+void dummy_enable(const char *arg);
/* Implementation details. */
-void dpif_dummy_register(bool override);
-void netdev_dummy_register(bool override);
+void dpif_dummy_register(enum dummy_level);
+void netdev_dummy_register(enum dummy_level);
void timeval_dummy_register(void);
void vlandev_dummy_enable(void);
return ok;
}
+void
+json_string_escape(const char *in, struct ds *out)
+{
+ struct json json = {
+ .type = JSON_STRING,
+ .u.string = CONST_CAST(char *, in),
+ };
+ json_to_ds(&json, 0, out);
+}
+
static void
json_parser_input_string(struct json_parser *p, const char *s)
{
/*
- * Copyright (c) 2009, 2010 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
/* JSON string formatting operations. */
bool json_string_unescape(const char *in, size_t in_len, char **outp);
+void json_string_escape(const char *in, struct ds *out);
#ifdef __cplusplus
}
case NX_LEARN_DST_OUTPUT:
if (spec->n_bits <= 16
|| is_all_zeros(value.u8, sizeof value - 2)) {
- ovs_be16 *last_be16 = &value.be16[ARRAY_SIZE(value.be16) - 1];
- ofp_port_t port = u16_to_ofp(ntohs(*last_be16));
+ ofp_port_t port = u16_to_ofp(ntohll(value.integer));
if (ofp_to_u16(port) < ofp_to_u16(OFPP_MAX)
|| port == OFPP_IN_PORT
/*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
struct flow_wildcards wc;
};
+/* Initializer for a "struct match" that matches every packet. */
+#define MATCH_CATCHALL_INITIALIZER { .flow = { .dl_type = 0 } }
+
void match_init(struct match *,
const struct flow *, const struct flow_wildcards *);
void match_wc_init(struct match *match, const struct flow *flow);
/*
- * Copyright (c) 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
pthread_once(&once, nxm_do_init);
}
+/* Consider the two value/mask pairs 'a_value/a_mask' and 'b_value/b_mask' as
+ * restrictions on a field's value. Then, this function initializes
+ * 'dst_value/dst_mask' such that it combines the restrictions of both pairs.
+ * This is not always possible, i.e. if one pair insists on a value of 0 in
+ * some bit and the other pair insists on a value of 1 in that bit. This
+ * function returns false in a case where the combined restriction is
+ * impossible (in which case 'dst_value/dst_mask' is not fully initialized),
+ * true otherwise.
+ *
+ * (As usually true for value/mask pairs in OVS, any 1-bit in a value must have
+ * a corresponding 1-bit in its mask.) */
+bool
+mf_subvalue_intersect(const union mf_subvalue *a_value,
+ const union mf_subvalue *a_mask,
+ const union mf_subvalue *b_value,
+ const union mf_subvalue *b_mask,
+ union mf_subvalue *dst_value,
+ union mf_subvalue *dst_mask)
+{
+ for (int i = 0; i < ARRAY_SIZE(a_value->be64); i++) {
+ ovs_be64 av = a_value->be64[i];
+ ovs_be64 am = a_mask->be64[i];
+ ovs_be64 bv = b_value->be64[i];
+ ovs_be64 bm = b_mask->be64[i];
+ ovs_be64 *dv = &dst_value->be64[i];
+ ovs_be64 *dm = &dst_mask->be64[i];
+
+ if ((av ^ bv) & (am & bm)) {
+ return false;
+ }
+ *dv = av | bv;
+ *dm = am | bm;
+ }
+ return true;
+}
+
+/* Returns the "number of bits" in 'v', e.g. 1 if only the lowest-order bit is
+ * set, 2 if the second-lowest-order bit is set, and so on. */
+int
+mf_subvalue_width(const union mf_subvalue *v)
+{
+ return 1 + bitwise_rscan(v, sizeof *v, true, sizeof *v * 8 - 1, -1);
+}
+
+/* For positive 'n', shifts the bits in 'value' 'n' bits to the left, and for
+ * negative 'n', shifts the bits '-n' bits to the right. */
+void
+mf_subvalue_shift(union mf_subvalue *value, int n)
+{
+ if (n) {
+ union mf_subvalue tmp;
+ memset(&tmp, 0, sizeof tmp);
+
+ if (n > 0 && n < 8 * sizeof tmp) {
+ bitwise_copy(value, sizeof *value, 0,
+ &tmp, sizeof tmp, n,
+ 8 * sizeof tmp - n);
+ } else if (n < 0 && n > -8 * sizeof tmp) {
+ bitwise_copy(value, sizeof *value, -n,
+ &tmp, sizeof tmp, 0,
+ 8 * sizeof tmp + n);
+ }
+ *value = tmp;
+ }
+}
+
/* Returns true if 'wc' wildcards all the bits in field 'mf', false if 'wc'
* specifies at least one bit in the field.
*
mf_set(field, &value, &mask, match);
}
+/* 'v' and 'm' correspond to values of 'field'. This function copies them into
+ * 'match' in the correspond positions. */
+void
+mf_mask_subfield(const struct mf_field *field,
+ const union mf_subvalue *v,
+ const union mf_subvalue *m,
+ struct match *match)
+{
+ union mf_value value, mask;
+
+ mf_get(field, match, &value, &mask);
+ bitwise_copy(v, sizeof *v, 0, &value, field->n_bytes, 0, field->n_bits);
+ bitwise_copy(m, sizeof *m, 0, &mask, field->n_bytes, 0, field->n_bits);
+ mf_set(field, &value, &mask, match);
+}
+
/* Initializes 'x' to the value of 'sf' within 'flow'. 'sf' must be valid for
* reading 'flow', e.g. as checked by mf_check_src(). */
void
* value" contains NXM_OF_VLAN_TCI[0..11], then one could access the
* corresponding data in value.be16[7] as the bits in the mask htons(0xfff). */
union mf_subvalue {
+ /* Access to full data. */
uint8_t u8[16];
ovs_be16 be16[8];
ovs_be32 be32[4];
ovs_be64 be64[2];
+
+ /* Convenient access to just least-significant bits in various forms. */
+ struct {
+ ovs_be64 dummy_integer;
+ ovs_be64 integer;
+ };
+ struct {
+ uint8_t dummy_mac[10];
+ uint8_t mac[6];
+ };
+ struct {
+ ovs_be32 dummy_ipv4[3];
+ ovs_be32 ipv4;
+ };
+ struct in6_addr ipv6;
};
BUILD_ASSERT_DECL(sizeof(union mf_value) == sizeof (union mf_subvalue));
+bool mf_subvalue_intersect(const union mf_subvalue *a_value,
+ const union mf_subvalue *a_mask,
+ const union mf_subvalue *b_value,
+ const union mf_subvalue *b_mask,
+ union mf_subvalue *dst_value,
+ union mf_subvalue *dst_mask);
+int mf_subvalue_width(const union mf_subvalue *);
+void mf_subvalue_shift(union mf_subvalue *, int n);
+
/* An array of fields with values */
struct field_array {
struct mf_bitmap used;
const union mf_subvalue *, struct flow *);
void mf_write_subfield(const struct mf_subfield *, const union mf_subvalue *,
struct match *);
+void mf_mask_subfield(const struct mf_field *,
+ const union mf_subvalue *value,
+ const union mf_subvalue *mask,
+ struct match *);
void mf_read_subfield(const struct mf_subfield *, const struct flow *,
union mf_subvalue *);
*address = netdev->address;
*netmask = netdev->netmask;
ovs_mutex_unlock(&netdev->mutex);
- return 0;
+
+ return address->s_addr ? 0 : EADDRNOTAVAIL;
}
static int
dummy_packet_conn_send(&dev->conn, buffer, size);
+ /* Reply to ARP requests for 'dev''s assigned IP address. */
+ if (dev->address.s_addr) {
+ struct dp_packet packet;
+ struct flow flow;
+
+ dp_packet_use_const(&packet, buffer, size);
+ flow_extract(&packet, &flow);
+ if (flow.dl_type == htons(ETH_TYPE_ARP)
+ && flow.nw_proto == ARP_OP_REQUEST
+ && flow.nw_dst == dev->address.s_addr) {
+ struct dp_packet *reply = dp_packet_new(0);
+ compose_arp(reply, ARP_OP_REPLY, dev->hwaddr, flow.dl_src,
+ false, flow.nw_dst, flow.nw_src);
+ netdev_dummy_queue_packet(dev, reply);
+ }
+ }
+
if (dev->tx_pcap) {
struct dp_packet packet;
}
+static void
+netdev_dummy_override(const char *type)
+{
+ if (!netdev_unregister_provider(type)) {
+ struct netdev_class *class;
+ int error;
+
+ class = xmemdup(&dummy_class, sizeof dummy_class);
+ class->type = xstrdup(type);
+ error = netdev_register_provider(class);
+ if (error) {
+ VLOG_ERR("%s: failed to register netdev provider (%s)",
+ type, ovs_strerror(error));
+ free(CONST_CAST(char *, class->type));
+ free(class);
+ }
+ }
+}
+
void
-netdev_dummy_register(bool override)
+netdev_dummy_register(enum dummy_level level)
{
unixctl_command_register("netdev-dummy/receive", "name packet|flow...",
2, INT_MAX, netdev_dummy_receive, NULL);
"[netdev] ipaddr/mask-prefix-len", 2, 2,
netdev_dummy_ip4addr, NULL);
-
- if (override) {
+ if (level == DUMMY_OVERRIDE_ALL) {
struct sset types;
const char *type;
sset_init(&types);
netdev_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
- if (!strcmp(type, "patch")) {
- continue;
- }
- if (!netdev_unregister_provider(type)) {
- struct netdev_class *class;
- int error;
-
- class = xmemdup(&dummy_class, sizeof dummy_class);
- class->type = xstrdup(type);
- error = netdev_register_provider(class);
- if (error) {
- VLOG_ERR("%s: failed to register netdev provider (%s)",
- type, ovs_strerror(error));
- free(CONST_CAST(char *, class->type));
- free(class);
- }
+ if (strcmp(type, "patch")) {
+ netdev_dummy_override(type);
}
}
sset_destroy(&types);
+ } else if (level == DUMMY_OVERRIDE_SYSTEM) {
+ netdev_dummy_override("system");
}
netdev_register_provider(&dummy_class);
struct netdev_registered_class *rc;
int error;
+ netdev_initialize();
+
ovs_mutex_lock(&netdev_class_mutex);
rc = netdev_lookup_class(type);
if (!rc) {
/*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#define ARP_PACKET_SIZE (2 + ETH_HEADER_LEN + VLAN_HEADER_LEN + \
ARP_ETH_HEADER_LEN)
+/* Clears 'b' and replaces its contents by an ARP frame with the specified
+ * 'arp_op', 'arp_sha', 'arp_tha', 'arp_spa', and 'arp_tpa'. The outer
+ * Ethernet frame is initialized with Ethernet source 'arp_sha' and destination
+ * 'arp_tha', except that destination ff:ff:ff:ff:ff:ff is used instead if
+ * 'broadcast' is true. */
void
-compose_arp(struct dp_packet *b, const uint8_t eth_src[ETH_ADDR_LEN],
- ovs_be32 ip_src, ovs_be32 ip_dst)
+compose_arp(struct dp_packet *b, uint16_t arp_op,
+ const uint8_t arp_sha[ETH_ADDR_LEN],
+ const uint8_t arp_tha[ETH_ADDR_LEN], bool broadcast,
+ ovs_be32 arp_spa, ovs_be32 arp_tpa)
{
struct eth_header *eth;
struct arp_eth_header *arp;
dp_packet_reserve(b, 2 + VLAN_HEADER_LEN);
eth = dp_packet_put_uninit(b, sizeof *eth);
- memcpy(eth->eth_dst, eth_addr_broadcast, ETH_ADDR_LEN);
- memcpy(eth->eth_src, eth_src, ETH_ADDR_LEN);
+ memcpy(eth->eth_dst, broadcast ? eth_addr_broadcast : arp_tha,
+ ETH_ADDR_LEN);
+ memcpy(eth->eth_src, arp_sha, ETH_ADDR_LEN);
eth->eth_type = htons(ETH_TYPE_ARP);
arp = dp_packet_put_uninit(b, sizeof *arp);
arp->ar_pro = htons(ARP_PRO_IP);
arp->ar_hln = sizeof arp->ar_sha;
arp->ar_pln = sizeof arp->ar_spa;
- arp->ar_op = htons(ARP_OP_REQUEST);
- memcpy(arp->ar_sha, eth_src, ETH_ADDR_LEN);
- memset(arp->ar_tha, 0, ETH_ADDR_LEN);
+ arp->ar_op = htons(arp_op);
+ memcpy(arp->ar_sha, arp_sha, ETH_ADDR_LEN);
+ memcpy(arp->ar_tha, arp_tha, ETH_ADDR_LEN);
- put_16aligned_be32(&arp->ar_spa, ip_src);
- put_16aligned_be32(&arp->ar_tpa, ip_dst);
+ put_16aligned_be32(&arp->ar_spa, arp_spa);
+ put_16aligned_be32(&arp->ar_tpa, arp_tpa);
dp_packet_reset_offsets(b);
dp_packet_set_l3(b, arp);
/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
static const uint8_t eth_addr_broadcast[ETH_ADDR_LEN] OVS_UNUSED
= { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+static const uint8_t eth_addr_zero[ETH_ADDR_LEN] OVS_UNUSED
+ = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+
static const uint8_t eth_addr_stp[ETH_ADDR_LEN] OVS_UNUSED
= { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x00 };
void packet_format_tcp_flags(struct ds *, uint16_t);
const char *packet_tcp_flag_to_string(uint32_t flag);
-void compose_arp(struct dp_packet *b, const uint8_t eth_src[ETH_ADDR_LEN],
- ovs_be32 ip_src, ovs_be32 ip_dst);
+void compose_arp(struct dp_packet *, uint16_t arp_op,
+ const uint8_t arp_sha[ETH_ADDR_LEN],
+ const uint8_t arp_tha[ETH_ADDR_LEN], bool broadcast,
+ ovs_be32 arp_spa, ovs_be32 arp_tpa);
uint32_t packet_csum_pseudoheader(const struct ip_header *);
#endif /* packets.h */
state_transition(struct rconn *rc, enum state state)
OVS_REQUIRES(rc->mutex)
{
- rc->seqno += (rc->state == S_ACTIVE) != (state == S_ACTIVE);
+ rc->seqno += is_connected_state(rc->state) != is_connected_state(state);
if (is_connected_state(state) && !is_connected_state(rc->state)) {
rc->probably_admitted = false;
}
return true;
}
-/* Scans the bits in 'p' that have bit offsets 'start' through 'end'
- * (inclusive) for the first bit with value 'target'. If one is found, returns
- * its offset, otherwise 'end'. 'p' is 'len' bytes long.
+/* Scans the bits in 'p' that have bit offsets 'start' (inclusive) through
+ * 'end' (exclusive) for the first bit with value 'target'. If one is found,
+ * returns its offset, otherwise 'end'. 'p' is 'len' bytes long.
*
* If you consider all of 'p' to be a single unsigned integer in network byte
* order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
* start <= end
*/
unsigned int
-bitwise_scan(const void *p_, unsigned int len, bool target, unsigned int start,
+bitwise_scan(const void *p, unsigned int len, bool target, unsigned int start,
unsigned int end)
{
- const uint8_t *p = p_;
unsigned int ofs;
for (ofs = start; ofs < end; ofs++) {
- bool bit = (p[len - (ofs / 8 + 1)] & (1u << (ofs % 8))) != 0;
- if (bit == target) {
+ if (bitwise_get_bit(p, len, ofs) == target) {
break;
}
}
return ofs;
}
+/* Scans the bits in 'p' that have bit offsets 'start' (inclusive) through
+ * 'end' (exclusive) for the first bit with value 'target', in reverse order.
+ * If one is found, returns its offset, otherwise 'end'. 'p' is 'len' bytes
+ * long.
+ *
+ * If you consider all of 'p' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in p[len - 1], bit 1 is the bit with value 2, bit 2 is the bit
+ * with value 4, ..., bit 8 is the bit with value 1 in p[len - 2], and so on.
+ *
+ * To scan an entire bit array in reverse order, specify start == len * 8 - 1
+ * and end == -1, in which case the return value is nonnegative if successful
+ * and -1 if no 'target' match is found.
+ *
+ * Required invariant:
+ * start >= end
+ */
+int
+bitwise_rscan(const void *p, unsigned int len, bool target, int start, int end)
+{
+ int ofs;
+
+ for (ofs = start; ofs > end; ofs--) {
+ if (bitwise_get_bit(p, len, ofs) == target) {
+ break;
+ }
+ }
+ return ofs;
+}
/* Copies the 'n_bits' low-order bits of 'value' into the 'n_bits' bits
* starting at bit 'dst_ofs' in 'dst', which is 'dst_len' bytes long.
n_bits);
return ntohll(value);
}
+
+/* Returns the value of the bit with offset 'ofs' in 'src', which is 'len'
+ * bytes long.
+ *
+ * If you consider all of 'src' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in src[len - 1], bit 1 is the bit with value 2, bit 2 is the
+ * bit with value 4, ..., bit 8 is the bit with value 1 in src[len - 2], and so
+ * on.
+ *
+ * Required invariants:
+ * ofs < len * 8
+ */
+bool
+bitwise_get_bit(const void *src_, unsigned int len, unsigned int ofs)
+{
+ const uint8_t *src = src_;
+
+ return (src[len - (ofs / 8 + 1)] & (1u << (ofs % 8))) != 0;
+}
+
+/* Sets the bit with offset 'ofs' in 'dst', which is 'len' bytes long, to 0.
+ *
+ * If you consider all of 'dst' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in dst[len - 1], bit 1 is the bit with value 2, bit 2 is the
+ * bit with value 4, ..., bit 8 is the bit with value 1 in dst[len - 2], and so
+ * on.
+ *
+ * Required invariants:
+ * ofs < len * 8
+ */
+void
+bitwise_put0(void *dst_, unsigned int len, unsigned int ofs)
+{
+ uint8_t *dst = dst_;
+
+ dst[len - (ofs / 8 + 1)] &= ~(1u << (ofs % 8));
+}
+
+/* Sets the bit with offset 'ofs' in 'dst', which is 'len' bytes long, to 1.
+ *
+ * If you consider all of 'dst' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in dst[len - 1], bit 1 is the bit with value 2, bit 2 is the
+ * bit with value 4, ..., bit 8 is the bit with value 1 in dst[len - 2], and so
+ * on.
+ *
+ * Required invariants:
+ * ofs < len * 8
+ */
+void
+bitwise_put1(void *dst_, unsigned int len, unsigned int ofs)
+{
+ uint8_t *dst = dst_;
+
+ dst[len - (ofs / 8 + 1)] |= 1u << (ofs % 8);
+}
+
+/* Sets the bit with offset 'ofs' in 'dst', which is 'len' bytes long, to 'b'.
+ *
+ * If you consider all of 'dst' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in dst[len - 1], bit 1 is the bit with value 2, bit 2 is the
+ * bit with value 4, ..., bit 8 is the bit with value 1 in dst[len - 2], and so
+ * on.
+ *
+ * Required invariants:
+ * ofs < len * 8
+ */
+void
+bitwise_put_bit(void *dst, unsigned int len, unsigned int ofs, bool b)
+{
+ if (b) {
+ bitwise_put1(dst, len, ofs);
+ } else {
+ bitwise_put0(dst, len, ofs);
+ }
+}
+
+/* Flips the bit with offset 'ofs' in 'dst', which is 'len' bytes long.
+ *
+ * If you consider all of 'dst' to be a single unsigned integer in network byte
+ * order, then bit N is the bit with value 2**N. That is, bit 0 is the bit
+ * with value 1 in dst[len - 1], bit 1 is the bit with value 2, bit 2 is the
+ * bit with value 4, ..., bit 8 is the bit with value 1 in dst[len - 2], and so
+ * on.
+ *
+ * Required invariants:
+ * ofs < len * 8
+ */
+void
+bitwise_toggle_bit(void *dst_, unsigned int len, unsigned int ofs)
+{
+ uint8_t *dst = dst_;
+
+ dst[len - (ofs / 8 + 1)] ^= 1u << (ofs % 8);
+}
\f
/* ovs_scan */
unsigned int n_bits);
unsigned int bitwise_scan(const void *, unsigned int len,
bool target, unsigned int start, unsigned int end);
+int bitwise_rscan(const void *, unsigned int len, bool target,
+ int start, int end);
void bitwise_put(uint64_t value,
void *dst, unsigned int dst_len, unsigned int dst_ofs,
unsigned int n_bits);
uint64_t bitwise_get(const void *src, unsigned int src_len,
unsigned int src_ofs, unsigned int n_bits);
+bool bitwise_get_bit(const void *src, unsigned int len, unsigned int ofs);
+void bitwise_put0(void *dst, unsigned int len, unsigned int ofs);
+void bitwise_put1(void *dst, unsigned int len, unsigned int ofs);
+void bitwise_put_bit(void *dst, unsigned int len, unsigned int ofs, bool);
+void bitwise_toggle_bit(void *dst, unsigned int len, unsigned int ofs);
/* Returns non-zero if the parameters have equal value. */
static inline int
static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
struct xlate_ctx *);
static void xlate_normal(struct xlate_ctx *);
-static inline void xlate_report(struct xlate_ctx *, const char *);
+static inline void xlate_report(struct xlate_ctx *, const char *, ...)
+ OVS_PRINTF_FORMAT(2, 3);
static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
uint8_t table_id, bool may_packet_in,
bool honor_table_miss);
static void xlate_xcfg_free(struct xlate_cfg *);
static inline void
-xlate_report(struct xlate_ctx *ctx, const char *s)
+xlate_report(struct xlate_ctx *ctx, const char *format, ...)
{
if (OVS_UNLIKELY(ctx->xin->report_hook)) {
- ctx->xin->report_hook(ctx->xin, s, ctx->recurse);
+ va_list args;
+
+ va_start(args, format);
+ ctx->xin->report_hook(ctx->xin, ctx->recurse, format, args);
+ va_end(args);
}
}
struct dp_packet packet;
dp_packet_init(&packet, 0);
- compose_arp(&packet, eth_src, ip_src, ip_dst);
+ compose_arp(&packet, ARP_OP_REQUEST,
+ eth_src, eth_addr_zero, true, ip_src, ip_dst);
xlate_flood_packet(xbridge, &packet);
dp_packet_uninit(&packet);
}
static int
-build_tunnel_send(const struct xlate_ctx *ctx, const struct xport *xport,
+build_tunnel_send(struct xlate_ctx *ctx, const struct xport *xport,
const struct flow *flow, odp_port_t tunnel_odp_port)
{
struct ovs_action_push_tnl tnl_push_data;
err = tnl_route_lookup_flow(flow, &d_ip, &out_dev);
if (err) {
+ xlate_report(ctx, "native tunnel routing failed");
return err;
}
+ xlate_report(ctx, "tunneling to "IP_FMT" via %s",
+ IP_ARGS(d_ip), netdev_get_name(out_dev->netdev));
/* Use mac addr of bridge port of the peer. */
err = netdev_get_etheraddr(out_dev->netdev, smac);
if (err) {
+ xlate_report(ctx, "tunnel output device lacks Ethernet address");
return err;
}
err = netdev_get_in4(out_dev->netdev, (struct in_addr *) &s_ip, NULL);
if (err) {
+ xlate_report(ctx, "tunnel output device lacks IPv4 address");
return err;
}
err = tnl_arp_lookup(out_dev->xbridge->name, d_ip, dmac);
if (err) {
+ xlate_report(ctx, "ARP cache miss for "IP_FMT" on bridge %s, "
+ "sending ARP request",
+ IP_ARGS(d_ip), out_dev->xbridge->name);
tnl_send_arp_request(out_dev, smac, s_ip, d_ip);
return err;
}
sizeof entry->u.tnl_arp_cache.br_name);
entry->u.tnl_arp_cache.d_ip = d_ip;
}
+
+ xlate_report(ctx, "tunneling from "ETH_ADDR_FMT" "IP_FMT
+ " to "ETH_ADDR_FMT" "IP_FMT,
+ ETH_ADDR_ARGS(smac), IP_ARGS(s_ip),
+ ETH_ADDR_ARGS(dmac), IP_ARGS(d_ip));
err = tnl_port_build_header(xport->ofport, flow,
dmac, smac, s_ip, &tnl_push_data);
if (err) {
}
out_port = odp_port;
if (ovs_native_tunneling_is_on(ctx->xbridge->ofproto)) {
+ xlate_report(ctx, "output to native tunnel");
tnl_push_pop_send = true;
} else {
+ xlate_report(ctx, "output to kernel tunnel");
commit_odp_tunnel_action(flow, &ctx->base_flow,
ctx->xout->odp_actions);
flow->tunnel = flow_tnl; /* Restore tunnel metadata */
-/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* significant decision, e.g. to explain why OFPP_NORMAL translation
* dropped a packet. 'recurse' is the resubmit recursion depth at time of
* invocation. */
- void (*report_hook)(struct xlate_in *, const char *s, int recurse);
+ void (*report_hook)(struct xlate_in *, int recurse,
+ const char *format, va_list args);
/* If nonnull, flow translation credits the specified statistics to each
* rule reached through a resubmit or OFPP_TABLE action.
ds_put_char(result, '\n');
}
-static void trace_report(struct xlate_in *xin, const char *s, int recurse);
+static void trace_report(struct xlate_in *, int recurse,
+ const char *format, ...)
+ OVS_PRINTF_FORMAT(3, 4);
+static void trace_report_valist(struct xlate_in *, int recurse,
+ const char *format, va_list args)
+ OVS_PRINTF_FORMAT(3, 0);
static void
trace_resubmit(struct xlate_in *xin, struct rule_dpif *rule, int recurse)
if (!recurse) {
if (rule == xin->ofproto->miss_rule) {
- trace_report(xin, "No match, flow generates \"packet in\"s.",
- recurse);
+ trace_report(xin, recurse,
+ "No match, flow generates \"packet in\"s.");
} else if (rule == xin->ofproto->no_packet_in_rule) {
- trace_report(xin, "No match, packets dropped because "
- "OFPPC_NO_PACKET_IN is set on in_port.", recurse);
+ trace_report(xin, recurse, "No match, packets dropped because "
+ "OFPPC_NO_PACKET_IN is set on in_port.");
} else if (rule == xin->ofproto->drop_frags_rule) {
- trace_report(xin, "Packets dropped because they are IP "
+ trace_report(xin, recurse, "Packets dropped because they are IP "
"fragments and the fragment handling mode is "
- "\"drop\".", recurse);
+ "\"drop\".");
}
}
}
static void
-trace_report(struct xlate_in *xin, const char *s, int recurse)
+trace_report_valist(struct xlate_in *xin, int recurse,
+ const char *format, va_list args)
{
struct trace_ctx *trace = CONTAINER_OF(xin, struct trace_ctx, xin);
struct ds *result = trace->result;
ds_put_char_multiple(result, '\t', recurse);
- ds_put_cstr(result, s);
+ ds_put_format_valist(result, format, args);
ds_put_char(result, '\n');
}
+static void
+trace_report(struct xlate_in *xin, int recurse, const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ trace_report_valist(xin, recurse, format, args);
+ va_end(args);
+}
+
/* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following
* forms are supported:
*
trace.xin.ofpacts = ofpacts;
trace.xin.ofpacts_len = ofpacts_len;
trace.xin.resubmit_hook = trace_resubmit;
- trace.xin.report_hook = trace_report;
+ trace.xin.report_hook = trace_report_valist;
xlate_actions(&trace.xin, &trace.xout);
--- /dev/null
+/ovn-architecture.7
+/ovn-nb.5
+/ovn-nb.gv
+/ovn-nb.pic
+/ovn-sb.5
+/ovn-sb.gv
+/ovn-sb.pic
+/ovn-nbctl
+/ovn-nbctl.8
--- /dev/null
+Integration of Containers with OVN and OpenStack
+------------------------------------------------
+
+Isolation between containers is weaker than isolation between VMs, so
+some environments deploy containers for different tenants in separate
+VMs as an additional security measure. This document describes creation of
+containers inside VMs and how they can be made part of the logical networks
+securely. The created logical network can include VMs, containers and
+physical machines as endpoints. To better understand the proposed integration
+of containers with OVN and OpenStack, this document describes the end to end
+workflow with an example.
+
+* A OpenStack tenant creates a VM (say VM-A) with a single network interface
+that belongs to a management logical network. The VM is meant to host
+containers. OpenStack Nova chooses the hypervisor on which VM-A is created.
+
+* A Neutron port may have been created in advance and passed in to Nova
+with the request to create a new VM. If not, Nova will issue a request
+to Neutron to create a new port. The ID of the logical port from
+Neutron will also be used as the vif-id for the virtual network
+interface (VIF) of VM-A.
+
+* When VM-A is created on a hypervisor, its VIF gets added to the
+Open vSwitch integration bridge. This creates a row in the Interface table
+of the Open_vSwitch database. As explained in the [IntegrationGuide.md],
+the vif-id associated with the VM network interface gets added in the
+external_ids:iface-id column of the newly created row in the Interface table.
+
+* Since VM-A belongs to a logical network, it gets an IP address. This IP
+address is used to spawn containers (either manually or through container
+orchestration systems) inside that VM and to monitor the health of the
+created containers.
+
+* The vif-id associated with the VM's network interface can be obtained by
+making a call to Neutron using tenant credentials.
+
+* This flow assumes a component called a "container network plugin".
+If you take Docker as an example for containers, you could envision
+the plugin to be either a wrapper around Docker or a feature of Docker itself
+that understands how to perform part of this workflow to get a container
+connected to a logical network managed by Neutron. The rest of the flow
+refers to this logical component that does not yet exist as the
+"container network plugin".
+
+* All the calls to Neutron will need tenant credentials. These calls can
+either be made from inside the tenant VM as part of a container network plugin
+or from outside the tenant VM (if the tenant is not comfortable using temporary
+Keystone tokens from inside the tenant VMs). For simplicity, this document
+explains the work flow using the former method.
+
+* The container hosting VM will need Open vSwitch installed in it. The only
+work for Open vSwitch inside the VM is to tag network traffic coming from
+containers.
+
+* When a container needs to be created inside the VM with a container network
+interface that is expected to be attached to a particular logical switch, the
+network plugin in that VM chooses any unused VLAN (This VLAN tag only needs to
+be unique inside that VM. This limits the number of container interfaces to
+4096 inside a single VM). This VLAN tag is stripped out in the hypervisor
+by OVN and is only useful as a context (or metadata) for OVN.
+
+* The container network plugin then makes a call to Neutron to create a
+logical port. In addition to all the inputs that a call to create a port in
+Neutron that are currently needed, it sends the vif-id and the VLAN tag as
+inputs.
+
+* Neutron in turn will verify that the vif-id belongs to the tenant in question
+and then uses the OVN specific plugin to create a new row in the Logical_Port
+table of the OVN Northbound Database. Neutron responds back with an
+IP address and MAC address for that network interface. So Neutron becomes
+the IPAM system and provides unique IP and MAC addresses across VMs and
+containers in the same logical network.
+
+* The Neutron API call above to create a logical port for the container
+could add a relatively significant amount of time to container creation.
+However, an optimization is possible here. Logical ports could be
+created in advance and reused by the container system doing container
+orchestration. Additional Neutron API calls would only be needed if the
+port needs to be attached to a different logical network.
+
+* When a container is eventually deleted, the network plugin in that VM
+may make a call to Neutron to delete that port. Neutron in turn will
+delete the entry in the Logical_Port table of the OVN Northbound Database.
+
+As an example, consider Docker containers. Since Docker currently does not
+have a network plugin feature, this example uses a hypothetical wrapper
+around Docker to make calls to Neutron.
+
+* Create a Logical switch, e.g.:
+
+```
+% ovn-docker --cred=cca86bd13a564ac2a63ddf14bf45d37f create network LS1
+```
+
+The above command will make a call to Neutron with the credentials to create
+a logical switch. The above is optional if the logical switch has already
+been created from outside the VM.
+
+* List networks available to the tenant.
+
+```
+% ovn-docker --cred=cca86bd13a564ac2a63ddf14bf45d37f list networks
+```
+
+* Create a container and attach a interface to the previously created switch
+as a logical port.
+
+```
+% ovn-docker --cred=cca86bd13a564ac2a63ddf14bf45d37f --vif-id=$VIF_ID \
+--network=LS1 run -d --net=none ubuntu:14.04 /bin/sh -c \
+"while true; do echo hello world; sleep 1; done"
+```
+
+The above command will make a call to Neutron with all the inputs it currently
+needs to create a logical port. In addition, it passes the $VIF_ID and a
+unused VLAN. Neutron will add that information in OVN and return back
+a MAC address and IP address for that interface. ovn-docker will then create
+a veth pair, insert one end inside the container as 'eth0' and the other end
+as a port of a local OVS bridge as an access port of the chosen VLAN.
+
+[IntegrationGuide.md]:IntegrationGuide.md
--- /dev/null
+* ovn-controller
+
+*** Determine how to split logical pipeline across physical nodes.
+
+ From the original OVN architecture document:
+
+ The pipeline processing is split between the ingress and egress
+ transport nodes. In particular, the logical egress processing may
+ occur at either hypervisor. Processing the logical egress on the
+ ingress hypervisor requires more state about the egress vif's
+ policies, but reduces traffic on the wire that would eventually be
+ dropped. Whereas, processing on the egress hypervisor can reduce
+ broadcast traffic on the wire by doing local replication. We
+ initially plan to process logical egress on the egress hypervisor
+ so that less state needs to be replicated. However, we may change
+ this behavior once we gain some experience writing the logical
+ flows.
+
+ The split pipeline processing split will influence how tunnel keys
+ are encoded.
+
+** ovn-controller parameters and configuration.
+
+*** SSL configuration.
+
+ Can probably get this from Open_vSwitch database.
+
+* ovsdb-server
+
+ ovsdb-server should have adequate features for OVN but it probably
+ needs work for scale and possibly for availability as deployments
+ grow. Here are some thoughts.
+
+ Andy Zhou is looking at these issues.
+
+*** Reducing amount of data sent to clients.
+
+ Currently, whenever a row monitored by a client changes,
+ ovsdb-server sends the client every monitored column in the row,
+ even if only one column changes. It might be valuable to reduce
+ this only to the columns that changes.
+
+ Also, whenever a column changes, ovsdb-server sends the entire
+ contents of the column. It might be valuable, for columns that
+ are sets or maps, to send only added or removed values or
+ key-values pairs.
+
+ Currently, clients monitor the entire contents of a table. It
+ might make sense to allow clients to monitor only rows that
+ satisfy specific criteria, e.g. to allow an ovn-controller to
+ receive only Pipeline rows for logical networks on its hypervisor.
+
+*** Reducing redundant data and code within ovsdb-server.
+
+ Currently, ovsdb-server separately composes database update
+ information to send to each of its clients. This is fine for a
+ small number of clients, but it wastes time and memory when
+ hundreds of clients all want the same updates (as will be in the
+ case in OVN).
+
+ (This is somewhat opposed to the idea of letting a client monitor
+ only some rows in a table, since that would increase the diversity
+ among clients.)
+
+*** Multithreading.
+
+ If it turns out that other changes don't let ovsdb-server scale
+ adequately, we can multithread ovsdb-server. Initially one might
+ only break protocol handling into separate threads, leaving the
+ actual database work serialized through a lock.
+
+** Increasing availability.
+
+ Database availability might become an issue. The OVN system
+ shouldn't grind to a halt if the database becomes unavailable, but
+ it would become impossible to bring VIFs up or down, etc.
+
+ My current thought on how to increase availability is to add
+ clustering to ovsdb-server, probably via the Raft consensus
+ algorithm. As an experiment, I wrote an implementation of Raft
+ for Open vSwitch that you can clone from:
+
+ https://github.com/blp/ovs-reviews.git raft
+
+** Reducing startup time.
+
+ As-is, if ovsdb-server restarts, every client will fetch a fresh
+ copy of the part of the database that it cares about. With
+ hundreds of clients, this could cause heavy CPU load on
+ ovsdb-server and use excessive network bandwidth. It would be
+ better to allow incremental updates even across connection loss.
+ One way might be to use "Difference Digests" as described in
+ Epstein et al., "What's the Difference? Efficient Set
+ Reconciliation Without Prior Context". (I'm not yet aware of
+ previous non-academic use of this technique.)
--- /dev/null
+# OVN southbound schema and IDL
+EXTRA_DIST += ovn/ovn-sb.ovsschema
+pkgdata_DATA += ovn/ovn-sb.ovsschema
+
+# OVN southbound E-R diagram
+#
+# If "python" or "dot" is not available, then we do not add graphical diagram
+# to the documentation.
+if HAVE_PYTHON
+if HAVE_DOT
+ovn/ovn-sb.gv: ovsdb/ovsdb-dot.in ovn/ovn-sb.ovsschema
+ $(AM_V_GEN)$(OVSDB_DOT) --no-arrows $(srcdir)/ovn/ovn-sb.ovsschema > $@
+ovn/ovn-sb.pic: ovn/ovn-sb.gv ovsdb/dot2pic
+ $(AM_V_GEN)(dot -T plain < ovn/ovn-sb.gv | $(PERL) $(srcdir)/ovsdb/dot2pic -f 3) > $@.tmp && \
+ mv $@.tmp $@
+OVN_SB_PIC = ovn/ovn-sb.pic
+OVN_SB_DOT_DIAGRAM_ARG = --er-diagram=$(OVN_SB_PIC)
+DISTCLEANFILES += ovn/ovn-sb.gv ovn/ovn-sb.pic
+endif
+endif
+
+# OVN southbound schema documentation
+EXTRA_DIST += ovn/ovn-sb.xml
+DISTCLEANFILES += ovn/ovn-sb.5
+man_MANS += ovn/ovn-sb.5
+ovn/ovn-sb.5: \
+ ovsdb/ovsdb-doc ovn/ovn-sb.xml ovn/ovn-sb.ovsschema $(OVN_SB_PIC)
+ $(AM_V_GEN)$(OVSDB_DOC) \
+ $(OVN_SB_DOT_DIAGRAM_ARG) \
+ --version=$(VERSION) \
+ $(srcdir)/ovn/ovn-sb.ovsschema \
+ $(srcdir)/ovn/ovn-sb.xml > $@.tmp && \
+ mv $@.tmp $@
+
+# OVN northbound schema and IDL
+EXTRA_DIST += ovn/ovn-nb.ovsschema
+pkgdata_DATA += ovn/ovn-nb.ovsschema
+
+# OVN northbound E-R diagram
+#
+# If "python" or "dot" is not available, then we do not add graphical diagram
+# to the documentation.
+if HAVE_PYTHON
+if HAVE_DOT
+ovn/ovn-nb.gv: ovsdb/ovsdb-dot.in ovn/ovn-nb.ovsschema
+ $(AM_V_GEN)$(OVSDB_DOT) --no-arrows $(srcdir)/ovn/ovn-nb.ovsschema > $@
+ovn/ovn-nb.pic: ovn/ovn-nb.gv ovsdb/dot2pic
+ $(AM_V_GEN)(dot -T plain < ovn/ovn-nb.gv | $(PERL) $(srcdir)/ovsdb/dot2pic -f 3) > $@.tmp && \
+ mv $@.tmp $@
+OVN_NB_PIC = ovn/ovn-nb.pic
+OVN_NB_DOT_DIAGRAM_ARG = --er-diagram=$(OVN_NB_PIC)
+DISTCLEANFILES += ovn/ovn-nb.gv ovn/ovn-nb.pic
+endif
+endif
+
+# OVN northbound schema documentation
+EXTRA_DIST += ovn/ovn-nb.xml
+DISTCLEANFILES += ovn/ovn-nb.5
+man_MANS += ovn/ovn-nb.5
+ovn/ovn-nb.5: \
+ ovsdb/ovsdb-doc ovn/ovn-nb.xml ovn/ovn-nb.ovsschema $(OVN_NB_PIC)
+ $(AM_V_GEN)$(OVSDB_DOC) \
+ $(OVN_NB_DOT_DIAGRAM_ARG) \
+ --version=$(VERSION) \
+ $(srcdir)/ovn/ovn-nb.ovsschema \
+ $(srcdir)/ovn/ovn-nb.xml > $@.tmp && \
+ mv $@.tmp $@
+
+man_MANS += ovn/ovn-architecture.7 ovn/ovn-nbctl.8
+EXTRA_DIST += ovn/ovn-architecture.7.xml ovn/ovn-nbctl.8.xml
+
+SUFFIXES += .xml
+%: %.xml
+ $(AM_V_GEN)$(run_python) $(srcdir)/build-aux/xml2nroff $< > $@.tmp \
+ --version=$(VERSION) \
+ PKIDIR='$(PKIDIR)' \
+ LOGDIR='$(LOGDIR)' \
+ DBDIR='$(DBDIR)' \
+ PERL='$(PERL)' \
+ PYTHON='$(PYTHON)' \
+ RUNDIR='$(RUNDIR)' \
+ VERSION='$(VERSION)' \
+ localstatedir='$(localstatedir)' \
+ pkgdatadir='$(pkgdatadir)' \
+ sysconfdir='$(sysconfdir)' \
+ bindir='$(bindir)' \
+ sbindir='$(sbindir)'
+ $(AM_v_at)mv $@.tmp $@
+
+EXTRA_DIST += \
+ ovn/TODO \
+ ovn/CONTAINERS.OpenStack.md
+
+# ovn-nbctl
+bin_PROGRAMS += ovn/ovn-nbctl
+ovn_ovn_nbctl_SOURCES = ovn/ovn-nbctl.c
+ovn_ovn_nbctl_LDADD = ovn/lib/libovn.la ovsdb/libovsdb.la lib/libopenvswitch.la
+
+include ovn/controller/automake.mk
+include ovn/lib/automake.mk
+include ovn/northd/automake.mk
+include ovn/utilities/automake.mk
--- /dev/null
+/ovn-controller
+/ovn-controller.8
--- /dev/null
+bin_PROGRAMS += ovn/controller/ovn-controller
+ovn_controller_ovn_controller_SOURCES = \
+ ovn/controller/binding.c \
+ ovn/controller/binding.h \
+ ovn/controller/chassis.c \
+ ovn/controller/chassis.h \
+ ovn/controller/ofctrl.c \
+ ovn/controller/ofctrl.h \
+ ovn/controller/ovn-controller.c \
+ ovn/controller/ovn-controller.h \
+ ovn/controller/pipeline.c \
+ ovn/controller/pipeline.h \
+ ovn/controller/physical.c \
+ ovn/controller/physical.h
+ovn_controller_ovn_controller_LDADD = ovn/lib/libovn.la lib/libopenvswitch.la
+man_MANS += ovn/controller/ovn-controller.8
+EXTRA_DIST += ovn/controller/ovn-controller.8.xml
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "binding.h"
+
+#include "lib/sset.h"
+#include "lib/util.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "ovn-controller.h"
+
+VLOG_DEFINE_THIS_MODULE(binding);
+
+void
+binding_init(struct controller_ctx *ctx)
+{
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_open_vswitch);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_open_vswitch_col_bridges);
+
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_bridge);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_bridge_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_bridge_col_ports);
+
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_port);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_interfaces);
+
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_interface);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_external_ids);
+}
+
+static void
+get_local_iface_ids(struct controller_ctx *ctx, struct sset *lports)
+{
+ int i;
+
+ for (i = 0; i < ctx->br_int->n_ports; i++) {
+ const struct ovsrec_port *port_rec = ctx->br_int->ports[i];
+ const char *iface_id;
+ int j;
+
+ if (!strcmp(port_rec->name, ctx->br_int_name)) {
+ continue;
+ }
+
+ for (j = 0; j < port_rec->n_interfaces; j++) {
+ const struct ovsrec_interface *iface_rec;
+
+ iface_rec = port_rec->interfaces[j];
+ iface_id = smap_get(&iface_rec->external_ids, "iface-id");
+ if (!iface_id) {
+ continue;
+ }
+ sset_add(lports, iface_id);
+ }
+ }
+}
+
+void
+binding_run(struct controller_ctx *ctx)
+{
+ const struct sbrec_chassis *chassis_rec;
+ const struct sbrec_binding *binding_rec;
+ struct ovsdb_idl_txn *txn;
+ struct sset lports, all_lports;
+ const char *name;
+ int retval;
+
+ chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
+ if (!chassis_rec) {
+ return;
+ }
+
+ sset_init(&lports);
+ sset_init(&all_lports);
+ get_local_iface_ids(ctx, &lports);
+ sset_clone(&all_lports, &lports);
+
+ txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
+ ovsdb_idl_txn_add_comment(txn,
+ "ovn-controller: updating bindings for '%s'",
+ ctx->chassis_id);
+
+ SBREC_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
+ if (sset_find_and_delete(&lports, binding_rec->logical_port) ||
+ (binding_rec->parent_port && binding_rec->parent_port[0] &&
+ sset_contains(&all_lports, binding_rec->parent_port))) {
+ if (binding_rec->chassis == chassis_rec) {
+ continue;
+ }
+ if (binding_rec->chassis) {
+ VLOG_INFO("Changing chassis for lport %s from %s to %s",
+ binding_rec->logical_port,
+ binding_rec->chassis->name,
+ chassis_rec->name);
+ }
+ sbrec_binding_set_chassis(binding_rec, chassis_rec);
+ } else if (binding_rec->chassis == chassis_rec) {
+ sbrec_binding_set_chassis(binding_rec, NULL);
+ }
+ }
+
+ retval = ovsdb_idl_txn_commit_block(txn);
+ if (retval == TXN_ERROR) {
+ VLOG_INFO("Problem committing binding information: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ }
+
+ ovsdb_idl_txn_destroy(txn);
+
+ SSET_FOR_EACH (name, &lports) {
+ VLOG_DBG("No binding record for lport %s", name);
+ }
+ sset_destroy(&lports);
+ sset_destroy(&all_lports);
+}
+
+void
+binding_destroy(struct controller_ctx *ctx)
+{
+ const struct sbrec_chassis *chassis_rec;
+ int retval = TXN_TRY_AGAIN;
+
+ ovs_assert(ctx->ovnsb_idl);
+
+ chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
+ if (!chassis_rec) {
+ return;
+ }
+
+ while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
+ const struct sbrec_binding *binding_rec;
+ struct ovsdb_idl_txn *txn;
+
+ txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
+ ovsdb_idl_txn_add_comment(txn,
+ "ovn-controller: removing all bindings for '%s'",
+ ctx->chassis_id);
+
+ SBREC_BINDING_FOR_EACH(binding_rec, ctx->ovnsb_idl) {
+ if (binding_rec->chassis == chassis_rec) {
+ sbrec_binding_set_chassis(binding_rec, NULL);
+ }
+ }
+
+ retval = ovsdb_idl_txn_commit_block(txn);
+ if (retval == TXN_ERROR) {
+ VLOG_INFO("Problem removing bindings: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ }
+
+ ovsdb_idl_txn_destroy(txn);
+ }
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef OVN_BINDING_H
+#define OVN_BINDING_H 1
+
+struct controller_ctx;
+
+void binding_init(struct controller_ctx *);
+void binding_run(struct controller_ctx *);
+void binding_destroy(struct controller_ctx *);
+
+#endif /* ovn/binding.h */
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "chassis.h"
+
+#include "lib/hash.h"
+#include "lib/poll-loop.h"
+#include "lib/sset.h"
+#include "lib/util.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "ovn-controller.h"
+
+VLOG_DEFINE_THIS_MODULE(chassis);
+
+void
+chassis_init(struct controller_ctx *ctx)
+{
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_open_vswitch);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_open_vswitch_col_external_ids);
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_bridge);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_bridge_col_ports);
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_port);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_interfaces);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_external_ids);
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_interface);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_type);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_options);
+}
+
+static void
+register_chassis(struct controller_ctx *ctx)
+{
+ const struct sbrec_chassis *chassis_rec;
+ const struct ovsrec_open_vswitch *cfg;
+ const char *encap_type, *encap_ip;
+ struct sbrec_encap *encap_rec;
+ static bool inited = false;
+ int retval = TXN_TRY_AGAIN;
+ struct ovsdb_idl_txn *txn;
+
+ chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
+
+ /* xxx Need to support more than one encap. Also need to support
+ * xxx encap options. */
+ cfg = ovsrec_open_vswitch_first(ctx->ovs_idl);
+ if (!cfg) {
+ VLOG_INFO("No Open_vSwitch row defined.");
+ return;
+ }
+
+ encap_type = smap_get(&cfg->external_ids, "ovn-encap-type");
+ encap_ip = smap_get(&cfg->external_ids, "ovn-encap-ip");
+ if (!encap_type || !encap_ip) {
+ VLOG_INFO("Need to specify an encap type and ip");
+ return;
+ }
+
+ if (chassis_rec) {
+ int i;
+
+ for (i = 0; i < chassis_rec->n_encaps; i++) {
+ if (!strcmp(chassis_rec->encaps[i]->type, encap_type)
+ && !strcmp(chassis_rec->encaps[i]->ip, encap_ip)) {
+ /* Nothing changed. */
+ inited = true;
+ return;
+ } else if (!inited) {
+ VLOG_WARN("Chassis config changing on startup, make sure "
+ "multiple chassis are not configured : %s/%s->%s/%s",
+ chassis_rec->encaps[i]->type,
+ chassis_rec->encaps[i]->ip,
+ encap_type, encap_ip);
+ }
+
+ }
+ }
+
+ txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
+ ovsdb_idl_txn_add_comment(txn,
+ "ovn-controller: registering chassis '%s'",
+ ctx->chassis_id);
+
+ if (!chassis_rec) {
+ chassis_rec = sbrec_chassis_insert(txn);
+ sbrec_chassis_set_name(chassis_rec, ctx->chassis_id);
+ }
+
+ encap_rec = sbrec_encap_insert(txn);
+
+ sbrec_encap_set_type(encap_rec, encap_type);
+ sbrec_encap_set_ip(encap_rec, encap_ip);
+
+ sbrec_chassis_set_encaps(chassis_rec, &encap_rec, 1);
+
+ retval = ovsdb_idl_txn_commit_block(txn);
+ if (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
+ VLOG_INFO("Problem registering chassis: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ poll_immediate_wake();
+ }
+ ovsdb_idl_txn_destroy(txn);
+
+ inited = true;
+}
+
+/* Enough context to create a new tunnel, using tunnel_add(). */
+struct tunnel_ctx {
+ /* Contains "struct port_hash_node"s. Used to figure out what
+ * existing tunnels should be deleted: we index all of the OVN encap
+ * rows into this data structure, then as existing rows are
+ * generated we remove them. After generating all the rows, any
+ * remaining in 'tunnel_hmap' must be deleted from the database. */
+ struct hmap tunnel_hmap;
+
+ /* Names of all ports in the bridge, to allow checking uniqueness when
+ * adding a new tunnel. */
+ struct sset port_names;
+
+ struct ovsdb_idl_txn *ovs_txn;
+ const struct ovsrec_bridge *br_int;
+};
+
+struct port_hash_node {
+ struct hmap_node node;
+ const struct ovsrec_port *port;
+ const struct ovsrec_bridge *bridge;
+};
+
+static size_t
+port_hash(const char *chassis_id, const char *type, const char *ip)
+{
+ size_t hash = hash_string(chassis_id, 0);
+ hash = hash_string(type, hash);
+ return hash_string(ip, hash);
+}
+
+static size_t
+port_hash_rec(const struct ovsrec_port *port)
+{
+ const char *chassis_id, *ip;
+ const struct ovsrec_interface *iface;
+
+ chassis_id = smap_get(&port->external_ids, "ovn-chassis-id");
+
+ if (!chassis_id || !port->n_interfaces) {
+ /* This should not happen for an OVN-created port. */
+ return 0;
+ }
+
+ iface = port->interfaces[0];
+ ip = smap_get(&iface->options, "remote_ip");
+
+ return port_hash(chassis_id, iface->type, ip);
+}
+
+static char *
+tunnel_create_name(struct tunnel_ctx *tc, const char *chassis_id)
+{
+ int i;
+
+ for (i = 0; i < UINT16_MAX; i++) {
+ char *port_name;
+ port_name = xasprintf("ovn-%.6s-%x", chassis_id, i);
+
+ if (!sset_contains(&tc->port_names, port_name)) {
+ return port_name;
+ }
+
+ free(port_name);
+ }
+
+ return NULL;
+}
+
+
+static void
+tunnel_add(struct tunnel_ctx *tc, const char *new_chassis_id,
+ const struct sbrec_encap *encap)
+{
+ struct port_hash_node *hash_node;
+
+ /* Check whether such a row already exists in OVS. If so, remove it
+ * from 'tc->tunnel_hmap' and we're done. */
+ HMAP_FOR_EACH_WITH_HASH (hash_node, node,
+ port_hash(new_chassis_id,
+ encap->type, encap->ip),
+ &tc->tunnel_hmap) {
+ const struct ovsrec_port *port = hash_node->port;
+ const char *chassis_id = smap_get(&port->external_ids,
+ "ovn-chassis-id");
+ const struct ovsrec_interface *iface;
+ const char *ip;
+
+ if (!chassis_id || !port->n_interfaces) {
+ continue;
+ }
+
+ iface = port->interfaces[0];
+ ip = smap_get(&iface->options, "remote_ip");
+ if (!ip) {
+ continue;
+ }
+
+ if (!strcmp(new_chassis_id, chassis_id)
+ && !strcmp(encap->type, iface->type)
+ && !strcmp(encap->ip, ip)) {
+ hmap_remove(&tc->tunnel_hmap, &hash_node->node);
+ free(hash_node);
+ return;
+ }
+ }
+
+ /* No such port, so add one. */
+ struct smap external_ids = SMAP_INITIALIZER(&external_ids);
+ struct smap options = SMAP_INITIALIZER(&options);
+ struct ovsrec_port *port, **ports;
+ struct ovsrec_interface *iface;
+ char *port_name;
+ size_t i;
+
+ port_name = tunnel_create_name(tc, new_chassis_id);
+ if (!port_name) {
+ VLOG_WARN("Unable to allocate unique name for '%s' tunnel",
+ new_chassis_id);
+ return;
+ }
+
+ iface = ovsrec_interface_insert(tc->ovs_txn);
+ ovsrec_interface_set_name(iface, port_name);
+ ovsrec_interface_set_type(iface, encap->type);
+ smap_add(&options, "remote_ip", encap->ip);
+ smap_add(&options, "key", "flow");
+ ovsrec_interface_set_options(iface, &options);
+ smap_destroy(&options);
+
+ port = ovsrec_port_insert(tc->ovs_txn);
+ ovsrec_port_set_name(port, port_name);
+ ovsrec_port_set_interfaces(port, &iface, 1);
+ smap_add(&external_ids, "ovn-chassis-id", new_chassis_id);
+ ovsrec_port_set_external_ids(port, &external_ids);
+ smap_destroy(&external_ids);
+
+ ports = xmalloc(sizeof *tc->br_int->ports * (tc->br_int->n_ports + 1));
+ for (i = 0; i < tc->br_int->n_ports; i++) {
+ ports[i] = tc->br_int->ports[i];
+ }
+ ports[tc->br_int->n_ports] = port;
+ ovsrec_bridge_verify_ports(tc->br_int);
+ ovsrec_bridge_set_ports(tc->br_int, ports, tc->br_int->n_ports + 1);
+
+ sset_add(&tc->port_names, port_name);
+ free(port_name);
+ free(ports);
+}
+
+static void
+bridge_delete_port(const struct ovsrec_bridge *br,
+ const struct ovsrec_port *port)
+{
+ struct ovsrec_port **ports;
+ size_t i, n;
+
+ ports = xmalloc(sizeof *br->ports * br->n_ports);
+ for (i = n = 0; i < br->n_ports; i++) {
+ if (br->ports[i] != port) {
+ ports[n++] = br->ports[i];
+ }
+ }
+ ovsrec_bridge_verify_ports(br);
+ ovsrec_bridge_set_ports(br, ports, n);
+ free(ports);
+}
+
+static struct sbrec_encap *
+preferred_encap(const struct sbrec_chassis *chassis_rec)
+{
+ size_t i;
+
+ /* For hypervisors, we only support Geneve and STT encapsulations.
+ * Sets are returned alphabetically, so "geneve" will be preferred
+ * over "stt". */
+ for (i = 0; i < chassis_rec->n_encaps; i++) {
+ if (!strcmp(chassis_rec->encaps[i]->type, "geneve")
+ || !strcmp(chassis_rec->encaps[i]->type, "stt")) {
+ return chassis_rec->encaps[i];
+ }
+ }
+
+ return NULL;
+}
+
+static void
+update_encaps(struct controller_ctx *ctx)
+{
+ const struct sbrec_chassis *chassis_rec;
+ const struct ovsrec_bridge *br;
+ int retval;
+
+ struct tunnel_ctx tc = {
+ .tunnel_hmap = HMAP_INITIALIZER(&tc.tunnel_hmap),
+ .port_names = SSET_INITIALIZER(&tc.port_names),
+ .br_int = ctx->br_int
+ };
+
+ tc.ovs_txn = ovsdb_idl_txn_create(ctx->ovs_idl);
+ ovsdb_idl_txn_add_comment(tc.ovs_txn,
+ "ovn-controller: modifying OVS tunnels '%s'",
+ ctx->chassis_id);
+
+ /* Collect all port names into tc.port_names.
+ *
+ * Collect all the OVN-created tunnels into tc.tunnel_hmap. */
+ OVSREC_BRIDGE_FOR_EACH(br, ctx->ovs_idl) {
+ size_t i;
+
+ for (i = 0; i < br->n_ports; i++) {
+ const struct ovsrec_port *port = br->ports[i];
+
+ sset_add(&tc.port_names, port->name);
+
+ if (smap_get(&port->external_ids, "ovn-chassis-id")) {
+ struct port_hash_node *hash_node = xzalloc(sizeof *hash_node);
+ hash_node->bridge = br;
+ hash_node->port = port;
+ hmap_insert(&tc.tunnel_hmap, &hash_node->node,
+ port_hash_rec(port));
+ }
+ }
+ }
+
+ SBREC_CHASSIS_FOR_EACH(chassis_rec, ctx->ovnsb_idl) {
+ if (strcmp(chassis_rec->name, ctx->chassis_id)) {
+ /* Create tunnels to the other chassis. */
+ const struct sbrec_encap *encap = preferred_encap(chassis_rec);
+ if (!encap) {
+ VLOG_INFO("No supported encaps for '%s'", chassis_rec->name);
+ continue;
+ }
+ tunnel_add(&tc, chassis_rec->name, encap);
+ }
+ }
+
+ /* Delete any existing OVN tunnels that were not still around. */
+ struct port_hash_node *hash_node, *next_hash_node;
+ HMAP_FOR_EACH_SAFE (hash_node, next_hash_node, node, &tc.tunnel_hmap) {
+ hmap_remove(&tc.tunnel_hmap, &hash_node->node);
+ bridge_delete_port(hash_node->bridge, hash_node->port);
+ free(hash_node);
+ }
+ hmap_destroy(&tc.tunnel_hmap);
+ sset_destroy(&tc.port_names);
+
+ retval = ovsdb_idl_txn_commit_block(tc.ovs_txn);
+ if (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
+ VLOG_INFO("Problem modifying OVS tunnels: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ poll_immediate_wake();
+ }
+ ovsdb_idl_txn_destroy(tc.ovs_txn);
+}
+
+void
+chassis_run(struct controller_ctx *ctx)
+{
+ register_chassis(ctx);
+ update_encaps(ctx);
+}
+
+void
+chassis_destroy(struct controller_ctx *ctx)
+{
+ int retval = TXN_TRY_AGAIN;
+
+ ovs_assert(ctx->ovnsb_idl);
+
+ while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
+ const struct sbrec_chassis *chassis_rec;
+ struct ovsdb_idl_txn *txn;
+
+ chassis_rec = get_chassis_by_name(ctx->ovnsb_idl, ctx->chassis_id);
+ if (!chassis_rec) {
+ break;
+ }
+
+ txn = ovsdb_idl_txn_create(ctx->ovnsb_idl);
+ ovsdb_idl_txn_add_comment(txn,
+ "ovn-controller: unregistering chassis '%s'",
+ ctx->chassis_id);
+ sbrec_chassis_delete(chassis_rec);
+
+ retval = ovsdb_idl_txn_commit_block(txn);
+ if (retval == TXN_ERROR) {
+ VLOG_INFO("Problem unregistering chassis: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ }
+ ovsdb_idl_txn_destroy(txn);
+ }
+
+ retval = TXN_TRY_AGAIN;
+ while (retval != TXN_SUCCESS && retval != TXN_UNCHANGED) {
+ struct ovsrec_port **ports;
+ struct ovsdb_idl_txn *txn;
+ size_t i, n;
+
+ txn = ovsdb_idl_txn_create(ctx->ovs_idl);
+ ovsdb_idl_txn_add_comment(txn,
+ "ovn-controller: destroying tunnels");
+
+ /* Delete all the OVS-created tunnels from the integration
+ * bridge. */
+ ports = xmalloc(sizeof *ctx->br_int->ports * ctx->br_int->n_ports);
+ for (i = n = 0; i < ctx->br_int->n_ports; i++) {
+ if (!smap_get(&ctx->br_int->ports[i]->external_ids,
+ "ovn-chassis-id")) {
+ ports[n++] = ctx->br_int->ports[i];
+ }
+ }
+ ovsrec_bridge_verify_ports(ctx->br_int);
+ ovsrec_bridge_set_ports(ctx->br_int, ports, n);
+ free(ports);
+
+ retval = ovsdb_idl_txn_commit_block(txn);
+ if (retval == TXN_ERROR) {
+ VLOG_INFO("Problem destroying tunnels: %s",
+ ovsdb_idl_txn_status_to_string(retval));
+ }
+ ovsdb_idl_txn_destroy(txn);
+ }
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_CHASSIS_H
+#define OVN_CHASSIS_H 1
+
+struct controller_ctx;
+
+void chassis_init(struct controller_ctx *);
+void chassis_run(struct controller_ctx *);
+void chassis_destroy(struct controller_ctx *);
+
+#endif /* ovn/chassis.h */
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "ofctrl.h"
+#include "dirs.h"
+#include "dynamic-string.h"
+#include "hmap.h"
+#include "match.h"
+#include "ofp-actions.h"
+#include "ofp-msgs.h"
+#include "ofp-print.h"
+#include "ofp-util.h"
+#include "ofpbuf.h"
+#include "openflow/openflow.h"
+#include "openvswitch/vlog.h"
+#include "ovn-controller.h"
+#include "rconn.h"
+#include "socket-util.h"
+
+VLOG_DEFINE_THIS_MODULE(ofctrl);
+
+/* An OpenFlow flow. */
+struct ovn_flow {
+ /* Key. */
+ struct hmap_node hmap_node; /* In 'desired_flows' or 'installed_flows'. */
+ uint8_t table_id;
+ uint16_t priority;
+ struct match match;
+
+ /* Data. */
+ struct ofpact *ofpacts;
+ size_t ofpacts_len;
+};
+
+static uint32_t ovn_flow_hash(const struct ovn_flow *);
+static struct ovn_flow *ovn_flow_lookup(struct hmap *flow_table,
+ const struct ovn_flow *target);
+static char *ovn_flow_to_string(const struct ovn_flow *);
+static void ovn_flow_log(const struct ovn_flow *, const char *action);
+static void ovn_flow_destroy(struct ovn_flow *);
+
+/* OpenFlow connection to the switch. */
+static struct rconn *swconn;
+
+/* Last seen sequence number for 'swconn'. When this differs from
+ * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */
+static unsigned int seqno;
+
+/* Counter for in-flight OpenFlow messages on 'swconn'. We only send a new
+ * round of flow table modifications to the switch when the counter falls to
+ * zero, to avoid unbounded buffering. */
+static struct rconn_packet_counter *tx_counter;
+
+/* Flow tables. Each holds "struct ovn_flow"s.
+ *
+ * 'desired_flows' is the flow table that we want the switch to have.
+ * 'installed_flows' is the flow table currently installed in the switch. */
+static struct hmap desired_flows;
+static struct hmap installed_flows;
+
+static void ovn_flow_table_clear(struct hmap *flow_table);
+static void ovn_flow_table_destroy(struct hmap *flow_table);
+
+static void ofctrl_update_flows(void);
+static void ofctrl_recv(const struct ofpbuf *msg);
+
+void
+ofctrl_init(void)
+{
+ swconn = rconn_create(5, 0, DSCP_DEFAULT, 1 << OFP13_VERSION);
+ tx_counter = rconn_packet_counter_create();
+ hmap_init(&desired_flows);
+ hmap_init(&installed_flows);
+}
+
+/* This function should be called in the main loop after anything that updates
+ * the flow table (e.g. after calls to ofctrl_clear_flows() and
+ * ofctrl_add_flow()). */
+void
+ofctrl_run(struct controller_ctx *ctx)
+{
+ char *target;
+ target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), ctx->br_int_name);
+ if (strcmp(target, rconn_get_target(swconn))) {
+ rconn_connect(swconn, target, target);
+ }
+ free(target);
+
+ rconn_run(swconn);
+
+ if (!rconn_is_connected(swconn)) {
+ return;
+ }
+ if (!rconn_packet_counter_n_packets(tx_counter)) {
+ ofctrl_update_flows();
+ }
+
+ for (int i = 0; i < 50; i++) {
+ struct ofpbuf *msg = rconn_recv(swconn);
+ if (!msg) {
+ break;
+ }
+
+ ofctrl_recv(msg);
+ ofpbuf_delete(msg);
+ }
+}
+
+void
+ofctrl_wait(void)
+{
+ rconn_run_wait(swconn);
+ rconn_recv_wait(swconn);
+}
+
+void
+ofctrl_destroy(void)
+{
+ rconn_destroy(swconn);
+ ovn_flow_table_destroy(&installed_flows);
+ ovn_flow_table_destroy(&desired_flows);
+ rconn_packet_counter_destroy(tx_counter);
+}
+\f
+static void
+queue_msg(struct ofpbuf *msg)
+{
+ rconn_send(swconn, msg, tx_counter);
+}
+
+static void
+ofctrl_recv(const struct ofpbuf *msg)
+{
+ enum ofptype type;
+ struct ofpbuf b;
+
+ b = *msg;
+ if (ofptype_pull(&type, &b)) {
+ return;
+ }
+
+ switch (type) {
+ case OFPTYPE_ECHO_REQUEST:
+ queue_msg(make_echo_reply(msg->data));
+ break;
+
+ case OFPTYPE_ECHO_REPLY:
+ case OFPTYPE_PACKET_IN:
+ case OFPTYPE_PORT_STATUS:
+ case OFPTYPE_FLOW_REMOVED:
+ /* Nothing to do. */
+ break;
+
+ case OFPTYPE_HELLO:
+ case OFPTYPE_ERROR:
+ case OFPTYPE_FEATURES_REQUEST:
+ case OFPTYPE_FEATURES_REPLY:
+ case OFPTYPE_GET_CONFIG_REQUEST:
+ case OFPTYPE_GET_CONFIG_REPLY:
+ case OFPTYPE_SET_CONFIG:
+ case OFPTYPE_PACKET_OUT:
+ case OFPTYPE_FLOW_MOD:
+ case OFPTYPE_GROUP_MOD:
+ case OFPTYPE_PORT_MOD:
+ case OFPTYPE_TABLE_MOD:
+ case OFPTYPE_BARRIER_REQUEST:
+ case OFPTYPE_BARRIER_REPLY:
+ case OFPTYPE_QUEUE_GET_CONFIG_REQUEST:
+ case OFPTYPE_QUEUE_GET_CONFIG_REPLY:
+ case OFPTYPE_DESC_STATS_REQUEST:
+ case OFPTYPE_DESC_STATS_REPLY:
+ case OFPTYPE_FLOW_STATS_REQUEST:
+ case OFPTYPE_FLOW_STATS_REPLY:
+ case OFPTYPE_AGGREGATE_STATS_REQUEST:
+ case OFPTYPE_AGGREGATE_STATS_REPLY:
+ case OFPTYPE_TABLE_STATS_REQUEST:
+ case OFPTYPE_TABLE_STATS_REPLY:
+ case OFPTYPE_PORT_STATS_REQUEST:
+ case OFPTYPE_PORT_STATS_REPLY:
+ case OFPTYPE_QUEUE_STATS_REQUEST:
+ case OFPTYPE_QUEUE_STATS_REPLY:
+ case OFPTYPE_PORT_DESC_STATS_REQUEST:
+ case OFPTYPE_PORT_DESC_STATS_REPLY:
+ case OFPTYPE_ROLE_REQUEST:
+ case OFPTYPE_ROLE_REPLY:
+ case OFPTYPE_ROLE_STATUS:
+ case OFPTYPE_SET_FLOW_FORMAT:
+ case OFPTYPE_FLOW_MOD_TABLE_ID:
+ case OFPTYPE_SET_PACKET_IN_FORMAT:
+ case OFPTYPE_FLOW_AGE:
+ case OFPTYPE_SET_CONTROLLER_ID:
+ case OFPTYPE_FLOW_MONITOR_STATS_REQUEST:
+ case OFPTYPE_FLOW_MONITOR_STATS_REPLY:
+ case OFPTYPE_FLOW_MONITOR_CANCEL:
+ case OFPTYPE_FLOW_MONITOR_PAUSED:
+ case OFPTYPE_FLOW_MONITOR_RESUMED:
+ case OFPTYPE_GET_ASYNC_REQUEST:
+ case OFPTYPE_GET_ASYNC_REPLY:
+ case OFPTYPE_SET_ASYNC_CONFIG:
+ case OFPTYPE_METER_MOD:
+ case OFPTYPE_GROUP_STATS_REQUEST:
+ case OFPTYPE_GROUP_STATS_REPLY:
+ case OFPTYPE_GROUP_DESC_STATS_REQUEST:
+ case OFPTYPE_GROUP_DESC_STATS_REPLY:
+ case OFPTYPE_GROUP_FEATURES_STATS_REQUEST:
+ case OFPTYPE_GROUP_FEATURES_STATS_REPLY:
+ case OFPTYPE_METER_STATS_REQUEST:
+ case OFPTYPE_METER_STATS_REPLY:
+ case OFPTYPE_METER_CONFIG_STATS_REQUEST:
+ case OFPTYPE_METER_CONFIG_STATS_REPLY:
+ case OFPTYPE_METER_FEATURES_STATS_REQUEST:
+ case OFPTYPE_METER_FEATURES_STATS_REPLY:
+ case OFPTYPE_TABLE_FEATURES_STATS_REQUEST:
+ case OFPTYPE_TABLE_FEATURES_STATS_REPLY:
+ case OFPTYPE_BUNDLE_CONTROL:
+ case OFPTYPE_BUNDLE_ADD_MESSAGE:
+ default:
+ /* Messages that are generally unexpected. */
+ if (VLOG_IS_DBG_ENABLED()) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
+
+ char *s = ofp_to_string(msg->data, msg->size, 2);
+ VLOG_DBG_RL(&rl, "OpenFlow packet ignored: %s", s);
+ free(s);
+ }
+ }
+}
+\f
+/* Flow table interface to the rest of ovn-controller. */
+
+/* Clears the table of flows desired to be in the switch. Call this before
+ * adding the desired flows (with ofctrl_add_flow()). */
+void
+ofctrl_clear_flows(void)
+{
+ ovn_flow_table_clear(&desired_flows);
+}
+
+/* Adds a flow with the specified 'match' and 'actions' to the OpenFlow table
+ * numbered 'table_id' with the given 'priority'. The caller retains ownership
+ * of 'match' and 'actions'.
+ *
+ * This just assembles the desired flow table in memory. Nothing is actually
+ * sent to the switch until a later call to ofctrl_run(). */
+void
+ofctrl_add_flow(uint8_t table_id, uint16_t priority,
+ const struct match *match, const struct ofpbuf *actions)
+{
+ struct ovn_flow *f = xmalloc(sizeof *f);
+ f->table_id = table_id;
+ f->priority = priority;
+ f->match = *match;
+ f->ofpacts = xmemdup(actions->data, actions->size);
+ f->ofpacts_len = actions->size;
+
+ if (ovn_flow_lookup(&desired_flows, f)) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ if (!VLOG_DROP_INFO(&rl)) {
+ char *s = ovn_flow_to_string(f);
+ VLOG_INFO("dropping duplicate flow: %s", s);
+ free(s);
+ }
+
+ ovn_flow_destroy(f);
+ return;
+ }
+
+ hmap_insert(&desired_flows, &f->hmap_node, ovn_flow_hash(f));
+}
+\f
+/* ovn_flow. */
+
+/* Returns a hash of the key in 'f'. */
+static uint32_t
+ovn_flow_hash(const struct ovn_flow *f)
+{
+ return hash_2words((f->table_id << 16) | f->priority,
+ match_hash(&f->match, 0));
+
+}
+
+/* Finds and returns an ovn_flow in 'flow_table' whose key is identical to
+ * 'target''s key, or NULL if there is none. */
+static struct ovn_flow *
+ovn_flow_lookup(struct hmap *flow_table, const struct ovn_flow *target)
+{
+ struct ovn_flow *f;
+
+ HMAP_FOR_EACH_WITH_HASH (f, hmap_node, target->hmap_node.hash,
+ flow_table) {
+ if (f->table_id == target->table_id
+ && f->priority == target->priority
+ && match_equal(&f->match, &target->match)) {
+ return f;
+ }
+ }
+ return NULL;
+}
+
+static char *
+ovn_flow_to_string(const struct ovn_flow *f)
+{
+ struct ds s = DS_EMPTY_INITIALIZER;
+ ds_put_format(&s, "table_id=%"PRIu8", ", f->table_id);
+ ds_put_format(&s, "priority=%"PRIu16", ", f->priority);
+ match_format(&f->match, &s, OFP_DEFAULT_PRIORITY);
+ ds_put_cstr(&s, ", actions=");
+ ofpacts_format(f->ofpacts, f->ofpacts_len, &s);
+ return ds_steal_cstr(&s);
+}
+
+static void
+ovn_flow_log(const struct ovn_flow *f, const char *action)
+{
+ if (VLOG_IS_DBG_ENABLED()) {
+ char *s = ovn_flow_to_string(f);
+ VLOG_DBG("%s flow: %s", action, s);
+ free(s);
+ }
+}
+
+static void
+ovn_flow_destroy(struct ovn_flow *f)
+{
+ if (f) {
+ free(f->ofpacts);
+ free(f);
+ }
+}
+\f
+/* Flow tables of struct ovn_flow. */
+
+static void
+ovn_flow_table_clear(struct hmap *flow_table)
+{
+ struct ovn_flow *f, *next;
+ HMAP_FOR_EACH_SAFE (f, next, hmap_node, flow_table) {
+ hmap_remove(flow_table, &f->hmap_node);
+ ovn_flow_destroy(f);
+ }
+}
+static void
+ovn_flow_table_destroy(struct hmap *flow_table)
+{
+ ovn_flow_table_clear(flow_table);
+ hmap_destroy(flow_table);
+}
+\f
+/* Flow table update. */
+
+static void
+queue_flow_mod(struct ofputil_flow_mod *fm)
+{
+ fm->buffer_id = UINT32_MAX;
+ fm->out_port = OFPP_ANY;
+ fm->out_group = OFPG_ANY;
+ queue_msg(ofputil_encode_flow_mod(fm, OFPUTIL_P_OF13_OXM));
+}
+
+static void
+ofctrl_update_flows(void)
+{
+ /* If we've (re)connected, don't make any assumptions about the flows in
+ * the switch: delete all of them. (We'll immediately repopulate it
+ * below.) */
+ if (seqno != rconn_get_connection_seqno(swconn)) {
+ seqno = rconn_get_connection_seqno(swconn);
+
+ /* Send a flow_mod to delete all flows. */
+ struct ofputil_flow_mod fm = {
+ .match = MATCH_CATCHALL_INITIALIZER,
+ .table_id = OFPTT_ALL,
+ .command = OFPFC_DELETE,
+ };
+ queue_flow_mod(&fm);
+ VLOG_DBG("clearing all flows");
+
+ /* Clear installed_flows, to match the state of the switch. */
+ ovn_flow_table_clear(&installed_flows);
+ }
+
+ /* Iterate through all of the installed flows. If any of them are no
+ * longer desired, delete them; if any of them should have different
+ * actions, update them. */
+ struct ovn_flow *i, *next;
+ HMAP_FOR_EACH_SAFE (i, next, hmap_node, &installed_flows) {
+ struct ovn_flow *d = ovn_flow_lookup(&desired_flows, i);
+ if (!d) {
+ /* Installed flow is no longer desirable. Delete it from the
+ * switch and from installed_flows. */
+ struct ofputil_flow_mod fm;
+ memset(&fm, 0, sizeof fm);
+ fm.match = i->match;
+ fm.priority = i->priority;
+ fm.table_id = i->table_id;
+ fm.command = OFPFC_DELETE_STRICT;
+ queue_flow_mod(&fm);
+ ovn_flow_log(i, "removing");
+
+ hmap_remove(&installed_flows, &i->hmap_node);
+ ovn_flow_destroy(i);
+ } else {
+ if (!ofpacts_equal(i->ofpacts, i->ofpacts_len,
+ d->ofpacts, d->ofpacts_len)) {
+ /* Update actions in installed flow. */
+ struct ofputil_flow_mod fm;
+ memset(&fm, 0, sizeof fm);
+ fm.match = i->match;
+ fm.priority = i->priority;
+ fm.table_id = i->table_id;
+ fm.ofpacts = d->ofpacts;
+ fm.ofpacts_len = d->ofpacts_len;
+ fm.command = OFPFC_MODIFY_STRICT;
+ queue_flow_mod(&fm);
+ ovn_flow_log(i, "updating");
+
+ /* Replace 'i''s actions by 'd''s. */
+ free(i->ofpacts);
+ i->ofpacts = d->ofpacts;
+ i->ofpacts_len = d->ofpacts_len;
+ d->ofpacts = NULL;
+ d->ofpacts_len = 0;
+ }
+
+ hmap_remove(&desired_flows, &d->hmap_node);
+ ovn_flow_destroy(d);
+ }
+ }
+
+ /* The previous loop removed from desired_flows all of the flows that are
+ * already installed. Thus, any flows remaining in desired_flows need to
+ * be added to the flow table. */
+ struct ovn_flow *d;
+ HMAP_FOR_EACH_SAFE (d, next, hmap_node, &desired_flows) {
+ /* Send flow_mod to add flow. */
+ struct ofputil_flow_mod fm;
+ memset(&fm, 0, sizeof fm);
+ fm.match = d->match;
+ fm.priority = d->priority;
+ fm.table_id = d->table_id;
+ fm.ofpacts = d->ofpacts;
+ fm.ofpacts_len = d->ofpacts_len;
+ fm.command = OFPFC_ADD;
+ queue_flow_mod(&fm);
+ ovn_flow_log(d, "adding");
+
+ /* Move 'd' from desired_flows to installed_flows. */
+ hmap_remove(&desired_flows, &d->hmap_node);
+ hmap_insert(&installed_flows, &d->hmap_node, d->hmap_node.hash);
+ }
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef OFCTRL_H
+#define OFCTRL_H 1
+
+#include <stdint.h>
+
+struct controller_ctx;
+struct match;
+struct ofpbuf;
+
+/* Interface for OVN main loop. */
+void ofctrl_init(void);
+void ofctrl_run(struct controller_ctx *);
+void ofctrl_wait(void);
+void ofctrl_destroy(void);
+
+/* Flow table interface to the rest of ovn-controller. */
+void ofctrl_clear_flows(void);
+void ofctrl_add_flow(uint8_t table_id, uint16_t priority,
+ const struct match *, const struct ofpbuf *ofpacts);
+
+#endif /* ovn/ofctrl.h */
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovn-controller" section="8" title="ovn-controller">
+ <h1>Name</h1>
+ <p>ovn-controller -- Open Virtual Network local controller</p>
+
+ <h1>Synopsis</h1>
+ <p><code>ovn-controller</code> [<var>options</var>] [<var>ovs-database</var>]</p>
+
+ <h1>Description</h1>
+ <p>
+ <code>ovn-controller</code> is the local controller daemon for
+ OVN, the Open Virtual Network. It connects up to the OVN
+ Southbound database (see <code>ovn-sb</code>(5)) over the OVSDB
+ protocol, and down to the Open vSwitch database (see
+ <code>ovs-vswitchd.conf.db</code>(5)) over the OVSDB protocol and
+ to <code>ovs-vswitchd</code>(8) via OpenFlow. Each hypervisor and
+ software gateway in an OVN deployment runs its own independent
+ copy of <code>ovn-controller</code>; thus,
+ <code>ovn-controller</code>'s downward connections are
+ machine-local and do not run over a physical network.
+ </p>
+
+ <h1>Configuration</h1>
+ <p>
+ <code>ovn-controller</code> retrieves most of its configuration
+ information from the local Open vSwitch's ovsdb-server instance.
+ The default is the <code>db.sock</code> in local Open vSwitch's
+ "run" directory. <var>ovs-database</var> must take one of the
+ following forms:
+ </p>
+ <ul>
+ <li>
+ <p>
+ <code>ssl:<var>ip</var>:<var>port</var></code>
+ </p>
+ <p>
+ The specified SSL <var>port</var> on the host at the given
+ <var>ip</var>, which must be expressed as an IP address (not a DNS
+ name) in IPv4 or IPv6 address format. If <var>ip</var> is an IPv6
+ address, then wrap <var>ip</var> with square brackets, e.g.:
+ <code>ssl:[::1]:6640</code>. The <code>--private-key</code>,
+ <code>--certificate</code>, and <code>--ca-cert</code> options are
+ mandatory when this form is used.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>tcp:<var>ip</var>:<var>port</var></code>
+ </p>
+ <p>
+ Connect to the given TCP <var>port</var> on <var>ip</var>, where
+ <var>ip</var> can be IPv4 or IPv6 address. If <var>ip</var> is an
+ IPv6 address, then wrap <var>ip</var> with square brackets, e.g.:
+ <code>tcp:[::1]:6640</code>.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>unix:<var>file</var></code>
+ </p>
+ <p>
+ On POSIX, connect to the Unix domain server socket named
+ <var>file</var>.
+ </p>
+ <p>
+ On Windows, connect to a localhost TCP port whose value is written
+ in <var>file</var>.
+ </p>
+ </li>
+ </ul>
+ <p>
+ <code>ovn-controller</code> assumes it gets configuration
+ information from the following keys in the <code>Open_vSwitch</code>
+ table of the local OVS instance:
+ <ul>
+ <li>
+ <p>
+ <code>external_ids:system-id</code> specifies the chassis
+ name to use in the Chassis table.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>external_ids:ovn-bridge</code> specifies the
+ integration bridge to which logical ports are attached.
+ The default is <code>br-int</code>.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>external_ids:ovn-remote</code> specifies the OVN
+ database that this system should connect to for its
+ configuration.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>external_ids:ovn-encap-type</code> specifies the
+ encapsulation type that a chassis should use to connect to
+ this node. Supported tunnel types for connecting
+ hypervisors are <code>geneve</code> and <code>stt</code>.
+ Gateways may use <code>geneve</code>, <code>vxlan</code>,
+ or <code>stt</code>.
+ </p>
+ </li>
+ <li>
+ <p>
+ <code>external_ids:ovn-encap-ip</code> specifies the IP
+ address that a chassis should use to connect to this node
+ using encapsulation type specified by
+ <code>external_ids:ovn-encap-ip</code>.
+ </p>
+ </li>
+ </ul>
+ <p>
+ Currently, <code>ovn-controller</code> does not support changing
+ the chassis name, integration bridge, or OVN database mid-run.
+ If these values need to change, the daemon must be restarted.
+ This behavior should be changed.
+ </p>
+ </p>
+</manpage>
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "ovn-controller.h"
+
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "command-line.h"
+#include "compiler.h"
+#include "daemon.h"
+#include "dirs.h"
+#include "openvswitch/vconn.h"
+#include "openvswitch/vlog.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "poll-loop.h"
+#include "fatal-signal.h"
+#include "lib/vswitch-idl.h"
+#include "smap.h"
+#include "stream.h"
+#include "stream-ssl.h"
+#include "unixctl.h"
+#include "util.h"
+
+#include "ofctrl.h"
+#include "binding.h"
+#include "chassis.h"
+#include "physical.h"
+#include "pipeline.h"
+
+VLOG_DEFINE_THIS_MODULE(main);
+
+static unixctl_cb_func ovn_controller_exit;
+
+#define DEFAULT_BRIDGE_NAME "br-int"
+
+static void parse_options(int argc, char *argv[]);
+OVS_NO_RETURN static void usage(void);
+
+static char *ovs_remote;
+static char *ovnsb_remote;
+
+
+static void
+get_initial_snapshot(struct ovsdb_idl *idl)
+{
+ while (1) {
+ ovsdb_idl_run(idl);
+ if (ovsdb_idl_has_ever_connected(idl)) {
+ return;
+ }
+ ovsdb_idl_wait(idl);
+ poll_block();
+ }
+}
+
+static const struct ovsrec_bridge *
+get_bridge(struct controller_ctx *ctx, const char *name)
+{
+ const struct ovsrec_bridge *br;
+
+ OVSREC_BRIDGE_FOR_EACH(br, ctx->ovs_idl) {
+ if (!strcmp(br->name, name)) {
+ return br;
+ }
+ }
+
+ return NULL;
+}
+
+/* Retrieve the OVN integration bridge from the "external-ids:ovn-bridge"
+ * key, the remote location from the "external-ids:ovn-remote" key, and
+ * the chassis name from the "external-ids:system-id" key in the
+ * Open_vSwitch table of the OVS database instance.
+ *
+ * xxx ovn-controller does not support changing any of these mid-run,
+ * xxx but that should be addressed later. */
+static void
+get_core_config(struct controller_ctx *ctx)
+{
+ const struct ovsrec_open_vswitch *cfg;
+
+ cfg = ovsrec_open_vswitch_first(ctx->ovs_idl);
+ if (!cfg) {
+ VLOG_ERR("No Open_vSwitch row defined.");
+ ovsdb_idl_destroy(ctx->ovs_idl);
+ exit(EXIT_FAILURE);
+ }
+
+ while (1) {
+ const struct ovsrec_bridge *br_int;
+ const char *remote, *system_id, *br_int_name;
+
+ ovsdb_idl_run(ctx->ovs_idl);
+
+ br_int_name = smap_get(&cfg->external_ids, "ovn-bridge");
+ if (!br_int_name) {
+ br_int_name = DEFAULT_BRIDGE_NAME;
+ }
+ ctx->br_int_name = xstrdup(br_int_name);
+
+ br_int = get_bridge(ctx, ctx->br_int_name);
+ if (!br_int) {
+ VLOG_INFO("Integration bridge '%s' does not exist. Waiting...",
+ ctx->br_int_name);
+ goto try_again;
+ }
+
+ remote = smap_get(&cfg->external_ids, "ovn-remote");
+ if (!remote) {
+ VLOG_INFO("OVN OVSDB remote not specified. Waiting...");
+ goto try_again;
+ }
+
+ system_id = smap_get(&cfg->external_ids, "system-id");
+ if (!system_id) {
+ VLOG_INFO("system-id not specified. Waiting...");
+ goto try_again;
+ }
+
+ ovnsb_remote = xstrdup(remote);
+ ctx->chassis_id = xstrdup(system_id);
+ return;
+
+try_again:
+ ovsdb_idl_wait(ctx->ovs_idl);
+ poll_block();
+ }
+
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct unixctl_server *unixctl;
+ struct controller_ctx ctx = { .chassis_id = NULL };
+ bool exiting;
+ int retval;
+
+ ovs_cmdl_proctitle_init(argc, argv);
+ set_program_name(argv[0]);
+ parse_options(argc, argv);
+ fatal_ignore_sigpipe();
+
+ daemonize_start();
+
+ retval = unixctl_server_create(NULL, &unixctl);
+ if (retval) {
+ exit(EXIT_FAILURE);
+ }
+ unixctl_command_register("exit", "", 0, 0, ovn_controller_exit, &exiting);
+
+ daemonize_complete();
+
+ ovsrec_init();
+ sbrec_init();
+
+ ofctrl_init();
+
+ /* Connect to OVS OVSDB instance. We do not monitor all tables by
+ * default, so modules must register their interest explicitly. */
+ ctx.ovs_idl = ovsdb_idl_create(ovs_remote, &ovsrec_idl_class, false, true);
+
+ /* Register interest in "external_ids" column in "Open_vSwitch" table,
+ * since we'll need to get the OVN OVSDB remote. */
+ ovsdb_idl_add_table(ctx.ovs_idl, &ovsrec_table_open_vswitch);
+ ovsdb_idl_add_column(ctx.ovs_idl, &ovsrec_open_vswitch_col_external_ids);
+
+ chassis_init(&ctx);
+ binding_init(&ctx);
+ physical_init(&ctx);
+ pipeline_init();
+
+ get_initial_snapshot(ctx.ovs_idl);
+
+ get_core_config(&ctx);
+
+ ctx.ovnsb_idl = ovsdb_idl_create(ovnsb_remote, &sbrec_idl_class,
+ true, true);
+ get_initial_snapshot(ctx.ovnsb_idl);
+
+ exiting = false;
+ while (!exiting) {
+ ovsdb_idl_run(ctx.ovs_idl);
+ ovsdb_idl_run(ctx.ovnsb_idl);
+
+ /* xxx If run into any surprising changes, we exit. We should
+ * xxx handle this more gracefully. */
+ ctx.br_int = get_bridge(&ctx, ctx.br_int_name);
+ if (!ctx.br_int) {
+ VLOG_ERR("Integration bridge '%s' disappeared",
+ ctx.br_int_name);
+ retval = EXIT_FAILURE;
+ break;
+ }
+
+ if (!ovsdb_idl_is_alive(ctx.ovnsb_idl)) {
+ int retval = ovsdb_idl_get_last_error(ctx.ovnsb_idl);
+ VLOG_ERR("%s: database connection failed (%s)",
+ ovnsb_remote, ovs_retval_to_string(retval));
+ retval = EXIT_FAILURE;
+ break;
+ }
+
+ if (!ovsdb_idl_is_alive(ctx.ovs_idl)) {
+ int retval = ovsdb_idl_get_last_error(ctx.ovs_idl);
+ VLOG_ERR("%s: database connection failed (%s)",
+ ovs_remote, ovs_retval_to_string(retval));
+ retval = EXIT_FAILURE;
+ break;
+ }
+
+ ofctrl_clear_flows();
+
+ chassis_run(&ctx);
+ binding_run(&ctx);
+ pipeline_run(&ctx);
+ physical_run(&ctx);
+ ofctrl_run(&ctx);
+ unixctl_server_run(unixctl);
+
+ unixctl_server_wait(unixctl);
+ if (exiting) {
+ poll_immediate_wake();
+ }
+
+ ovsdb_idl_wait(ctx.ovs_idl);
+ ovsdb_idl_wait(ctx.ovnsb_idl);
+ ofctrl_wait();
+ poll_block();
+ }
+
+ unixctl_server_destroy(unixctl);
+ pipeline_destroy(&ctx);
+ ofctrl_destroy();
+ binding_destroy(&ctx);
+ chassis_destroy(&ctx);
+
+ ovsdb_idl_destroy(ctx.ovs_idl);
+ ovsdb_idl_destroy(ctx.ovnsb_idl);
+
+ free(ctx.br_int_name);
+ free(ctx.chassis_id);
+ free(ovnsb_remote);
+ free(ovs_remote);
+
+ exit(retval);
+}
+
+static void
+parse_options(int argc, char *argv[])
+{
+ enum {
+ OPT_PEER_CA_CERT = UCHAR_MAX + 1,
+ VLOG_OPTION_ENUMS,
+ DAEMON_OPTION_ENUMS
+ };
+
+ static struct option long_options[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ VLOG_LONG_OPTIONS,
+ DAEMON_LONG_OPTIONS,
+ STREAM_SSL_LONG_OPTIONS,
+ {"peer-ca-cert", required_argument, NULL, OPT_PEER_CA_CERT},
+ {NULL, 0, NULL, 0}
+ };
+ char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, short_options, long_options, NULL);
+ if (c == -1) {
+ break;
+ }
+
+ switch (c) {
+ case 'h':
+ usage();
+
+ case 'V':
+ ovs_print_version(OFP13_VERSION, OFP13_VERSION);
+ exit(EXIT_SUCCESS);
+
+ VLOG_OPTION_HANDLERS
+ DAEMON_OPTION_HANDLERS
+ STREAM_SSL_OPTION_HANDLERS
+
+ case OPT_PEER_CA_CERT:
+ stream_ssl_set_peer_ca_cert_file(optarg);
+ break;
+
+ case '?':
+ exit(EXIT_FAILURE);
+
+ default:
+ abort();
+ }
+ }
+ free(short_options);
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc == 0) {
+ ovs_remote = xasprintf("unix:%s/db.sock", ovs_rundir());
+ } else if (argc == 1) {
+ ovs_remote = xstrdup(argv[0]);
+ } else {
+ VLOG_FATAL("exactly zero or one non-option argument required; "
+ "use --help for usage");
+ }
+}
+
+static void
+usage(void)
+{
+ printf("%s: OVN controller\n"
+ "usage %s [OPTIONS] [OVS-DATABASE]\n"
+ "where OVS-DATABASE is a socket on which the OVS OVSDB server is listening.\n",
+ program_name, program_name);
+ stream_usage("OVS-DATABASE", true, false, false);
+ daemon_usage();
+ vlog_usage();
+ printf("\nOther options:\n"
+ " -h, --help display this help message\n"
+ " -V, --version display version information\n");
+ exit(EXIT_SUCCESS);
+}
+
+static void
+ovn_controller_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *exiting_)
+{
+ bool *exiting = exiting_;
+ *exiting = true;
+
+ unixctl_command_reply(conn, NULL);
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef OVN_CONTROLLER_H
+#define OVN_CONTROLLER_H 1
+
+#include "ovn/lib/ovn-sb-idl.h"
+
+struct controller_ctx {
+ char *chassis_id; /* ID for this chassis. */
+ char *br_int_name; /* Name of local integration bridge. */
+ struct ovsdb_idl *ovnsb_idl;
+ struct ovsdb_idl *ovs_idl;
+
+ const struct ovsrec_bridge *br_int;
+};
+
+static inline const struct sbrec_chassis *
+get_chassis_by_name(struct ovsdb_idl *ovnsb_idl, char *chassis_id)
+{
+ const struct sbrec_chassis *chassis_rec;
+
+ SBREC_CHASSIS_FOR_EACH(chassis_rec, ovnsb_idl) {
+ if (!strcmp(chassis_rec->name, chassis_id)) {
+ break;
+ }
+ }
+
+ return chassis_rec;
+}
+
+#endif /* ovn/ovn-controller.h */
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "physical.h"
+#include "match.h"
+#include "ofctrl.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+#include "ovn-controller.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "pipeline.h"
+#include "simap.h"
+#include "vswitch-idl.h"
+
+void
+physical_init(struct controller_ctx *ctx)
+{
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_bridge);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_bridge_col_ports);
+
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_port);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_interfaces);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_port_col_external_ids);
+
+ ovsdb_idl_add_table(ctx->ovs_idl, &ovsrec_table_interface);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_name);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_ofport);
+ ovsdb_idl_add_column(ctx->ovs_idl, &ovsrec_interface_col_external_ids);
+}
+
+void
+physical_run(struct controller_ctx *ctx)
+{
+ struct simap lport_to_ofport = SIMAP_INITIALIZER(&lport_to_ofport);
+ struct simap chassis_to_ofport = SIMAP_INITIALIZER(&chassis_to_ofport);
+ for (int i = 0; i < ctx->br_int->n_ports; i++) {
+ const struct ovsrec_port *port_rec = ctx->br_int->ports[i];
+ if (!strcmp(port_rec->name, ctx->br_int_name)) {
+ continue;
+ }
+
+ const char *chassis_id = smap_get(&port_rec->external_ids,
+ "ovn-chassis-id");
+ if (chassis_id && !strcmp(chassis_id, ctx->chassis_id)) {
+ continue;
+ }
+
+ for (int j = 0; j < port_rec->n_interfaces; j++) {
+ const struct ovsrec_interface *iface_rec = port_rec->interfaces[j];
+
+ /* Get OpenFlow port number. */
+ if (!iface_rec->n_ofport) {
+ continue;
+ }
+ int64_t ofport = iface_rec->ofport[0];
+ if (ofport < 1 || ofport > ofp_to_u16(OFPP_MAX)) {
+ continue;
+ }
+
+ /* Record as chassis or local logical port. */
+ if (chassis_id) {
+ simap_put(&chassis_to_ofport, chassis_id, ofport);
+ break;
+ } else {
+ const char *iface_id = smap_get(&iface_rec->external_ids,
+ "iface-id");
+ if (iface_id) {
+ simap_put(&lport_to_ofport, iface_id, ofport);
+ }
+ }
+ }
+ }
+
+ struct ofpbuf ofpacts;
+ ofpbuf_init(&ofpacts, 0);
+
+ /* Set up flows in table 0 for physical-to-logical translation and in table
+ * 64 for logical-to-physical translation. */
+ const struct sbrec_binding *binding;
+ SBREC_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
+ /* Find the Openflow port for the logical port, as 'ofport'. If it's
+ * on a remote chassis, this is the OpenFlow port for the tunnel to
+ * that chassis (and set 'local' to false). Otherwise, if it's on the
+ * chassis we're managing, this is the OpenFlow port for the vif itself
+ * (and set 'local' to true). When 'parent_port' is set for a binding,
+ * it implies a container sitting inside a VM reachable via a 'tag'.
+ */
+
+ int tag = 0;
+ ofp_port_t ofport;
+ if (binding->parent_port) {
+ ofport = u16_to_ofp(simap_get(&lport_to_ofport,
+ binding->parent_port));
+ if (ofport && binding->tag) {
+ tag = *binding->tag;
+ }
+ } else {
+ ofport = u16_to_ofp(simap_get(&lport_to_ofport,
+ binding->logical_port));
+ }
+
+ bool local = ofport != 0;
+ if (!local) {
+ if (!binding->chassis) {
+ continue;
+ }
+ ofport = u16_to_ofp(simap_get(&chassis_to_ofport,
+ binding->chassis->name));
+ if (!ofport) {
+ continue;
+ }
+ }
+
+ /* Translate the logical datapath into the form we use in
+ * MFF_METADATA. */
+ uint32_t ldp = ldp_to_integer(&binding->logical_datapath);
+ if (!ldp) {
+ continue;
+ }
+
+ struct match match;
+ if (local) {
+ /*
+ * Packets that arrive from a vif can belong to a VM or
+ * to a container located inside that VM. Packets that
+ * arrive from containers have a tag (vlan) associated with them.
+ *
+ * Table 0, Priority 150 and 100.
+ * ==============================
+ * Priority 150 is for traffic belonging to containers. For such
+ * traffic, match on the tags and then strip the tag.
+ * Priority 100 is for traffic belonging to VMs.
+ *
+ * For both types of traffic: set MFF_LOG_INPORT to the
+ * logical input port, MFF_METADATA to the logical datapath, and
+ * resubmit into the logical pipeline starting at table 16. */
+ match_init_catchall(&match);
+ ofpbuf_clear(&ofpacts);
+ match_set_in_port(&match, ofport);
+ if (tag) {
+ match_set_dl_vlan(&match, htons(tag));
+ }
+
+ /* Set MFF_METADATA. */
+ struct ofpact_set_field *sf = ofpact_put_SET_FIELD(&ofpacts);
+ sf->field = mf_from_id(MFF_METADATA);
+ sf->value.be64 = htonll(ldp);
+ sf->mask.be64 = OVS_BE64_MAX;
+
+ /* Set MFF_LOG_INPORT. */
+ sf = ofpact_put_SET_FIELD(&ofpacts);
+ sf->field = mf_from_id(MFF_LOG_INPORT);
+ sf->value.be32 = htonl(binding->tunnel_key);
+ sf->mask.be32 = OVS_BE32_MAX;
+
+ /* Strip vlans. */
+ if (tag) {
+ ofpact_put_STRIP_VLAN(&ofpacts);
+ }
+
+ /* Resubmit to first logical pipeline table. */
+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&ofpacts);
+ resubmit->in_port = OFPP_IN_PORT;
+ resubmit->table_id = 16;
+ ofctrl_add_flow(0, tag ? 150 : 100, &match, &ofpacts);
+
+ /* Table 0, Priority 50.
+ * =====================
+ *
+ * For packets that arrive from a remote node destined to this
+ * local vif: deliver directly to the vif. If the destination
+ * is a container sitting behind a vif, tag the packets. */
+ match_init_catchall(&match);
+ ofpbuf_clear(&ofpacts);
+ match_set_tun_id(&match, htonll(binding->tunnel_key));
+ if (tag) {
+ struct ofpact_vlan_vid *vlan_vid;
+ vlan_vid = ofpact_put_SET_VLAN_VID(&ofpacts);
+ vlan_vid->vlan_vid = tag;
+ vlan_vid->push_vlan_if_needed = true;
+ }
+ ofpact_put_OUTPUT(&ofpacts)->port = ofport;
+ ofctrl_add_flow(0, 50, &match, &ofpacts);
+ }
+
+ /* Table 64, Priority 100.
+ * =======================
+ *
+ * Drop packets whose logical inport and outport are the same. */
+ match_init_catchall(&match);
+ ofpbuf_clear(&ofpacts);
+ match_set_reg(&match, MFF_LOG_INPORT - MFF_REG0, binding->tunnel_key);
+ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, binding->tunnel_key);
+ ofctrl_add_flow(64, 100, &match, &ofpacts);
+
+ /* Table 64, Priority 50.
+ * ======================
+ *
+ * For packets to remote machines, send them over a tunnel to the
+ * remote chassis.
+ *
+ * For packets to local vifs, deliver them directly. */
+ match_init_catchall(&match);
+ ofpbuf_clear(&ofpacts);
+ match_set_reg(&match, MFF_LOG_OUTPORT - MFF_REG0, binding->tunnel_key);
+ if (!local) {
+ /* Set MFF_TUN_ID. */
+ struct ofpact_set_field *sf = ofpact_put_SET_FIELD(&ofpacts);
+ sf->field = mf_from_id(MFF_TUN_ID);
+ sf->value.be64 = htonll(binding->tunnel_key);
+ sf->mask.be64 = OVS_BE64_MAX;
+ }
+ if (tag) {
+ /* For containers sitting behind a local vif, tag the packets
+ * before delivering them. Since there is a possibility of
+ * packets needing to hair-pin back into the same vif from
+ * which it came, make the in_port as zero. */
+ struct ofpact_vlan_vid *vlan_vid;
+ vlan_vid = ofpact_put_SET_VLAN_VID(&ofpacts);
+ vlan_vid->vlan_vid = tag;
+ vlan_vid->push_vlan_if_needed = true;
+
+ struct ofpact_set_field *sf = ofpact_put_SET_FIELD(&ofpacts);
+ sf->field = mf_from_id(MFF_IN_PORT);
+ sf->value.be16 = 0;
+ sf->mask.be16 = OVS_BE16_MAX;
+ }
+ ofpact_put_OUTPUT(&ofpacts)->port = ofport;
+ ofctrl_add_flow(64, 50, &match, &ofpacts);
+ }
+
+ ofpbuf_uninit(&ofpacts);
+ simap_destroy(&lport_to_ofport);
+ simap_destroy(&chassis_to_ofport);
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_PHYSICAL_H
+#define OVN_PHYSICAL_H 1
+
+/* Logical/Physical Translation
+ * ============================
+ *
+ * This module implements physical-to-logical and logical-to-physical
+ * translation as separate OpenFlow tables that run before and after,
+ * respectively, the logical pipeline OpenFlow tables.
+ */
+
+struct controller_ctx;
+
+void physical_init(struct controller_ctx *);
+void physical_run(struct controller_ctx *);
+
+#endif /* ovn/physical.h */
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "pipeline.h"
+#include "dynamic-string.h"
+#include "ofctrl.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+#include "openvswitch/vlog.h"
+#include "ovn/controller/ovn-controller.h"
+#include "ovn/lib/actions.h"
+#include "ovn/lib/expr.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "simap.h"
+
+VLOG_DEFINE_THIS_MODULE(pipeline);
+\f
+/* Symbol table. */
+
+/* Contains "struct expr_symbol"s for fields supported by OVN pipeline. */
+static struct shash symtab;
+
+static void
+symtab_init(void)
+{
+ shash_init(&symtab);
+
+ /* Reserve a pair of registers for the logical inport and outport. A full
+ * 32-bit register each is bigger than we need, but the expression code
+ * doesn't yet support string fields that occupy less than a full OXM. */
+ expr_symtab_add_string(&symtab, "inport", MFF_LOG_INPORT, NULL);
+ expr_symtab_add_string(&symtab, "outport", MFF_LOG_OUTPORT, NULL);
+
+ /* Registers. We omit the registers that would otherwise overlap the
+ * reserved fields. */
+ for (enum mf_field_id id = MFF_REG0; id < MFF_REG0 + FLOW_N_REGS; id++) {
+ if (id != MFF_LOG_INPORT && id != MFF_LOG_OUTPORT) {
+ char name[8];
+
+ snprintf(name, sizeof name, "reg%d", id - MFF_REG0);
+ expr_symtab_add_field(&symtab, name, id, NULL, false);
+ }
+ }
+
+ /* Data fields. */
+ expr_symtab_add_field(&symtab, "eth.src", MFF_ETH_SRC, NULL, false);
+ expr_symtab_add_field(&symtab, "eth.dst", MFF_ETH_DST, NULL, false);
+ expr_symtab_add_field(&symtab, "eth.type", MFF_ETH_TYPE, NULL, true);
+
+ expr_symtab_add_field(&symtab, "vlan.tci", MFF_VLAN_TCI, NULL, false);
+ expr_symtab_add_predicate(&symtab, "vlan.present", "vlan.tci[12]");
+ expr_symtab_add_subfield(&symtab, "vlan.pcp", "vlan.present",
+ "vlan.tci[13..15]");
+ expr_symtab_add_subfield(&symtab, "vlan.vid", "vlan.present",
+ "vlan.tci[0..11]");
+
+ expr_symtab_add_predicate(&symtab, "ip4", "eth.type == 0x800");
+ expr_symtab_add_predicate(&symtab, "ip6", "eth.type == 0x86dd");
+ expr_symtab_add_predicate(&symtab, "ip", "ip4 || ip6");
+ expr_symtab_add_field(&symtab, "ip.proto", MFF_IP_PROTO, "ip", true);
+ expr_symtab_add_field(&symtab, "ip.dscp", MFF_IP_DSCP, "ip", false);
+ expr_symtab_add_field(&symtab, "ip.ecn", MFF_IP_ECN, "ip", false);
+ expr_symtab_add_field(&symtab, "ip.ttl", MFF_IP_TTL, "ip", false);
+
+ expr_symtab_add_field(&symtab, "ip4.src", MFF_IPV4_SRC, "ip4", false);
+ expr_symtab_add_field(&symtab, "ip4.dst", MFF_IPV4_DST, "ip4", false);
+
+ expr_symtab_add_predicate(&symtab, "icmp4", "ip4 && ip.proto == 1");
+ expr_symtab_add_field(&symtab, "icmp4.type", MFF_ICMPV4_TYPE, "icmp4",
+ false);
+ expr_symtab_add_field(&symtab, "icmp4.code", MFF_ICMPV4_CODE, "icmp4",
+ false);
+
+ expr_symtab_add_field(&symtab, "ip6.src", MFF_IPV6_SRC, "ip6", false);
+ expr_symtab_add_field(&symtab, "ip6.dst", MFF_IPV6_DST, "ip6", false);
+ expr_symtab_add_field(&symtab, "ip6.label", MFF_IPV6_LABEL, "ip6", false);
+
+ expr_symtab_add_predicate(&symtab, "icmp6", "ip6 && ip.proto == 58");
+ expr_symtab_add_field(&symtab, "icmp6.type", MFF_ICMPV6_TYPE, "icmp6",
+ true);
+ expr_symtab_add_field(&symtab, "icmp6.code", MFF_ICMPV6_CODE, "icmp6",
+ true);
+
+ expr_symtab_add_predicate(&symtab, "icmp", "icmp4 || icmp6");
+
+ expr_symtab_add_field(&symtab, "ip.frag", MFF_IP_FRAG, "ip", false);
+ expr_symtab_add_predicate(&symtab, "ip.is_frag", "ip.frag[0]");
+ expr_symtab_add_predicate(&symtab, "ip.later_frag", "ip.frag[1]");
+ expr_symtab_add_predicate(&symtab, "ip.first_frag",
+ "ip.is_frag && !ip.later_frag");
+
+ expr_symtab_add_predicate(&symtab, "arp", "eth.type == 0x806");
+ expr_symtab_add_field(&symtab, "arp.op", MFF_ARP_OP, "arp", false);
+ expr_symtab_add_field(&symtab, "arp.spa", MFF_ARP_SPA, "arp", false);
+ expr_symtab_add_field(&symtab, "arp.sha", MFF_ARP_SHA, "arp", false);
+ expr_symtab_add_field(&symtab, "arp.tpa", MFF_ARP_TPA, "arp", false);
+ expr_symtab_add_field(&symtab, "arp.tha", MFF_ARP_THA, "arp", false);
+
+ expr_symtab_add_predicate(&symtab, "nd",
+ "icmp6.type == {135, 136} && icmp6.code == 0");
+ expr_symtab_add_field(&symtab, "nd.target", MFF_ND_TARGET, "nd", false);
+ expr_symtab_add_field(&symtab, "nd.sll", MFF_ND_SLL,
+ "nd && icmp6.type == 135", false);
+ expr_symtab_add_field(&symtab, "nd.tll", MFF_ND_TLL,
+ "nd && icmp6.type == 136", false);
+
+ expr_symtab_add_predicate(&symtab, "tcp", "ip.proto == 6");
+ expr_symtab_add_field(&symtab, "tcp.src", MFF_TCP_SRC, "tcp", false);
+ expr_symtab_add_field(&symtab, "tcp.dst", MFF_TCP_DST, "tcp", false);
+ expr_symtab_add_field(&symtab, "tcp.flags", MFF_TCP_FLAGS, "tcp", false);
+
+ expr_symtab_add_predicate(&symtab, "udp", "ip.proto == 17");
+ expr_symtab_add_field(&symtab, "udp.src", MFF_UDP_SRC, "udp", false);
+ expr_symtab_add_field(&symtab, "udp.dst", MFF_UDP_DST, "udp", false);
+
+ expr_symtab_add_predicate(&symtab, "sctp", "ip.proto == 132");
+ expr_symtab_add_field(&symtab, "sctp.src", MFF_SCTP_SRC, "sctp", false);
+ expr_symtab_add_field(&symtab, "sctp.dst", MFF_SCTP_DST, "sctp", false);
+}
+\f
+/* Logical datapaths and logical port numbers. */
+
+/* A logical datapath.
+ *
+ * 'uuid' is the UUID that represents the logical datapath in the OVN_SB
+ * database.
+ *
+ * 'integer' represents the logical datapath as an integer value that is unique
+ * only within the local hypervisor. Because of its size, this value is more
+ * practical for use in an OpenFlow flow table than a UUID.
+ *
+ * 'ports' maps 'logical_port' names to 'tunnel_key' values in the OVN_SB
+ * Binding table within the logical datapath. */
+struct logical_datapath {
+ struct hmap_node hmap_node; /* Indexed on 'uuid'. */
+ struct uuid uuid; /* The logical_datapath's UUID. */
+ uint32_t integer; /* Locally unique among logical datapaths. */
+ struct simap ports; /* Logical port name to port number. */
+};
+
+/* Contains "struct logical_datapath"s. */
+static struct hmap logical_datapaths = HMAP_INITIALIZER(&logical_datapaths);
+
+/* Finds and returns the logical_datapath with the given 'uuid', or NULL if
+ * no such logical_datapath exists. */
+static struct logical_datapath *
+ldp_lookup(const struct uuid *uuid)
+{
+ struct logical_datapath *ldp;
+ HMAP_FOR_EACH_IN_BUCKET (ldp, hmap_node, uuid_hash(uuid),
+ &logical_datapaths) {
+ if (uuid_equals(&ldp->uuid, uuid)) {
+ return ldp;
+ }
+ }
+ return NULL;
+}
+
+/* Finds and returns the integer value corresponding to the given 'uuid', or 0
+ * if no such logical datapath exists. */
+uint32_t
+ldp_to_integer(const struct uuid *logical_datapath)
+{
+ const struct logical_datapath *ldp = ldp_lookup(logical_datapath);
+ return ldp ? ldp->integer : 0;
+}
+
+/* Creates a new logical_datapath with the given 'uuid'. */
+static struct logical_datapath *
+ldp_create(const struct uuid *uuid)
+{
+ static uint32_t next_integer = 1;
+ struct logical_datapath *ldp;
+
+ /* We don't handle the case where the logical datapaths wrap around. */
+ ovs_assert(next_integer);
+
+ ldp = xmalloc(sizeof *ldp);
+ hmap_insert(&logical_datapaths, &ldp->hmap_node, uuid_hash(uuid));
+ ldp->uuid = *uuid;
+ ldp->integer = next_integer++;
+ simap_init(&ldp->ports);
+ return ldp;
+}
+
+static void
+ldp_free(struct logical_datapath *ldp)
+{
+ simap_destroy(&ldp->ports);
+ hmap_remove(&logical_datapaths, &ldp->hmap_node);
+ free(ldp);
+}
+
+/* Iterates through all of the records in the Binding table, updating the
+ * table of logical_datapaths to match the values found in active Bindings. */
+static void
+ldp_run(struct controller_ctx *ctx)
+{
+ struct logical_datapath *ldp;
+ HMAP_FOR_EACH (ldp, hmap_node, &logical_datapaths) {
+ simap_clear(&ldp->ports);
+ }
+
+ const struct sbrec_binding *binding;
+ SBREC_BINDING_FOR_EACH (binding, ctx->ovnsb_idl) {
+ struct logical_datapath *ldp;
+
+ ldp = ldp_lookup(&binding->logical_datapath);
+ if (!ldp) {
+ ldp = ldp_create(&binding->logical_datapath);
+ }
+
+ simap_put(&ldp->ports, binding->logical_port, binding->tunnel_key);
+ }
+
+ struct logical_datapath *next_ldp;
+ HMAP_FOR_EACH_SAFE (ldp, next_ldp, hmap_node, &logical_datapaths) {
+ if (simap_is_empty(&ldp->ports)) {
+ ldp_free(ldp);
+ }
+ }
+}
+
+static void
+ldp_destroy(void)
+{
+ struct logical_datapath *ldp, *next_ldp;
+ HMAP_FOR_EACH_SAFE (ldp, next_ldp, hmap_node, &logical_datapaths) {
+ ldp_free(ldp);
+ }
+}
+\f
+void
+pipeline_init(void)
+{
+ symtab_init();
+}
+
+/* Translates logical flows in the Pipeline table in the OVN_SB database
+ * into OpenFlow flows.
+ *
+ * We put the Pipeline flows into OpenFlow tables 16 through 47 (inclusive). */
+void
+pipeline_run(struct controller_ctx *ctx)
+{
+ struct hmap flows = HMAP_INITIALIZER(&flows);
+ uint32_t conj_id_ofs = 1;
+
+ ldp_run(ctx);
+
+ const struct sbrec_pipeline *pipeline;
+ SBREC_PIPELINE_FOR_EACH (pipeline, ctx->ovnsb_idl) {
+ /* Find the "struct logical_datapath" asssociated with this Pipeline
+ * row. If there's no such struct, that must be because no logical
+ * ports are bound to that logical datapath, so there's no point in
+ * maintaining any flows for it anyway, so skip it. */
+ const struct logical_datapath *ldp;
+ ldp = ldp_lookup(&pipeline->logical_datapath);
+ if (!ldp) {
+ continue;
+ }
+
+ /* Translate OVN actions into OpenFlow actions. */
+ uint64_t ofpacts_stub[64 / 8];
+ struct ofpbuf ofpacts;
+ struct expr *prereqs;
+ uint8_t next_table_id;
+ char *error;
+
+ ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
+ next_table_id = pipeline->table_id < 31 ? pipeline->table_id + 17 : 0;
+ error = actions_parse_string(pipeline->actions, &symtab, &ldp->ports,
+ next_table_id, &ofpacts, &prereqs);
+ if (error) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "error parsing actions \"%s\": %s",
+ pipeline->actions, error);
+ free(error);
+ continue;
+ }
+
+ /* Translate OVN match into table of OpenFlow matches. */
+ struct hmap matches;
+ struct expr *expr;
+
+ expr = expr_parse_string(pipeline->match, &symtab, &error);
+ if (!error) {
+ if (prereqs) {
+ expr = expr_combine(EXPR_T_AND, expr, prereqs);
+ prereqs = NULL;
+ }
+ expr = expr_annotate(expr, &symtab, &error);
+ }
+ if (error) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "error parsing match \"%s\": %s",
+ pipeline->match, error);
+ expr_destroy(prereqs);
+ ofpbuf_uninit(&ofpacts);
+ free(error);
+ continue;
+ }
+
+ expr = expr_simplify(expr);
+ expr = expr_normalize(expr);
+ uint32_t n_conjs = expr_to_matches(expr, &ldp->ports, &matches);
+ expr_destroy(expr);
+
+ /* Prepare the OpenFlow matches for adding to the flow table. */
+ struct expr_match *m;
+ HMAP_FOR_EACH (m, hmap_node, &matches) {
+ match_set_metadata(&m->match, htonll(ldp->integer));
+ if (m->match.wc.masks.conj_id) {
+ m->match.flow.conj_id += conj_id_ofs;
+ }
+ if (!m->n) {
+ ofctrl_add_flow(pipeline->table_id + 16, pipeline->priority,
+ &m->match, &ofpacts);
+ } else {
+ uint64_t conj_stubs[64 / 8];
+ struct ofpbuf conj;
+
+ ofpbuf_use_stub(&conj, conj_stubs, sizeof conj_stubs);
+ for (int i = 0; i < m->n; i++) {
+ const struct cls_conjunction *src = &m->conjunctions[i];
+ struct ofpact_conjunction *dst;
+
+ dst = ofpact_put_CONJUNCTION(&conj);
+ dst->id = src->id + conj_id_ofs;
+ dst->clause = src->clause;
+ dst->n_clauses = src->n_clauses;
+ }
+ ofctrl_add_flow(pipeline->table_id + 16, pipeline->priority,
+ &m->match, &conj);
+ ofpbuf_uninit(&conj);
+ }
+ }
+
+ /* Clean up. */
+ expr_matches_destroy(&matches);
+ ofpbuf_uninit(&ofpacts);
+ conj_id_ofs += n_conjs;
+ }
+}
+
+void
+pipeline_destroy(struct controller_ctx *ctx OVS_UNUSED)
+{
+ expr_symtab_destroy(&symtab);
+ ldp_destroy();
+}
--- /dev/null
+/* Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef OVN_PIPELINE_H
+#define OVN_PIPELINE_H 1
+
+/* Pipeline table translation to OpenFlow
+ * ======================================
+ *
+ * The Pipeline table obtained from the OVN_Southbound database works in terms
+ * of logical entities, that is, logical flows among logical datapaths and
+ * logical ports. This code translates these logical flows into OpenFlow flows
+ * that, again, work in terms of logical entities implemented through OpenFlow
+ * extensions (e.g. registers represent the logical input and output ports).
+ *
+ * Physical-to-logical and logical-to-physical translation are implemented in
+ * physical.[ch] as separate OpenFlow tables that run before and after,
+ * respectively, the logical pipeline OpenFlow tables.
+ */
+
+#include <stdint.h>
+
+struct controller_ctx;
+struct uuid;
+
+/* Logical ports. */
+#define MFF_LOG_INPORT MFF_REG6 /* Logical input port. */
+#define MFF_LOG_OUTPORT MFF_REG7 /* Logical output port. */
+
+void pipeline_init(void);
+void pipeline_run(struct controller_ctx *);
+void pipeline_destroy(struct controller_ctx *);
+
+uint32_t ldp_to_integer(const struct uuid *logical_datapath);
+
+#endif /* ovn/pipeline.h */
--- /dev/null
+/ovn-nb-idl.c
+/ovn-nb-idl.h
+/ovn-nb-idl.ovsidl
+/ovn-sb-idl.c
+/ovn-sb-idl.h
+/ovn-sb-idl.ovsidl
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "actions.h"
+#include <stdarg.h>
+#include <stdbool.h>
+#include "compiler.h"
+#include "dynamic-string.h"
+#include "expr.h"
+#include "lex.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+
+/* Context maintained during actions_parse(). */
+struct action_context {
+ /* Input. */
+ struct lexer *lexer; /* Lexer for pulling more tokens. */
+ const struct shash *symtab; /* Symbol table. */
+ uint8_t next_table_id; /* OpenFlow table for 'next' to resubmit. */
+ const struct simap *ports; /* Map from port name to number. */
+
+ /* State. */
+ char *error; /* Error, if any, otherwise NULL. */
+
+ /* Output. */
+ struct ofpbuf *ofpacts; /* Actions. */
+ struct expr *prereqs; /* Prerequisites to apply to match. */
+};
+
+static bool
+action_error_handle_common(struct action_context *ctx)
+{
+ if (ctx->error) {
+ /* Already have an error, suppress this one since the cascade seems
+ * unlikely to be useful. */
+ return true;
+ } else if (ctx->lexer->token.type == LEX_T_ERROR) {
+ /* The lexer signaled an error. Nothing at the action level
+ * accepts an error token, so we'll inevitably end up here with some
+ * meaningless parse error. Report the lexical error instead. */
+ ctx->error = xstrdup(ctx->lexer->token.s);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void OVS_PRINTF_FORMAT(2, 3)
+action_error(struct action_context *ctx, const char *message, ...)
+{
+ if (action_error_handle_common(ctx)) {
+ return;
+ }
+
+ va_list args;
+ va_start(args, message);
+ ctx->error = xvasprintf(message, args);
+ va_end(args);
+}
+
+static void OVS_PRINTF_FORMAT(2, 3)
+action_syntax_error(struct action_context *ctx, const char *message, ...)
+{
+ if (action_error_handle_common(ctx)) {
+ return;
+ }
+
+ struct ds s;
+
+ ds_init(&s);
+ ds_put_cstr(&s, "Syntax error");
+ if (ctx->lexer->token.type == LEX_T_END) {
+ ds_put_cstr(&s, " at end of input");
+ } else if (ctx->lexer->start) {
+ ds_put_format(&s, " at `%.*s'",
+ (int) (ctx->lexer->input - ctx->lexer->start),
+ ctx->lexer->start);
+ }
+
+ if (message) {
+ ds_put_char(&s, ' ');
+
+ va_list args;
+ va_start(args, message);
+ ds_put_format_valist(&s, message, args);
+ va_end(args);
+ }
+ ds_put_char(&s, '.');
+
+ ctx->error = ds_steal_cstr(&s);
+}
+
+static void
+parse_set_action(struct action_context *ctx)
+{
+ struct expr *prereqs;
+ char *error;
+
+ error = expr_parse_assignment(ctx->lexer, ctx->symtab, ctx->ports,
+ ctx->ofpacts, &prereqs);
+ if (error) {
+ action_error(ctx, "%s", error);
+ free(error);
+ return;
+ }
+
+ ctx->prereqs = expr_combine(EXPR_T_AND, ctx->prereqs, prereqs);
+}
+
+static void
+emit_resubmit(struct action_context *ctx, uint8_t table_id)
+{
+ struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(ctx->ofpacts);
+ resubmit->in_port = OFPP_IN_PORT;
+ resubmit->table_id = table_id;
+}
+
+static void
+parse_actions(struct action_context *ctx)
+{
+ /* "drop;" by itself is a valid (empty) set of actions, but it can't be
+ * combined with other actions because that doesn't make sense. */
+ if (ctx->lexer->token.type == LEX_T_ID
+ && !strcmp(ctx->lexer->token.s, "drop")
+ && lexer_lookahead(ctx->lexer) == LEX_T_SEMICOLON) {
+ lexer_get(ctx->lexer); /* Skip "drop". */
+ lexer_get(ctx->lexer); /* Skip ";". */
+ if (ctx->lexer->token.type != LEX_T_END) {
+ action_syntax_error(ctx, "expecting end of input");
+ }
+ return;
+ }
+
+ while (ctx->lexer->token.type != LEX_T_END) {
+ if (ctx->lexer->token.type != LEX_T_ID) {
+ action_syntax_error(ctx, NULL);
+ break;
+ }
+
+ enum lex_type lookahead = lexer_lookahead(ctx->lexer);
+ if (lookahead == LEX_T_EQUALS || lookahead == LEX_T_LSQUARE) {
+ parse_set_action(ctx);
+ } else if (lexer_match_id(ctx->lexer, "next")) {
+ if (ctx->next_table_id) {
+ emit_resubmit(ctx, ctx->next_table_id);
+ } else {
+ action_error(ctx, "\"next\" action not allowed here.");
+ }
+ } else if (lexer_match_id(ctx->lexer, "output")) {
+ /* Table 64 does logical-to-physical translation. */
+ emit_resubmit(ctx, 64);
+ } else {
+ action_syntax_error(ctx, "expecting action");
+ }
+ if (!lexer_match(ctx->lexer, LEX_T_SEMICOLON)) {
+ action_syntax_error(ctx, "expecting ';'");
+ }
+ if (ctx->error) {
+ return;
+ }
+ }
+}
+
+/* Parses OVN actions, in the format described for the "actions" column in the
+ * Pipeline table in ovn-sb(5), and appends the parsed versions of the actions
+ * to 'ofpacts' as "struct ofpact"s.
+ *
+ * 'symtab' provides a table of "struct expr_symbol"s to support (as one would
+ * provide to expr_parse()).
+ *
+ * 'ports' must be a map from strings (presumably names of ports) to integers
+ * (as one would provide to expr_to_matches()). Strings used in the actions
+ * that are not in 'ports' are translated to zero.
+ *
+ * 'next_table_id' should be the OpenFlow table to which the "next" action will
+ * resubmit, or 0 to disable "next".
+ *
+ * Some actions add extra requirements (prerequisites) to the flow's match. If
+ * so, this function sets '*prereqsp' to the actions' prerequisites; otherwise,
+ * it sets '*prereqsp' to NULL. The caller owns '*prereqsp' and must
+ * eventually free it.
+ *
+ * Returns NULL on success, otherwise a malloc()'d error message that the
+ * caller must free. On failure, 'ofpacts' has the same contents and
+ * '*prereqsp' is set to NULL, but some tokens may have been consumed from
+ * 'lexer'.
+ */
+char * OVS_WARN_UNUSED_RESULT
+actions_parse(struct lexer *lexer, const struct shash *symtab,
+ const struct simap *ports, uint8_t next_table_id,
+ struct ofpbuf *ofpacts, struct expr **prereqsp)
+{
+ size_t ofpacts_start = ofpacts->size;
+
+ struct action_context ctx;
+ ctx.lexer = lexer;
+ ctx.symtab = symtab;
+ ctx.ports = ports;
+ ctx.next_table_id = next_table_id;
+ ctx.error = NULL;
+ ctx.ofpacts = ofpacts;
+ ctx.prereqs = NULL;
+
+ parse_actions(&ctx);
+
+ if (!ctx.error) {
+ *prereqsp = ctx.prereqs;
+ return NULL;
+ } else {
+ ofpacts->size = ofpacts_start;
+ expr_destroy(ctx.prereqs);
+ *prereqsp = NULL;
+ return ctx.error;
+ }
+}
+
+/* Like actions_parse(), but the actions are taken from 's'. */
+char * OVS_WARN_UNUSED_RESULT
+actions_parse_string(const char *s, const struct shash *symtab,
+ const struct simap *ports, uint8_t next_table_id,
+ struct ofpbuf *ofpacts, struct expr **prereqsp)
+{
+ struct lexer lexer;
+ char *error;
+
+ lexer_init(&lexer, s);
+ lexer_get(&lexer);
+ error = actions_parse(&lexer, symtab, ports, next_table_id,
+ ofpacts, prereqsp);
+ lexer_destroy(&lexer);
+
+ return error;
+}
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_ACTIONS_H
+#define OVN_ACTIONS_H 1
+
+#include <stdint.h>
+#include "compiler.h"
+
+struct expr;
+struct lexer;
+struct ofpbuf;
+struct shash;
+struct simap;
+
+char *actions_parse(struct lexer *, const struct shash *symtab,
+ const struct simap *ports, uint8_t next_table_id,
+ struct ofpbuf *ofpacts, struct expr **prereqsp)
+ OVS_WARN_UNUSED_RESULT;;
+char *actions_parse_string(const char *s, const struct shash *symtab,
+ const struct simap *ports, uint8_t next_table_id,
+ struct ofpbuf *ofpacts, struct expr **prereqsp)
+ OVS_WARN_UNUSED_RESULT;;
+
+#endif /* ovn/actions.h */
--- /dev/null
+lib_LTLIBRARIES += ovn/lib/libovn.la
+ovn_lib_libovn_la_LDFLAGS = \
+ -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) \
+ -Wl,--version-script=$(top_builddir)/ovn/lib/libovn.sym \
+ $(AM_LDFLAGS)
+ovn_lib_libovn_la_SOURCES = \
+ ovn/lib/actions.c \
+ ovn/lib/actions.h \
+ ovn/lib/expr.c \
+ ovn/lib/expr.h \
+ ovn/lib/lex.c \
+ ovn/lib/lex.h \
+ ovn/lib/ovn-nb-idl.c \
+ ovn/lib/ovn-nb-idl.h \
+ ovn/lib/ovn-sb-idl.c \
+ ovn/lib/ovn-sb-idl.h
+
+# ovn-sb IDL
+OVSIDL_BUILT += \
+ $(srcdir)/ovn/lib/ovn-sb-idl.c \
+ $(srcdir)/ovn/lib/ovn-sb-idl.h \
+ $(srcdir)/ovn/lib/ovn-sb-idl.ovsidl
+EXTRA_DIST += $(srcdir)/ovn/lib/ovn-sb-idl.ann
+OVN_SB_IDL_FILES = \
+ $(srcdir)/ovn/ovn-sb.ovsschema \
+ $(srcdir)/ovn/lib/ovn-sb-idl.ann
+$(srcdir)/ovn/lib/ovn-sb-idl.ovsidl: $(OVN_SB_IDL_FILES)
+ $(AM_V_GEN)$(OVSDB_IDLC) annotate $(OVN_SB_IDL_FILES) > $@.tmp && \
+ mv $@.tmp $@
+CLEANFILES += ovn/lib/ovn-sb-idl.c ovn/lib/ovn-sb-idl.h
+
+# ovn-nb IDL
+OVSIDL_BUILT += \
+ $(srcdir)/ovn/lib/ovn-nb-idl.c \
+ $(srcdir)/ovn/lib/ovn-nb-idl.h \
+ $(srcdir)/ovn/lib/ovn-nb-idl.ovsidl
+EXTRA_DIST += $(srcdir)/ovn/lib/ovn-nb-idl.ann
+OVN_NB_IDL_FILES = \
+ $(srcdir)/ovn/ovn-nb.ovsschema \
+ $(srcdir)/ovn/lib/ovn-nb-idl.ann
+$(srcdir)/ovn/lib/ovn-nb-idl.ovsidl: $(OVN_NB_IDL_FILES)
+ $(AM_V_GEN)$(OVSDB_IDLC) annotate $(OVN_NB_IDL_FILES) > $@.tmp && \
+ mv $@.tmp $@
+CLEANFILES += ovn/lib/ovn-nb-idl.c ovn/lib/ovn-nb-idl.h
+
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "expr.h"
+#include "dynamic-string.h"
+#include "json.h"
+#include "lex.h"
+#include "match.h"
+#include "ofp-actions.h"
+#include "shash.h"
+#include "simap.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(expr);
+\f
+/* Returns the name of measurement level 'level'. */
+const char *
+expr_level_to_string(enum expr_level level)
+{
+ switch (level) {
+ case EXPR_L_NOMINAL: return "nominal";
+ case EXPR_L_BOOLEAN: return "Boolean";
+ case EXPR_L_ORDINAL: return "ordinal";
+ default: OVS_NOT_REACHED();
+ }
+}
+\f
+/* Relational operators. */
+
+/* Returns a string form of relational operator 'relop'. */
+const char *
+expr_relop_to_string(enum expr_relop relop)
+{
+ switch (relop) {
+ case EXPR_R_EQ: return "==";
+ case EXPR_R_NE: return "!=";
+ case EXPR_R_LT: return "<";
+ case EXPR_R_LE: return "<=";
+ case EXPR_R_GT: return ">";
+ case EXPR_R_GE: return ">=";
+ default: OVS_NOT_REACHED();
+ }
+}
+
+bool
+expr_relop_from_token(enum lex_type type, enum expr_relop *relop)
+{
+ enum expr_relop r;
+
+ switch ((int) type) {
+ case LEX_T_EQ: r = EXPR_R_EQ; break;
+ case LEX_T_NE: r = EXPR_R_NE; break;
+ case LEX_T_LT: r = EXPR_R_LT; break;
+ case LEX_T_LE: r = EXPR_R_LE; break;
+ case LEX_T_GT: r = EXPR_R_GT; break;
+ case LEX_T_GE: r = EXPR_R_GE; break;
+ default: return false;
+ }
+
+ if (relop) {
+ *relop = r;
+ }
+ return true;
+}
+
+/* Returns the relational operator that 'relop' becomes if you turn the
+ * relation's operands around, e.g. EXPR_R_EQ does not change because "a == b"
+ * and "b == a" are equivalent, but EXPR_R_LE becomes EXPR_R_GE because "a <=
+ * b" is equivalent to "b >= a". */
+static enum expr_relop
+expr_relop_turn(enum expr_relop relop)
+{
+ switch (relop) {
+ case EXPR_R_EQ: return EXPR_R_EQ;
+ case EXPR_R_NE: return EXPR_R_NE;
+ case EXPR_R_LT: return EXPR_R_GT;
+ case EXPR_R_LE: return EXPR_R_GE;
+ case EXPR_R_GT: return EXPR_R_LT;
+ case EXPR_R_GE: return EXPR_R_LE;
+ default: OVS_NOT_REACHED();
+ }
+}
+
+/* Returns the relational operator that is the opposite of 'relop'. */
+static enum expr_relop
+expr_relop_invert(enum expr_relop relop)
+{
+ switch (relop) {
+ case EXPR_R_EQ: return EXPR_R_NE;
+ case EXPR_R_NE: return EXPR_R_EQ;
+ case EXPR_R_LT: return EXPR_R_GE;
+ case EXPR_R_LE: return EXPR_R_GT;
+ case EXPR_R_GT: return EXPR_R_LE;
+ case EXPR_R_GE: return EXPR_R_LT;
+ default: OVS_NOT_REACHED();
+ }
+}
+\f
+/* Constructing and manipulating expressions. */
+
+/* Creates and returns a logical AND or OR expression (according to 'type',
+ * which must be EXPR_T_AND or EXPR_T_OR) that initially has no
+ * sub-expressions. (To satisfy the invariants for expressions, the caller
+ * must add at least two sub-expressions whose types are different from
+ * 'type'.) */
+struct expr *
+expr_create_andor(enum expr_type type)
+{
+ struct expr *e = xmalloc(sizeof *e);
+ e->type = type;
+ list_init(&e->andor);
+ return e;
+}
+
+/* Returns a logical AND or OR expression (according to 'type', which must be
+ * EXPR_T_AND or EXPR_T_OR) whose sub-expressions are 'a' and 'b', with some
+ * flexibility:
+ *
+ * - If 'a' or 'b' is NULL, just returns the other one (which means that if
+ * that other one is not of the given 'type', then the returned
+ * expression is not either).
+ *
+ * - If 'a' or 'b', or both, have type 'type', then they are combined into
+ * a single node that satisfies the invariants for expressions. */
+struct expr *
+expr_combine(enum expr_type type, struct expr *a, struct expr *b)
+{
+ if (!a) {
+ return b;
+ } else if (!b) {
+ return a;
+ } else if (a->type == type) {
+ if (b->type == type) {
+ list_splice(&a->andor, b->andor.next, &b->andor);
+ free(b);
+ } else {
+ list_push_back(&a->andor, &b->node);
+ }
+ return a;
+ } else if (b->type == type) {
+ list_push_front(&b->andor, &a->node);
+ return b;
+ } else {
+ struct expr *e = expr_create_andor(type);
+ list_push_back(&e->andor, &a->node);
+ list_push_back(&e->andor, &b->node);
+ return e;
+ }
+}
+
+static void
+expr_insert_andor(struct expr *andor, struct expr *before, struct expr *new)
+{
+ if (new->type == andor->type) {
+ if (andor->type == EXPR_T_AND) {
+ /* Conjunction junction, what's your function? */
+ }
+ list_splice(&before->node, new->andor.next, &new->andor);
+ free(new);
+ } else {
+ list_insert(&before->node, &new->node);
+ }
+}
+
+/* Returns an EXPR_T_BOOLEAN expression with value 'b'. */
+struct expr *
+expr_create_boolean(bool b)
+{
+ struct expr *e = xmalloc(sizeof *e);
+ e->type = EXPR_T_BOOLEAN;
+ e->boolean = b;
+ return e;
+}
+
+static void
+expr_not(struct expr *expr)
+{
+ struct expr *sub;
+
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ expr->cmp.relop = expr_relop_invert(expr->cmp.relop);
+ break;
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ expr_not(sub);
+ }
+ expr->type = expr->type == EXPR_T_AND ? EXPR_T_OR : EXPR_T_AND;
+ break;
+
+ case EXPR_T_BOOLEAN:
+ expr->boolean = !expr->boolean;
+ break;
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static struct expr *
+expr_fix_andor(struct expr *expr, bool short_circuit)
+{
+ struct expr *sub, *next;
+
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ if (sub->type == EXPR_T_BOOLEAN) {
+ if (sub->boolean == short_circuit) {
+ expr_destroy(expr);
+ return expr_create_boolean(short_circuit);
+ } else {
+ list_remove(&sub->node);
+ expr_destroy(sub);
+ }
+ }
+ }
+
+ if (list_is_short(&expr->andor)) {
+ if (list_is_empty(&expr->andor)) {
+ free(expr);
+ return expr_create_boolean(!short_circuit);
+ } else {
+ sub = expr_from_node(list_front(&expr->andor));
+ free(expr);
+ return sub;
+ }
+ } else {
+ return expr;
+ }
+}
+
+static struct expr *
+expr_fix(struct expr *expr)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return expr;
+
+ case EXPR_T_AND:
+ return expr_fix_andor(expr, false);
+
+ case EXPR_T_OR:
+ return expr_fix_andor(expr, true);
+
+ case EXPR_T_BOOLEAN:
+ return expr;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+\f
+/* Formatting. */
+
+static void
+find_bitwise_range(const union mf_subvalue *sv, int width,
+ int *startp, int *n_bitsp)
+{
+ unsigned int start = bitwise_scan(sv, sizeof *sv, true, 0, width);
+ if (start < width) {
+ unsigned int end = bitwise_scan(sv, sizeof *sv, false, start, width);
+ if (end >= width
+ || bitwise_scan(sv, sizeof *sv, true, end, width) >= width) {
+ *startp = start;
+ *n_bitsp = end - start;
+ return;
+ }
+ }
+ *startp = *n_bitsp = 0;
+}
+
+static void
+expr_format_cmp(const struct expr *e, struct ds *s)
+{
+ /* The common case is numerical comparisons.
+ * Handle string comparisons as a special case. */
+ if (!e->cmp.symbol->width) {
+ ds_put_format(s, "%s %s ", e->cmp.symbol->name,
+ expr_relop_to_string(e->cmp.relop));
+ json_string_escape(e->cmp.string, s);
+ return;
+ }
+
+ int ofs, n;
+ find_bitwise_range(&e->cmp.mask, e->cmp.symbol->width, &ofs, &n);
+ if (n == 1 && (e->cmp.relop == EXPR_R_EQ || e->cmp.relop == EXPR_R_NE)) {
+ bool positive;
+
+ positive = bitwise_get_bit(&e->cmp.value, sizeof e->cmp.value, ofs);
+ positive ^= e->cmp.relop == EXPR_R_NE;
+ if (!positive) {
+ ds_put_char(s, '!');
+ }
+ ds_put_cstr(s, e->cmp.symbol->name);
+ if (e->cmp.symbol->width > 1) {
+ ds_put_format(s, "[%d]", ofs);
+ }
+ return;
+ }
+
+ ds_put_cstr(s, e->cmp.symbol->name);
+ if (n > 0 && n < e->cmp.symbol->width) {
+ if (n > 1) {
+ ds_put_format(s, "[%d..%d]", ofs, ofs + n - 1);
+ } else {
+ ds_put_format(s, "[%d]", ofs);
+ }
+ }
+
+ ds_put_format(s, " %s ", expr_relop_to_string(e->cmp.relop));
+
+ if (n) {
+ union mf_subvalue value;
+
+ memset(&value, 0, sizeof value);
+ bitwise_copy(&e->cmp.value, sizeof e->cmp.value, ofs,
+ &value, sizeof value, 0,
+ n);
+ mf_format_subvalue(&value, s);
+ } else {
+ mf_format_subvalue(&e->cmp.value, s);
+ ds_put_char(s, '/');
+ mf_format_subvalue(&e->cmp.mask, s);
+ }
+}
+
+static void
+expr_format_andor(const struct expr *e, const char *op, struct ds *s)
+{
+ struct expr *sub;
+ int i = 0;
+
+ LIST_FOR_EACH (sub, node, &e->andor) {
+ if (i++) {
+ ds_put_format(s, " %s ", op);
+ }
+
+ if (sub->type == EXPR_T_AND || sub->type == EXPR_T_OR) {
+ ds_put_char(s, '(');
+ expr_format(sub, s);
+ ds_put_char(s, ')');
+ } else {
+ expr_format(sub, s);
+ }
+ }
+}
+
+/* Appends a string form of 'e' to 's'. The string form is acceptable for
+ * parsing back into an equivalent expression. */
+void
+expr_format(const struct expr *e, struct ds *s)
+{
+ switch (e->type) {
+ case EXPR_T_CMP:
+ expr_format_cmp(e, s);
+ break;
+
+ case EXPR_T_AND:
+ expr_format_andor(e, "&&", s);
+ break;
+
+ case EXPR_T_OR:
+ expr_format_andor(e, "||", s);
+ break;
+
+ case EXPR_T_BOOLEAN:
+ ds_put_char(s, e->boolean ? '1' : '0');
+ break;
+ }
+}
+
+/* Prints a string form of 'e' on stdout, followed by a new-line. */
+void
+expr_print(const struct expr *e)
+{
+ struct ds output;
+
+ ds_init(&output);
+ expr_format(e, &output);
+ puts(ds_cstr(&output));
+ ds_destroy(&output);
+}
+\f
+/* Parsing. */
+
+/* Type of a "union expr_constant" or "struct expr_constant_set". */
+enum expr_constant_type {
+ EXPR_C_INTEGER,
+ EXPR_C_STRING
+};
+
+/* A string or integer constant (one must know which from context). */
+union expr_constant {
+ /* Integer constant.
+ *
+ * The width of a constant isn't always clear, e.g. if you write "1",
+ * there's no way to tell whether you mean for that to be a 1-bit constant
+ * or a 128-bit constant or somewhere in between. */
+ struct {
+ union mf_subvalue value;
+ union mf_subvalue mask; /* Only initialized if 'masked'. */
+ bool masked;
+
+ enum lex_format format; /* From the constant's lex_token. */
+ };
+
+ /* Null-terminated string constant. */
+ char *string;
+};
+
+/* A collection of "union expr_constant"s of the same type. */
+struct expr_constant_set {
+ union expr_constant *values; /* Constants. */
+ size_t n_values; /* Number of constants. */
+ enum expr_constant_type type; /* Type of the constants. */
+ bool in_curlies; /* Whether the constants were in {}. */
+};
+
+/* A reference to a symbol or a subfield of a symbol.
+ *
+ * For string fields, ofs and n_bits are 0. */
+struct expr_field {
+ const struct expr_symbol *symbol; /* The symbol. */
+ int ofs; /* Starting bit offset. */
+ int n_bits; /* Number of bits. */
+};
+
+/* Context maintained during expr_parse(). */
+struct expr_context {
+ struct lexer *lexer; /* Lexer for pulling more tokens. */
+ const struct shash *symtab; /* Symbol table. */
+ char *error; /* Error, if any, otherwise NULL. */
+ bool not; /* True inside odd number of NOT operators. */
+};
+
+struct expr *expr_parse__(struct expr_context *);
+static void expr_not(struct expr *);
+static void expr_constant_set_destroy(struct expr_constant_set *);
+static bool parse_field(struct expr_context *, struct expr_field *);
+
+static bool
+expr_error_handle_common(struct expr_context *ctx)
+{
+ if (ctx->error) {
+ /* Already have an error, suppress this one since the cascade seems
+ * unlikely to be useful. */
+ return true;
+ } else if (ctx->lexer->token.type == LEX_T_ERROR) {
+ /* The lexer signaled an error. Nothing at the expression level
+ * accepts an error token, so we'll inevitably end up here with some
+ * meaningless parse error. Report the lexical error instead. */
+ ctx->error = xstrdup(ctx->lexer->token.s);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void OVS_PRINTF_FORMAT(2, 3)
+expr_error(struct expr_context *ctx, const char *message, ...)
+{
+ if (expr_error_handle_common(ctx)) {
+ return;
+ }
+
+ va_list args;
+ va_start(args, message);
+ ctx->error = xvasprintf(message, args);
+ va_end(args);
+}
+
+static void OVS_PRINTF_FORMAT(2, 3)
+expr_syntax_error(struct expr_context *ctx, const char *message, ...)
+{
+ if (expr_error_handle_common(ctx)) {
+ return;
+ }
+
+ struct ds s;
+
+ ds_init(&s);
+ ds_put_cstr(&s, "Syntax error ");
+ if (ctx->lexer->token.type == LEX_T_END) {
+ ds_put_cstr(&s, "at end of input ");
+ } else if (ctx->lexer->start) {
+ ds_put_format(&s, "at `%.*s' ",
+ (int) (ctx->lexer->input - ctx->lexer->start),
+ ctx->lexer->start);
+ }
+
+ va_list args;
+ va_start(args, message);
+ ds_put_format_valist(&s, message, args);
+ va_end(args);
+
+ ctx->error = ds_steal_cstr(&s);
+}
+
+static struct expr *
+make_cmp__(const struct expr_field *f, enum expr_relop r,
+ const union expr_constant *c)
+{
+ struct expr *e = xzalloc(sizeof *e);
+ e->type = EXPR_T_CMP;
+ e->cmp.symbol = f->symbol;
+ e->cmp.relop = r;
+ if (f->symbol->width) {
+ bitwise_copy(&c->value, sizeof c->value, 0,
+ &e->cmp.value, sizeof e->cmp.value, f->ofs,
+ f->n_bits);
+ if (c->masked) {
+ bitwise_copy(&c->mask, sizeof c->mask, 0,
+ &e->cmp.mask, sizeof e->cmp.mask, f->ofs,
+ f->n_bits);
+ } else {
+ bitwise_one(&e->cmp.mask, sizeof e->cmp.mask, f->ofs,
+ f->n_bits);
+ }
+ } else {
+ e->cmp.string = xstrdup(c->string);
+ }
+ return e;
+}
+
+/* Returns the minimum reasonable width for integer constant 'c'. */
+static int
+expr_constant_width(const union expr_constant *c)
+{
+ if (c->masked) {
+ return mf_subvalue_width(&c->mask);
+ }
+
+ switch (c->format) {
+ case LEX_F_DECIMAL:
+ case LEX_F_HEXADECIMAL:
+ return mf_subvalue_width(&c->value);
+
+ case LEX_F_IPV4:
+ return 32;
+
+ case LEX_F_IPV6:
+ return 128;
+
+ case LEX_F_ETHERNET:
+ return 48;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static bool
+type_check(struct expr_context *ctx, const struct expr_field *f,
+ struct expr_constant_set *cs)
+{
+ if (cs->type != (f->symbol->width ? EXPR_C_INTEGER : EXPR_C_STRING)) {
+ expr_error(ctx, "%s field %s is not compatible with %s constant.",
+ f->symbol->width ? "Integer" : "String",
+ f->symbol->name,
+ cs->type == EXPR_C_INTEGER ? "integer" : "string");
+ return false;
+ }
+
+ if (f->symbol->width) {
+ for (size_t i = 0; i < cs->n_values; i++) {
+ int w = expr_constant_width(&cs->values[i]);
+ if (w > f->symbol->width) {
+ expr_error(ctx, "%d-bit constant is not compatible with "
+ "%d-bit field %s.",
+ w, f->symbol->width, f->symbol->name);
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
+static struct expr *
+make_cmp(struct expr_context *ctx,
+ const struct expr_field *f, enum expr_relop r,
+ struct expr_constant_set *cs)
+{
+ struct expr *e = NULL;
+
+ if (!type_check(ctx, f, cs)) {
+ goto exit;
+ }
+
+ if (r != EXPR_R_EQ && r != EXPR_R_NE) {
+ if (cs->in_curlies) {
+ expr_error(ctx, "Only == and != operators may be used "
+ "with value sets.");
+ goto exit;
+ }
+ if (f->symbol->level == EXPR_L_NOMINAL ||
+ f->symbol->level == EXPR_L_BOOLEAN) {
+ expr_error(ctx, "Only == and != operators may be used "
+ "with %s field %s.",
+ expr_level_to_string(f->symbol->level),
+ f->symbol->name);
+ goto exit;
+ }
+ if (cs->values[0].masked) {
+ expr_error(ctx, "Only == and != operators may be used with "
+ "masked constants. Consider using subfields instead "
+ "(e.g. eth.src[0..15] > 0x1111 in place of "
+ "eth.src > 00:00:00:00:11:11/00:00:00:00:ff:ff).");
+ goto exit;
+ }
+ }
+
+ if (f->symbol->level == EXPR_L_NOMINAL) {
+ if (f->symbol->expansion) {
+ for (size_t i = 0; i < cs->n_values; i++) {
+ const union mf_subvalue *value = &cs->values[i].value;
+ bool positive = (value->integer & htonll(1)) != 0;
+ positive ^= r == EXPR_R_NE;
+ positive ^= ctx->not;
+ if (!positive) {
+ const char *name = f->symbol->name;
+ expr_error(ctx, "Nominal predicate %s may only be tested "
+ "positively, e.g. `%s' or `%s == 1' but not "
+ "`!%s' or `%s == 0'.",
+ name, name, name, name, name);
+ goto exit;
+ }
+ }
+ } else if (r != (ctx->not ? EXPR_R_NE : EXPR_R_EQ)) {
+ expr_error(ctx, "Nominal field %s may only be tested for "
+ "equality (taking enclosing `!' operators into "
+ "account).", f->symbol->name);
+ goto exit;
+ }
+ }
+
+ e = make_cmp__(f, r, &cs->values[0]);
+ for (size_t i = 1; i < cs->n_values; i++) {
+ e = expr_combine(r == EXPR_R_EQ ? EXPR_T_OR : EXPR_T_AND,
+ e, make_cmp__(f, r, &cs->values[i]));
+ }
+exit:
+ expr_constant_set_destroy(cs);
+ return e;
+}
+
+static bool
+expr_get_int(struct expr_context *ctx, int *value)
+{
+ if (ctx->lexer->token.type == LEX_T_INTEGER
+ && ctx->lexer->token.format == LEX_F_DECIMAL
+ && ntohll(ctx->lexer->token.value.integer) <= INT_MAX) {
+ *value = ntohll(ctx->lexer->token.value.integer);
+ lexer_get(ctx->lexer);
+ return true;
+ } else {
+ expr_syntax_error(ctx, "expecting small integer.");
+ return false;
+ }
+}
+
+static bool
+parse_field(struct expr_context *ctx, struct expr_field *f)
+{
+ const struct expr_symbol *symbol;
+
+ if (ctx->lexer->token.type != LEX_T_ID) {
+ expr_syntax_error(ctx, "expecting field name.");
+ return false;
+ }
+
+ symbol = shash_find_data(ctx->symtab, ctx->lexer->token.s);
+ if (!symbol) {
+ expr_syntax_error(ctx, "expecting field name.");
+ return false;
+ }
+ lexer_get(ctx->lexer);
+
+ f->symbol = symbol;
+ if (lexer_match(ctx->lexer, LEX_T_LSQUARE)) {
+ int low, high;
+
+ if (!symbol->width) {
+ expr_error(ctx, "Cannot select subfield of string field %s.",
+ symbol->name);
+ return false;
+ }
+
+ if (!expr_get_int(ctx, &low)) {
+ return false;
+ }
+ if (lexer_match(ctx->lexer, LEX_T_ELLIPSIS)) {
+ if (!expr_get_int(ctx, &high)) {
+ return false;
+ }
+ } else {
+ high = low;
+ }
+
+ if (!lexer_match(ctx->lexer, LEX_T_RSQUARE)) {
+ expr_syntax_error(ctx, "expecting `]'.");
+ return false;
+ }
+
+ if (low > high) {
+ expr_error(ctx, "Invalid bit range %d to %d.", low, high);
+ return false;
+ } else if (high >= symbol->width) {
+ expr_error(ctx, "Cannot select bits %d to %d of %d-bit field %s.",
+ low, high, symbol->width, symbol->name);
+ return false;
+ } else if (symbol->level == EXPR_L_NOMINAL
+ && (low != 0 || high != symbol->width - 1)) {
+ expr_error(ctx, "Cannot select subfield of nominal field %s.",
+ symbol->name);
+ return false;
+ }
+
+ f->ofs = low;
+ f->n_bits = high - low + 1;
+ } else {
+ f->ofs = 0;
+ f->n_bits = symbol->width;
+ }
+
+ return true;
+}
+
+static bool
+parse_relop(struct expr_context *ctx, enum expr_relop *relop)
+{
+ if (expr_relop_from_token(ctx->lexer->token.type, relop)) {
+ lexer_get(ctx->lexer);
+ return true;
+ } else {
+ expr_syntax_error(ctx, "expecting relational operator.");
+ return false;
+ }
+}
+
+static bool
+assign_constant_set_type(struct expr_context *ctx,
+ struct expr_constant_set *cs,
+ enum expr_constant_type type)
+{
+ if (!cs->n_values || cs->type == type) {
+ cs->type = type;
+ return true;
+ } else {
+ expr_syntax_error(ctx, "expecting %s.",
+ cs->type == EXPR_C_INTEGER ? "integer" : "string");
+ return false;
+ }
+}
+
+static bool
+parse_constant(struct expr_context *ctx, struct expr_constant_set *cs,
+ size_t *allocated_values)
+{
+ if (cs->n_values >= *allocated_values) {
+ cs->values = x2nrealloc(cs->values, allocated_values,
+ sizeof *cs->values);
+ }
+
+ if (ctx->lexer->token.type == LEX_T_STRING) {
+ if (!assign_constant_set_type(ctx, cs, EXPR_C_STRING)) {
+ return false;
+ }
+ cs->values[cs->n_values++].string = xstrdup(ctx->lexer->token.s);
+ lexer_get(ctx->lexer);
+ return true;
+ } else if (ctx->lexer->token.type == LEX_T_INTEGER ||
+ ctx->lexer->token.type == LEX_T_MASKED_INTEGER) {
+ if (!assign_constant_set_type(ctx, cs, EXPR_C_INTEGER)) {
+ return false;
+ }
+
+ union expr_constant *c = &cs->values[cs->n_values++];
+ c->value = ctx->lexer->token.value;
+ c->format = ctx->lexer->token.format;
+ c->masked = ctx->lexer->token.type == LEX_T_MASKED_INTEGER;
+ if (c->masked) {
+ c->mask = ctx->lexer->token.mask;
+ }
+ lexer_get(ctx->lexer);
+ return true;
+ } else {
+ expr_syntax_error(ctx, "expecting constant.");
+ return false;
+ }
+}
+
+/* Parses a single or {}-enclosed set of integer or string constants into 'cs',
+ * which the caller need not have initialized. Returns true on success, in
+ * which case the caller owns 'cs', false on failure, in which case 'cs' is
+ * indeterminate. */
+static bool
+parse_constant_set(struct expr_context *ctx, struct expr_constant_set *cs)
+{
+ size_t allocated_values = 0;
+ bool ok;
+
+ memset(cs, 0, sizeof *cs);
+ if (lexer_match(ctx->lexer, LEX_T_LCURLY)) {
+ ok = true;
+ cs->in_curlies = true;
+ do {
+ if (!parse_constant(ctx, cs, &allocated_values)) {
+ ok = false;
+ break;
+ }
+ lexer_match(ctx->lexer, LEX_T_COMMA);
+ } while (!lexer_match(ctx->lexer, LEX_T_RCURLY));
+ } else {
+ ok = parse_constant(ctx, cs, &allocated_values);
+ }
+ if (!ok) {
+ expr_constant_set_destroy(cs);
+ }
+ return ok;
+}
+
+static void
+expr_constant_set_destroy(struct expr_constant_set *cs)
+{
+ if (cs) {
+ if (cs->type == EXPR_C_STRING) {
+ for (size_t i = 0; i < cs->n_values; i++) {
+ free(cs->values[i].string);
+ }
+ }
+ free(cs->values);
+ }
+}
+
+static struct expr *
+expr_parse_primary(struct expr_context *ctx, bool *atomic)
+{
+ *atomic = false;
+ if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
+ struct expr *e = expr_parse__(ctx);
+ if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) {
+ expr_destroy(e);
+ expr_syntax_error(ctx, "expecting `)'.");
+ return NULL;
+ }
+ *atomic = true;
+ return e;
+ }
+
+ if (ctx->lexer->token.type == LEX_T_ID) {
+ struct expr_field f;
+ enum expr_relop r;
+ struct expr_constant_set c;
+
+ if (!parse_field(ctx, &f)) {
+ return NULL;
+ }
+
+ if (!expr_relop_from_token(ctx->lexer->token.type, &r)) {
+ if (f.n_bits > 1 && !ctx->not) {
+ expr_error(ctx, "Explicit `!= 0' is required for inequality "
+ "test of multibit field against 0.");
+ return NULL;
+ }
+
+ *atomic = true;
+
+ union expr_constant *cst = xzalloc(sizeof *cst);
+ cst->format = LEX_F_HEXADECIMAL;
+ cst->masked = false;
+
+ c.type = EXPR_C_INTEGER;
+ c.values = cst;
+ c.n_values = 1;
+ c.in_curlies = false;
+ return make_cmp(ctx, &f, EXPR_R_NE, &c);
+ } else if (parse_relop(ctx, &r) && parse_constant_set(ctx, &c)) {
+ return make_cmp(ctx, &f, r, &c);
+ } else {
+ return NULL;
+ }
+ } else {
+ struct expr_constant_set c1;
+ if (!parse_constant_set(ctx, &c1)) {
+ return NULL;
+ }
+
+ if (!expr_relop_from_token(ctx->lexer->token.type, NULL)
+ && c1.n_values == 1
+ && c1.type == EXPR_C_INTEGER
+ && c1.values[0].format == LEX_F_DECIMAL
+ && !c1.values[0].masked
+ && !c1.in_curlies) {
+ uint64_t x = ntohll(c1.values[0].value.integer);
+ if (x <= 1) {
+ *atomic = true;
+ expr_constant_set_destroy(&c1);
+ return expr_create_boolean(x);
+ }
+ }
+
+ enum expr_relop r1;
+ struct expr_field f;
+ if (!parse_relop(ctx, &r1) || !parse_field(ctx, &f)) {
+ expr_constant_set_destroy(&c1);
+ return NULL;
+ }
+
+ if (!expr_relop_from_token(ctx->lexer->token.type, NULL)) {
+ return make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
+ }
+
+ enum expr_relop r2;
+ struct expr_constant_set c2;
+ if (!parse_relop(ctx, &r2) || !parse_constant_set(ctx, &c2)) {
+ expr_constant_set_destroy(&c1);
+ return NULL;
+ } else {
+ /* Reject "1 == field == 2", "1 < field > 2", and so on. */
+ if (!(((r1 == EXPR_R_LT || r1 == EXPR_R_LE) &&
+ (r2 == EXPR_R_LT || r2 == EXPR_R_LE)) ||
+ ((r1 == EXPR_R_GT || r1 == EXPR_R_GE) &&
+ (r2 == EXPR_R_GT || r2 == EXPR_R_GE)))) {
+ expr_error(ctx, "Range expressions must have the form "
+ "`x < field < y' or `x > field > y', with each "
+ "`<' optionally replaced by `<=' or `>' by `>=').");
+ expr_constant_set_destroy(&c1);
+ expr_constant_set_destroy(&c2);
+ return NULL;
+ }
+
+ struct expr *e1 = make_cmp(ctx, &f, expr_relop_turn(r1), &c1);
+ struct expr *e2 = make_cmp(ctx, &f, r2, &c2);
+ if (ctx->error) {
+ expr_destroy(e1);
+ expr_destroy(e2);
+ return NULL;
+ }
+ return expr_combine(EXPR_T_AND, e1, e2);
+ }
+ }
+}
+
+static struct expr *
+expr_parse_not(struct expr_context *ctx)
+{
+ bool atomic;
+
+ if (lexer_match(ctx->lexer, LEX_T_LOG_NOT)) {
+ ctx->not = !ctx->not;
+ struct expr *expr = expr_parse_primary(ctx, &atomic);
+ ctx->not = !ctx->not;
+
+ if (expr) {
+ if (!atomic) {
+ expr_error(ctx, "Missing parentheses around operand of !.");
+ expr_destroy(expr);
+ return NULL;
+ }
+ expr_not(expr);
+ }
+ return expr;
+ } else {
+ return expr_parse_primary(ctx, &atomic);
+ }
+}
+
+struct expr *
+expr_parse__(struct expr_context *ctx)
+{
+ struct expr *e = expr_parse_not(ctx);
+ if (!e) {
+ return NULL;
+ }
+
+ enum lex_type lex_type = ctx->lexer->token.type;
+ if (lex_type == LEX_T_LOG_AND || lex_type == LEX_T_LOG_OR) {
+ enum expr_type expr_type
+ = lex_type == LEX_T_LOG_AND ? EXPR_T_AND : EXPR_T_OR;
+
+ lexer_get(ctx->lexer);
+ do {
+ struct expr *e2 = expr_parse_not(ctx);
+ if (!e2) {
+ expr_destroy(e);
+ return NULL;
+ }
+ e = expr_combine(expr_type, e, e2);
+ } while (lexer_match(ctx->lexer, lex_type));
+ if (ctx->lexer->token.type == LEX_T_LOG_AND
+ || ctx->lexer->token.type == LEX_T_LOG_OR) {
+ expr_destroy(e);
+ expr_error(ctx,
+ "&& and || must be parenthesized when used together.");
+ return NULL;
+ }
+ }
+ return e;
+}
+
+/* Parses an expression using the symbols in 'symtab' from 'lexer'. If
+ * successful, returns the new expression and sets '*errorp' to NULL. On
+ * failure, returns NULL and sets '*errorp' to an explanatory error message.
+ * The caller must eventually free the returned expression (with
+ * expr_destroy()) or error (with free()). */
+struct expr *
+expr_parse(struct lexer *lexer, const struct shash *symtab, char **errorp)
+{
+ struct expr_context ctx;
+
+ ctx.lexer = lexer;
+ ctx.symtab = symtab;
+ ctx.error = NULL;
+ ctx.not = false;
+
+ struct expr *e = expr_parse__(&ctx);
+ *errorp = ctx.error;
+ ovs_assert((ctx.error != NULL) != (e != NULL));
+ return e;
+}
+
+/* Like expr_parse(), but the expression is taken from 's'. */
+struct expr *
+expr_parse_string(const char *s, const struct shash *symtab, char **errorp)
+{
+ struct lexer lexer;
+ struct expr *expr;
+
+ lexer_init(&lexer, s);
+ lexer_get(&lexer);
+ expr = expr_parse(&lexer, symtab, errorp);
+ if (!errorp && lexer.token.type != LEX_T_END) {
+ *errorp = xstrdup("Extra tokens at end of input.");
+ expr_destroy(expr);
+ expr = NULL;
+ }
+ lexer_destroy(&lexer);
+
+ return expr;
+}
+\f
+static struct expr_symbol *
+add_symbol(struct shash *symtab, const char *name, int width,
+ const char *prereqs, enum expr_level level,
+ bool must_crossproduct)
+{
+ struct expr_symbol *symbol = xzalloc(sizeof *symbol);
+ symbol->name = xstrdup(name);
+ symbol->prereqs = prereqs && prereqs[0] ? xstrdup(prereqs) : NULL;
+ symbol->width = width;
+ symbol->level = level;
+ symbol->must_crossproduct = must_crossproduct;
+ shash_add_assert(symtab, symbol->name, symbol);
+ return symbol;
+}
+
+/* Adds field 'id' to symbol table 'symtab' under the given 'name'. Whenever
+ * 'name' is referenced, expression annotation (see expr_annotate()) will
+ * ensure that 'prereqs' are also true. If 'must_crossproduct' is true, then
+ * conversion to flows will never attempt to use the field as a conjunctive
+ * match dimension (see "Crossproducting" in the large comment on struct
+ * expr_symbol in expr.h for an example).
+ *
+ * A given field 'id' must only be used for a single symbol in a symbol table.
+ * Use subfields to duplicate or subset a field (you can even make a subfield
+ * include all the bits of the "parent" field if you like). */
+struct expr_symbol *
+expr_symtab_add_field(struct shash *symtab, const char *name,
+ enum mf_field_id id, const char *prereqs,
+ bool must_crossproduct)
+{
+ const struct mf_field *field = mf_from_id(id);
+ struct expr_symbol *symbol;
+
+ symbol = add_symbol(symtab, name, field->n_bits, prereqs,
+ (field->maskable == MFM_FULLY
+ ? EXPR_L_ORDINAL
+ : EXPR_L_NOMINAL),
+ must_crossproduct);
+ symbol->field = field;
+ return symbol;
+}
+
+static bool
+parse_field_from_string(const char *s, const struct shash *symtab,
+ struct expr_field *field, char **errorp)
+{
+ struct lexer lexer;
+ lexer_init(&lexer, s);
+ lexer_get(&lexer);
+
+ struct expr_context ctx;
+ ctx.lexer = &lexer;
+ ctx.symtab = symtab;
+ ctx.error = NULL;
+ ctx.not = false;
+
+ bool ok = parse_field(&ctx, field);
+ if (!ok) {
+ *errorp = ctx.error;
+ } else if (lexer.token.type != LEX_T_END) {
+ *errorp = xstrdup("Extra tokens at end of input.");
+ ok = false;
+ }
+
+ lexer_destroy(&lexer);
+
+ return ok;
+}
+
+/* Adds 'name' as a subfield of a larger field in 'symtab'. Whenever
+ * 'name' is referenced, expression annotation (see expr_annotate()) will
+ * ensure that 'prereqs' are also true.
+ *
+ * 'subfield' must describe the subfield as a string, e.g. "vlan.tci[0..11]"
+ * for the low 12 bits of a larger field named "vlan.tci". */
+struct expr_symbol *
+expr_symtab_add_subfield(struct shash *symtab, const char *name,
+ const char *prereqs, const char *subfield)
+{
+ struct expr_symbol *symbol;
+ struct expr_field f;
+ char *error;
+
+ if (!parse_field_from_string(subfield, symtab, &f, &error)) {
+ VLOG_WARN("%s: error parsing %s subfield (%s)", subfield, name, error);
+ free(error);
+ return NULL;
+ }
+
+ enum expr_level level = f.symbol->level;
+ if (level != EXPR_L_ORDINAL) {
+ VLOG_WARN("can't define %s as subfield of %s field %s",
+ name, expr_level_to_string(level), f.symbol->name);
+ }
+
+ symbol = add_symbol(symtab, name, f.n_bits, prereqs, level, false);
+ symbol->expansion = xstrdup(subfield);
+ return symbol;
+}
+
+/* Adds a string-valued symbol named 'name' to 'symtab' with the specified
+ * 'prereqs'. */
+struct expr_symbol *
+expr_symtab_add_string(struct shash *symtab, const char *name,
+ enum mf_field_id id, const char *prereqs)
+{
+ const struct mf_field *field = mf_from_id(id);
+ struct expr_symbol *symbol;
+
+ symbol = add_symbol(symtab, name, 0, prereqs, EXPR_L_NOMINAL, false);
+ symbol->field = field;
+ return symbol;
+}
+
+static enum expr_level
+expr_get_level(const struct expr *expr)
+{
+ const struct expr *sub;
+ enum expr_level level = EXPR_L_ORDINAL;
+
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return (expr->cmp.symbol->level == EXPR_L_NOMINAL
+ ? EXPR_L_NOMINAL
+ : EXPR_L_BOOLEAN);
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ enum expr_level sub_level = expr_get_level(sub);
+ level = MIN(level, sub_level);
+ }
+ return level;
+
+ case EXPR_T_BOOLEAN:
+ return EXPR_L_BOOLEAN;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static enum expr_level
+expr_parse_level(const char *s, const struct shash *symtab, char **errorp)
+{
+ struct expr *expr = expr_parse_string(s, symtab, errorp);
+ enum expr_level level = expr ? expr_get_level(expr) : EXPR_L_NOMINAL;
+ expr_destroy(expr);
+ return level;
+}
+
+/* Adds a predicate symbol, whose value is the given Boolean 'expression',
+ * named 'name' to 'symtab'. For example, "ip4 && ip4.proto == 1" might be an
+ * appropriate predicate named "tcp4". */
+struct expr_symbol *
+expr_symtab_add_predicate(struct shash *symtab, const char *name,
+ const char *expansion)
+{
+ struct expr_symbol *symbol;
+ enum expr_level level;
+ char *error;
+
+ level = expr_parse_level(expansion, symtab, &error);
+ if (error) {
+ VLOG_WARN("%s: error parsing %s expansion (%s)",
+ expansion, name, error);
+ free(error);
+ return NULL;
+ }
+
+ symbol = add_symbol(symtab, name, 1, NULL, level, false);
+ symbol->expansion = xstrdup(expansion);
+ return symbol;
+}
+
+/* Destroys 'symtab' and all of its symbols. */
+void
+expr_symtab_destroy(struct shash *symtab)
+{
+ struct shash_node *node, *next;
+
+ SHASH_FOR_EACH_SAFE (node, next, symtab) {
+ struct expr_symbol *symbol = node->data;
+
+ shash_delete(symtab, node);
+ free(symbol->name);
+ free(symbol->prereqs);
+ free(symbol->expansion);
+ free(symbol);
+ }
+}
+\f
+/* Cloning. */
+
+static struct expr *
+expr_clone_cmp(struct expr *expr)
+{
+ struct expr *new = xmemdup(expr, sizeof *expr);
+ if (!new->cmp.symbol->width) {
+ new->cmp.string = xstrdup(new->cmp.string);
+ }
+ return new;
+}
+
+static struct expr *
+expr_clone_andor(struct expr *expr)
+{
+ struct expr *new = expr_create_andor(expr->type);
+ struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ struct expr *new_sub = expr_clone(sub);
+ list_push_back(&new->andor, &new_sub->node);
+ }
+ return new;
+}
+
+/* Returns a clone of 'expr'. This is a "deep copy": neither the returned
+ * expression nor any of its substructure will be shared with 'expr'. */
+struct expr *
+expr_clone(struct expr *expr)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return expr_clone_cmp(expr);
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ return expr_clone_andor(expr);
+
+ case EXPR_T_BOOLEAN:
+ return expr_create_boolean(expr->boolean);
+ }
+ OVS_NOT_REACHED();
+}
+\f
+/* Destroys 'expr' and all of the sub-expressions it references. */
+void
+expr_destroy(struct expr *expr)
+{
+ if (!expr) {
+ return;
+ }
+
+ struct expr *sub, *next;
+
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ if (!expr->cmp.symbol->width) {
+ free(expr->cmp.string);
+ }
+ break;
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ list_remove(&sub->node);
+ expr_destroy(sub);
+ }
+ break;
+
+ case EXPR_T_BOOLEAN:
+ break;
+ }
+ free(expr);
+}
+\f
+/* Annotation. */
+
+/* An element in a linked list of symbols.
+ *
+ * Used to detect when a symbol is being expanded recursively, to allow
+ * flagging an error. */
+struct annotation_nesting {
+ struct ovs_list node;
+ const struct expr_symbol *symbol;
+};
+
+struct expr *expr_annotate__(struct expr *, const struct shash *symtab,
+ struct ovs_list *nesting, char **errorp);
+
+static struct expr *
+parse_and_annotate(const char *s, const struct shash *symtab,
+ struct ovs_list *nesting, char **errorp)
+{
+ char *error;
+ struct expr *expr;
+
+ expr = expr_parse_string(s, symtab, &error);
+ if (expr) {
+ expr = expr_annotate__(expr, symtab, nesting, &error);
+ }
+ if (expr) {
+ *errorp = NULL;
+ } else {
+ *errorp = xasprintf("Error parsing expression `%s' encountered as "
+ "prerequisite or predicate of initial expression: "
+ "%s", s, error);
+ free(error);
+ }
+ return expr;
+}
+
+static struct expr *
+expr_annotate_cmp(struct expr *expr, const struct shash *symtab,
+ struct ovs_list *nesting, char **errorp)
+{
+ const struct expr_symbol *symbol = expr->cmp.symbol;
+ const struct annotation_nesting *iter;
+ LIST_FOR_EACH (iter, node, nesting) {
+ if (iter->symbol == symbol) {
+ *errorp = xasprintf("Recursive expansion of symbol `%s'.",
+ symbol->name);
+ expr_destroy(expr);
+ return NULL;
+ }
+ }
+
+ struct annotation_nesting an;
+ an.symbol = symbol;
+ list_push_back(nesting, &an.node);
+
+ struct expr *prereqs = NULL;
+ if (symbol->prereqs) {
+ prereqs = parse_and_annotate(symbol->prereqs, symtab, nesting, errorp);
+ if (!prereqs) {
+ goto error;
+ }
+ }
+
+ if (symbol->expansion) {
+ if (symbol->level == EXPR_L_ORDINAL) {
+ struct expr_field field;
+
+ if (!parse_field_from_string(symbol->expansion, symtab,
+ &field, errorp)) {
+ goto error;
+ }
+
+ expr->cmp.symbol = field.symbol;
+ mf_subvalue_shift(&expr->cmp.value, field.ofs);
+ mf_subvalue_shift(&expr->cmp.mask, field.ofs);
+ } else {
+ struct expr *expansion;
+
+ expansion = parse_and_annotate(symbol->expansion, symtab,
+ nesting, errorp);
+ if (!expansion) {
+ goto error;
+ }
+
+ bool positive = (expr->cmp.value.integer & htonll(1)) != 0;
+ positive ^= expr->cmp.relop == EXPR_R_NE;
+ if (!positive) {
+ expr_not(expansion);
+ }
+
+ expr_destroy(expr);
+ expr = expansion;
+ }
+ }
+
+ list_remove(&an.node);
+ return prereqs ? expr_combine(EXPR_T_AND, expr, prereqs) : expr;
+
+error:
+ expr_destroy(expr);
+ expr_destroy(prereqs);
+ list_remove(&an.node);
+ return NULL;
+}
+
+struct expr *
+expr_annotate__(struct expr *expr, const struct shash *symtab,
+ struct ovs_list *nesting, char **errorp)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return expr_annotate_cmp(expr, symtab, nesting, errorp);
+
+ case EXPR_T_AND:
+ case EXPR_T_OR: {
+ struct expr *sub, *next;
+
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ list_remove(&sub->node);
+ struct expr *new_sub = expr_annotate__(sub, symtab,
+ nesting, errorp);
+ if (!new_sub) {
+ expr_destroy(expr);
+ return NULL;
+ }
+ expr_insert_andor(expr, next, new_sub);
+ }
+ *errorp = NULL;
+ return expr;
+ }
+
+ case EXPR_T_BOOLEAN:
+ *errorp = NULL;
+ return expr;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+/* "Annotates" 'expr', which does the following:
+ *
+ * - Applies prerequisites, by locating each comparison operator whose
+ * field has a prerequisite and adding a logical AND against those
+ * prerequisites.
+ *
+ * - Expands references to subfield symbols, by replacing them by
+ * references to their underlying field symbols (suitably shifted).
+ *
+ * - Expands references to predicate symbols, by replacing them by the
+ * expressions that they expand to.
+ *
+ * In each case, annotation occurs recursively as necessary. */
+struct expr *
+expr_annotate(struct expr *expr, const struct shash *symtab, char **errorp)
+{
+ struct ovs_list nesting = OVS_LIST_INITIALIZER(&nesting);
+ return expr_annotate__(expr, symtab, &nesting, errorp);
+}
+\f
+static struct expr *
+expr_simplify_ne(struct expr *expr)
+{
+ struct expr *new = NULL;
+ const union mf_subvalue *value = &expr->cmp.value;
+ const union mf_subvalue *mask = &expr->cmp.mask;
+ int w = expr->cmp.symbol->width;
+ int i;
+
+ for (i = 0; (i = bitwise_scan(mask, sizeof *mask, true, i, w)) < w; i++) {
+ struct expr *e;
+
+ e = xzalloc(sizeof *e);
+ e->type = EXPR_T_CMP;
+ e->cmp.symbol = expr->cmp.symbol;
+ e->cmp.relop = EXPR_R_EQ;
+ bitwise_put_bit(&e->cmp.value, sizeof e->cmp.value, i,
+ !bitwise_get_bit(value, sizeof *value, i));
+ bitwise_put1(&e->cmp.mask, sizeof e->cmp.mask, i);
+
+ new = expr_combine(EXPR_T_OR, new, e);
+ }
+ ovs_assert(new);
+
+ expr_destroy(expr);
+
+ return new;
+}
+
+static struct expr *
+expr_simplify_relational(struct expr *expr)
+{
+ const union mf_subvalue *value = &expr->cmp.value;
+ int start, n_bits, end;
+
+ find_bitwise_range(&expr->cmp.mask, expr->cmp.symbol->width,
+ &start, &n_bits);
+ ovs_assert(n_bits > 0);
+ end = start + n_bits;
+
+ struct expr *new;
+ if (expr->cmp.relop == EXPR_R_LE || expr->cmp.relop == EXPR_R_GE) {
+ new = xmemdup(expr, sizeof *expr);
+ new->cmp.relop = EXPR_R_EQ;
+ } else {
+ new = NULL;
+ }
+
+ bool b = expr->cmp.relop == EXPR_R_LT || expr->cmp.relop == EXPR_R_LE;
+ for (int z = bitwise_scan(value, sizeof *value, b, start, end);
+ z < end;
+ z = bitwise_scan(value, sizeof *value, b, z + 1, end)) {
+ struct expr *e;
+
+ e = xmemdup(expr, sizeof *expr);
+ e->cmp.relop = EXPR_R_EQ;
+ bitwise_toggle_bit(&e->cmp.value, sizeof e->cmp.value, z);
+ bitwise_zero(&e->cmp.value, sizeof e->cmp.value, start, z - start);
+ bitwise_zero(&e->cmp.mask, sizeof e->cmp.mask, start, z - start);
+ new = expr_combine(EXPR_T_OR, new, e);
+ }
+ expr_destroy(expr);
+ return new ? new : expr_create_boolean(false);
+}
+
+/* Takes ownership of 'expr' and returns an equivalent expression whose
+ * EXPR_T_CMP nodes use only tests for equality (EXPR_R_EQ). */
+struct expr *
+expr_simplify(struct expr *expr)
+{
+ struct expr *sub, *next;
+
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return (expr->cmp.relop == EXPR_R_EQ || !expr->cmp.symbol->width ? expr
+ : expr->cmp.relop == EXPR_R_NE ? expr_simplify_ne(expr)
+ : expr_simplify_relational(expr));
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ list_remove(&sub->node);
+ expr_insert_andor(expr, next, expr_simplify(sub));
+ }
+ return expr_fix(expr);
+
+ case EXPR_T_BOOLEAN:
+ return expr;
+ }
+ OVS_NOT_REACHED();
+}
+\f
+static const struct expr_symbol *
+expr_is_cmp(const struct expr *expr)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return expr->cmp.symbol;
+
+ case EXPR_T_AND:
+ case EXPR_T_OR: {
+ const struct expr_symbol *prev = NULL;
+ struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ const struct expr_symbol *symbol = expr_is_cmp(sub);
+ if (!symbol || (prev && symbol != prev)) {
+ return NULL;
+ }
+ prev = symbol;
+ }
+ return prev;
+ }
+
+ case EXPR_T_BOOLEAN:
+ return NULL;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+struct expr_sort {
+ struct expr *expr;
+ const struct expr_symbol *relop;
+ enum expr_type type;
+};
+
+static int
+compare_expr_sort(const void *a_, const void *b_)
+{
+ const struct expr_sort *a = a_;
+ const struct expr_sort *b = b_;
+
+ if (a->type != b->type) {
+ return a->type < b->type ? -1 : 1;
+ } else if (a->relop) {
+ int cmp = strcmp(a->relop->name, b->relop->name);
+ if (cmp) {
+ return cmp;
+ }
+
+ enum expr_type a_type = a->expr->type;
+ enum expr_type b_type = a->expr->type;
+ return a_type < b_type ? -1 : a_type > b_type;
+ } else if (a->type == EXPR_T_AND || a->type == EXPR_T_OR) {
+ size_t a_len = list_size(&a->expr->andor);
+ size_t b_len = list_size(&b->expr->andor);
+ return a_len < b_len ? -1 : a_len > b_len;
+ } else {
+ return 0;
+ }
+}
+
+static struct expr *crush_cmps(struct expr *, const struct expr_symbol *);
+
+static struct expr *
+crush_and(struct expr *expr, const struct expr_symbol *symbol)
+{
+ ovs_assert(!list_is_short(&expr->andor));
+
+ union mf_subvalue value, mask;
+ memset(&value, 0, sizeof value);
+ memset(&mask, 0, sizeof mask);
+
+ struct expr *sub, *next = NULL;
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ list_remove(&sub->node);
+ struct expr *new = crush_cmps(sub, symbol);
+ switch (new->type) {
+ case EXPR_T_CMP:
+ if (!mf_subvalue_intersect(&value, &mask,
+ &new->cmp.value, &new->cmp.mask,
+ &value, &mask)) {
+ expr_destroy(new);
+ expr_destroy(expr);
+ return expr_create_boolean(false);
+ }
+ expr_destroy(new);
+ break;
+ case EXPR_T_AND:
+ OVS_NOT_REACHED();
+ case EXPR_T_OR:
+ list_insert(&next->node, &new->node);
+ break;
+ case EXPR_T_BOOLEAN:
+ if (!new->boolean) {
+ expr_destroy(expr);
+ return new;
+ }
+ free(new);
+ break;
+ }
+ }
+ if (list_is_empty(&expr->andor)) {
+ if (is_all_zeros(&mask, sizeof mask)) {
+ expr_destroy(expr);
+ return expr_create_boolean(true);
+ } else {
+ struct expr *cmp;
+ cmp = xmalloc(sizeof *cmp);
+ cmp->type = EXPR_T_CMP;
+ cmp->cmp.symbol = symbol;
+ cmp->cmp.relop = EXPR_R_EQ;
+ cmp->cmp.value = value;
+ cmp->cmp.mask = mask;
+ expr_destroy(expr);
+ return cmp;
+ }
+ } else if (list_is_short(&expr->andor)) {
+ /* Transform "a && (b || c || d)" into "ab || ac || ad" where "ab" is
+ * computed as "a && b", etc. */
+ struct expr *disjuncts = expr_from_node(list_pop_front(&expr->andor));
+ struct expr *or;
+
+ or = xmalloc(sizeof *or);
+ or->type = EXPR_T_OR;
+ list_init(&or->andor);
+
+ ovs_assert(disjuncts->type == EXPR_T_OR);
+ LIST_FOR_EACH_SAFE (sub, next, node, &disjuncts->andor) {
+ ovs_assert(sub->type == EXPR_T_CMP);
+ list_remove(&sub->node);
+ if (mf_subvalue_intersect(&value, &mask,
+ &sub->cmp.value, &sub->cmp.mask,
+ &sub->cmp.value, &sub->cmp.mask)) {
+ list_push_back(&or->andor, &sub->node);
+ } else {
+ free(sub);
+ }
+ }
+ free(disjuncts);
+ free(expr);
+ if (list_is_empty(&or->andor)) {
+ free(or);
+ return expr_create_boolean(false);
+ } else if (list_is_short(&or->andor)) {
+ struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
+ free(or);
+ return cmp;
+ } else {
+ return or;
+ }
+ } else {
+ /* Transform "x && (a0 || a1) && (b0 || b1) && ..." into
+ * "(xa0b0 || xa0b1 || xa1b0 || xa1b1) && ...". */
+ struct expr *as = expr_from_node(list_pop_front(&expr->andor));
+ struct expr *bs = expr_from_node(list_pop_front(&expr->andor));
+ struct expr *new = NULL;
+ struct expr *or;
+
+ or = xmalloc(sizeof *or);
+ or->type = EXPR_T_OR;
+ list_init(&or->andor);
+
+ struct expr *a;
+ LIST_FOR_EACH (a, node, &as->andor) {
+ union mf_subvalue a_value, a_mask;
+
+ ovs_assert(a->type == EXPR_T_CMP);
+ if (!mf_subvalue_intersect(&value, &mask,
+ &a->cmp.value, &a->cmp.mask,
+ &a_value, &a_mask)) {
+ continue;
+ }
+
+ struct expr *b;
+ LIST_FOR_EACH (b, node, &bs->andor) {
+ ovs_assert(b->type == EXPR_T_CMP);
+ if (!new) {
+ new = xmalloc(sizeof *new);
+ new->type = EXPR_T_CMP;
+ new->cmp.symbol = symbol;
+ new->cmp.relop = EXPR_R_EQ;
+ }
+ if (mf_subvalue_intersect(&a_value, &a_mask,
+ &b->cmp.value, &b->cmp.mask,
+ &new->cmp.value, &new->cmp.mask)) {
+ list_push_back(&or->andor, &new->node);
+ new = NULL;
+ }
+ }
+ }
+ expr_destroy(as);
+ expr_destroy(bs);
+ free(new);
+
+ if (list_is_empty(&or->andor)) {
+ expr_destroy(expr);
+ free(or);
+ return expr_create_boolean(false);
+ } else if (list_is_short(&or->andor)) {
+ struct expr *cmp = expr_from_node(list_pop_front(&or->andor));
+ free(or);
+ if (list_is_empty(&expr->andor)) {
+ expr_destroy(expr);
+ return crush_cmps(cmp, symbol);
+ } else {
+ return crush_cmps(expr_combine(EXPR_T_AND, cmp, expr), symbol);
+ }
+ } else if (!list_is_empty(&expr->andor)) {
+ struct expr *e = expr_combine(EXPR_T_AND, or, expr);
+ ovs_assert(!list_is_short(&e->andor));
+ return crush_cmps(e, symbol);
+ } else {
+ expr_destroy(expr);
+ return crush_cmps(or, symbol);
+ }
+ }
+}
+
+static int
+compare_expr(const void *a_, const void *b_)
+{
+ const struct expr *const *ap = a_;
+ const struct expr *const *bp = b_;
+ const struct expr *a = *ap;
+ const struct expr *b = *bp;
+ int d = memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value);
+ if (!d) {
+ d = memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask);
+ }
+ return d;
+}
+
+static struct expr *
+crush_or(struct expr *expr, const struct expr_symbol *symbol)
+{
+ struct expr *sub, *next = NULL;
+
+ /* First, crush all the subexpressions. That might eliminate the
+ * OR-expression entirely; if so, return the result. */
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ list_remove(&sub->node);
+ expr_insert_andor(expr, next, crush_cmps(sub, symbol));
+ }
+ expr = expr_fix(expr);
+ if (expr->type != EXPR_T_OR) {
+ return expr;
+ }
+
+ /* Eliminate duplicates by sorting the subexpressions. */
+ size_t n = list_size(&expr->andor);
+ struct expr **subs = xmalloc(n * sizeof *subs);
+
+ size_t i = 0;
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ subs[i++] = sub;
+ }
+ ovs_assert(i == n);
+
+ qsort(subs, n, sizeof *subs, compare_expr);
+
+ list_init(&expr->andor);
+ list_push_back(&expr->andor, &subs[0]->node);
+ for (i = 1; i < n; i++) {
+ struct expr *a = expr_from_node(list_back(&expr->andor));
+ struct expr *b = subs[i];
+ if (memcmp(&a->cmp.value, &b->cmp.value, sizeof a->cmp.value)
+ || memcmp(&a->cmp.mask, &b->cmp.mask, sizeof a->cmp.mask)) {
+ list_push_back(&expr->andor, &b->node);
+ } else {
+ free(b);
+ }
+ }
+ free(subs);
+ return expr_fix(expr);
+}
+
+/* Converts 'expr', which must be a cmp in the sense determined by
+ * expr_is_cmp(). Returns a cmp, a disjunction of cmps, or a boolean. */
+static struct expr *
+crush_cmps(struct expr *expr, const struct expr_symbol *symbol)
+{
+ switch (expr->type) {
+ case EXPR_T_OR:
+ return crush_or(expr, symbol);
+
+ case EXPR_T_AND:
+ return crush_and(expr, symbol);
+
+ case EXPR_T_CMP:
+ return expr;
+
+ case EXPR_T_BOOLEAN:
+ return expr;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static struct expr *
+expr_sort(struct expr *expr)
+{
+ size_t n = list_size(&expr->andor);
+ struct expr_sort *subs = xmalloc(n * sizeof *subs);
+ struct expr *sub;
+ size_t i;
+
+ i = 0;
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ subs[i].expr = sub;
+ subs[i].relop = expr_is_cmp(sub);
+ subs[i].type = subs[i].relop ? EXPR_T_CMP : sub->type;
+ i++;
+ }
+ ovs_assert(i == n);
+
+ qsort(subs, n, sizeof *subs, compare_expr_sort);
+
+ list_init(&expr->andor);
+ for (int i = 0; i < n; ) {
+ if (subs[i].relop) {
+ int j;
+ for (j = i + 1; j < n; j++) {
+ if (subs[i].relop != subs[j].relop) {
+ break;
+ }
+ }
+
+ struct expr *crushed;
+ if (j == i + 1) {
+ crushed = crush_cmps(subs[i].expr, subs[i].relop);
+ } else {
+ struct expr *combined = subs[i].expr;
+ for (int k = i + 1; k < j; k++) {
+ combined = expr_combine(EXPR_T_AND, combined,
+ subs[k].expr);
+ }
+ ovs_assert(!list_is_short(&combined->andor));
+ crushed = crush_cmps(combined, subs[i].relop);
+ }
+ if (crushed->type == EXPR_T_BOOLEAN) {
+ if (!crushed->boolean) {
+ for (int k = j; k < n; k++) {
+ expr_destroy(subs[k].expr);
+ }
+ expr_destroy(expr);
+ expr = crushed;
+ break;
+ } else {
+ free(crushed);
+ }
+ } else {
+ expr = expr_combine(EXPR_T_AND, expr, crushed);
+ }
+ i = j;
+ } else {
+ expr = expr_combine(EXPR_T_AND, expr, subs[i++].expr);
+ }
+ }
+ free(subs);
+
+ return expr;
+}
+
+static struct expr *expr_normalize_or(struct expr *expr);
+
+/* Returns 'expr', which is an AND, reduced to OR(AND(clause)) where
+ * a clause is a cmp or a disjunction of cmps on a single field. */
+static struct expr *
+expr_normalize_and(struct expr *expr)
+{
+ ovs_assert(expr->type == EXPR_T_AND);
+
+ expr = expr_sort(expr);
+ if (expr->type != EXPR_T_AND) {
+ ovs_assert(expr->type == EXPR_T_BOOLEAN);
+ return expr;
+ }
+
+ struct expr *a, *b;
+ LIST_FOR_EACH_SAFE (a, b, node, &expr->andor) {
+ if (&b->node == &expr->andor
+ || a->type != EXPR_T_CMP || b->type != EXPR_T_CMP) {
+ } else if (a->cmp.symbol != b->cmp.symbol) {
+ continue;
+ } else if (mf_subvalue_intersect(&a->cmp.value, &a->cmp.mask,
+ &b->cmp.value, &b->cmp.mask,
+ &b->cmp.value, &b->cmp.mask)) {
+ list_remove(&a->node);
+ expr_destroy(a);
+ } else {
+ expr_destroy(expr);
+ return expr_create_boolean(false);
+ }
+ }
+ if (list_is_short(&expr->andor)) {
+ struct expr *sub = expr_from_node(list_front(&expr->andor));
+ free(expr);
+ return sub;
+ }
+
+ struct expr *sub;
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (sub->type == EXPR_T_CMP) {
+ continue;
+ }
+
+ ovs_assert(sub->type == EXPR_T_OR);
+ const struct expr_symbol *symbol = expr_is_cmp(sub);
+ if (!symbol || symbol->must_crossproduct) {
+ struct expr *or = expr_create_andor(EXPR_T_OR);
+ struct expr *k;
+
+ LIST_FOR_EACH (k, node, &sub->andor) {
+ struct expr *and = expr_create_andor(EXPR_T_AND);
+ struct expr *m;
+
+ LIST_FOR_EACH (m, node, &expr->andor) {
+ struct expr *term = m == sub ? k : m;
+ if (term->type == EXPR_T_AND) {
+ struct expr *p;
+
+ LIST_FOR_EACH (p, node, &term->andor) {
+ struct expr *new = expr_clone(p);
+ list_push_back(&and->andor, &new->node);
+ }
+ } else {
+ struct expr *new = expr_clone(term);
+ list_push_back(&and->andor, &new->node);
+ }
+ }
+ list_push_back(&or->andor, &and->node);
+ }
+ expr_destroy(expr);
+ return expr_normalize_or(or);
+ }
+ }
+ return expr;
+}
+
+static struct expr *
+expr_normalize_or(struct expr *expr)
+{
+ struct expr *sub, *next;
+
+ LIST_FOR_EACH_SAFE (sub, next, node, &expr->andor) {
+ if (sub->type == EXPR_T_AND) {
+ list_remove(&sub->node);
+
+ struct expr *new = expr_normalize_and(sub);
+ if (new->type == EXPR_T_BOOLEAN) {
+ if (new->boolean) {
+ expr_destroy(expr);
+ return new;
+ }
+ free(new);
+ } else {
+ expr_insert_andor(expr, next, new);
+ }
+ } else {
+ ovs_assert(sub->type == EXPR_T_CMP);
+ }
+ }
+ if (list_is_empty(&expr->andor)) {
+ free(expr);
+ return expr_create_boolean(false);
+ }
+ if (list_is_short(&expr->andor)) {
+ struct expr *sub = expr_from_node(list_pop_front(&expr->andor));
+ free(expr);
+ return sub;
+ }
+
+ return expr;
+}
+
+/* Takes ownership of 'expr', which is either a constant "true" or "false" or
+ * an expression in terms of only relationals, AND, and OR. Returns either a
+ * constant "true" or "false" or 'expr' reduced to OR(AND(clause)) where a
+ * clause is a cmp or a disjunction of cmps on a single field. This form is
+ * significant because it is a form that can be directly converted to OpenFlow
+ * flows with the Open vSwitch "conjunctive match" extension.
+ *
+ * 'expr' must already have been simplified, with expr_simplify(). */
+struct expr *
+expr_normalize(struct expr *expr)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return expr;
+
+ case EXPR_T_AND:
+ return expr_normalize_and(expr);
+
+ case EXPR_T_OR:
+ return expr_normalize_or(expr);
+
+ case EXPR_T_BOOLEAN:
+ return expr;
+ }
+ OVS_NOT_REACHED();
+}
+\f
+/* Creates, initializes, and returns a new 'struct expr_match'. If 'm' is
+ * nonnull then it is copied into the new expr_match, otherwise the new
+ * expr_match's 'match' member is initialized to a catch-all match for the
+ * caller to refine in-place.
+ *
+ * If 'conj_id' is nonzero, adds one conjunction based on 'conj_id', 'clause',
+ * and 'n_clauses' to the returned 'struct expr_match', otherwise the
+ * expr_match will not have any conjunctions.
+ *
+ * The caller should use expr_match_add() to add the expr_match to a hash table
+ * after it is finalized. */
+static struct expr_match *
+expr_match_new(const struct match *m, uint8_t clause, uint8_t n_clauses,
+ uint32_t conj_id)
+{
+ struct expr_match *match = xmalloc(sizeof *match);
+ if (m) {
+ match->match = *m;
+ } else {
+ match_init_catchall(&match->match);
+ }
+ if (conj_id) {
+ match->conjunctions = xmalloc(sizeof *match->conjunctions);
+ match->conjunctions[0].id = conj_id;
+ match->conjunctions[0].clause = clause;
+ match->conjunctions[0].n_clauses = n_clauses;
+ match->n = 1;
+ match->allocated = 1;
+ } else {
+ match->conjunctions = NULL;
+ match->n = 0;
+ match->allocated = 0;
+ }
+ return match;
+}
+
+/* Adds 'match' to hash table 'matches', which becomes the new owner of
+ * 'match'.
+ *
+ * This might actually destroy 'match' because it gets merged together with
+ * some existing conjunction.*/
+static void
+expr_match_add(struct hmap *matches, struct expr_match *match)
+{
+ uint32_t hash = match_hash(&match->match, 0);
+ struct expr_match *m;
+
+ HMAP_FOR_EACH_WITH_HASH (m, hmap_node, hash, matches) {
+ if (match_equal(&m->match, &match->match)) {
+ if (!m->n || !match->n) {
+ free(m->conjunctions);
+ m->conjunctions = NULL;
+ m->n = 0;
+ m->allocated = 0;
+ } else {
+ ovs_assert(match->n == 1);
+ if (m->n >= m->allocated) {
+ m->conjunctions = x2nrealloc(m->conjunctions,
+ &m->allocated,
+ sizeof *m->conjunctions);
+ }
+ m->conjunctions[m->n++] = match->conjunctions[0];
+ }
+ free(match->conjunctions);
+ free(match);
+ return;
+ }
+ }
+
+ hmap_insert(matches, &match->hmap_node, hash);
+}
+
+static bool
+constrain_match(const struct expr *expr, const struct simap *ports,
+ struct match *m)
+{
+ ovs_assert(expr->type == EXPR_T_CMP);
+ if (expr->cmp.symbol->width) {
+ mf_mask_subfield(expr->cmp.symbol->field, &expr->cmp.value,
+ &expr->cmp.mask, m);
+ } else {
+ const struct simap_node *node;
+ node = ports ? simap_find(ports, expr->cmp.string) : NULL;
+ if (!node) {
+ return false;
+ }
+
+ struct mf_subfield sf;
+ sf.field = expr->cmp.symbol->field;
+ sf.ofs = 0;
+ sf.n_bits = expr->cmp.symbol->field->n_bits;
+
+ union mf_subvalue x;
+ memset(&x, 0, sizeof x);
+ x.integer = htonll(node->data);
+
+ mf_write_subfield(&sf, &x, m);
+ }
+ return true;
+}
+
+static bool
+add_disjunction(const struct expr *or, const struct simap *ports,
+ struct match *m, uint8_t clause, uint8_t n_clauses,
+ uint32_t conj_id, struct hmap *matches)
+{
+ struct expr *sub;
+ int n = 0;
+
+ ovs_assert(or->type == EXPR_T_OR);
+ LIST_FOR_EACH (sub, node, &or->andor) {
+ struct expr_match *match = expr_match_new(m, clause, n_clauses,
+ conj_id);
+ if (constrain_match(sub, ports, &match->match)) {
+ expr_match_add(matches, match);
+ n++;
+ } else {
+ free(match->conjunctions);
+ free(match);
+ }
+ }
+
+ /* If n == 1, then this didn't really need to be a disjunction. Oh well,
+ * that shouldn't happen much. */
+ return n > 0;
+}
+
+static void
+add_conjunction(const struct expr *and, const struct simap *ports,
+ uint32_t *n_conjsp, struct hmap *matches)
+{
+ struct match match;
+ int n_clauses = 0;
+ struct expr *sub;
+
+ match_init_catchall(&match);
+
+ ovs_assert(and->type == EXPR_T_AND);
+ LIST_FOR_EACH (sub, node, &and->andor) {
+ switch (sub->type) {
+ case EXPR_T_CMP:
+ if (!constrain_match(sub, ports, &match)) {
+ return;
+ }
+ break;
+ case EXPR_T_OR:
+ n_clauses++;
+ break;
+ case EXPR_T_AND:
+ case EXPR_T_BOOLEAN:
+ OVS_NOT_REACHED();
+ }
+ }
+
+ if (!n_clauses) {
+ expr_match_add(matches, expr_match_new(&match, 0, 0, 0));
+ } else if (n_clauses == 1) {
+ LIST_FOR_EACH (sub, node, &and->andor) {
+ if (sub->type == EXPR_T_OR) {
+ add_disjunction(sub, ports, &match, 0, 0, 0, matches);
+ }
+ }
+ } else {
+ int clause = 0;
+ (*n_conjsp)++;
+ LIST_FOR_EACH (sub, node, &and->andor) {
+ if (sub->type == EXPR_T_OR) {
+ if (!add_disjunction(sub, ports, &match, clause++,
+ n_clauses, *n_conjsp, matches)) {
+ /* This clause can't ever match, so we might as well skip
+ * adding the other clauses--the overall disjunctive flow
+ * can't ever match. Ideally we would also back out all of
+ * the clauses we already added, but that seems like a lot
+ * of trouble for a case that might never occur in
+ * practice. */
+ return;
+ }
+ }
+ }
+
+ /* Add the flow that matches on conj_id. */
+ match_set_conj_id(&match, *n_conjsp);
+ expr_match_add(matches, expr_match_new(&match, 0, 0, 0));
+ }
+}
+
+static void
+add_cmp_flow(const struct expr *cmp, const struct simap *ports,
+ struct hmap *matches)
+{
+ struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
+ if (constrain_match(cmp, ports, &m->match)) {
+ expr_match_add(matches, m);
+ } else {
+ free(m);
+ }
+}
+
+/* Converts 'expr', which must be in the form returned by expr_normalize(), to
+ * a collection of Open vSwitch flows in 'matches', which this function
+ * initializes to an hmap of "struct expr_match" structures. Returns the
+ * number of conjunctive match IDs consumed by 'matches', which uses
+ * conjunctive match IDs beginning with 0; the caller must offset or remap them
+ * into the desired range as necessary.
+ *
+ * The matches inserted into 'matches' will be of three distinct kinds:
+ *
+ * - Ordinary flows. The caller should add these OpenFlow flows with
+ * its desired actions.
+ *
+ * - Conjunctive flows, distinguished by 'n > 0' in the expr_match
+ * structure. The caller should add these OpenFlow flows with the
+ * conjunction(id, k/n) actions as specified in the 'conjunctions' array,
+ * remapping the ids.
+ *
+ * - conj_id flows, distinguished by matching on the "conj_id" field. The
+ * caller should remap the conj_id and add the OpenFlow flow with its
+ * desired actions.
+ *
+ * 'ports' must be a map from strings (presumably names of ports) to integers.
+ * Any comparisons against string fields in 'expr' are translated into integers
+ * through this map. A comparison against a string that is not in 'ports' acts
+ * like a Boolean "false"; that is, it will always fail to match. For a simple
+ * expression, this means that the overall expression always fails to match,
+ * but an expression with a disjunction on the string field might still match
+ * on other port names.
+ *
+ * (This treatment of string fields might be too simplistic in general, but it
+ * seems reasonable for now when string fields are used only for ports.) */
+uint32_t
+expr_to_matches(const struct expr *expr, const struct simap *ports,
+ struct hmap *matches)
+{
+ uint32_t n_conjs = 0;
+
+ hmap_init(matches);
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ add_cmp_flow(expr, ports, matches);
+ break;
+
+ case EXPR_T_AND:
+ add_conjunction(expr, ports, &n_conjs, matches);
+ break;
+
+ case EXPR_T_OR:
+ if (expr_is_cmp(expr)) {
+ struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ add_cmp_flow(sub, ports, matches);
+ }
+ } else {
+ struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (sub->type == EXPR_T_AND) {
+ add_conjunction(sub, ports, &n_conjs, matches);
+ } else {
+ add_cmp_flow(sub, ports, matches);
+ }
+ }
+ }
+ break;
+
+ case EXPR_T_BOOLEAN:
+ if (expr->boolean) {
+ struct expr_match *m = expr_match_new(NULL, 0, 0, 0);
+ expr_match_add(matches, m);
+ } else {
+ /* No match. */
+ }
+ break;
+ }
+ return n_conjs;
+}
+
+/* Destroys all of the 'struct expr_match'es in 'matches', as well as the
+ * 'matches' hmap itself. */
+void
+expr_matches_destroy(struct hmap *matches)
+{
+ struct expr_match *m, *n;
+
+ HMAP_FOR_EACH_SAFE (m, n, hmap_node, matches) {
+ hmap_remove(matches, &m->hmap_node);
+ free(m->conjunctions);
+ free(m);
+ }
+ hmap_destroy(matches);
+}
+
+/* Prints a representation of the 'struct expr_match'es in 'matches' to
+ * 'stream'. */
+void
+expr_matches_print(const struct hmap *matches, FILE *stream)
+{
+ if (hmap_is_empty(matches)) {
+ fputs("(no flows)\n", stream);
+ return;
+ }
+
+ const struct expr_match *m;
+ HMAP_FOR_EACH (m, hmap_node, matches) {
+ char *s = match_to_string(&m->match, OFP_DEFAULT_PRIORITY);
+ fputs(s, stream);
+ free(s);
+
+ if (m->n) {
+ for (int i = 0; i < m->n; i++) {
+ const struct cls_conjunction *c = &m->conjunctions[i];
+ fprintf(stream, "%c conjunction(%"PRIu32", %d/%d)",
+ i == 0 ? ':' : ',', c->id, c->clause, c->n_clauses);
+ }
+ }
+ putc('\n', stream);
+ }
+}
+\f
+/* Returns true if 'expr' honors the invariants for expressions (see the large
+ * comment above "struct expr" in expr.h), false otherwise. */
+bool
+expr_honors_invariants(const struct expr *expr)
+{
+ const struct expr *sub;
+
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ if (expr->cmp.symbol->width) {
+ for (int i = 0; i < ARRAY_SIZE(expr->cmp.value.be64); i++) {
+ if (expr->cmp.value.be64[i] & ~expr->cmp.mask.be64[i]) {
+ return false;
+ }
+ }
+ }
+ return true;
+
+ case EXPR_T_AND:
+ case EXPR_T_OR:
+ if (list_is_short(&expr->andor)) {
+ return false;
+ }
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (sub->type == expr->type || !expr_honors_invariants(sub)) {
+ return false;
+ }
+ }
+ return true;
+
+ case EXPR_T_BOOLEAN:
+ return true;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static bool
+expr_is_normalized_and(const struct expr *expr)
+{
+ /* XXX should also check that no symbol is repeated. */
+ const struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (!expr_is_cmp(sub)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Returns true if 'expr' is in the form returned by expr_normalize(), false
+ * otherwise. */
+bool
+expr_is_normalized(const struct expr *expr)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return true;
+
+ case EXPR_T_AND:
+ return expr_is_normalized_and(expr);
+
+ case EXPR_T_OR:
+ if (!expr_is_cmp(expr)) {
+ const struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (!expr_is_cmp(sub) && !expr_is_normalized_and(sub)) {
+ return false;
+ }
+ }
+ }
+ return true;
+
+ case EXPR_T_BOOLEAN:
+ return true;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+\f
+/* Action parsing helper. */
+
+static struct expr *
+parse_assignment(struct expr_context *ctx, const struct simap *ports,
+ struct ofpbuf *ofpacts)
+{
+ struct expr *prereqs = NULL;
+
+ struct expr_field f;
+ if (!parse_field(ctx, &f)) {
+ goto exit;
+ }
+ if (!lexer_match(ctx->lexer, LEX_T_EQUALS)) {
+ expr_syntax_error(ctx, "expecting `='.");
+ goto exit;
+ }
+
+ if (f.symbol->expansion && f.symbol->level != EXPR_L_ORDINAL) {
+ expr_error(ctx, "Can't assign to predicate symbol %s.",
+ f.symbol->name);
+ goto exit;
+ }
+
+ struct expr_constant_set cs;
+ if (!parse_constant_set(ctx, &cs)) {
+ goto exit;
+ }
+
+ if (!type_check(ctx, &f, &cs)) {
+ goto exit_destroy_cs;
+ }
+ if (cs.in_curlies) {
+ expr_error(ctx, "Assignments require a single value.");
+ goto exit_destroy_cs;
+ }
+
+ const struct expr_symbol *orig_symbol = f.symbol;
+ union expr_constant *c = cs.values;
+ for (;;) {
+ /* Accumulate prerequisites. */
+ if (f.symbol->prereqs) {
+ struct ovs_list nesting = OVS_LIST_INITIALIZER(&nesting);
+ char *error;
+ struct expr *e;
+ e = parse_and_annotate(f.symbol->prereqs, ctx->symtab, &nesting,
+ &error);
+ if (error) {
+ expr_error(ctx, "%s", error);
+ free(error);
+ goto exit_destroy_cs;
+ }
+ prereqs = expr_combine(EXPR_T_AND, prereqs, e);
+ }
+
+ /* If there's no expansion, we're done. */
+ if (!f.symbol->expansion) {
+ break;
+ }
+
+ /* Expand. */
+ struct expr_field expansion;
+ char *error;
+ if (!parse_field_from_string(f.symbol->expansion, ctx->symtab,
+ &expansion, &error)) {
+ expr_error(ctx, "%s", error);
+ free(error);
+ goto exit_destroy_cs;
+ }
+ f.symbol = expansion.symbol;
+ f.ofs += expansion.ofs;
+ }
+
+ if (!f.symbol->field->writable) {
+ expr_error(ctx, "Field %s is not modifiable.", orig_symbol->name);
+ goto exit_destroy_cs;
+ }
+
+ struct ofpact_set_field *sf = ofpact_put_SET_FIELD(ofpacts);
+ sf->field = f.symbol->field;
+ if (f.symbol->width) {
+ mf_subvalue_shift(&c->value, f.ofs);
+ if (!c->masked) {
+ memset(&c->mask, 0, sizeof c->mask);
+ bitwise_one(&c->mask, sizeof c->mask, f.ofs, f.n_bits);
+ } else {
+ mf_subvalue_shift(&c->mask, f.ofs);
+ }
+
+ memcpy(&sf->value, &c->value.u8[sizeof c->value - sf->field->n_bytes],
+ sf->field->n_bytes);
+ memcpy(&sf->mask, &c->mask.u8[sizeof c->mask - sf->field->n_bytes],
+ sf->field->n_bytes);
+ } else {
+ uint32_t port = simap_get(ports, c->string);
+ bitwise_put(port, &sf->value,
+ sf->field->n_bytes, 0, sf->field->n_bits);
+ bitwise_put(UINT64_MAX, &sf->mask,
+ sf->field->n_bytes, 0, sf->field->n_bits);
+ }
+
+exit_destroy_cs:
+ expr_constant_set_destroy(&cs);
+exit:
+ return prereqs;
+}
+
+/* A helper for actions_parse(), to parse an OVN assignment action in the form
+ * "field = value" into 'ofpacts'. The parameters and return value match those
+ * for actions_parse(). */
+char *
+expr_parse_assignment(struct lexer *lexer, const struct shash *symtab,
+ const struct simap *ports,
+ struct ofpbuf *ofpacts, struct expr **prereqsp)
+{
+ struct expr_context ctx;
+ ctx.lexer = lexer;
+ ctx.symtab = symtab;
+ ctx.error = NULL;
+ ctx.not = false;
+
+ struct expr *prereqs = parse_assignment(&ctx, ports, ofpacts);
+ if (ctx.error) {
+ expr_destroy(prereqs);
+ prereqs = NULL;
+ }
+ *prereqsp = prereqs;
+ return ctx.error;
+}
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_EXPR_H
+#define OVN_EXPR_H 1
+
+/* OVN matching expression tree
+ * ============================
+ *
+ * The data structures here form an abstract expression tree for matching
+ * expressions in OVN.
+ *
+ * The abstract syntax tree representation of a matching expression is one of:
+ *
+ * - A Boolean literal ("true" or "false").
+ *
+ * - A comparison of a field (or part of a field) against a constant
+ * with one of the operators == != < <= > >=.
+ *
+ * - The logical AND or OR of two or more matching expressions.
+ *
+ * Literals and comparisons are called "terminal" nodes, logical AND and OR
+ * nodes are "nonterminal" nodes.
+ *
+ * The syntax for expressions includes a few other concepts that are not part
+ * of the abstract syntax tree. In these examples, x is a field, a, b, and c
+ * are constants, and e1 and e2 are arbitrary expressions:
+ *
+ * - Logical NOT. The parser implements NOT by inverting the sense of the
+ * operand: !(x == a) becomes x != a, !(e1 && e2) becomes !e1 || !e2, and
+ * so on.
+ *
+ * - Set membership. The parser translates x == {a, b, c} into
+ * x == a || x == b || x == c.
+ *
+ * - Reversed comparisons. The parser translates a < x into x > a.
+ *
+ * - Range expressions. The parser translates a < x < b into
+ * x > a && x < b.
+ */
+
+#include "classifier.h"
+#include "lex.h"
+#include "hmap.h"
+#include "list.h"
+#include "match.h"
+#include "meta-flow.h"
+
+struct ds;
+struct ofpbuf;
+struct shash;
+struct simap;
+
+/* "Measurement level" of a field. See "Level of Measurement" in the large
+ * comment on struct expr_symbol below for more information. */
+enum expr_level {
+ EXPR_L_NOMINAL,
+
+ /* Boolean values are nominal, however because of their simple nature OVN
+ * can allow both equality and inequality tests on them. */
+ EXPR_L_BOOLEAN,
+
+ /* Ordinal values can at least be ordered on a scale. OVN allows equality
+ * and inequality and relational tests on ordinal values. These are the
+ * fields on which OVS allows bitwise matching. */
+ EXPR_L_ORDINAL
+};
+
+const char *expr_level_to_string(enum expr_level);
+\f
+/* A symbol.
+ *
+ *
+ * Name
+ * ====
+ *
+ * Every symbol must have a name. To be useful, the name must satisfy the
+ * lexer's syntax for an identifier.
+ *
+ *
+ * Width
+ * =====
+ *
+ * Every symbol has a width. For integer symbols, this is the number of bits
+ * in the value; for string symbols, this is 0.
+ *
+ *
+ * Types
+ * =====
+ *
+ * There are three kinds of symbols:
+ *
+ * Fields:
+ *
+ * One might, for example, define a field named "vlan.tci" to refer to
+ * MFF_VLAN_TCI. For integer fields, 'field' specifies the referent; for
+ * string fields, 'field' is NULL.
+ *
+ * 'expansion' is NULL.
+ *
+ * Integer fields can be nominal or ordinal (see below). String fields are
+ * always nominal.
+ *
+ * Subfields:
+ *
+ * 'expansion' is a string that specifies a subfield of some larger field,
+ * e.g. "vlan.tci[0..11]" for a field that represents a VLAN VID.
+ *
+ * 'field' is NULL.
+ *
+ * Only ordinal fields (see below) may have subfields, and subfields are
+ * always ordinal.
+ *
+ * Predicates:
+ *
+ * A predicate is an arbitrary Boolean expression that can be used in an
+ * expression much like a 1-bit field. 'expansion' specifies the Boolean
+ * expression, e.g. "ip4" might expand to "eth.type == 0x800". The
+ * expansion of a predicate might refer to other predicates, e.g. "icmp4"
+ * might expand to "ip4 && ip4.proto == 1".
+ *
+ * 'field' is NULL.
+ *
+ * A predicate whose expansion refers to any nominal field or predicate
+ * (see below) is nominal; other predicates have Boolean level of
+ * measurement.
+ *
+ *
+ * Level of Measurement
+ * ====================
+ *
+ * See http://en.wikipedia.org/wiki/Level_of_measurement for the statistical
+ * concept on which this classification is based. There are three levels:
+ *
+ * Ordinal:
+ *
+ * In statistics, ordinal values can be ordered on a scale. Here, we
+ * consider a field (or subfield) to be ordinal if its bits can be examined
+ * individually. This is true for the OpenFlow fields that OpenFlow or
+ * Open vSwitch makes "maskable".
+ *
+ * OVN supports all the usual arithmetic relations (== != < <= > >=) on
+ * ordinal fields and their subfields, because all of these can be
+ * implemented as collections of bitwise tests.
+ *
+ * Nominal:
+ *
+ * In statistics, nominal values cannot be usefully compared except for
+ * equality. This is true of OpenFlow port numbers, Ethernet types, and IP
+ * protocols are examples: all of these are just identifiers assigned
+ * arbitrarily with no deeper meaning. In OpenFlow and Open vSwitch, bits
+ * in these fields generally aren't individually addressable.
+ *
+ * OVN only supports arithmetic tests for equality on nominal fields,
+ * because OpenFlow and Open vSwitch provide no way for a flow to
+ * efficiently implement other comparisons on them. (A test for inequality
+ * can be sort of built out of two flows with different priorities, but OVN
+ * matching expressions always generate flows with a single priority.)
+ *
+ * String fields are always nominal.
+ *
+ * Boolean:
+ *
+ * A nominal field that has only two values, 0 and 1, is somewhat
+ * exceptional, since it is easy to support both equality and inequality
+ * tests on such a field: either one can be implemented as a test for 0 or
+ * 1.
+ *
+ * Only predicates (see above) have a Boolean level of measurement.
+ *
+ * This isn't a standard level of measurement.
+ *
+ *
+ * Prerequisites
+ * =============
+ *
+ * Any symbol can have prerequisites, which are specified as a string giving an
+ * additional expression that must be true whenever the symbol is referenced.
+ * For example, the "icmp4.type" symbol might have prerequisite "icmp4", which
+ * would cause an expression "icmp4.type == 0" to be interpreted as "icmp4.type
+ * == 0 && icmp4", which would in turn expand to "icmp4.type == 0 && eth.type
+ * == 0x800 && ip4.proto == 1" (assuming "icmp4" is a predicate defined as
+ * suggested under "Types" above).
+ *
+ *
+ * Crossproducting
+ * ===============
+ *
+ * Ordinarily OVN is willing to consider using any field as a dimension in the
+ * Open vSwitch "conjunctive match" extension (see ovs-ofctl(8)). However,
+ * some fields can't actually be used that way because they are necessary as
+ * prerequisites. For example, from an expression like "tcp.src == {1,2,3}
+ * && tcp.dst == {4,5,6}", OVN might naturally generate flows like this:
+ *
+ * conj_id=1,actions=...
+ * ip,actions=conjunction(1,1/3)
+ * ip6,actions=conjunction(1,1/3)
+ * tp_src=1,actions=conjunction(1,2/3)
+ * tp_src=2,actions=conjunction(1,2/3)
+ * tp_src=2,actions=conjunction(1,2/3)
+ * tp_dst=4,actions=conjunction(1,3/3)
+ * tp_dst=5,actions=conjunction(1,3/3)
+ * tp_dst=6,actions=conjunction(1,3/3)
+ *
+ * but that's not valid because any flow that matches on tp_src or tp_dst must
+ * also match on either ip or ip6. Thus, one would mark eth.type as "must
+ * crossproduct", to force generating flows like this:
+ *
+ * conj_id=1,actions=...
+ * ip,tp_src=1,actions=conjunction(1,1/2)
+ * ip,tp_src=2,actions=conjunction(1,1/2)
+ * ip,tp_src=2,actions=conjunction(1,1/2)
+ * ip6,tp_src=1,actions=conjunction(1,1/2)
+ * ip6,tp_src=2,actions=conjunction(1,1/2)
+ * ip6,tp_src=2,actions=conjunction(1,1/2)
+ * ip,tp_dst=4,actions=conjunction(1,2/2)
+ * ip,tp_dst=5,actions=conjunction(1,2/2)
+ * ip,tp_dst=6,actions=conjunction(1,2/2)
+ * ip6,tp_dst=4,actions=conjunction(1,2/2)
+ * ip6,tp_dst=5,actions=conjunction(1,2/2)
+ * ip6,tp_dst=6,actions=conjunction(1,2/2)
+ *
+ * which are acceptable.
+ */
+struct expr_symbol {
+ char *name;
+ int width;
+
+ const struct mf_field *field;
+ char *expansion;
+
+ enum expr_level level;
+
+ char *prereqs;
+ bool must_crossproduct;
+};
+
+struct expr_symbol *expr_symtab_add_field(struct shash *symtab,
+ const char *name, enum mf_field_id,
+ const char *prereqs,
+ bool must_crossproduct);
+struct expr_symbol *expr_symtab_add_subfield(struct shash *symtab,
+ const char *name,
+ const char *prereqs,
+ const char *subfield);
+struct expr_symbol *expr_symtab_add_string(struct shash *symtab,
+ const char *name, enum mf_field_id,
+ const char *prereqs);
+struct expr_symbol *expr_symtab_add_predicate(struct shash *symtab,
+ const char *name,
+ const char *expansion);
+void expr_symtab_destroy(struct shash *symtab);
+\f
+/* Expression type. */
+enum expr_type {
+ EXPR_T_CMP, /* Compare symbol with constant. */
+ EXPR_T_AND, /* Logical AND of 2 or more subexpressions. */
+ EXPR_T_OR, /* Logical OR of 2 or more subexpressions. */
+ EXPR_T_BOOLEAN, /* True or false constant. */
+};
+
+/* Relational operator. */
+enum expr_relop {
+ EXPR_R_EQ, /* == */
+ EXPR_R_NE, /* != */
+ EXPR_R_LT, /* < */
+ EXPR_R_LE, /* <= */
+ EXPR_R_GT, /* > */
+ EXPR_R_GE, /* >= */
+};
+const char *expr_relop_to_string(enum expr_relop);
+bool expr_relop_from_token(enum lex_type type, enum expr_relop *relop);
+
+/* An abstract syntax tree for a matching expression.
+ *
+ * The expression code maintains and relies on a few important invariants:
+ *
+ * - An EXPR_T_AND or EXPR_T_OR node never has a child of the same type.
+ * (Any such children could be merged into their parent.) A node may
+ * have grandchildren of its own type.
+ *
+ * As a consequence, every nonterminal node at the same distance from the
+ * root of the root has the same type.
+ *
+ * - EXPR_T_AND and EXPR_T_OR nodes must have at least two children.
+ *
+ * - An EXPR_T_CMP node always has a nonzero mask, and never has a 1-bit
+ * in its value in a position where the mask is a 0-bit.
+ *
+ * The expr_honors_invariants() function can check invariants. */
+struct expr {
+ struct ovs_list node; /* In parent EXPR_T_AND or EXPR_T_OR if any. */
+ enum expr_type type; /* Expression type. */
+
+ union {
+ /* EXPR_T_CMP.
+ *
+ * The symbol is on the left, e.g. "field < constant". */
+ struct {
+ const struct expr_symbol *symbol;
+ enum expr_relop relop;
+
+ union {
+ char *string;
+ struct {
+ union mf_subvalue value;
+ union mf_subvalue mask;
+ };
+ };
+ } cmp;
+
+ /* EXPR_T_AND, EXPR_T_OR. */
+ struct ovs_list andor;
+
+ /* EXPR_T_BOOLEAN. */
+ bool boolean;
+ };
+};
+
+struct expr *expr_create_boolean(bool b);
+struct expr *expr_create_andor(enum expr_type);
+struct expr *expr_combine(enum expr_type, struct expr *a, struct expr *b);
+
+static inline struct expr *
+expr_from_node(const struct ovs_list *node)
+{
+ return CONTAINER_OF(node, struct expr, node);
+}
+
+void expr_format(const struct expr *, struct ds *);
+void expr_print(const struct expr *);
+struct expr *expr_parse(struct lexer *, const struct shash *symtab,
+ char **errorp);
+struct expr *expr_parse_string(const char *, const struct shash *symtab,
+ char **errorp);
+
+struct expr *expr_clone(struct expr *);
+void expr_destroy(struct expr *);
+
+struct expr *expr_annotate(struct expr *, const struct shash *symtab,
+ char **errorp);
+struct expr *expr_simplify(struct expr *);
+struct expr *expr_normalize(struct expr *);
+
+bool expr_honors_invariants(const struct expr *);
+bool expr_is_simplified(const struct expr *);
+bool expr_is_normalized(const struct expr *);
+\f
+/* Converting expressions to OpenFlow flows. */
+
+/* An OpenFlow match generated from a Boolean expression. See
+ * expr_to_matches() for more information. */
+struct expr_match {
+ struct hmap_node hmap_node;
+ struct match match;
+ struct cls_conjunction *conjunctions;
+ size_t n, allocated;
+};
+
+uint32_t expr_to_matches(const struct expr *, const struct simap *ports,
+ struct hmap *matches);
+void expr_matches_destroy(struct hmap *matches);
+void expr_matches_print(const struct hmap *matches, FILE *);
+\f
+/* Action parsing helper. */
+
+char *expr_parse_assignment(struct lexer *lexer, const struct shash *symtab,
+ const struct simap *ports, struct ofpbuf *ofpacts,
+ struct expr **prereqsp);
+
+#endif /* ovn/expr.h */
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "lex.h"
+#include <ctype.h>
+#include <errno.h>
+#include <stdarg.h>
+#include "dynamic-string.h"
+#include "json.h"
+#include "util.h"
+\f
+/* Returns a string that represents 'format'. */
+const char *
+lex_format_to_string(enum lex_format format)
+{
+ switch (format) {
+ case LEX_F_DECIMAL:
+ return "decimal";
+ case LEX_F_HEXADECIMAL:
+ return "hexadecimal";
+ case LEX_F_IPV4:
+ return "IPv4";
+ case LEX_F_IPV6:
+ return "IPv6";
+ case LEX_F_ETHERNET:
+ return "Ethernet";
+ default:
+ abort();
+ }
+}
+\f
+/* Initializes 'token'. */
+void
+lex_token_init(struct lex_token *token)
+{
+ token->type = LEX_T_END;
+ token->s = NULL;
+}
+
+/* Frees memory owned by 'token'. */
+void
+lex_token_destroy(struct lex_token *token)
+{
+ free(token->s);
+}
+
+/* Exchanges 'a' and 'b'. */
+void
+lex_token_swap(struct lex_token *a, struct lex_token *b)
+{
+ struct lex_token tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+\f
+/* lex_token_format(). */
+
+static size_t
+lex_token_n_zeros(enum lex_format format)
+{
+ switch (format) {
+ case LEX_F_DECIMAL: return offsetof(union mf_subvalue, integer);
+ case LEX_F_HEXADECIMAL: return 0;
+ case LEX_F_IPV4: return offsetof(union mf_subvalue, ipv4);
+ case LEX_F_IPV6: return offsetof(union mf_subvalue, ipv6);
+ case LEX_F_ETHERNET: return offsetof(union mf_subvalue, mac);
+ default: OVS_NOT_REACHED();
+ }
+}
+
+/* Returns the effective format for 'token', that is, the format in which it
+ * should actually be printed. This is ordinarily the same as 'token->format',
+ * but it's always possible that someone sets up a token with a format that
+ * won't work for a value, e.g. 'token->value' is wider than 32 bits but the
+ * format is LEX_F_IPV4. (The lexer itself won't do that; this is an attempt
+ * to avoid confusion in the future.) */
+static enum lex_format
+lex_token_get_format(const struct lex_token *token)
+{
+ size_t n_zeros = lex_token_n_zeros(token->format);
+ return (is_all_zeros(&token->value, n_zeros)
+ && (token->type != LEX_T_MASKED_INTEGER
+ || is_all_zeros(&token->mask, n_zeros))
+ ? token->format
+ : LEX_F_HEXADECIMAL);
+}
+
+static void
+lex_token_format_value(const union mf_subvalue *value,
+ enum lex_format format, struct ds *s)
+{
+ switch (format) {
+ case LEX_F_DECIMAL:
+ ds_put_format(s, "%"PRIu64, ntohll(value->integer));
+ break;
+
+ case LEX_F_HEXADECIMAL:
+ mf_format_subvalue(value, s);
+ break;
+
+ case LEX_F_IPV4:
+ ds_put_format(s, IP_FMT, IP_ARGS(value->ipv4));
+ break;
+
+ case LEX_F_IPV6:
+ print_ipv6_addr(s, &value->ipv6);
+ break;
+
+ case LEX_F_ETHERNET:
+ ds_put_format(s, ETH_ADDR_FMT, ETH_ADDR_ARGS(value->mac));
+ break;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+
+}
+
+static void
+lex_token_format_masked_integer(const struct lex_token *token, struct ds *s)
+{
+ enum lex_format format = lex_token_get_format(token);
+
+ lex_token_format_value(&token->value, format, s);
+ ds_put_char(s, '/');
+
+ const union mf_subvalue *mask = &token->mask;
+ if (format == LEX_F_IPV4 && ip_is_cidr(mask->ipv4)) {
+ ds_put_format(s, "%d", ip_count_cidr_bits(mask->ipv4));
+ } else if (token->format == LEX_F_IPV6 && ipv6_is_cidr(&mask->ipv6)) {
+ ds_put_format(s, "%d", ipv6_count_cidr_bits(&mask->ipv6));
+ } else {
+ lex_token_format_value(&token->mask, format, s);
+ }
+}
+
+/* Appends a string representation of 'token' to 's', in a format that can be
+ * losslessly parsed back by the lexer. (LEX_T_END and LEX_T_ERROR can't be
+ * parsed back.) */
+void
+lex_token_format(const struct lex_token *token, struct ds *s)
+{
+ switch (token->type) {
+ case LEX_T_END:
+ ds_put_cstr(s, "$");
+ break;
+
+ case LEX_T_ID:
+ ds_put_cstr(s, token->s);
+ break;
+
+ case LEX_T_ERROR:
+ ds_put_cstr(s, "error(");
+ json_string_escape(token->s, s);
+ ds_put_char(s, ')');
+ break;
+
+ case LEX_T_STRING:
+ json_string_escape(token->s, s);
+ break;
+
+ case LEX_T_INTEGER:
+ lex_token_format_value(&token->value, lex_token_get_format(token), s);
+ break;
+
+ case LEX_T_MASKED_INTEGER:
+ lex_token_format_masked_integer(token, s);
+ break;
+
+ case LEX_T_LPAREN:
+ ds_put_cstr(s, "(");
+ break;
+ case LEX_T_RPAREN:
+ ds_put_cstr(s, ")");
+ break;
+ case LEX_T_LCURLY:
+ ds_put_cstr(s, "{");
+ break;
+ case LEX_T_RCURLY:
+ ds_put_cstr(s, "}");
+ break;
+ case LEX_T_LSQUARE:
+ ds_put_cstr(s, "[");
+ break;
+ case LEX_T_RSQUARE:
+ ds_put_cstr(s, "]");
+ break;
+ case LEX_T_EQ:
+ ds_put_cstr(s, "==");
+ break;
+ case LEX_T_NE:
+ ds_put_cstr(s, "!=");
+ break;
+ case LEX_T_LT:
+ ds_put_cstr(s, "<");
+ break;
+ case LEX_T_LE:
+ ds_put_cstr(s, "<=");
+ break;
+ case LEX_T_GT:
+ ds_put_cstr(s, ">");
+ break;
+ case LEX_T_GE:
+ ds_put_cstr(s, ">=");
+ break;
+ case LEX_T_LOG_NOT:
+ ds_put_cstr(s, "!");
+ break;
+ case LEX_T_LOG_AND:
+ ds_put_cstr(s, "&&");
+ break;
+ case LEX_T_LOG_OR:
+ ds_put_cstr(s, "||");
+ break;
+ case LEX_T_ELLIPSIS:
+ ds_put_cstr(s, "..");
+ break;
+ case LEX_T_COMMA:
+ ds_put_cstr(s, ",");
+ break;
+ case LEX_T_SEMICOLON:
+ ds_put_cstr(s, ";");
+ break;
+ case LEX_T_EQUALS:
+ ds_put_cstr(s, "=");
+ break;
+ default:
+ OVS_NOT_REACHED();
+ }
+
+}
+\f
+/* lex_token_parse(). */
+
+static void OVS_PRINTF_FORMAT(2, 3)
+lex_error(struct lex_token *token, const char *message, ...)
+{
+ ovs_assert(!token->s);
+ token->type = LEX_T_ERROR;
+
+ va_list args;
+ va_start(args, message);
+ token->s = xvasprintf(message, args);
+ va_end(args);
+}
+
+static void
+lex_parse_hex_integer(const char *start, size_t len, struct lex_token *token)
+{
+ const char *in = start + (len - 1);
+ uint8_t *out = token->value.u8 + (sizeof token->value.u8 - 1);
+
+ for (int i = 0; i < len; i++) {
+ int hexit = hexit_value(in[-i]);
+ if (hexit < 0) {
+ lex_error(token, "Invalid syntax in hexadecimal constant.");
+ return;
+ }
+ if (hexit && i / 2 >= sizeof token->value.u8) {
+ lex_error(token, "Hexadecimal constant requires more than "
+ "%"PRIuSIZE" bits.", 8 * sizeof token->value.u8);
+ return;
+ }
+ out[-(i / 2)] |= i % 2 ? hexit << 4 : hexit;
+ }
+ token->format = LEX_F_HEXADECIMAL;
+}
+
+static const char *
+lex_parse_integer__(const char *p, struct lex_token *token)
+{
+ lex_token_init(token);
+ token->type = LEX_T_INTEGER;
+ memset(&token->value, 0, sizeof token->value);
+ const char *start = p;
+ const char *end = start;
+ while (isalnum((unsigned char) *end) || *end == ':'
+ || (*end == '.' && end[1] != '.')) {
+ end++;
+ }
+ size_t len = end - start;
+
+ int n;
+ uint8_t mac[ETH_ADDR_LEN];
+
+ if (!len) {
+ lex_error(token, "Integer constant expected.");
+ } else if (len == 17
+ && ovs_scan(start, ETH_ADDR_SCAN_FMT"%n",
+ ETH_ADDR_SCAN_ARGS(mac), &n)
+ && n == len) {
+ memcpy(token->value.mac, mac, sizeof token->value.mac);
+ token->format = LEX_F_ETHERNET;
+ } else if (start + strspn(start, "0123456789") == end) {
+ if (p[0] == '0' && len > 1) {
+ lex_error(token, "Decimal constants must not have leading zeros.");
+ } else {
+ unsigned long long int integer;
+ char *tail;
+
+ errno = 0;
+ integer = strtoull(p, &tail, 10);
+ if (tail != end || errno == ERANGE) {
+ lex_error(token, "Decimal constants must be less than 2**64.");
+ } else {
+ token->value.integer = htonll(integer);
+ token->format = LEX_F_DECIMAL;
+ }
+ }
+ } else if (p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ if (len > 2) {
+ lex_parse_hex_integer(start + 2, len - 2, token);
+ } else {
+ lex_error(token, "Hex digits expected following 0%c.", p[1]);
+ }
+ } else if (len < INET6_ADDRSTRLEN) {
+ char copy[INET6_ADDRSTRLEN];
+ memcpy(copy, p, len);
+ copy[len] = '\0';
+
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ if (inet_pton(AF_INET, copy, &ipv4) == 1) {
+ token->value.ipv4 = ipv4.s_addr;
+ token->format = LEX_F_IPV4;
+ } else if (inet_pton(AF_INET6, copy, &ipv6) == 1) {
+ token->value.ipv6 = ipv6;
+ token->format = LEX_F_IPV6;
+ } else {
+ lex_error(token, "Invalid numeric constant.");
+ }
+ } else {
+ lex_error(token, "Invalid numeric constant.");
+ }
+
+ ovs_assert(token->type == LEX_T_INTEGER || token->type == LEX_T_ERROR);
+ return end;
+}
+
+static const char *
+lex_parse_mask(const char *p, struct lex_token *token)
+{
+ struct lex_token mask;
+
+ /* Parse just past the '/' as a second integer. Handle errors. */
+ p = lex_parse_integer__(p + 1, &mask);
+ if (mask.type == LEX_T_ERROR) {
+ lex_token_swap(&mask, token);
+ lex_token_destroy(&mask);
+ return p;
+ }
+ ovs_assert(mask.type == LEX_T_INTEGER);
+
+ /* Now convert the value and mask into a masked integer token.
+ * We have a few special cases. */
+ token->type = LEX_T_MASKED_INTEGER;
+ memset(&token->mask, 0, sizeof token->mask);
+ uint32_t prefix_bits = ntohll(mask.value.integer);
+ if (token->format == mask.format) {
+ /* Same format value and mask is always OK. */
+ token->mask = mask.value;
+ } else if (token->format == LEX_F_IPV4
+ && mask.format == LEX_F_DECIMAL
+ && prefix_bits <= 32) {
+ /* IPv4 address with decimal mask is a CIDR prefix. */
+ token->mask.integer = htonll(ntohl(be32_prefix_mask(prefix_bits)));
+ } else if (token->format == LEX_F_IPV6
+ && mask.format == LEX_F_DECIMAL
+ && prefix_bits <= 128) {
+ /* IPv6 address with decimal mask is a CIDR prefix. */
+ token->mask.ipv6 = ipv6_create_mask(prefix_bits);
+ } else if (token->format == LEX_F_DECIMAL
+ && mask.format == LEX_F_HEXADECIMAL
+ && token->value.integer == 0) {
+ /* Special case for e.g. 0/0x1234. */
+ token->format = LEX_F_HEXADECIMAL;
+ token->mask = mask.value;
+ } else {
+ lex_error(token, "Value and mask have incompatible formats.");
+ return p;
+ }
+
+ /* Check invariant that a 1-bit in the value corresponds to a 1-bit in the
+ * mask. */
+ for (int i = 0; i < ARRAY_SIZE(token->mask.be32); i++) {
+ ovs_be32 v = token->value.be32[i];
+ ovs_be32 m = token->mask.be32[i];
+
+ if (v & ~m) {
+ lex_error(token, "Value contains unmasked 1-bits.");
+ break;
+ }
+ }
+
+ /* Done! */
+ lex_token_destroy(&mask);
+ return p;
+}
+
+static const char *
+lex_parse_integer(const char *p, struct lex_token *token)
+{
+ p = lex_parse_integer__(p, token);
+ if (token->type == LEX_T_INTEGER && *p == '/') {
+ p = lex_parse_mask(p, token);
+ }
+ return p;
+}
+
+static const char *
+lex_parse_string(const char *p, struct lex_token *token)
+{
+ const char *start = ++p;
+ for (;;) {
+ switch (*p) {
+ case '\0':
+ lex_error(token, "Input ends inside quoted string.");
+ return p;
+
+ case '"':
+ token->type = (json_string_unescape(start, p - start, &token->s)
+ ? LEX_T_STRING : LEX_T_ERROR);
+ return p + 1;
+
+ case '\\':
+ p++;
+ if (*p) {
+ p++;
+ }
+ break;
+
+ default:
+ p++;
+ break;
+ }
+ }
+}
+
+static bool
+lex_is_id1(unsigned char c)
+{
+ return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+ || c == '_' || c == '.');
+}
+
+static bool
+lex_is_idn(unsigned char c)
+{
+ return lex_is_id1(c) || (c >= '0' && c <= '9');
+}
+
+static const char *
+lex_parse_id(const char *p, struct lex_token *token)
+{
+ const char *start = p;
+
+ do {
+ p++;
+ } while (lex_is_idn(*p));
+
+ token->type = LEX_T_ID;
+ token->s = xmemdup0(start, p - start);
+ return p;
+}
+
+/* Initializes 'token' and parses the first token from the beginning of
+ * null-terminated string 'p' into 'token'. Stores a pointer to the start of
+ * the token (after skipping white space and comments, if any) into '*startp'.
+ * Returns the character position at which to begin parsing the next token. */
+const char *
+lex_token_parse(struct lex_token *token, const char *p, const char **startp)
+{
+ lex_token_init(token);
+
+next:
+ *startp = p;
+ switch (*p) {
+ case '\0':
+ token->type = LEX_T_END;
+ return p;
+
+ case ' ': case '\t': case '\n': case '\r':
+ p++;
+ goto next;
+
+ case '/':
+ p++;
+ if (*p == '/') {
+ do {
+ p++;
+ } while (*p != '\0' && *p != '\n');
+ goto next;
+ } else if (*p == '*') {
+ p++;
+ for (;;) {
+ if (*p == '*' && p[1] == '/') {
+ p += 2;
+ goto next;
+ } else if (*p == '\0' || *p == '\n') {
+ lex_error(token, "`/*' without matching `*/'.");
+ return p;
+ } else {
+ p++;
+ }
+ }
+ goto next;
+ } else {
+ lex_error(token,
+ "`/' is only valid as part of `//' or `/*'.");
+ }
+ break;
+
+ case '(':
+ token->type = LEX_T_LPAREN;
+ p++;
+ break;
+
+ case ')':
+ token->type = LEX_T_RPAREN;
+ p++;
+ break;
+
+ case '{':
+ token->type = LEX_T_LCURLY;
+ p++;
+ break;
+
+ case '}':
+ token->type = LEX_T_RCURLY;
+ p++;
+ break;
+
+ case '[':
+ token->type = LEX_T_LSQUARE;
+ p++;
+ break;
+
+ case ']':
+ token->type = LEX_T_RSQUARE;
+ p++;
+ break;
+
+ case '=':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_EQ;
+ p++;
+ } else {
+ token->type = LEX_T_EQUALS;
+ }
+ break;
+
+ case '!':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_NE;
+ p++;
+ } else {
+ token->type = LEX_T_LOG_NOT;
+ }
+ break;
+
+ case '&':
+ p++;
+ if (*p == '&') {
+ token->type = LEX_T_LOG_AND;
+ p++;
+ } else {
+ lex_error(token, "`&' is only valid as part of `&&'.");
+ }
+ break;
+
+ case '|':
+ p++;
+ if (*p == '|') {
+ token->type = LEX_T_LOG_OR;
+ p++;
+ } else {
+ lex_error(token, "`|' is only valid as part of `||'.");
+ }
+ break;
+
+ case '<':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_LE;
+ p++;
+ } else {
+ token->type = LEX_T_LT;
+ }
+ break;
+
+ case '>':
+ p++;
+ if (*p == '=') {
+ token->type = LEX_T_GE;
+ p++;
+ } else {
+ token->type = LEX_T_GT;
+ }
+ break;
+
+ case '.':
+ p++;
+ if (*p == '.') {
+ token->type = LEX_T_ELLIPSIS;
+ p++;
+ } else {
+ lex_error(token, "`.' is only valid as part of `..' or a number.");
+ }
+ break;
+
+ case ',':
+ p++;
+ token->type = LEX_T_COMMA;
+ break;
+
+ case ';':
+ p++;
+ token->type = LEX_T_SEMICOLON;
+ break;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case ':':
+ p = lex_parse_integer(p, token);
+ break;
+
+ case '"':
+ p = lex_parse_string(p, token);
+ break;
+
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ /* We need to distinguish an Ethernet address or IPv6 address from an
+ * identifier. Fortunately, Ethernet addresses and IPv6 addresses that
+ * are ambiguous based on the first character, always start with hex
+ * digits followed by a colon, but identifiers never do. */
+ p = (p[strspn(p, "0123456789abcdefABCDEF")] == ':'
+ ? lex_parse_integer(p, token)
+ : lex_parse_id(p, token));
+ break;
+
+ default:
+ if (lex_is_id1(*p)) {
+ p = lex_parse_id(p, token);
+ } else {
+ if (isprint((unsigned char) *p)) {
+ lex_error(token, "Invalid character `%c' in input.", *p);
+ } else {
+ lex_error(token, "Invalid byte 0x%d in input.", *p);
+ }
+ p++;
+ }
+ break;
+ }
+
+ return p;
+}
+\f
+/* Initializes 'lexer' for parsing 'input'.
+ *
+ * While the lexer is in use, 'input' must remain available, but the caller
+ * otherwise retains ownership of 'input'.
+ *
+ * The caller must call lexer_get() to obtain the first token. */
+void
+lexer_init(struct lexer *lexer, const char *input)
+{
+ lexer->input = input;
+ lexer->start = NULL;
+ lex_token_init(&lexer->token);
+}
+
+/* Frees storage associated with 'lexer'. */
+void
+lexer_destroy(struct lexer *lexer)
+{
+ lex_token_destroy(&lexer->token);
+}
+
+/* Obtains the next token from 'lexer' into 'lexer->token', and returns the
+ * token's type. The caller may examine 'lexer->token' directly to obtain full
+ * information about the token. */
+enum lex_type
+lexer_get(struct lexer *lexer)
+{
+ lex_token_destroy(&lexer->token);
+ lexer->input = lex_token_parse(&lexer->token, lexer->input, &lexer->start);
+ return lexer->token.type;
+}
+
+/* Returns the type of the next token that will be fetched by lexer_get(),
+ * without advancing 'lexer->token' to that token. */
+enum lex_type
+lexer_lookahead(const struct lexer *lexer)
+{
+ struct lex_token next;
+ enum lex_type type;
+ const char *start;
+
+ lex_token_parse(&next, lexer->input, &start);
+ type = next.type;
+ lex_token_destroy(&next);
+ return type;
+}
+
+/* If 'lexer''s current token has the given 'type', advances 'lexer' to the
+ * next token and returns true. Otherwise returns false. */
+bool
+lexer_match(struct lexer *lexer, enum lex_type type)
+{
+ if (lexer->token.type == type) {
+ lexer_get(lexer);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+/* If 'lexer''s current token is the identifier given in 'id', advances 'lexer'
+ * to the next token and returns true. Otherwise returns false. */
+bool
+lexer_match_id(struct lexer *lexer, const char *id)
+{
+ if (lexer->token.type == LEX_T_ID && !strcmp(lexer->token.s, id)) {
+ lexer_get(lexer);
+ return true;
+ } else {
+ return false;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OVN_LEX_H
+#define OVN_LEX_H 1
+
+/* OVN lexical analyzer
+ * ====================
+ *
+ * This is a simple lexical analyzer (or tokenizer) for OVN match expressions
+ * and ACLs. */
+
+#include "meta-flow.h"
+
+struct ds;
+
+/* Token type. */
+enum lex_type {
+ LEX_T_END, /* end of input */
+
+ /* Tokens with auxiliary data. */
+ LEX_T_ID, /* foo */
+ LEX_T_STRING, /* "foo" */
+ LEX_T_INTEGER, /* 12345 or 1.2.3.4 or ::1 or 01:02:03:04:05 */
+ LEX_T_MASKED_INTEGER, /* 12345/10 or 1.2.0.0/16 or ::2/127 or... */
+ LEX_T_ERROR, /* invalid input */
+
+ /* Bare tokens. */
+ LEX_T_LPAREN, /* ( */
+ LEX_T_RPAREN, /* ) */
+ LEX_T_LCURLY, /* { */
+ LEX_T_RCURLY, /* } */
+ LEX_T_LSQUARE, /* [ */
+ LEX_T_RSQUARE, /* ] */
+ LEX_T_EQ, /* == */
+ LEX_T_NE, /* != */
+ LEX_T_LT, /* < */
+ LEX_T_LE, /* <= */
+ LEX_T_GT, /* > */
+ LEX_T_GE, /* >= */
+ LEX_T_LOG_NOT, /* ! */
+ LEX_T_LOG_AND, /* && */
+ LEX_T_LOG_OR, /* || */
+ LEX_T_ELLIPSIS, /* .. */
+ LEX_T_COMMA, /* , */
+ LEX_T_SEMICOLON, /* ; */
+ LEX_T_EQUALS, /* = */
+};
+
+/* Subtype for LEX_T_INTEGER and LEX_T_MASKED_INTEGER tokens.
+ *
+ * These do not change the semantics of a token; instead, they determine the
+ * format used when a token is serialized back to a text form. That's
+ * important because 3232268289 is meaningless to a human whereas 192.168.128.1
+ * has some actual significance. */
+enum lex_format {
+ LEX_F_DECIMAL,
+ LEX_F_HEXADECIMAL,
+ LEX_F_IPV4,
+ LEX_F_IPV6,
+ LEX_F_ETHERNET,
+};
+const char *lex_format_to_string(enum lex_format);
+
+/* A token.
+ *
+ * 's' is owned by the token. */
+struct lex_token {
+ enum lex_type type; /* One of LEX_*. */
+ char *s; /* LEX_T_ID, LEX_T_STRING, LEX_T_ERROR only. */
+ enum lex_format format; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */
+ union mf_subvalue value; /* LEX_T_INTEGER, LEX_T_MASKED_INTEGER only. */
+ union mf_subvalue mask; /* LEX_T_MASKED_INTEGER only. */
+};
+
+void lex_token_init(struct lex_token *);
+void lex_token_destroy(struct lex_token *);
+void lex_token_swap(struct lex_token *, struct lex_token *);
+
+void lex_token_format(const struct lex_token *, struct ds *);
+const char *lex_token_parse(struct lex_token *, const char *input,
+ const char **startp);
+
+/* A lexical analyzer. */
+struct lexer {
+ const char *input; /* Remaining input (not owned by lexer). */
+ const char *start; /* Start of current token in 'input'. */
+ struct lex_token token; /* Current token (owned by lexer). */
+};
+
+void lexer_init(struct lexer *, const char *input);
+void lexer_destroy(struct lexer *);
+
+enum lex_type lexer_get(struct lexer *);
+enum lex_type lexer_lookahead(const struct lexer *);
+bool lexer_match(struct lexer *, enum lex_type);
+bool lexer_match_id(struct lexer *, const char *id);
+
+#endif /* ovn/lex.h */
--- /dev/null
+libovn_@LT_CURRENT@ {
+global:
+ *;
+};
--- /dev/null
+# -*- python -*-
+
+# This code, when invoked by "ovsdb-idlc annotate" (by the build
+# process), annotates vswitch.ovsschema with additional data that give
+# the ovsdb-idl engine information about the types involved, so that
+# it can generate more programmer-friendly data structures.
+
+s["idlPrefix"] = "nbrec_"
+s["idlHeader"] = "\"ovn/lib/ovn-nb-idl.h\""
--- /dev/null
+# -*- python -*-
+
+# This code, when invoked by "ovsdb-idlc annotate" (by the build
+# process), annotates vswitch.ovsschema with additional data that give
+# the ovsdb-idl engine information about the types involved, so that
+# it can generate more programmer-friendly data structures.
+
+s["idlPrefix"] = "sbrec_"
+s["idlHeader"] = "\"ovn/lib/ovn-sb-idl.h\""
--- /dev/null
+/ovn-northd
--- /dev/null
+# ovn-northd
+bin_PROGRAMS += ovn/northd/ovn-northd
+ovn_northd_ovn_northd_SOURCES = ovn/northd/ovn-northd.c
+ovn_northd_ovn_northd_LDADD = \
+ ovn/lib/libovn.la \
+ ovsdb/libovsdb.la \
+ lib/libopenvswitch.la
--- /dev/null
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "command-line.h"
+#include "daemon.h"
+#include "dirs.h"
+#include "dynamic-string.h"
+#include "fatal-signal.h"
+#include "hash.h"
+#include "hmap.h"
+#include "json.h"
+#include "ovn/lib/lex.h"
+#include "ovn/lib/ovn-nb-idl.h"
+#include "ovn/lib/ovn-sb-idl.h"
+#include "poll-loop.h"
+#include "stream.h"
+#include "stream-ssl.h"
+#include "unixctl.h"
+#include "util.h"
+#include "uuid.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(ovn_northd);
+
+static unixctl_cb_func ovn_northd_exit;
+
+struct northd_context {
+ struct ovsdb_idl *ovnnb_idl;
+ struct ovsdb_idl *ovnsb_idl;
+ struct ovsdb_idl_txn *ovnnb_txn;
+ struct ovsdb_idl_txn *ovnsb_txn;
+};
+
+static const char *ovnnb_db;
+static const char *ovnsb_db;
+
+static const char *default_db(void);
+
+static void
+usage(void)
+{
+ printf("\
+%s: OVN northbound management daemon\n\
+usage: %s [OPTIONS]\n\
+\n\
+Options:\n\
+ --ovnnb-db=DATABASE connect to ovn-nb database at DATABASE\n\
+ (default: %s)\n\
+ --ovnsb-db=DATABASE connect to ovn-sb database at DATABASE\n\
+ (default: %s)\n\
+ -h, --help display this help message\n\
+ -o, --options list available options\n\
+ -V, --version display version information\n\
+", program_name, program_name, default_db(), default_db());
+ daemon_usage();
+ vlog_usage();
+ stream_usage("database", true, true, false);
+}
+\f
+static int
+compare_strings(const void *a_, const void *b_)
+{
+ char *const *a = a_;
+ char *const *b = b_;
+ return strcmp(*a, *b);
+}
+
+/*
+ * Determine whether 2 arrays of MAC addresses are the same. It's possible that
+ * the lists could be *very* long and this check is being done a lot (every
+ * time the OVN_Northbound database changes).
+ */
+static bool
+macs_equal(char **binding_macs_, size_t b_n_macs,
+ char **lport_macs_, size_t l_n_macs)
+{
+ char **binding_macs, **lport_macs;
+ size_t bytes, i;
+
+ if (b_n_macs != l_n_macs) {
+ return false;
+ }
+
+ bytes = b_n_macs * sizeof binding_macs_[0];
+ binding_macs = xmalloc(bytes);
+ lport_macs = xmalloc(bytes);
+
+ memcpy(binding_macs, binding_macs_, bytes);
+ memcpy(lport_macs, lport_macs_, bytes);
+
+ qsort(binding_macs, b_n_macs, sizeof binding_macs[0], compare_strings);
+ qsort(lport_macs, l_n_macs, sizeof lport_macs[0], compare_strings);
+
+ for (i = 0; i < b_n_macs; i++) {
+ if (strcmp(binding_macs[i], lport_macs[i])) {
+ break;
+ }
+ }
+
+ free(binding_macs);
+ free(lport_macs);
+
+ return (i == b_n_macs) ? true : false;
+}
+\f
+/* Pipeline generation.
+ *
+ * This code generates the Pipeline table in the southbound database, as a
+ * function of most of the northbound database.
+ */
+
+/* Enough context to add a Pipeline row, using pipeline_add(). */
+struct pipeline_ctx {
+ /* From northd_context. */
+ struct ovsdb_idl *ovnsb_idl;
+ struct ovsdb_idl_txn *ovnsb_txn;
+
+ /* Contains "struct pipeline_hash_node"s. Used to figure out what existing
+ * Pipeline rows should be deleted: we index all of the Pipeline rows into
+ * this data structure, then as existing rows are generated we remove them.
+ * After generating all the rows, any remaining in 'pipeline_hmap' must be
+ * deleted from the database. */
+ struct hmap pipeline_hmap;
+};
+
+/* A row in the Pipeline table, indexed by its full contents, */
+struct pipeline_hash_node {
+ struct hmap_node node;
+ const struct sbrec_pipeline *pipeline;
+};
+
+static size_t
+pipeline_hash(const struct uuid *logical_datapath, uint8_t table_id,
+ uint16_t priority, const char *match, const char *actions)
+{
+ size_t hash = uuid_hash(logical_datapath);
+ hash = hash_2words((table_id << 16) | priority, hash);
+ hash = hash_string(match, hash);
+ return hash_string(actions, hash);
+}
+
+static size_t
+pipeline_hash_rec(const struct sbrec_pipeline *pipeline)
+{
+ return pipeline_hash(&pipeline->logical_datapath, pipeline->table_id,
+ pipeline->priority, pipeline->match,
+ pipeline->actions);
+}
+
+/* Adds a row with the specified contents to the Pipeline table. */
+static void
+pipeline_add(struct pipeline_ctx *ctx,
+ const struct nbrec_logical_switch *logical_datapath,
+ uint8_t table_id,
+ uint16_t priority,
+ const char *match,
+ const char *actions)
+{
+ struct pipeline_hash_node *hash_node;
+
+ /* Check whether such a row already exists in the Pipeline table. If so,
+ * remove it from 'ctx->pipeline_hmap' and we're done. */
+ HMAP_FOR_EACH_WITH_HASH (hash_node, node,
+ pipeline_hash(&logical_datapath->header_.uuid,
+ table_id, priority, match, actions),
+ &ctx->pipeline_hmap) {
+ const struct sbrec_pipeline *pipeline = hash_node->pipeline;
+ if (uuid_equals(&pipeline->logical_datapath,
+ &logical_datapath->header_.uuid)
+ && pipeline->table_id == table_id
+ && pipeline->priority == priority
+ && !strcmp(pipeline->match, match)
+ && !strcmp(pipeline->actions, actions)) {
+ hmap_remove(&ctx->pipeline_hmap, &hash_node->node);
+ free(hash_node);
+ return;
+ }
+ }
+
+ /* No such Pipeline row. Add one. */
+ const struct sbrec_pipeline *pipeline;
+ pipeline = sbrec_pipeline_insert(ctx->ovnsb_txn);
+ sbrec_pipeline_set_logical_datapath(pipeline,
+ logical_datapath->header_.uuid);
+ sbrec_pipeline_set_table_id(pipeline, table_id);
+ sbrec_pipeline_set_priority(pipeline, priority);
+ sbrec_pipeline_set_match(pipeline, match);
+ sbrec_pipeline_set_actions(pipeline, actions);
+}
+
+/* Appends port security constraints on L2 address field 'eth_addr_field'
+ * (e.g. "eth.src" or "eth.dst") to 'match'. 'port_security', with
+ * 'n_port_security' elements, is the collection of port_security constraints
+ * from an OVN_NB Logical_Port row. */
+static void
+build_port_security(const char *eth_addr_field,
+ char **port_security, size_t n_port_security,
+ struct ds *match)
+{
+ size_t base_len = match->length;
+ ds_put_format(match, " && %s == {", eth_addr_field);
+
+ size_t n = 0;
+ for (size_t i = 0; i < n_port_security; i++) {
+ uint8_t ea[ETH_ADDR_LEN];
+
+ if (eth_addr_from_string(port_security[i], ea)) {
+ ds_put_format(match, ETH_ADDR_FMT, ETH_ADDR_ARGS(ea));
+ ds_put_char(match, ' ');
+ n++;
+ }
+ }
+ ds_chomp(match, ' ');
+ ds_put_cstr(match, "}");
+
+ if (!n) {
+ match->length = base_len;
+ }
+}
+
+/* Updates the Pipeline table in the OVN_SB database, constructing its contents
+ * based on the OVN_NB database. */
+static void
+build_pipeline(struct northd_context *ctx)
+{
+ struct pipeline_ctx pc = {
+ .ovnsb_idl = ctx->ovnsb_idl,
+ .ovnsb_txn = ctx->ovnsb_txn,
+ .pipeline_hmap = HMAP_INITIALIZER(&pc.pipeline_hmap)
+ };
+
+ /* Add all the Pipeline entries currently in the southbound database to
+ * 'pc.pipeline_hmap'. We remove entries that we generate from the hmap,
+ * thus by the time we're done only entries that need to be removed
+ * remain. */
+ const struct sbrec_pipeline *pipeline;
+ SBREC_PIPELINE_FOR_EACH (pipeline, ctx->ovnsb_idl) {
+ struct pipeline_hash_node *hash_node = xzalloc(sizeof *hash_node);
+ hash_node->pipeline = pipeline;
+ hmap_insert(&pc.pipeline_hmap, &hash_node->node,
+ pipeline_hash_rec(pipeline));
+ }
+
+ /* Table 0: Admission control framework. */
+ const struct nbrec_logical_switch *lswitch;
+ NBREC_LOGICAL_SWITCH_FOR_EACH (lswitch, ctx->ovnnb_idl) {
+ /* Logical VLANs not supported. */
+ pipeline_add(&pc, lswitch, 0, 100, "vlan.present", "drop;");
+
+ /* Broadcast/multicast source address is invalid. */
+ pipeline_add(&pc, lswitch, 0, 100, "eth.src[40]", "drop;");
+
+ /* Port security flows have priority 50 (see below) and will continue
+ * to the next table if packet source is acceptable. */
+
+ /* Otherwise drop the packet. */
+ pipeline_add(&pc, lswitch, 0, 0, "1", "drop;");
+ }
+
+ /* Table 0: Ingress port security. */
+ const struct nbrec_logical_port *lport;
+ NBREC_LOGICAL_PORT_FOR_EACH (lport, ctx->ovnnb_idl) {
+ struct ds match = DS_EMPTY_INITIALIZER;
+ ds_put_cstr(&match, "inport == ");
+ json_string_escape(lport->name, &match);
+ build_port_security("eth.src",
+ lport->port_security, lport->n_port_security,
+ &match);
+ pipeline_add(&pc, lport->lswitch, 0, 50, ds_cstr(&match), "next;");
+ ds_destroy(&match);
+ }
+
+ /* Table 1: Destination lookup, broadcast and multicast handling (priority
+ * 100). */
+ NBREC_LOGICAL_SWITCH_FOR_EACH (lswitch, ctx->ovnnb_idl) {
+ struct ds actions;
+
+ ds_init(&actions);
+ NBREC_LOGICAL_PORT_FOR_EACH (lport, ctx->ovnnb_idl) {
+ if (lport->lswitch == lswitch) {
+ ds_put_cstr(&actions, "outport = ");
+ json_string_escape(lport->name, &actions);
+ ds_put_cstr(&actions, "; next; ");
+ }
+ }
+ ds_chomp(&actions, ' ');
+
+ pipeline_add(&pc, lswitch, 1, 100, "eth.dst[40]", ds_cstr(&actions));
+ ds_destroy(&actions);
+ }
+
+ /* Table 1: Destination lookup, unicast handling (priority 50), */
+ struct unknown_actions {
+ struct hmap_node hmap_node;
+ const struct nbrec_logical_switch *ls;
+ struct ds actions;
+ };
+ struct hmap unknown_actions = HMAP_INITIALIZER(&unknown_actions);
+ NBREC_LOGICAL_PORT_FOR_EACH (lport, ctx->ovnnb_idl) {
+ lswitch = lport->lswitch;
+ for (size_t i = 0; i < lport->n_macs; i++) {
+ uint8_t mac[ETH_ADDR_LEN];
+
+ if (eth_addr_from_string(lport->macs[i], mac)) {
+ struct ds match, actions;
+
+ ds_init(&match);
+ ds_put_format(&match, "eth.dst == %s", lport->macs[i]);
+
+ ds_init(&actions);
+ ds_put_cstr(&actions, "outport = ");
+ json_string_escape(lport->name, &actions);
+ ds_put_cstr(&actions, "; next;");
+ pipeline_add(&pc, lswitch, 1, 50,
+ ds_cstr(&match), ds_cstr(&actions));
+ ds_destroy(&actions);
+ ds_destroy(&match);
+ } else if (!strcmp(lport->macs[i], "unknown")) {
+ const struct uuid *uuid = &lswitch->header_.uuid;
+ struct unknown_actions *ua = NULL;
+ struct unknown_actions *iter;
+ HMAP_FOR_EACH_WITH_HASH (iter, hmap_node, uuid_hash(uuid),
+ &unknown_actions) {
+ if (uuid_equals(&iter->ls->header_.uuid, uuid)) {
+ ua = iter;
+ break;
+ }
+ }
+ if (!ua) {
+ ua = xmalloc(sizeof *ua);
+ hmap_insert(&unknown_actions, &ua->hmap_node,
+ uuid_hash(uuid));
+ ua->ls = lswitch;
+ ds_init(&ua->actions);
+ } else {
+ ds_put_char(&ua->actions, ' ');
+ }
+
+ ds_put_cstr(&ua->actions, "outport = ");
+ json_string_escape(lport->name, &ua->actions);
+ ds_put_cstr(&ua->actions, "; next;");
+ } else {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+
+ VLOG_INFO_RL(&rl, "%s: invalid syntax '%s' in macs column",
+ lport->name, lport->macs[i]);
+ }
+ }
+ }
+
+ /* Table 1: Destination lookup for unknown MACs (priority 0). */
+ struct unknown_actions *ua, *next_ua;
+ HMAP_FOR_EACH_SAFE (ua, next_ua, hmap_node, &unknown_actions) {
+ pipeline_add(&pc, ua->ls, 1, 0, "1", ds_cstr(&ua->actions));
+ hmap_remove(&unknown_actions, &ua->hmap_node);
+ ds_destroy(&ua->actions);
+ free(ua);
+ }
+ hmap_destroy(&unknown_actions);
+
+ /* Table 2: ACLs. */
+ const struct nbrec_acl *acl;
+ NBREC_ACL_FOR_EACH (acl, ctx->ovnnb_idl) {
+ const char *action;
+
+ action = (!strcmp(acl->action, "allow") ||
+ !strcmp(acl->action, "allow-related"))
+ ? "next;" : "drop;";
+ pipeline_add(&pc, acl->lswitch, 2, acl->priority, acl->match, action);
+ }
+ NBREC_LOGICAL_SWITCH_FOR_EACH (lswitch, ctx->ovnnb_idl) {
+ pipeline_add(&pc, lswitch, 2, 0, "1", "next;");
+ }
+
+ /* Table 3: Egress port security. */
+ NBREC_LOGICAL_SWITCH_FOR_EACH (lswitch, ctx->ovnnb_idl) {
+ pipeline_add(&pc, lswitch, 3, 100, "eth.dst[40]", "output;");
+ }
+ NBREC_LOGICAL_PORT_FOR_EACH (lport, ctx->ovnnb_idl) {
+ struct ds match;
+
+ ds_init(&match);
+ ds_put_cstr(&match, "outport == ");
+ json_string_escape(lport->name, &match);
+ build_port_security("eth.dst",
+ lport->port_security, lport->n_port_security,
+ &match);
+
+ pipeline_add(&pc, lport->lswitch, 3, 50, ds_cstr(&match), "output;");
+
+ ds_destroy(&match);
+ }
+
+ /* Delete any existing Pipeline rows that were not re-generated. */
+ struct pipeline_hash_node *hash_node, *next_hash_node;
+ HMAP_FOR_EACH_SAFE (hash_node, next_hash_node, node, &pc.pipeline_hmap) {
+ hmap_remove(&pc.pipeline_hmap, &hash_node->node);
+ sbrec_pipeline_delete(hash_node->pipeline);
+ free(hash_node);
+ }
+ hmap_destroy(&pc.pipeline_hmap);
+}
+\f
+static bool
+parents_equal(const struct sbrec_binding *binding,
+ const struct nbrec_logical_port *lport)
+{
+ if (!!binding->parent_port != !!lport->parent_name) {
+ /* One is set and the other is not. */
+ return false;
+ }
+
+ if (binding->parent_port) {
+ /* Both are set. */
+ return strcmp(binding->parent_port, lport->parent_name) ? false : true;
+ }
+
+ /* Both are NULL. */
+ return true;
+}
+
+static bool
+tags_equal(const struct sbrec_binding *binding,
+ const struct nbrec_logical_port *lport)
+{
+ if (binding->n_tag != lport->n_tag) {
+ return false;
+ }
+
+ return binding->n_tag ? (binding->tag[0] == lport->tag[0]) : true;
+}
+
+struct binding_hash_node {
+ struct hmap_node lp_node; /* In 'lp_map', by binding->logical_port. */
+ struct hmap_node tk_node; /* In 'tk_map', by binding->tunnel_key. */
+ const struct sbrec_binding *binding;
+};
+
+static bool
+tunnel_key_in_use(const struct hmap *tk_hmap, uint16_t tunnel_key)
+{
+ const struct binding_hash_node *hash_node;
+
+ HMAP_FOR_EACH_IN_BUCKET (hash_node, tk_node, hash_int(tunnel_key, 0),
+ tk_hmap) {
+ if (hash_node->binding->tunnel_key == tunnel_key) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Chooses and returns a positive tunnel key that is not already in use in
+ * 'tk_hmap'. Returns 0 if all tunnel keys are in use. */
+static uint16_t
+choose_tunnel_key(const struct hmap *tk_hmap)
+{
+ static uint16_t prev;
+
+ for (uint16_t key = prev + 1; key != prev; key++) {
+ if (!tunnel_key_in_use(tk_hmap, key)) {
+ prev = key;
+ return key;
+ }
+ }
+
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 1);
+ VLOG_WARN_RL(&rl, "all tunnel keys exhausted");
+ return 0;
+}
+
+/*
+ * When a change has occurred in the OVN_Northbound database, we go through and
+ * make sure that the contents of the Binding table in the OVN_Southbound
+ * database are up to date with the logical ports defined in the
+ * OVN_Northbound database.
+ */
+static void
+set_bindings(struct northd_context *ctx)
+{
+ const struct sbrec_binding *binding;
+ const struct nbrec_logical_port *lport;
+
+ /*
+ * We will need to look up a binding for every logical port. We don't want
+ * to have to do an O(n) search for every binding, so start out by hashing
+ * them on the logical port.
+ *
+ * As we go through every logical port, we will update the binding if it
+ * exists or create one otherwise. When the update is done, we'll remove
+ * it from the hashmap. At the end, any bindings left in the hashmap are
+ * for logical ports that have been deleted.
+ *
+ * We index the logical_port column because that's the shared key between
+ * the OVN_NB and OVN_SB databases. We index the tunnel_key column to
+ * allow us to choose a unique tunnel key for any Binding rows we have to
+ * add.
+ */
+ struct hmap lp_hmap = HMAP_INITIALIZER(&lp_hmap);
+ struct hmap tk_hmap = HMAP_INITIALIZER(&tk_hmap);
+
+ SBREC_BINDING_FOR_EACH(binding, ctx->ovnsb_idl) {
+ struct binding_hash_node *hash_node = xzalloc(sizeof *hash_node);
+ hash_node->binding = binding;
+ hmap_insert(&lp_hmap, &hash_node->lp_node,
+ hash_string(binding->logical_port, 0));
+ hmap_insert(&tk_hmap, &hash_node->tk_node,
+ hash_int(binding->tunnel_key, 0));
+ }
+
+ NBREC_LOGICAL_PORT_FOR_EACH(lport, ctx->ovnnb_idl) {
+ struct binding_hash_node *hash_node;
+ binding = NULL;
+ HMAP_FOR_EACH_WITH_HASH(hash_node, lp_node,
+ hash_string(lport->name, 0), &lp_hmap) {
+ if (!strcmp(lport->name, hash_node->binding->logical_port)) {
+ binding = hash_node->binding;
+ break;
+ }
+ }
+
+ struct uuid logical_datapath;
+ if (lport->lswitch) {
+ logical_datapath = lport->lswitch->header_.uuid;
+ } else {
+ uuid_zero(&logical_datapath);
+ }
+
+ if (binding) {
+ /* We found an existing binding for this logical port. Update its
+ * contents. */
+
+ hmap_remove(&lp_hmap, &hash_node->lp_node);
+
+ if (!macs_equal(binding->mac, binding->n_mac,
+ lport->macs, lport->n_macs)) {
+ sbrec_binding_set_mac(binding,
+ (const char **) lport->macs, lport->n_macs);
+ }
+ if (!parents_equal(binding, lport)) {
+ sbrec_binding_set_parent_port(binding, lport->parent_name);
+ }
+ if (!tags_equal(binding, lport)) {
+ sbrec_binding_set_tag(binding, lport->tag, lport->n_tag);
+ }
+ if (!uuid_equals(&binding->logical_datapath, &logical_datapath)) {
+ sbrec_binding_set_logical_datapath(binding,
+ logical_datapath);
+ }
+ } else {
+ /* There is no binding for this logical port, so create one. */
+
+ uint16_t tunnel_key = choose_tunnel_key(&tk_hmap);
+ if (!tunnel_key) {
+ continue;
+ }
+
+ binding = sbrec_binding_insert(ctx->ovnsb_txn);
+ sbrec_binding_set_logical_port(binding, lport->name);
+ sbrec_binding_set_mac(binding,
+ (const char **) lport->macs, lport->n_macs);
+ if (lport->parent_name && lport->n_tag > 0) {
+ sbrec_binding_set_parent_port(binding, lport->parent_name);
+ sbrec_binding_set_tag(binding, lport->tag, lport->n_tag);
+ }
+
+ sbrec_binding_set_tunnel_key(binding, tunnel_key);
+ sbrec_binding_set_logical_datapath(binding, logical_datapath);
+
+ /* Add the tunnel key to the tk_hmap so that we don't try to use it
+ * for another port. (We don't want it in the lp_hmap because that
+ * would just get the Binding record deleted later.) */
+ struct binding_hash_node *hash_node = xzalloc(sizeof *hash_node);
+ hash_node->binding = binding;
+ hmap_insert(&tk_hmap, &hash_node->tk_node,
+ hash_int(binding->tunnel_key, 0));
+ }
+ }
+
+ struct binding_hash_node *hash_node;
+ HMAP_FOR_EACH (hash_node, lp_node, &lp_hmap) {
+ hmap_remove(&lp_hmap, &hash_node->lp_node);
+ sbrec_binding_delete(hash_node->binding);
+ }
+ hmap_destroy(&lp_hmap);
+
+ struct binding_hash_node *hash_node_next;
+ HMAP_FOR_EACH_SAFE (hash_node, hash_node_next, tk_node, &tk_hmap) {
+ hmap_remove(&tk_hmap, &hash_node->tk_node);
+ free(hash_node);
+ }
+ hmap_destroy(&tk_hmap);
+}
+
+static void
+ovnnb_db_changed(struct northd_context *ctx)
+{
+ VLOG_DBG("ovn-nb db contents have changed.");
+
+ set_bindings(ctx);
+ build_pipeline(ctx);
+}
+
+/*
+ * The only change we get notified about is if the 'chassis' column of the
+ * 'Binding' table changes. When this column is not empty, it means we need to
+ * set the corresponding logical port as 'up' in the northbound DB.
+ */
+static void
+ovnsb_db_changed(struct northd_context *ctx)
+{
+ struct hmap lports_hmap;
+ const struct sbrec_binding *binding;
+ const struct nbrec_logical_port *lport;
+
+ struct lport_hash_node {
+ struct hmap_node node;
+ const struct nbrec_logical_port *lport;
+ } *hash_node, *hash_node_next;
+
+ VLOG_DBG("Recalculating port up states for ovn-nb db.");
+
+ hmap_init(&lports_hmap);
+
+ NBREC_LOGICAL_PORT_FOR_EACH(lport, ctx->ovnnb_idl) {
+ hash_node = xzalloc(sizeof *hash_node);
+ hash_node->lport = lport;
+ hmap_insert(&lports_hmap, &hash_node->node,
+ hash_string(lport->name, 0));
+ }
+
+ SBREC_BINDING_FOR_EACH(binding, ctx->ovnsb_idl) {
+ lport = NULL;
+ HMAP_FOR_EACH_WITH_HASH(hash_node, node,
+ hash_string(binding->logical_port, 0), &lports_hmap) {
+ if (!strcmp(binding->logical_port, hash_node->lport->name)) {
+ lport = hash_node->lport;
+ break;
+ }
+ }
+
+ if (!lport) {
+ /* The logical port doesn't exist for this binding. This can
+ * happen under normal circumstances when ovn-northd hasn't gotten
+ * around to pruning the Binding yet. */
+ continue;
+ }
+
+ if (binding->chassis && (!lport->up || !*lport->up)) {
+ bool up = true;
+ nbrec_logical_port_set_up(lport, &up, 1);
+ } else if (!binding->chassis && (!lport->up || *lport->up)) {
+ bool up = false;
+ nbrec_logical_port_set_up(lport, &up, 1);
+ }
+ }
+
+ HMAP_FOR_EACH_SAFE(hash_node, hash_node_next, node, &lports_hmap) {
+ hmap_remove(&lports_hmap, &hash_node->node);
+ free(hash_node);
+ }
+ hmap_destroy(&lports_hmap);
+}
+\f
+static const char *
+default_db(void)
+{
+ static char *def;
+ if (!def) {
+ def = xasprintf("unix:%s/db.sock", ovs_rundir());
+ }
+ return def;
+}
+
+static void
+parse_options(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
+{
+ enum {
+ DAEMON_OPTION_ENUMS,
+ VLOG_OPTION_ENUMS,
+ };
+ static const struct option long_options[] = {
+ {"ovnsb-db", required_argument, NULL, 'd'},
+ {"ovnnb-db", required_argument, NULL, 'D'},
+ {"help", no_argument, NULL, 'h'},
+ {"options", no_argument, NULL, 'o'},
+ {"version", no_argument, NULL, 'V'},
+ DAEMON_LONG_OPTIONS,
+ VLOG_LONG_OPTIONS,
+ STREAM_SSL_LONG_OPTIONS,
+ {NULL, 0, NULL, 0},
+ };
+ char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, short_options, long_options, NULL);
+ if (c == -1) {
+ break;
+ }
+
+ switch (c) {
+ DAEMON_OPTION_HANDLERS;
+ VLOG_OPTION_HANDLERS;
+ STREAM_SSL_OPTION_HANDLERS;
+
+ case 'd':
+ ovnsb_db = optarg;
+ break;
+
+ case 'D':
+ ovnnb_db = optarg;
+ break;
+
+ case 'h':
+ usage();
+ exit(EXIT_SUCCESS);
+
+ case 'o':
+ ovs_cmdl_print_options(long_options);
+ exit(EXIT_SUCCESS);
+
+ case 'V':
+ ovs_print_version(0, 0);
+ exit(EXIT_SUCCESS);
+
+ default:
+ break;
+ }
+ }
+
+ if (!ovnsb_db) {
+ ovnsb_db = default_db();
+ }
+
+ if (!ovnnb_db) {
+ ovnnb_db = default_db();
+ }
+
+ free(short_options);
+}
+
+int
+main(int argc, char *argv[])
+{
+ extern struct vlog_module VLM_reconnect;
+ struct ovsdb_idl *ovnnb_idl, *ovnsb_idl;
+ unsigned int ovnnb_seqno, ovn_seqno;
+ int res = EXIT_SUCCESS;
+ struct northd_context ctx = {
+ .ovnsb_txn = NULL,
+ };
+ bool ovnnb_changes_pending = false;
+ bool ovn_changes_pending = false;
+ struct unixctl_server *unixctl;
+ int retval;
+ bool exiting;
+
+ fatal_ignore_sigpipe();
+ set_program_name(argv[0]);
+ vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
+ vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
+ parse_options(argc, argv);
+
+ daemonize_start();
+
+ retval = unixctl_server_create(NULL, &unixctl);
+ if (retval) {
+ exit(EXIT_FAILURE);
+ }
+ unixctl_command_register("exit", "", 0, 0, ovn_northd_exit, &exiting);
+
+ daemonize_complete();
+
+ nbrec_init();
+ sbrec_init();
+
+ /* We want to detect all changes to the ovn-nb db. */
+ ctx.ovnnb_idl = ovnnb_idl = ovsdb_idl_create(ovnnb_db,
+ &nbrec_idl_class, true, true);
+
+ /* There is only a small subset of changes to the ovn-sb db that ovn-northd
+ * has to care about, so we'll enable monitoring those directly. */
+ ctx.ovnsb_idl = ovnsb_idl = ovsdb_idl_create(ovnsb_db,
+ &sbrec_idl_class, false, true);
+ ovsdb_idl_add_table(ovnsb_idl, &sbrec_table_binding);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_logical_port);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_chassis);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_mac);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_tag);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_parent_port);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_logical_datapath);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_binding_col_tunnel_key);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_pipeline_col_logical_datapath);
+ ovsdb_idl_omit_alert(ovnsb_idl, &sbrec_pipeline_col_logical_datapath);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_pipeline_col_table_id);
+ ovsdb_idl_omit_alert(ovnsb_idl, &sbrec_pipeline_col_table_id);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_pipeline_col_priority);
+ ovsdb_idl_omit_alert(ovnsb_idl, &sbrec_pipeline_col_priority);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_pipeline_col_match);
+ ovsdb_idl_omit_alert(ovnsb_idl, &sbrec_pipeline_col_match);
+ ovsdb_idl_add_column(ovnsb_idl, &sbrec_pipeline_col_actions);
+ ovsdb_idl_omit_alert(ovnsb_idl, &sbrec_pipeline_col_actions);
+
+ /*
+ * The loop here just runs the IDL in a loop waiting for the seqno to
+ * change, which indicates that the contents of the db have changed.
+ *
+ * If the contents of the ovn-nb db change, the mappings to the ovn-sb
+ * db must be recalculated.
+ *
+ * If the contents of the ovn-sb db change, it means the 'up' state of
+ * a port may have changed, as that's the only type of change ovn-northd is
+ * watching for.
+ */
+
+ ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
+ ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
+ exiting = false;
+ while (!exiting) {
+ ovsdb_idl_run(ovnnb_idl);
+ ovsdb_idl_run(ovnsb_idl);
+ unixctl_server_run(unixctl);
+
+ if (!ovsdb_idl_is_alive(ovnnb_idl)) {
+ int retval = ovsdb_idl_get_last_error(ovnnb_idl);
+ VLOG_ERR("%s: database connection failed (%s)",
+ ovnnb_db, ovs_retval_to_string(retval));
+ res = EXIT_FAILURE;
+ break;
+ }
+
+ if (!ovsdb_idl_is_alive(ovnsb_idl)) {
+ int retval = ovsdb_idl_get_last_error(ovnsb_idl);
+ VLOG_ERR("%s: database connection failed (%s)",
+ ovnsb_db, ovs_retval_to_string(retval));
+ res = EXIT_FAILURE;
+ break;
+ }
+
+ if (ovnnb_seqno != ovsdb_idl_get_seqno(ovnnb_idl)) {
+ ovnnb_seqno = ovsdb_idl_get_seqno(ovnnb_idl);
+ ovnnb_changes_pending = true;
+ }
+
+ if (ovn_seqno != ovsdb_idl_get_seqno(ovnsb_idl)) {
+ ovn_seqno = ovsdb_idl_get_seqno(ovnsb_idl);
+ ovn_changes_pending = true;
+ }
+
+ /*
+ * If there are any pending changes, we delay recalculating the
+ * necessary updates until after an existing transaction finishes.
+ * This avoids the possibility of rapid updates causing ovn-northd to
+ * never be able to successfully make the corresponding updates to the
+ * other db. Instead, pending changes are batched up until the next
+ * time we get a chance to calculate the new state and apply it.
+ */
+
+ if (ovnnb_changes_pending && !ctx.ovnsb_txn) {
+ /*
+ * The OVN-nb db contents have changed, so create a transaction for
+ * updating the OVN-sb DB.
+ */
+ ctx.ovnsb_txn = ovsdb_idl_txn_create(ctx.ovnsb_idl);
+ ovsdb_idl_txn_add_comment(ctx.ovnsb_txn,
+ "ovn-northd: northbound db changed");
+ ovnnb_db_changed(&ctx);
+ ovnnb_changes_pending = false;
+ }
+
+ if (ovn_changes_pending && !ctx.ovnnb_txn) {
+ /*
+ * The OVN-sb db contents have changed, so create a transaction for
+ * updating the northbound DB.
+ */
+ ctx.ovnnb_txn = ovsdb_idl_txn_create(ctx.ovnnb_idl);
+ ovsdb_idl_txn_add_comment(ctx.ovnnb_txn,
+ "ovn-northd: southbound db changed");
+ ovnsb_db_changed(&ctx);
+ ovn_changes_pending = false;
+ }
+
+ if (ctx.ovnnb_txn) {
+ enum ovsdb_idl_txn_status txn_status;
+ txn_status = ovsdb_idl_txn_commit(ctx.ovnnb_txn);
+ switch (txn_status) {
+ case TXN_UNCOMMITTED:
+ case TXN_INCOMPLETE:
+ /* Come back around and try to commit this transaction again */
+ break;
+ case TXN_ABORTED:
+ case TXN_TRY_AGAIN:
+ case TXN_NOT_LOCKED:
+ case TXN_ERROR:
+ /* Something went wrong, so try creating a new transaction. */
+ ovn_changes_pending = true;
+ case TXN_UNCHANGED:
+ case TXN_SUCCESS:
+ ovsdb_idl_txn_destroy(ctx.ovnnb_txn);
+ ctx.ovnnb_txn = NULL;
+ }
+ }
+
+ if (ctx.ovnsb_txn) {
+ enum ovsdb_idl_txn_status txn_status;
+ txn_status = ovsdb_idl_txn_commit(ctx.ovnsb_txn);
+ switch (txn_status) {
+ case TXN_UNCOMMITTED:
+ case TXN_INCOMPLETE:
+ /* Come back around and try to commit this transaction again */
+ break;
+ case TXN_ABORTED:
+ case TXN_TRY_AGAIN:
+ case TXN_NOT_LOCKED:
+ case TXN_ERROR:
+ /* Something went wrong, so try creating a new transaction. */
+ ovnnb_changes_pending = true;
+ case TXN_UNCHANGED:
+ case TXN_SUCCESS:
+ ovsdb_idl_txn_destroy(ctx.ovnsb_txn);
+ ctx.ovnsb_txn = NULL;
+ }
+ }
+
+ if (ovnnb_seqno == ovsdb_idl_get_seqno(ovnnb_idl) &&
+ ovn_seqno == ovsdb_idl_get_seqno(ovnsb_idl)) {
+ ovsdb_idl_wait(ovnnb_idl);
+ ovsdb_idl_wait(ovnsb_idl);
+ if (ctx.ovnnb_txn) {
+ ovsdb_idl_txn_wait(ctx.ovnnb_txn);
+ }
+ if (ctx.ovnsb_txn) {
+ ovsdb_idl_txn_wait(ctx.ovnsb_txn);
+ }
+ unixctl_server_wait(unixctl);
+ if (exiting) {
+ poll_immediate_wake();
+ }
+ poll_block();
+ }
+ }
+
+ unixctl_server_destroy(unixctl);
+ ovsdb_idl_destroy(ovnsb_idl);
+ ovsdb_idl_destroy(ovnnb_idl);
+
+ exit(res);
+}
+
+static void
+ovn_northd_exit(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *exiting_)
+{
+ bool *exiting = exiting_;
+ *exiting = true;
+
+ unixctl_command_reply(conn, NULL);
+}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovn-architecture" section="7" title="OVN Architecture">
+ <h1>Name</h1>
+ <p>ovn-architecture -- Open Virtual Network architecture</p>
+
+ <h1>Description</h1>
+
+ <p>
+ OVN, the Open Virtual Network, is a system to support virtual network
+ abstraction. OVN complements the existing capabilities of OVS to add
+ native support for virtual network abstractions, such as virtual L2 and L3
+ overlays and security groups. Services such as DHCP are also desirable
+ features. Just like OVS, OVN's design goal is to have a production-quality
+ implementation that can operate at significant scale.
+ </p>
+
+ <p>
+ An OVN deployment consists of several components:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ A <dfn>Cloud Management System</dfn> (<dfn>CMS</dfn>), which is
+ OVN's ultimate client (via its users and administrators). OVN
+ integration requires installing a CMS-specific plugin and
+ related software (see below). OVN initially targets OpenStack
+ as CMS.
+ </p>
+
+ <p>
+ We generally speak of ``the'' CMS, but one can imagine scenarios in
+ which multiple CMSes manage different parts of an OVN deployment.
+ </p>
+ </li>
+
+ <li>
+ An OVN Database physical or virtual node (or, eventually, cluster)
+ installed in a central location.
+ </li>
+
+ <li>
+ One or more (usually many) <dfn>hypervisors</dfn>. Hypervisors must run
+ Open vSwitch and implement the interface described in
+ <code>IntegrationGuide.md</code> in the OVS source tree. Any hypervisor
+ platform supported by Open vSwitch is acceptable.
+ </li>
+
+ <li>
+ <p>
+ Zero or more <dfn>gateways</dfn>. A gateway extends a tunnel-based
+ logical network into a physical network by bidirectionally forwarding
+ packets between tunnels and a physical Ethernet port. This allows
+ non-virtualized machines to participate in logical networks. A gateway
+ may be a physical host, a virtual machine, or an ASIC-based hardware
+ switch that supports the <code>vtep</code>(5) schema. (Support for the
+ latter will come later in OVN implementation.)
+ </p>
+
+ <p>
+ Hypervisors and gateways are together called <dfn>transport node</dfn>
+ or <dfn>chassis</dfn>.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ The diagram below shows how the major components of OVN and related
+ software interact. Starting at the top of the diagram, we have:
+ </p>
+
+ <ul>
+ <li>
+ The Cloud Management System, as defined above.
+ </li>
+
+ <li>
+ <p>
+ The <dfn>OVN/CMS Plugin</dfn> is the component of the CMS that
+ interfaces to OVN. In OpenStack, this is a Neutron plugin.
+ The plugin's main purpose is to translate the CMS's notion of logical
+ network configuration, stored in the CMS's configuration database in a
+ CMS-specific format, into an intermediate representation understood by
+ OVN.
+ </p>
+
+ <p>
+ This component is necessarily CMS-specific, so a new plugin needs to be
+ developed for each CMS that is integrated with OVN. All of the
+ components below this one in the diagram are CMS-independent.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ The <dfn>OVN Northbound Database</dfn> receives the intermediate
+ representation of logical network configuration passed down by the
+ OVN/CMS Plugin. The database schema is meant to be ``impedance
+ matched'' with the concepts used in a CMS, so that it directly supports
+ notions of logical switches, routers, ACLs, and so on. See
+ <code>ovs-nb</code>(5) for details.
+ </p>
+
+ <p>
+ The OVN Northbound Database has only two clients: the OVN/CMS Plugin
+ above it and <code>ovn-northd</code> below it.
+ </p>
+ </li>
+
+ <li>
+ <code>ovn-northd</code>(8) connects to the OVN Northbound Database
+ above it and the OVN Southbound Database below it. It translates the
+ logical network configuration in terms of conventional network
+ concepts, taken from the OVN Northbound Database, into logical
+ datapath flows in the OVN Southbound Database below it.
+ </li>
+
+ <li>
+ <p>
+ The <dfn>OVN Southbound Database</dfn> is the center of the system.
+ Its clients are <code>ovn-northd</code>(8) above it and
+ <code>ovn-controller</code>(8) on every transport node below it.
+ </p>
+
+ <p>
+ The OVN Southbound Database contains three kinds of data: <dfn>Physical
+ Network</dfn> (PN) tables that specify how to reach hypervisor and
+ other nodes, <dfn>Logical Network</dfn> (LN) tables that describe the
+ logical network in terms of ``logical datapath flows,'' and
+ <dfn>Binding</dfn> tables that link logical network components'
+ locations to the physical network. The hypervisors populate the PN and
+ Binding tables, whereas <code>ovn-northd</code>(8) populates the LN
+ tables.
+ </p>
+
+ <p>
+ OVN Southbound Database performance must scale with the number of
+ transport nodes. This will likely require some work on
+ <code>ovsdb-server</code>(1) as we encounter bottlenecks.
+ Clustering for availability may be needed.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ The remaining components are replicated onto each hypervisor:
+ </p>
+
+ <ul>
+ <li>
+ <code>ovn-controller</code>(8) is OVN's agent on each hypervisor and
+ software gateway. Northbound, it connects to the OVN Southbound
+ Database to learn about OVN configuration and status and to
+ populate the PN table and the <code>Chassis</code> column in
+ <code>Binding</code> table with the hypervisor's status.
+ Southbound, it connects to <code>ovs-vswitchd</code>(8) as an
+ OpenFlow controller, for control over network traffic, and to the
+ local <code>ovsdb-server</code>(1) to allow it to monitor and
+ control Open vSwitch configuration.
+ </li>
+
+ <li>
+ <code>ovs-vswitchd</code>(8) and <code>ovsdb-server</code>(1) are
+ conventional components of Open vSwitch.
+ </li>
+ </ul>
+
+ <pre fixed="yes">
+ CMS
+ |
+ |
+ +-----------|-----------+
+ | | |
+ | OVN/CMS Plugin |
+ | | |
+ | | |
+ | OVN Northbound DB |
+ | | |
+ | | |
+ | ovn-northd |
+ | | |
+ +-----------|-----------+
+ |
+ |
+ +-------------------+
+ | OVN Southbound DB |
+ +-------------------+
+ |
+ |
+ +------------------+------------------+
+ | | |
+ HV 1 | | HV n |
++---------------|---------------+ . +---------------|---------------+
+| | | . | | |
+| ovn-controller | . | ovn-controller |
+| | | | . | | | |
+| | | | | | | |
+| ovs-vswitchd ovsdb-server | | ovs-vswitchd ovsdb-server |
+| | | |
++-------------------------------+ +-------------------------------+
+ </pre>
+
+ <h2>Chassis Setup</h2>
+
+ <p>
+ Each chassis in an OVN deployment must be configured with an Open vSwitch
+ bridge dedicated for OVN's use, called the <dfn>integration bridge</dfn>.
+ System startup scripts create this bridge prior to starting
+ <code>ovn-controller</code>. The ports on the integration bridge include:
+ </p>
+
+ <ul>
+ <li>
+ On any chassis, tunnel ports that OVN uses to maintain logical network
+ connectivity. <code>ovn-controller</code> adds, updates, and removes
+ these tunnel ports.
+ </li>
+
+ <li>
+ On a hypervisor, any VIFs that are to be attached to logical networks.
+ The hypervisor itself, or the integration between Open vSwitch and the
+ hypervisor (described in <code>IntegrationGuide.md</code>) takes care of
+ this. (This is not part of OVN or new to OVN; this is pre-existing
+ integration work that has already been done on hypervisors that support
+ OVS.)
+ </li>
+
+ <li>
+ On a gateway, the physical port used for logical network connectivity.
+ System startup scripts add this port to the bridge prior to starting
+ <code>ovn-controller</code>. This can be a patch port to another bridge,
+ instead of a physical port, in more sophisticated setups.
+ </li>
+ </ul>
+
+ <p>
+ Other ports should not be attached to the integration bridge. In
+ particular, physical ports attached to the underlay network (as opposed to
+ gateway ports, which are physical ports attached to logical networks) must
+ not be attached to the integration bridge. Underlay physical ports should
+ instead be attached to a separate Open vSwitch bridge (they need not be
+ attached to any bridge at all, in fact).
+ </p>
+
+ <p>
+ The integration bridge should be configured as described below.
+ The effect of each of these settings is documented in
+ <code>ovs-vswitchd.conf.db</code>(5):
+ </p>
+
+ <dl>
+ <dt><code>fail-mode=secure</code></dt>
+ <dd>
+ Avoids switching packets between isolated logical networks before
+ <code>ovn-controller</code> starts up. See <code>Controller Failure
+ Settings</code> in <code>ovs-vsctl</code>(8) for more information.
+ </dd>
+
+ <dt><code>other-config:disable-in-band=true</code></dt>
+ <dd>
+ Suppresses in-band control flows for the integration bridge. It would be
+ unusual for such flows to show up anyway, because OVN uses a local
+ controller (over a Unix domain socket) instead of a remote controller.
+ It's possible, however, for some other bridge in the same system to have
+ an in-band remote controller, and in that case this suppresses the flows
+ that in-band control would ordinarily set up. See <code>In-Band
+ Control</code> in <code>DESIGN.md</code> for more information.
+ </dd>
+ </dl>
+
+ <p>
+ The customary name for the integration bridge is <code>br-int</code>, but
+ another name may be used.
+ </p>
+
+ <h2>Logical Networks</h2>
+
+ <p>
+ A <dfn>logical network</dfn> implements the same concepts as physical
+ networks, but they are insulated from the physical network with tunnels or
+ other encapsulations. This allows logical networks to have separate IP and
+ other address spaces that overlap, without conflicting, with those used for
+ physical networks. Logical network topologies can be arranged without
+ regard for the topologies of the physical networks on which they run.
+ </p>
+
+ <p>
+ Logical network concepts in OVN include:
+ </p>
+
+ <ul>
+ <li>
+ <dfn>Logical switches</dfn>, the logical version of Ethernet switches.
+ </li>
+
+ <li>
+ <dfn>Logical routers</dfn>, the logical version of IP routers. Logical
+ switches and routers can be connected into sophisticated topologies.
+ </li>
+
+ <li>
+ <dfn>Logical datapaths</dfn> are the logical version of an OpenFlow
+ switch. Logical switches and routers are both implemented as logical
+ datapaths.
+ </li>
+ </ul>
+
+ <h2>Life Cycle of a VIF</h2>
+
+ <p>
+ Tables and their schemas presented in isolation are difficult to
+ understand. Here's an example.
+ </p>
+
+ <p>
+ A VIF on a hypervisor is a virtual network interface attached either
+ to a VM or a container running directly on that hypervisor (This is
+ different from the interface of a container running inside a VM).
+ </p>
+
+ <p>
+ The steps in this example refer often to details of the OVN and OVN
+ Northbound database schemas. Please see <code>ovn-sb</code>(5) and
+ <code>ovn-nb</code>(5), respectively, for the full story on these
+ databases.
+ </p>
+
+ <ol>
+ <li>
+ A VIF's life cycle begins when a CMS administrator creates a new VIF
+ using the CMS user interface or API and adds it to a switch (one
+ implemented by OVN as a logical switch). The CMS updates its own
+ configuration. This includes associating unique, persistent identifier
+ <var>vif-id</var> and Ethernet address <var>mac</var> with the VIF.
+ </li>
+
+ <li>
+ The CMS plugin updates the OVN Northbound database to include the new
+ VIF, by adding a row to the <code>Logical_Port</code> table. In the new
+ row, <code>name</code> is <var>vif-id</var>, <code>mac</code> is
+ <var>mac</var>, <code>switch</code> points to the OVN logical switch's
+ Logical_Switch record, and other columns are initialized appropriately.
+ </li>
+
+ <li>
+ <code>ovn-northd</code> receives the OVN Northbound database update.
+ In turn, it makes the corresponding updates to the OVN Southbound
+ database, by adding rows to the OVN Southbound database
+ <code>Pipeline</code> table to reflect the new port, e.g. add a
+ flow to recognize that packets destined to the new port's MAC
+ address should be delivered to it, and update the flow that
+ delivers broadcast and multicast packets to include the new port.
+ It also creates a record in the <code>Binding</code> table and
+ populates all its columns except the column that identifies the
+ <code>chassis</code>.
+ </li>
+
+ <li>
+ On every hypervisor, <code>ovn-controller</code> receives the
+ <code>Pipeline</code> table updates that <code>ovn-northd</code> made
+ in the previous step. As long as the VM that owns the VIF is powered off,
+ <code>ovn-controller</code> cannot do much; it cannot, for example,
+ arrange to send packets to or receive packets from the VIF, because the
+ VIF does not actually exist anywhere.
+ </li>
+
+ <li>
+ Eventually, a user powers on the VM that owns the VIF. On the hypervisor
+ where the VM is powered on, the integration between the hypervisor and
+ Open vSwitch (described in <code>IntegrationGuide.md</code>) adds the VIF
+ to the OVN integration bridge and stores <var>vif-id</var> in
+ <code>external-ids</code>:<code>iface-id</code> to indicate that the
+ interface is an instantiation of the new VIF. (None of this code is new
+ in OVN; this is pre-existing integration work that has already been done
+ on hypervisors that support OVS.)
+ </li>
+
+ <li>
+ On the hypervisor where the VM is powered on, <code>ovn-controller</code>
+ notices <code>external-ids</code>:<code>iface-id</code> in the new
+ Interface. In response, it updates the local hypervisor's OpenFlow
+ tables so that packets to and from the VIF are properly handled.
+ Afterward, in the OVN Southbound DB, it updates the
+ <code>Binding</code> table's <code>chassis</code> column for the
+ row that links the logical port from
+ <code>external-ids</code>:<code>iface-id</code> to the hypervisor.
+ </li>
+
+ <li>
+ Some CMS systems, including OpenStack, fully start a VM only when its
+ networking is ready. To support this, <code>ovn-northd</code> notices
+ the <code>chassis</code> column updated for the row in
+ <code>Binding</code> table and pushes this upward by updating the
+ <ref column="up" table="Logical_Port" db="OVN_NB"/> column in the OVN
+ Northbound database's <ref table="Logical_Port" db="OVN_NB"/> table to
+ indicate that the VIF is now up. The CMS, if it uses this feature, can
+ then
+ react by allowing the VM's execution to proceed.
+ </li>
+
+ <li>
+ On every hypervisor but the one where the VIF resides,
+ <code>ovn-controller</code> notices the completely populated row in the
+ <code>Binding</code> table. This provides <code>ovn-controller</code>
+ the physical location of the logical port, so each instance updates the
+ OpenFlow tables of its switch (based on logical datapath flows in the OVN
+ DB <code>Pipeline</code> table) so that packets to and from the VIF can
+ be properly handled via tunnels.
+ </li>
+
+ <li>
+ Eventually, a user powers off the VM that owns the VIF. On the
+ hypervisor where the VM was powered off, the VIF is deleted from the OVN
+ integration bridge.
+ </li>
+
+ <li>
+ On the hypervisor where the VM was powered off,
+ <code>ovn-controller</code> notices that the VIF was deleted. In
+ response, it removes the <code>Chassis</code> column content in the
+ <code>Binding</code> table for the logical port.
+ </li>
+
+ <li>
+ On every hypervisor, <code>ovn-controller</code> notices the empty
+ <code>Chassis</code> column in the <code>Binding</code> table's row
+ for the logical port. This means that <code>ovn-controller</code> no
+ longer knows the physical location of the logical port, so each instance
+ updates its OpenFlow table to reflect that.
+ </li>
+
+ <li>
+ Eventually, when the VIF (or its entire VM) is no longer needed by
+ anyone, an administrator deletes the VIF using the CMS user interface or
+ API. The CMS updates its own configuration.
+ </li>
+
+ <li>
+ The CMS plugin removes the VIF from the OVN Northbound database,
+ by deleting its row in the <code>Logical_Port</code> table.
+ </li>
+
+ <li>
+ <code>ovn-northd</code> receives the OVN Northbound update and in turn
+ updates the OVN Southbound database accordingly, by removing or
+ updating the rows from the OVN Southbound database
+ <code>Pipeline</code> table and <code>Binding</code> table that
+ were related to the now-destroyed VIF.
+ </li>
+
+ <li>
+ On every hypervisor, <code>ovn-controller</code> receives the
+ <code>Pipeline</code> table updates that <code>ovn-northd</code> made
+ in the previous step. <code>ovn-controller</code> updates OpenFlow tables
+ to reflect the update, although there may not be much to do, since the VIF
+ had already become unreachable when it was removed from the
+ <code>Binding</code> table in a previous step.
+ </li>
+ </ol>
+
+ <h2>Life Cycle of a container interface inside a VM</h2>
+
+ <p>
+ OVN provides virtual network abstractions by converting information
+ written in OVN_NB database to OpenFlow flows in each hypervisor. Secure
+ virtual networking for multi-tenants can only be provided if OVN controller
+ is the only entity that can modify flows in Open vSwitch. When the
+ Open vSwitch integration bridge resides in the hypervisor, it is a
+ fair assumption to make that tenant workloads running inside VMs cannot
+ make any changes to Open vSwitch flows.
+ </p>
+
+ <p>
+ If the infrastructure provider trusts the applications inside the
+ containers not to break out and modify the Open vSwitch flows, then
+ containers can be run in hypervisors. This is also the case when
+ containers are run inside the VMs and Open vSwitch integration bridge
+ with flows added by OVN controller resides in the same VM. For both
+ the above cases, the workflow is the same as explained with an example
+ in the previous section ("Life Cycle of a VIF").
+ </p>
+
+ <p>
+ This section talks about the life cycle of a container interface (CIF)
+ when containers are created in the VMs and the Open vSwitch integration
+ bridge resides inside the hypervisor. In this case, even if a container
+ application breaks out, other tenants are not affected because the
+ containers running inside the VMs cannot modify the flows in the
+ Open vSwitch integration bridge.
+ </p>
+
+ <p>
+ When multiple containers are created inside a VM, there are multiple
+ CIFs associated with them. The network traffic associated with these
+ CIFs need to reach the Open vSwitch integration bridge running in the
+ hypervisor for OVN to support virtual network abstractions. OVN should
+ also be able to distinguish network traffic coming from different CIFs.
+ There are two ways to distinguish network traffic of CIFs.
+ </p>
+
+ <p>
+ One way is to provide one VIF for every CIF (1:1 model). This means that
+ there could be a lot of network devices in the hypervisor. This would slow
+ down OVS because of all the additional CPU cycles needed for the management
+ of all the VIFs. It would also mean that the entity creating the
+ containers in a VM should also be able to create the corresponding VIFs in
+ the hypervisor.
+ </p>
+
+ <p>
+ The second way is to provide a single VIF for all the CIFs (1:many model).
+ OVN could then distinguish network traffic coming from different CIFs via
+ a tag written in every packet. OVN uses this mechanism and uses VLAN as
+ the tagging mechanism.
+ </p>
+
+ <ol>
+ <li>
+ A CIF's life cycle begins when a container is spawned inside a VM by
+ the either the same CMS that created the VM or a tenant that owns that VM
+ or even a container Orchestration System that is different than the CMS
+ that initially created the VM. Whoever the entity is, it will need to
+ know the <var>vif-id</var> that is associated with the network interface
+ of the VM through which the container interface's network traffic is
+ expected to go through. The entity that creates the container interface
+ will also need to choose an unused VLAN inside that VM.
+ </li>
+
+ <li>
+ The container spawning entity (either directly or through the CMS that
+ manages the underlying infrastructure) updates the OVN Northbound
+ database to include the new CIF, by adding a row to the
+ <code>Logical_Port</code> table. In the new row, <code>name</code> is
+ any unique identifier, <code>parent_name</code> is the <var>vif-id</var>
+ of the VM through which the CIF's network traffic is expected to go
+ through and the <code>tag</code> is the VLAN tag that identifies the
+ network traffic of that CIF.
+ </li>
+
+ <li>
+ <code>ovn-northd</code> receives the OVN Northbound database update.
+ In turn, it makes the corresponding updates to the OVN Southbound
+ database, by adding rows to the OVN Southbound database's
+ <code>Pipeline</code> table to reflect the new port and also by
+ creating a new row in the <code>Binding</code> table and
+ populating all its columns except the column that identifies the
+ <code>chassis</code>.
+ </li>
+
+ <li>
+ On every hypervisor, <code>ovn-controller</code> subscribes to the
+ changes in the <code>Binding</code> table. When a new row is created
+ by <code>ovn-northd</code> that includes a value in
+ <code>parent_port</code> column of <code>Binding</code> table, the
+ <code>ovn-controller</code> in the hypervisor whose OVN integration bridge
+ has that same value in <var>vif-id</var> in
+ <code>external-ids</code>:<code>iface-id</code>
+ updates the local hypervisor's OpenFlow tables so that packets to and
+ from the VIF with the particular VLAN <code>tag</code> are properly
+ handled. Afterward it updates the <code>chassis</code> column of
+ the <code>Binding</code> to reflect the physical location.
+ </li>
+
+ <li>
+ One can only start the application inside the container after the
+ underlying network is ready. To support this, <code>ovn-northd</code>
+ notices the updated <code>chassis</code> column in <code>Binding</code>
+ table and updates the <ref column="up" table="Logical_Port"
+ db="OVN_NB"/> column in the OVN Northbound database's
+ <ref table="Logical_Port" db="OVN_NB"/> table to indicate that the
+ CIF is now up. The entity responsible to start the container application
+ queries this value and starts the application.
+ </li>
+
+ <li>
+ Eventually the entity that created and started the container, stops it.
+ The entity, through the CMS (or directly) deletes its row in the
+ <code>Logical_Port</code> table.
+ </li>
+
+ <li>
+ <code>ovn-northd</code> receives the OVN Northbound update and in turn
+ updates the OVN Southbound database accordingly, by removing or
+ updating the rows from the OVN Southbound database
+ <code>Pipeline</code> table that were related to the now-destroyed
+ CIF. It also deletes the row in the <code>Binding</code> table
+ for that CIF.
+ </li>
+
+ <li>
+ On every hypervisor, <code>ovn-controller</code> receives the
+ <code>Pipeline</code> table updates that <code>ovn-northd</code> made
+ in the previous step. <code>ovn-controller</code> updates OpenFlow tables
+ to reflect the update.
+ </li>
+ </ol>
+
+ <h1>Design Decisions</h1>
+
+ <h2>Supported Tunnel Encapsulations</h2>
+ <p>
+ For connecting hypervisors to each other, the only supported tunnel
+ encapsulations are Geneve and STT. Hypervisors may use VXLAN to
+ connect to gateways. We have limited support to these encapsulations
+ for the following reasons:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ They support large amounts of metadata. In addition to
+ specifying the logical switch, we will likely want to indicate
+ the logical source port and where we are in the logical
+ pipeline. Geneve supports a 24-bit VNI field and TLV-based
+ extensions. The header of STT includes a 64-bit context id.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ They use randomized UDP or TCP source ports that allows
+ efficient distribution among multiple paths in environments that
+ use ECMP in their underlay.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ NICs are available that accelerate encapsulation and decapsulation.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ Due to its flexibility, the preferred encapsulation between
+ hypervisors is Geneve. Some environments may want to use STT for
+ performance reasons until the NICs they use support hardware offload
+ of Geneve.
+ </p>
+
+ <p>
+ For connecting to gateways, the only supported tunnel encapsulations
+ are VXLAN, Geneve, and STT. While support for Geneve is becoming
+ available for TOR (top-of-rack) switches, VXLAN is far more common.
+ Currently, gateways have a feature set that matches the capabilities
+ as defined by the VTEP schema, so fewer bits of metadata are
+ necessary. In the future, gateways that do not support
+ encapsulations with large amounts of metadata may continue to have a
+ reduced feature set.
+ </p>
+</manpage>
--- /dev/null
+{
+ "name": "OVN_Northbound",
+ "tables": {
+ "Logical_Switch": {
+ "columns": {
+ "name": {"type": "string"},
+ "router_port": {"type": {"key": {"type": "uuid",
+ "refTable": "Logical_Router_Port",
+ "refType": "strong"},
+ "min": 0, "max": 1}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}}},
+ "Logical_Port": {
+ "columns": {
+ "lswitch": {"type": {"key": {"type": "uuid",
+ "refTable": "Logical_Switch",
+ "refType": "strong"}}},
+ "name": {"type": "string"},
+ "parent_name": {"type": {"key": "string", "min": 0, "max": 1}},
+ "tag": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 4095},
+ "min": 0, "max": 1}},
+ "macs": {"type": {"key": "string",
+ "min": 0,
+ "max": "unlimited"}},
+ "port_security": {"type": {"key": "string",
+ "min": 0,
+ "max": "unlimited"}},
+ "up": {"type": {"key": "boolean", "min": 0, "max": 1}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}},
+ "indexes": [["name"]]},
+ "ACL": {
+ "columns": {
+ "lswitch": {"type": {"key": {"type": "uuid",
+ "refTable": "Logical_Switch",
+ "refType": "strong"}}},
+ "priority": {"type": {"key": {"type": "integer",
+ "minInteger": 1,
+ "maxInteger": 65535}}},
+ "match": {"type": "string"},
+ "action": {"type": {"key": {"type": "string",
+ "enum": ["set", ["allow", "allow-related", "drop", "reject"]]}}},
+ "log": {"type": "boolean"},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}}},
+ "Logical_Router": {
+ "columns": {
+ "ip": {"type": "string"},
+ "default_gw": {"type": {"key": "string", "min": 0, "max": 1}},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}}},
+ "Logical_Router_Port": {
+ "columns": {
+ "router": {"type": {"key": {"type": "uuid",
+ "refTable": "Logical_Router",
+ "refType": "strong"}}},
+ "network": {"type": "string"},
+ "mac": {"type": "string"},
+ "external_ids": {
+ "type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}}}}},
+ "version": "1.0.0"}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<database name="ovn-nb" title="OVN Northbound Database">
+ <p>
+ This database is the interface between OVN and the cloud management system
+ (CMS), such as OpenStack, running above it. The CMS produces almost all of
+ the contents of the database. The <code>ovn-northd</code> program
+ monitors the database contents, transforms it, and stores it into the <ref
+ db="OVN_Southbound"/> database.
+ </p>
+
+ <p>
+ We generally speak of ``the'' CMS, but one can imagine scenarios in
+ which multiple CMSes manage different parts of an OVN deployment.
+ </p>
+
+ <h2>External IDs</h2>
+
+ <p>
+ Each of the tables in this database contains a special column, named
+ <code>external_ids</code>. This column has the same form and purpose each
+ place it appears.
+ </p>
+
+ <dl>
+ <dt><code>external_ids</code>: map of string-string pairs</dt>
+ <dd>
+ Key-value pairs for use by the CMS. The CMS might use certain pairs, for
+ example, to identify entities in its own configuration that correspond to
+ those in this database.
+ </dd>
+ </dl>
+
+ <table name="Logical_Switch" title="L2 logical switch">
+ <p>
+ Each row represents one L2 logical switch. A given switch's ports are
+ the <ref table="Logical_Port"/> rows whose <ref table="Logical_Port"
+ column="lswitch"/> column points to its row.
+ </p>
+
+ <column name="name">
+ <p>
+ A name for the logical switch. This name has no special meaning or purpose
+ other than to provide convenience for human interaction with the ovn-nb
+ database. There is no requirement for the name to be unique. The
+ logical switch's UUID should be used as the unique identifier.
+ </p>
+ </column>
+
+ <column name="router_port">
+ <p>
+ The router port to which this logical switch is connected, or empty if
+ this logical switch is not connected to any router. A switch may be
+ connected to at most one logical router, but this is not a significant
+ restriction because logical routers may be connected into arbitrary
+ topologies.
+ </p>
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+
+ <table name="Logical_Port" title="L2 logical switch port">
+ <p>
+ A port within an L2 logical switch.
+ </p>
+
+ <column name="lswitch">
+ The logical switch to which the logical port is connected.
+ </column>
+
+ <column name="name">
+ <p>
+ The logical port name.
+ </p>
+
+ <p>
+ For entities (VMs or containers) that are spawned in the hypervisor,
+ the name used here must match those used in the <ref key="iface-id"
+ table="Interface" column="external_ids" db="Open_vSwitch"/> in the
+ <ref db="Open_vSwitch"/> database's <ref table="Interface"
+ db="Open_vSwitch"/> table, because hypervisors use <ref key="iface-id"
+ table="Interface" column="external_ids" db="Open_vSwitch"/> as a lookup
+ key to identify the network interface of that entity.
+ </p>
+
+ <p>
+ For containers that are spawned inside a VM, the name can be
+ any unique identifier. In such a case, <ref column="parent_name"/>
+ must be populated.
+ </p>
+ </column>
+
+ <column name="parent_name">
+ When <ref column="name"/> identifies the interface of a container
+ spawned inside a tenant VM, this column represents the VM interface
+ through which the container interface sends its network traffic.
+ The name used here must match those used in the <ref key="iface-id"
+ table="Interface" column="external_ids" db="Open_vSwitch"/> in the
+ <ref db="Open_vSwitch"/> table, because hypervisors in this case use
+ <ref key="iface-id" table="Interface" column="external_ids"
+ db="Open_vSwitch"/> as a lookup key to identify the network interface
+ of the tenant VM.
+ </column>
+
+ <column name="tag">
+ When <ref column="name"/> identifies the interface of a container
+ spawned inside a tenant VM, this column identifies the VLAN tag in
+ the network traffic associated with that container's network interface.
+ When there are multiple container interfaces inside a VM, all of
+ them send their network traffic through a single VM network interface and
+ this value helps OVN identify the correct container interface.
+ </column>
+
+ <column name="up">
+ This column is populated by <code>ovn-northd</code>, rather than by
+ the CMS plugin as is most of this database. When a logical port is bound
+ to a physical location in the OVN Southbound database <ref
+ db="OVN_Southbound" table="Binding"/> table, <code>ovn-northd</code>
+ sets this column to <code>true</code>; otherwise, or if the port
+ becomes unbound later, it sets it to <code>false</code>. This
+ allows the CMS to wait for a VM's (or container's) networking to
+ become active before it allows the VM (or container) to start.
+ </column>
+
+ <column name="macs">
+ The logical port's own Ethernet address or addresses, each in the form
+ <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
+ Like a physical Ethernet NIC, a logical port ordinarily has a single
+ fixed Ethernet address. The string <code>unknown</code> is also allowed
+ to indicate that the logical port has an unknown set of (additional)
+ source addresses.
+ </column>
+
+ <column name="port_security">
+ <p>
+ A set of L2 (Ethernet) addresses
+ from which the logical port is allowed to send packets and to which it
+ is allowed to receive packets. If this column is empty, all addresses
+ are permitted. Logical ports are always allowed to receive packets
+ addressed to multicast and broadcast addresses.
+ </p>
+
+ <p>
+ Each member of the set is an Ethernet address in the form
+ <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
+ </p>
+
+ <p>
+ This specification will be extended to support L3 port security.
+ </p>
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+
+ <table name="ACL" title="Access Control List (ACL) rule">
+ <p>
+ Each row in this table represents one ACL rule for the logical switch in
+ its <ref column="lswitch"/> column. The <ref column="action"/> column for
+ the highest-<ref column="priority"/> matching row in this table
+ determines a packet's treatment. If no row matches, packets are allowed
+ by default. (Default-deny treatment is possible: add a rule with <ref
+ column="priority"/> 1, <code>1</code> as <ref column="match"/>, and
+ <code>deny</code> as <ref column="action"/>.)
+ </p>
+
+ <column name="lswitch">
+ The switch to which the ACL rule applies. The expression in the
+ <ref column="match"/> column may match against logical ports
+ within this switch.
+ </column>
+
+ <column name="priority">
+ The ACL rule's priority. Rules with numerically higher priority take
+ precedence over those with lower. If two ACL rules with the same
+ priority both match, then the one actually applied to a packet is
+ undefined.
+ </column>
+
+ <column name="match">
+ The packets that the ACL should match, in the same expression
+ language used for the <ref column="match" table="Pipeline"
+ db="OVN_Southbound"/> column in the OVN Southbound database's <ref
+ table="Pipeline" db="OVN_Southbound"/> table. Match
+ <code>inport</code> and <code>outport</code> against names of
+ logical ports within <ref column="lswitch"/> to implement ingress
+ and egress ACLs, respectively. In logical switches connected to
+ logical routers, the special port name <code>ROUTER</code> refers
+ to the logical router port.
+ </column>
+
+ <column name="action">
+ <p>The action to take when the ACL rule matches:</p>
+
+ <ul>
+ <li>
+ <code>allow</code>: Forward the packet.
+ </li>
+
+ <li>
+ <code>allow-related</code>: Forward the packet and related traffic
+ (e.g. inbound replies to an outbound connection).
+ </li>
+
+ <li>
+ <code>drop</code>: Silently drop the packet.
+ </li>
+
+ <li>
+ <code>reject</code>: Drop the packet, replying with a RST for TCP or
+ ICMP unreachable message for other IP-based protocols.
+ </li>
+ </ul>
+
+ <p>
+ Only <code>allow</code> and <code>drop</code> are implemented:
+ <code>allow-related</code> is currently treated as <code>allow</code>,
+ and <code>reject</code> as <code>drop</code>.
+ </p>
+ </column>
+
+ <column name="log">
+ <p>
+ If set to <code>true</code>, packets that match the ACL will trigger a
+ log message on the transport node or nodes that perform ACL processing.
+ Logging may be combined with any <ref column="action"/>.
+ </p>
+
+ <p>
+ Logging is not yet implemented.
+ </p>
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+
+ <table name="Logical_Router" title="L3 logical router">
+ <p>
+ Each row represents one L3 logical router. A given router's ports are
+ the <ref table="Logical_Router_Port"/> rows whose <ref
+ table="Logical_Router_Port" column="router"/> column points to its row.
+ </p>
+
+ <column name="ip">
+ The logical router's own IP address. The logical router uses this
+ address for ICMP replies (e.g. network unreachable messages) and other
+ traffic that it originates and responds to traffic destined to this
+ address (e.g. ICMP echo requests).
+ </column>
+
+ <column name="default_gw">
+ IP address to use as default gateway, if any.
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+
+ <table name="Logical_Router_Port" title="L3 logical router port">
+ <p>
+ A port within an L3 logical router.
+ </p>
+
+ <p>
+ A router port is always attached to a switch port. The connection can be
+ identified by following the <ref column="router_port"
+ table="Logical_Port"/> column from an appropriate <ref
+ table="Logical_Port"/> row.
+ </p>
+
+ <column name="router">
+ The router to which the port belongs.
+ </column>
+
+ <column name="network">
+ The IP network and netmask of the network on the router port. Used for
+ routing.
+ </column>
+
+ <column name="mac">
+ The Ethernet address that belongs to this router port.
+ </column>
+
+ <group title="Common Columns">
+ <column name="external_ids">
+ See <em>External IDs</em> at the beginning of this document.
+ </column>
+ </group>
+ </table>
+</database>
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovn-nbctl" section="8" title="ovn-nbctl">
+ <h1>Name</h1>
+ <p>ovn-nbctl -- Open Virtual Network northbound db management utility</p>
+
+ <h1>Synopsys</h1>
+ <p><code>ovn-nbctl</code> [<var>options</var>] <var>command</var> [<var>arg</var>...]</p>
+
+ <h1>Description</h1>
+ <p>This utility can be used to manage the OVN northbound database.</p>
+
+ <h1>General Commands</h1>
+
+ <dl>
+ <dt><code>show [<var>lswitch</var>]</code></dt>
+ <dd>
+ Prints a brief overview of the database contents. If
+ <var>lswitch</var> is provided, only records related to that
+ logical switch are shown.
+ </dd>
+ </dl>
+
+ <h1>Logical Switch Commands</h1>
+
+ <dl>
+ <dt><code>lswitch-add</code> [<var>lswitch</var>]</dt>
+ <dd>
+ Creates a new logical switch named <var>lswitch</var>. If
+ <var>lswitch</var> is not provided, the switch will not have a
+ name so other commands must refer to this switch by its UUID.
+ Initially the switch will have no ports.
+ </dd>
+
+ <dt><code>lswitch-del</code> <var>lswitch</var></dt>
+ <dd>
+ Deletes <var>lswitch</var>.
+ </dd>
+
+ <dt><code>lswitch-list</code></dt>
+ <dd>
+ Lists all existing switches on standard output, one per line.
+ </dd>
+
+ <dt><code>lswitch-set-external-id</code> <var>lswitch</var> <var>key</var> [<var>value</var>]</dt>
+ <dd>
+ <p>Sets or clears an ``external ID'' value on <var>lswitch</var>.
+ These values are intended to identify entities external to OVN
+ with which <var>lswitch</var> is associated. The OVN Northbound
+ database schema may specify well-known <var>key</var> values,
+ but <var>key</var> and <var>value</var> are otherwise arbitrary
+ strings.</p>
+
+ <p>If <var>value</var> is specified, then <var>key</var> is set to
+ <var>value</var> for <var>lswitch</var>, overwriting any
+ previous value. If <var>value</var> is omitted, then
+ <var>key</var> is removed from <var>lswitch</var>'s set of
+ external IDs (if it was present.</p>
+ </dd>
+
+ <dt><code>lswitch-get-external-id</code> <var>lswitch</var> [<var>key</var>]</dt>
+ <dd>
+ Queries the external IDs on <var>lswitch</var>. If
+ <var>key</var> is specified, the output is the value for that
+ <var>key</var> or the empty string if <var>key</var> is unset.
+ If <var>key</var> is omitted, the output is
+ <var>key</var><code>=</code><var>value</var>, one per line, for
+ each key-value pair.
+ </dd>
+ </dl>
+
+ <h1>Logical Port Commands</h1>
+ <dl>
+ <dt><code>lport-add</code> <var>lswitch</var> <var>lport</var></dt>
+ <dd>
+ Creates on <var>lswitch</var> a new logical port named
+ <var>lport</var>.
+ </dd>
+
+ <dt><code>lport-add</code> <var>lswitch</var> <var>lport</var> <var>parent</var> <var>tag</var></dt>
+ <dd>
+ Creates on <var>lswitch</var> a logical port named <var>lport</var>
+ that is a child of <var>parent</var> that is identied with
+ <var>tag</var>. This is useful in cases such as virtualized
+ container environments where Open vSwitch does not have a direct
+ connection to the container's port and it must be shared with
+ the virtual machine's port.
+ </dd>
+
+ <dt><code>lport-del</code> <var>lport</var></dt>
+ <dd>
+ Deletes <var>lport</var>.
+ </dd>
+
+ <dt><code>lport-list</code> <var>lswitch</var></dt>
+ <dd>
+ Lists all the logical ports within <var>lswitch</var> on
+ standard output, one per line.
+ </dd>
+
+ <dt><code>lport-get-parent</code> <var>lport</var></dt>
+ <dd>
+ If set, get the parent port of <var>lport</var>. If not set, print
+ nothing.
+ </dd>
+
+ <dt><code>lport-get-tag</code> <var>lport</var></dt>
+ <dd>
+ If set, get the tag for <var>lport</var> traffic. If not set, print
+ nothing.
+ </dd>
+
+ <dt><code>lport-set-external-id</code> <var>lport</var> <var>key</var> [<var>value</var>]</dt>
+ <dd>
+ <p>Sets or clears an ``external ID'' value on <var>lport</var>.
+ These values are intended to identify entities external to OVN
+ with which <var>lport</var> is associated. The OVN Northbound
+ database schema may specify well-known <var>key</var> values,
+ but <var>key</var> and <var>value</var> are otherwise arbitrary
+ strings.</p>
+
+ <p>If <var>value</var> is specified, then <var>key</var> is set to
+ <var>value</var> for <var>lport</var>, overwriting any
+ previous value. If <var>value</var> is omitted, then
+ <var>key</var> is removed from <var>lport</var>'s set of
+ external IDs (if it was present.</p>
+ </dd>
+
+ <dt><code>lport-get-external-id</code> <var>lport</var> [<var>key</var>]</dt>
+ <dd>
+ Queries the external IDs on <var>lport</var>. If
+ <var>key</var> is specified, the output is the value for that
+ <var>key</var> or the empty string if <var>key</var> is unset.
+ If <var>key</var> is omitted, the output is
+ <var>key</var><code>=</code><var>value</var>, one per line, for
+ each key-value pair.
+ </dd>
+
+ <dt><code>lport-set-macs</code> <var>lport</var> [<var>mac</var>]...</dt>
+ <dd>
+ Sets the MACs associated with <var>lport</var> to
+ <var>mac</var>. Multiple MACs may be sets by using multiple
+ <var>mac</var> arguments. If no <var>mac</var> argument is
+ given, <var>lport</var> will have no MACs associated with it.
+ </dd>
+
+ <dt><code>lport-get-macs</code> <var>lport</var></dt>
+ <dd>
+ Lists all the MACs associated with <var>lport</var> on standard
+ output, one per line.
+ </dd>
+
+ <dt><code>lport-set-port-security</code> <var>lport</var> [<var>addrs</var>]...</dt>
+ <dd>
+ <p>
+ Sets the port security addresses associated with <var>lport</var> to
+ <var>addrs</var>. Multiple sets of addresses may be set by using
+ multiple <var>addrs</var> arguments. If no <var>addrs</var> argument
+ is given, <var>lport</var> will not have port security enabled.
+ </p>
+
+ <p>
+ Port security limits the addresses from which a logical port may send
+ packets and to which it may receive packets. See the
+ <code>ovn-nb</code>(5) documentation for the <ref
+ column="port_security" table="Logical_Port"/> column in the <ref
+ table="Logical_Port"/> table for details.
+ </p>
+ </dd>
+
+ <dt><code>lport-get-port-security</code> <var>lport</var></dt>
+ <dd>
+ Lists all the port security addresses associated with <var>lport</var>
+ on standard output, one per line.
+ </dd>
+
+ <dt><code>lport-get-up</code> <var>lport</var></dt>
+ <dd>
+ Prints the state of <var>lport</var>, either <code>up</code> or
+ <code>down</code>.
+ </dd>
+
+ </dl>
+
+ <h1>Options</h1>
+
+ <dl>
+ <dt><code>-d</code> <var>database</var></dt>
+ <dt><code>--db</code> <var>database</var></dt>
+ <dd>
+ The OVSDB database remote to contact. If the <env>OVN_NB_DB</env>
+ environment variable is set, its value is used as the default.
+ Otherwise, the default is <code>unix:@RUNDIR@/db.sock</code>, but this
+ default is unlikely to be useful outside of single-machine OVN test
+ environments.
+ </dd>
+
+ <dt><code>-h</code> | <code>--help</code></dt>
+ <dt><code>-o</code> | <code>--options</code></dt>
+ <dt><code>-V</code> | <code>--version</code></dt>
+ </dl>
+
+ <h1>Logging options</h1>
+ <dl>
+ <dt><code>-v</code><var>spec</var>, <code>--verbose=</code><var>spec</var></dt>
+ <dt><code>-v</code>, <code>--verbose</code></dt>
+ <dt><code>--log-file</code>[<code>=</code><var>file</var>]</dt>
+ <dt><code>--syslog-target=</code><var>host</var><code>:</code><var>port</var></dt>
+ </dl>
+
+ <h1>PKI configuration (required to use SSL)</h1>
+ <dl>
+ <dt><code>-p</code>, <code>--private-key=</code><var>file</var> file with private key</dt>
+ <dt><code>-c</code>, <code>--certificate=</code><var>file</var> file with certificate for private key</dt>
+ <dt><code>-C</code>, <code>--ca-cert=</code><var>file</var> file with peer CA certificate</dt>
+ </dl>
+
+</manpage>
--- /dev/null
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include <getopt.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "command-line.h"
+#include "dirs.h"
+#include "fatal-signal.h"
+#include "ovn/lib/ovn-nb-idl.h"
+#include "poll-loop.h"
+#include "process.h"
+#include "stream.h"
+#include "stream-ssl.h"
+#include "util.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(ovn_nbctl);
+
+struct nbctl_context {
+ struct ovsdb_idl *idl;
+ struct ovsdb_idl_txn *txn;
+};
+
+static const char *db;
+
+static const char *default_db(void);
+
+static void
+usage(void)
+{
+ printf("\
+%s: OVN northbound DB management utility\n\
+usage: %s [OPTIONS] COMMAND [ARG...]\n\
+\n\
+General commands:\n\
+ show print overview of database contents\n\
+ show LSWITCH print overview of database contents for LSWITCH\n\
+\n\
+Logical switch commands:\n\
+ lswitch-add [LSWITCH] create a logical switch named LSWITCH\n\
+ lswitch-del LSWITCH delete LSWITCH and all its ports\n\
+ lswitch-list print the names of all logical switches\n\
+ lswitch-set-external-id LSWITCH KEY [VALUE]\n\
+ set or delete an external-id on LSWITCH\n\
+ lswitch-get-external-id LSWITCH [KEY]\n\
+ list one or all external-ids on LSWITCH\n\
+\n\
+Logical port commands:\n\
+ lport-add LSWITCH LPORT add logical port LPORT on LSWITCH\n\
+ lport-add LSWITCH LPORT PARENT TAG\n\
+ add logical port LPORT on LSWITCH with PARENT\n\
+ on TAG\n\
+ lport-del LPORT delete LPORT from its attached switch\n\
+ lport-list LSWITCH print the names of all logical ports on LSWITCH\n\
+ lport-get-parent LPORT get the parent of LPORT if set\n\
+ lport-get-tag LPORT get the LPORT's tag if set\n\
+ lport-set-external-id LPORT KEY [VALUE]\n\
+ set or delete an external-id on LPORT\n\
+ lport-get-external-id LPORT [KEY]\n\
+ list one or all external-ids on LPORT\n\
+ lport-set-macs LPORT [MAC]...\n\
+ set MAC addresses for LPORT.\n\
+ lport-get-macs LPORT get a list of MAC addresses on LPORT\n\
+ lport-set-port-security LPORT [ADDRS]...\n\
+ set port security addresses for LPORT.\n\
+ lport-get-port-security LPORT get LPORT's port security addresses\n\
+ lport-get-up LPORT get state of LPORT ('up' or 'down')\n\
+\n\
+Options:\n\
+ --db=DATABASE connect to DATABASE\n\
+ (default: %s)\n\
+ -h, --help display this help message\n\
+ -o, --options list available options\n\
+ -V, --version display version information\n\
+", program_name, program_name, default_db());
+ vlog_usage();
+ stream_usage("database", true, true, false);
+}
+\f
+static const struct nbrec_logical_switch *
+lswitch_by_name_or_uuid(struct nbctl_context *nb_ctx, const char *id)
+{
+ const struct nbrec_logical_switch *lswitch = NULL;
+ bool is_uuid = false;
+ bool duplicate = false;
+ struct uuid lswitch_uuid;
+
+ if (uuid_from_string(&lswitch_uuid, id)) {
+ is_uuid = true;
+ lswitch = nbrec_logical_switch_get_for_uuid(nb_ctx->idl,
+ &lswitch_uuid);
+ }
+
+ if (!lswitch) {
+ const struct nbrec_logical_switch *iter;
+
+ NBREC_LOGICAL_SWITCH_FOR_EACH(iter, nb_ctx->idl) {
+ if (strcmp(iter->name, id)) {
+ continue;
+ }
+ if (lswitch) {
+ VLOG_WARN("There is more than one logical switch named '%s'. "
+ "Use a UUID.", id);
+ lswitch = NULL;
+ duplicate = true;
+ break;
+ }
+ lswitch = iter;
+ }
+ }
+
+ if (!lswitch && !duplicate) {
+ VLOG_WARN("lswitch not found for %s: '%s'",
+ is_uuid ? "UUID" : "name", id);
+ }
+
+ return lswitch;
+}
+
+static void
+print_lswitch(const struct nbctl_context *nb_ctx,
+ const struct nbrec_logical_switch *lswitch)
+{
+ const struct nbrec_logical_port *lport;
+
+ printf(" lswitch "UUID_FMT" (%s)\n",
+ UUID_ARGS(&lswitch->header_.uuid), lswitch->name);
+
+ NBREC_LOGICAL_PORT_FOR_EACH(lport, nb_ctx->idl) {
+ int i;
+
+ if (lport->lswitch == lswitch) {
+ printf(" lport %s\n", lport->name);
+ if (lport->parent_name && lport->n_tag) {
+ printf(" parent: %s, tag:%"PRIu64"\n",
+ lport->parent_name, lport->tag[0]);
+ }
+ if (lport->n_macs) {
+ printf(" macs:");
+ for (i=0; i < lport->n_macs; i++) {
+ printf(" %s", lport->macs[i]);
+ }
+ printf("\n");
+ }
+ }
+ }
+}
+
+static void
+do_show(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const struct nbrec_logical_switch *lswitch;
+
+ if (ctx->argc == 2) {
+ lswitch = lswitch_by_name_or_uuid(nb_ctx, ctx->argv[1]);
+ if (lswitch) {
+ print_lswitch(nb_ctx, lswitch);
+ }
+ } else {
+ NBREC_LOGICAL_SWITCH_FOR_EACH(lswitch, nb_ctx->idl) {
+ print_lswitch(nb_ctx, lswitch);
+ }
+ }
+}
+
+static void
+do_lswitch_add(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ struct nbrec_logical_switch *lswitch;
+
+ lswitch = nbrec_logical_switch_insert(nb_ctx->txn);
+ if (ctx->argc == 2) {
+ nbrec_logical_switch_set_name(lswitch, ctx->argv[1]);
+ }
+}
+
+static void
+do_lswitch_del(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_switch *lswitch;
+
+ lswitch = lswitch_by_name_or_uuid(nb_ctx, id);
+ if (!lswitch) {
+ return;
+ }
+
+ nbrec_logical_switch_delete(lswitch);
+}
+
+static void
+do_lswitch_list(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const struct nbrec_logical_switch *lswitch;
+
+ NBREC_LOGICAL_SWITCH_FOR_EACH(lswitch, nb_ctx->idl) {
+ printf(UUID_FMT " (%s)\n",
+ UUID_ARGS(&lswitch->header_.uuid), lswitch->name);
+ }
+}
+
+static void
+do_lswitch_set_external_id(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_switch *lswitch;
+ struct smap new_external_ids;
+
+ lswitch = lswitch_by_name_or_uuid(nb_ctx, id);
+ if (!lswitch) {
+ return;
+ }
+
+ smap_init(&new_external_ids);
+ smap_clone(&new_external_ids, &lswitch->external_ids);
+ if (ctx->argc == 4) {
+ smap_replace(&new_external_ids, ctx->argv[2], ctx->argv[3]);
+ } else {
+ smap_remove(&new_external_ids, ctx->argv[2]);
+ }
+ nbrec_logical_switch_set_external_ids(lswitch, &new_external_ids);
+ smap_destroy(&new_external_ids);
+}
+
+static void
+do_lswitch_get_external_id(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_switch *lswitch;
+
+ lswitch = lswitch_by_name_or_uuid(nb_ctx, id);
+ if (!lswitch) {
+ return;
+ }
+
+ if (ctx->argc == 3) {
+ const char *key = ctx->argv[2];
+ const char *value;
+
+ /* List one external ID */
+
+ value = smap_get(&lswitch->external_ids, key);
+ if (value) {
+ printf("%s\n", value);
+ }
+ } else {
+ struct smap_node *node;
+
+ /* List all external IDs */
+
+ SMAP_FOR_EACH(node, &lswitch->external_ids) {
+ printf("%s=%s\n", node->key, node->value);
+ }
+ }
+}
+\f
+static const struct nbrec_logical_port *
+lport_by_name_or_uuid(struct nbctl_context *nb_ctx, const char *id)
+{
+ const struct nbrec_logical_port *lport = NULL;
+ bool is_uuid = false;
+ struct uuid lport_uuid;
+
+ if (uuid_from_string(&lport_uuid, id)) {
+ is_uuid = true;
+ lport = nbrec_logical_port_get_for_uuid(nb_ctx->idl, &lport_uuid);
+ }
+
+ if (!lport) {
+ NBREC_LOGICAL_PORT_FOR_EACH(lport, nb_ctx->idl) {
+ if (!strcmp(lport->name, id)) {
+ break;
+ }
+ }
+ }
+
+ if (!lport) {
+ VLOG_WARN("lport not found for %s: '%s'",
+ is_uuid ? "UUID" : "name", id);
+ }
+
+ return lport;
+}
+
+static void
+do_lport_add(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ struct nbrec_logical_port *lport;
+ const struct nbrec_logical_switch *lswitch;
+ int64_t tag;
+
+ lswitch = lswitch_by_name_or_uuid(nb_ctx, ctx->argv[1]);
+ if (!lswitch) {
+ return;
+ }
+
+ if (ctx->argc != 3 && ctx->argc != 5) {
+ /* If a parent_name is specififed, a tag must be specified as well. */
+ VLOG_WARN("Invalid arguments to lport-add.");
+ return;
+ }
+
+ if (ctx->argc == 5) {
+ /* Validate tag. */
+ if (!ovs_scan(ctx->argv[4], "%"SCNd64, &tag) || tag < 0 || tag > 4095) {
+ VLOG_WARN("Invalid tag '%s'", ctx->argv[4]);
+ return;
+ }
+ }
+
+ /* Finally, create the transaction. */
+ lport = nbrec_logical_port_insert(nb_ctx->txn);
+ nbrec_logical_port_set_name(lport, ctx->argv[2]);
+ nbrec_logical_port_set_lswitch(lport, lswitch);
+ if (ctx->argc == 5) {
+ nbrec_logical_port_set_parent_name(lport, ctx->argv[3]);
+ nbrec_logical_port_set_tag(lport, &tag, 1);
+ }
+}
+
+static void
+do_lport_del(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, ctx->argv[1]);
+ if (!lport) {
+ return;
+ }
+
+ nbrec_logical_port_delete(lport);
+}
+
+static bool
+is_lswitch(const struct nbrec_logical_switch *lswitch,
+ struct uuid *lswitch_uuid, const char *name)
+{
+ if (lswitch_uuid) {
+ return uuid_equals(lswitch_uuid, &lswitch->header_.uuid);
+ } else {
+ return !strcmp(lswitch->name, name);
+ }
+}
+
+
+static void
+do_lport_list(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+ bool is_uuid = false;
+ struct uuid lswitch_uuid;
+
+ if (uuid_from_string(&lswitch_uuid, id)) {
+ is_uuid = true;
+ }
+
+ NBREC_LOGICAL_PORT_FOR_EACH(lport, nb_ctx->idl) {
+ bool match;
+ if (is_uuid) {
+ match = is_lswitch(lport->lswitch, &lswitch_uuid, NULL);
+ } else {
+ match = is_lswitch(lport->lswitch, NULL, id);
+ }
+ if (!match) {
+ continue;
+ }
+ printf(UUID_FMT " (%s)\n",
+ UUID_ARGS(&lport->header_.uuid), lport->name);
+ }
+}
+
+static void
+do_lport_get_parent(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, ctx->argv[1]);
+ if (!lport) {
+ return;
+ }
+
+ if (lport->parent_name) {
+ printf("%s\n", lport->parent_name);
+ }
+}
+
+static void
+do_lport_get_tag(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, ctx->argv[1]);
+ if (!lport) {
+ return;
+ }
+
+ if (lport->n_tag > 0) {
+ printf("%"PRId64"\n", lport->tag[0]);
+ }
+}
+
+static void
+do_lport_set_external_id(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+ struct smap new_external_ids;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ smap_init(&new_external_ids);
+ smap_clone(&new_external_ids, &lport->external_ids);
+ if (ctx->argc == 4) {
+ smap_replace(&new_external_ids, ctx->argv[2], ctx->argv[3]);
+ } else {
+ smap_remove(&new_external_ids, ctx->argv[2]);
+ }
+ nbrec_logical_port_set_external_ids(lport, &new_external_ids);
+ smap_destroy(&new_external_ids);
+}
+
+static void
+do_lport_get_external_id(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ if (ctx->argc == 3) {
+ const char *key = ctx->argv[2];
+ const char *value;
+
+ /* List one external ID */
+
+ value = smap_get(&lport->external_ids, key);
+ if (value) {
+ printf("%s\n", value);
+ }
+ } else {
+ struct smap_node *node;
+
+ /* List all external IDs */
+
+ SMAP_FOR_EACH(node, &lport->external_ids) {
+ printf("%s=%s\n", node->key, node->value);
+ }
+ }
+}
+
+static void
+do_lport_set_macs(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ nbrec_logical_port_set_macs(lport,
+ (const char **) ctx->argv + 2, ctx->argc - 2);
+}
+
+static void
+do_lport_get_macs(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+ size_t i;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ for (i = 0; i < lport->n_macs; i++) {
+ printf("%s\n", lport->macs[i]);
+ }
+}
+
+static void
+do_lport_set_port_security(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ nbrec_logical_port_set_port_security(lport,
+ (const char **) ctx->argv + 2, ctx->argc - 2);
+}
+
+static void
+do_lport_get_port_security(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+ size_t i;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ for (i = 0; i < lport->n_port_security; i++) {
+ printf("%s\n", lport->port_security[i]);
+ }
+}
+
+static void
+do_lport_get_up(struct ovs_cmdl_context *ctx)
+{
+ struct nbctl_context *nb_ctx = ctx->pvt;
+ const char *id = ctx->argv[1];
+ const struct nbrec_logical_port *lport;
+
+ lport = lport_by_name_or_uuid(nb_ctx, id);
+ if (!lport) {
+ return;
+ }
+
+ printf("%s\n", (lport->up && *lport->up) ? "up" : "down");
+}
+\f
+static void
+parse_options(int argc, char *argv[])
+{
+ enum {
+ VLOG_OPTION_ENUMS,
+ };
+ static const struct option long_options[] = {
+ {"db", required_argument, NULL, 'd'},
+ {"help", no_argument, NULL, 'h'},
+ {"options", no_argument, NULL, 'o'},
+ {"version", no_argument, NULL, 'V'},
+ VLOG_LONG_OPTIONS,
+ STREAM_SSL_LONG_OPTIONS,
+ {NULL, 0, NULL, 0},
+ };
+ char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
+
+ for (;;) {
+ int c;
+
+ c = getopt_long(argc, argv, short_options, long_options, NULL);
+ if (c == -1) {
+ break;
+ }
+
+ switch (c) {
+ VLOG_OPTION_HANDLERS;
+ STREAM_SSL_OPTION_HANDLERS;
+
+ case 'd':
+ db = optarg;
+ break;
+
+ case 'h':
+ usage();
+ exit(EXIT_SUCCESS);
+
+ case 'o':
+ ovs_cmdl_print_options(long_options);
+ exit(EXIT_SUCCESS);
+
+ case 'V':
+ ovs_print_version(0, 0);
+ exit(EXIT_SUCCESS);
+
+ default:
+ break;
+ }
+ }
+
+ if (!db) {
+ db = default_db();
+ }
+
+ free(short_options);
+}
+
+static const struct ovs_cmdl_command all_commands[] = {
+ {
+ .name = "show",
+ .usage = "[LSWITCH]",
+ .min_args = 0,
+ .max_args = 1,
+ .handler = do_show,
+ },
+ {
+ .name = "lswitch-add",
+ .usage = "[LSWITCH]",
+ .min_args = 0,
+ .max_args = 1,
+ .handler = do_lswitch_add,
+ },
+ {
+ .name = "lswitch-del",
+ .usage = "LSWITCH",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lswitch_del,
+ },
+ {
+ .name = "lswitch-list",
+ .usage = "",
+ .min_args = 0,
+ .max_args = 0,
+ .handler = do_lswitch_list,
+ },
+ {
+ .name = "lswitch-set-external-id",
+ .usage = "LSWITCH KEY [VALUE]",
+ .min_args = 2,
+ .max_args = 3,
+ .handler = do_lswitch_set_external_id,
+ },
+ {
+ .name = "lswitch-get-external-id",
+ .usage = "LSWITCH [KEY]",
+ .min_args = 1,
+ .max_args = 2,
+ .handler = do_lswitch_get_external_id,
+ },
+ {
+ .name = "lport-add",
+ .usage = "LSWITCH LPORT [PARENT] [TAG]",
+ .min_args = 2,
+ .max_args = 4,
+ .handler = do_lport_add,
+ },
+ {
+ .name = "lport-del",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_del,
+ },
+ {
+ .name = "lport-list",
+ .usage = "LSWITCH",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_list,
+ },
+ {
+ .name = "lport-get-parent",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_get_parent,
+ },
+ {
+ .name = "lport-get-tag",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_get_tag,
+ },
+ {
+ .name = "lport-set-external-id",
+ .usage = "LPORT KEY [VALUE]",
+ .min_args = 2,
+ .max_args = 3,
+ .handler = do_lport_set_external_id,
+ },
+ {
+ .name = "lport-get-external-id",
+ .usage = "LPORT [KEY]",
+ .min_args = 1,
+ .max_args = 2,
+ .handler = do_lport_get_external_id,
+ },
+ {
+ .name = "lport-set-macs",
+ .usage = "LPORT [MAC]...",
+ .min_args = 1,
+ /* Accept however many arguments the system will allow. */
+ .max_args = INT_MAX,
+ .handler = do_lport_set_macs,
+ },
+ {
+ .name = "lport-get-macs",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_get_macs,
+ },
+ {
+ .name = "lport-set-port-security",
+ .usage = "LPORT [ADDRS]...",
+ .min_args = 0,
+ /* Accept however many arguments the system will allow. */
+ .max_args = INT_MAX,
+ .handler = do_lport_set_port_security,
+ },
+ {
+ .name = "lport-get-port-security",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_get_port_security,
+ },
+ {
+ .name = "lport-get-up",
+ .usage = "LPORT",
+ .min_args = 1,
+ .max_args = 1,
+ .handler = do_lport_get_up,
+ },
+
+ {
+ /* sentinel */
+ .name = NULL,
+ },
+};
+
+static const struct ovs_cmdl_command *
+get_all_commands(void)
+{
+ return all_commands;
+}
+
+static const char *
+default_db(void)
+{
+ static char *def;
+ if (!def) {
+ def = getenv("OVN_NB_DB");
+ if (!def) {
+ def = xasprintf("unix:%s/db.sock", ovs_rundir());
+ }
+ }
+ return def;
+}
+
+int
+main(int argc, char *argv[])
+{
+ extern struct vlog_module VLM_reconnect;
+ struct ovs_cmdl_context ctx;
+ struct nbctl_context nb_ctx = { .idl = NULL, };
+ enum ovsdb_idl_txn_status txn_status;
+ unsigned int seqno;
+ int res = 0;
+ char *args;
+
+ fatal_ignore_sigpipe();
+ set_program_name(argv[0]);
+ vlog_set_levels(NULL, VLF_CONSOLE, VLL_WARN);
+ vlog_set_levels(&VLM_reconnect, VLF_ANY_DESTINATION, VLL_WARN);
+ parse_options(argc, argv);
+ nbrec_init();
+
+ args = process_escape_args(argv);
+
+ nb_ctx.idl = ovsdb_idl_create(db, &nbrec_idl_class, true, false);
+ ctx.pvt = &nb_ctx;
+ ctx.argc = argc - optind;
+ ctx.argv = argv + optind;
+
+ seqno = ovsdb_idl_get_seqno(nb_ctx.idl);
+ for (;;) {
+ ovsdb_idl_run(nb_ctx.idl);
+
+ if (!ovsdb_idl_is_alive(nb_ctx.idl)) {
+ int retval = ovsdb_idl_get_last_error(nb_ctx.idl);
+ VLOG_ERR("%s: database connection failed (%s)",
+ db, ovs_retval_to_string(retval));
+ res = 1;
+ break;
+ }
+
+ if (seqno != ovsdb_idl_get_seqno(nb_ctx.idl)) {
+ nb_ctx.txn = ovsdb_idl_txn_create(nb_ctx.idl);
+ ovsdb_idl_txn_add_comment(nb_ctx.txn, "ovn-nbctl: %s", args);
+ ovs_cmdl_run_command(&ctx, get_all_commands());
+ txn_status = ovsdb_idl_txn_commit_block(nb_ctx.txn);
+ if (txn_status == TXN_TRY_AGAIN) {
+ ovsdb_idl_txn_destroy(nb_ctx.txn);
+ nb_ctx.txn = NULL;
+ continue;
+ } else {
+ break;
+ }
+ }
+
+ if (seqno == ovsdb_idl_get_seqno(nb_ctx.idl)) {
+ ovsdb_idl_wait(nb_ctx.idl);
+ poll_block();
+ }
+ }
+
+ if (nb_ctx.txn) {
+ ovsdb_idl_txn_destroy(nb_ctx.txn);
+ }
+ ovsdb_idl_destroy(nb_ctx.idl);
+ free(args);
+
+ exit(res);
+}
--- /dev/null
+{
+ "name": "OVN_Southbound",
+ "tables": {
+ "Chassis": {
+ "columns": {
+ "name": {"type": "string"},
+ "encaps": {"type": {"key": {"type": "uuid",
+ "refTable": "Encap"},
+ "min": 1, "max": "unlimited"}},
+ "gateway_ports": {"type": {"key": "string",
+ "value": {"type": "uuid",
+ "refTable": "Gateway",
+ "refType": "strong"},
+ "min": 0,
+ "max": "unlimited"}}},
+ "isRoot": true,
+ "indexes": [["name"]]},
+ "Encap": {
+ "columns": {
+ "type": {"type": {"key": {
+ "type": "string",
+ "enum": ["set", ["geneve", "stt", "vxlan"]]}}},
+ "options": {"type": {"key": "string",
+ "value": "string",
+ "min": 0,
+ "max": "unlimited"}},
+ "ip": {"type": "string"}}},
+ "Gateway": {
+ "columns": {"vlan_map": {"type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 4095},
+ "value": {"type": "string"},
+ "min": 0,
+ "max": "unlimited"}}}},
+ "Pipeline": {
+ "columns": {
+ "logical_datapath": {"type": "uuid"},
+ "table_id": {"type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 31}}},
+ "priority": {"type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 65535}}},
+ "match": {"type": "string"},
+ "actions": {"type": "string"}},
+ "isRoot": true},
+ "Binding": {
+ "columns": {
+ "logical_datapath": {"type": "uuid"},
+ "logical_port": {"type": "string"},
+ "tunnel_key": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 1,
+ "maxInteger": 65535}}},
+ "parent_port": {"type": {"key": "string", "min": 0, "max": 1}},
+ "tag": {
+ "type": {"key": {"type": "integer",
+ "minInteger": 0,
+ "maxInteger": 4095},
+ "min": 0, "max": 1}},
+ "chassis": {"type": {"key": {"type": "uuid",
+ "refTable": "Chassis",
+ "refType": "weak"},
+ "min": 0, "max": 1}},
+ "mac": {"type": {"key": "string",
+ "min": 0,
+ "max": "unlimited"}}},
+ "indexes": [["logical_port"], ["tunnel_key"]],
+ "isRoot": true}},
+ "version": "1.0.0"}
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<database name="ovn-sb" title="OVN Southbound Database">
+ <p>
+ This database holds logical and physical configuration and state for the
+ Open Virtual Network (OVN) system to support virtual network abstraction.
+ For an introduction to OVN, please see <code>ovn-architecture</code>(7).
+ </p>
+
+ <p>
+ The OVN Southbound database sits at the center of the OVN
+ architecture. It is the one component that speaks both southbound
+ directly to all the hypervisors and gateways, via
+ <code>ovn-controller</code>, and northbound to the Cloud Management
+ System, via <code>ovn-northd</code>:
+ </p>
+
+ <h2>Database Structure</h2>
+
+ <p>
+ The OVN Southbound database contains three classes of data with
+ different properties, as described in the sections below.
+ </p>
+
+ <h3>Physical Network (PN) data</h3>
+
+ <p>
+ PN tables contain information about the chassis nodes in the system. This
+ contains all the information necessary to wire the overlay, such as IP
+ addresses, supported tunnel types, and security keys.
+ </p>
+
+ <p>
+ The amount of PN data is small (O(n) in the number of chassis) and it
+ changes infrequently, so it can be replicated to every chassis.
+ </p>
+
+ <p>
+ The <ref table="Chassis"/> and <ref table="Gateway"/> tables comprise the
+ PN tables.
+ </p>
+
+ <h3>Logical Network (LN) data</h3>
+
+ <p>
+ LN tables contain the topology of logical switches and routers, ACLs,
+ firewall rules, and everything needed to describe how packets traverse a
+ logical network, represented as logical datapath flows (see Logical
+ Datapath Flows, below).
+ </p>
+
+ <p>
+ LN data may be large (O(n) in the number of logical ports, ACL rules,
+ etc.). Thus, to improve scaling, each chassis should receive only data
+ related to logical networks in which that chassis participates. Past
+ experience shows that in the presence of large logical networks, even
+ finer-grained partitioning of data, e.g. designing logical flows so that
+ only the chassis hosting a logical port needs related flows, pays off
+ scale-wise. (This is not necessary initially but it is worth bearing in
+ mind in the design.)
+ </p>
+
+ <p>
+ The LN is a slave of the cloud management system running northbound of OVN.
+ That CMS determines the entire OVN logical configuration and therefore the
+ LN's content at any given time is a deterministic function of the CMS's
+ configuration, although that happens indirectly via the OVN Northbound DB
+ and <code>ovn-northd</code>.
+ </p>
+
+ <p>
+ LN data is likely to change more quickly than PN data. This is especially
+ true in a container environment where VMs are created and destroyed (and
+ therefore added to and deleted from logical switches) quickly.
+ </p>
+
+ <p>
+ The <ref table="Pipeline"/> table is currently the only LN table.
+ </p>
+
+ <h3>Bindings data</h3>
+
+ <p>
+ The Binding tables contain the current placement of logical components
+ (such as VMs and VIFs) onto chassis and the bindings between logical ports
+ and MACs.
+ </p>
+
+ <p>
+ Bindings change frequently, at least every time a VM powers up or down
+ or migrates, and especially quickly in a container environment. The
+ amount of data per VM (or VIF) is small.
+ </p>
+
+ <p>
+ Each chassis is authoritative about the VMs and VIFs that it hosts at any
+ given time and can efficiently flood that state to a central location, so
+ the consistency needs are minimal.
+ </p>
+
+ <p>
+ The <ref table="Binding"/> table is currently the only binding data.
+ </p>
+
+ <table name="Chassis" title="Physical Network Hypervisor and Gateway Information">
+ <p>
+ Each row in this table represents a hypervisor or gateway (a chassis) in
+ the physical network (PN). Each chassis, via
+ <code>ovn-controller</code>, adds and updates its own row, and keeps a
+ copy of the remaining rows to determine how to reach other hypervisors.
+ </p>
+
+ <p>
+ When a chassis shuts down gracefully, it should remove its own row.
+ (This is not critical because resources hosted on the chassis are equally
+ unreachable regardless of whether the row is present.) If a chassis
+ shuts down permanently without removing its row, some kind of manual or
+ automatic cleanup is eventually needed; we can devise a process for that
+ as necessary.
+ </p>
+
+ <column name="name">
+ A chassis name, taken from <ref key="system-id" table="Open_vSwitch"
+ column="external_ids" db="Open_vSwitch"/> in the Open_vSwitch
+ database's <ref table="Open_vSwitch" db="Open_vSwitch"/> table. OVN does
+ not prescribe a particular format for chassis names.
+ </column>
+
+ <group title="Encapsulation Configuration">
+ <p>
+ OVN uses encapsulation to transmit logical dataplane packets
+ between chassis.
+ </p>
+
+ <column name="encaps">
+ Points to supported encapsulation configurations to transmit
+ logical dataplane packets to this chassis. Each entry is a <ref
+ table="Encap"/> record that describes the configuration.
+ </column>
+ </group>
+
+ <group title="Gateway Configuration">
+ <p>
+ A <dfn>gateway</dfn> is a chassis that forwards traffic between a
+ logical network and a physical VLAN. Gateways are typically dedicated
+ nodes that do not host VMs.
+ </p>
+
+ <column name="gateway_ports">
+ Maps from the name of a port attached to the OVN integration bridge
+ (typically a physical port or an Open vSwitch patch port) to a <ref
+ table="Gateway"/> record that describes the details of the gatewaying
+ function.
+ </column>
+ </group>
+ </table>
+
+ <table name="Encap" title="Encapsulation Types">
+ <p>
+ The <ref column="encaps" table="Chassis"/> column in the <ref
+ table="Chassis"/> table refers to rows in this table to identify
+ how OVN may transmit logical dataplane packets to this chassis.
+ Each chassis, via <code>ovn-controller</code>(8), adds and updates
+ its own rows and keeps a copy of the remaining rows to determine
+ how to reach other chassis.
+ </p>
+
+ <column name="type">
+ The encapsulation to use to transmit packets to this chassis.
+ Hypervisors must use either <code>geneve</code> or
+ <code>stt</code>. Gateways may use <code>vxlan</code>,
+ <code>geneve</code>, or <code>stt</code>.
+ </column>
+
+ <column name="options">
+ Options for configuring the encapsulation, e.g. IPsec parameters when
+ IPsec support is introduced. No options are currently defined.
+ </column>
+
+ <column name="ip">
+ The IPv4 address of the encapsulation tunnel endpoint.
+ </column>
+ </table>
+
+ <table name="Gateway" title="Physical Network Gateway Ports">
+ <p>
+ The <ref column="gateway_ports" table="Chassis"/> column in the <ref
+ table="Chassis"/> table refers to rows in this table to connect a chassis
+ port to a gateway function. Each row in this table describes the logical
+ networks to which a gateway port is attached. Each chassis, via
+ <code>ovn-controller</code>(8), adds and updates its own rows, if any
+ (since most chassis are not gateways), and keeps a copy of the remaining
+ rows to determine how to reach other chassis.
+ </p>
+
+ <column name="vlan_map">
+ Maps from a VLAN ID to a logical port name. Thus, each named logical
+ port corresponds to one VLAN on the gateway port.
+ </column>
+ </table>
+
+ <table name="Pipeline" title="Logical Network Pipeline">
+ <p>
+ Each row in this table represents one logical flow. The cloud management
+ system, via its OVN integration, populates this table with logical flows
+ that implement the L2 and L3 topology specified in the CMS configuration.
+ Each hypervisor, via <code>ovn-controller</code>, translates the logical
+ flows into OpenFlow flows specific to its hypervisor and installs them
+ into Open vSwitch.
+ </p>
+
+ <p>
+ Logical flows are expressed in an OVN-specific format, described here. A
+ logical datapath flow is much like an OpenFlow flow, except that the
+ flows are written in terms of logical ports and logical datapaths instead
+ of physical ports and physical datapaths. Translation between logical
+ and physical flows helps to ensure isolation between logical datapaths.
+ (The logical flow abstraction also allows the CMS to do less work, since
+ it does not have to separately compute and push out physical flows to each
+ chassis.)
+ </p>
+
+ <p>
+ The default action when no flow matches is to drop packets.
+ </p>
+
+ <column name="logical_datapath">
+ The logical datapath to which the logical flow belongs. A logical
+ datapath implements a logical pipeline among the ports in the <ref
+ table="Binding"/> table associated with it. (No table represents a
+ logical datapath.) In practice, the pipeline in a given logical datapath
+ implements either a logical switch or a logical router, and
+ <code>ovn-northd</code> reuses the UUIDs for those logical entities from
+ the <code>OVN_Northbound</code> for logical datapaths.
+ </column>
+
+ <column name="table_id">
+ The stage in the logical pipeline, analogous to an OpenFlow table number.
+ </column>
+
+ <column name="priority">
+ The flow's priority. Flows with numerically higher priority take
+ precedence over those with lower. If two logical datapath flows with the
+ same priority both match, then the one actually applied to the packet is
+ undefined.
+ </column>
+
+ <column name="match">
+ <p>
+ A matching expression. OVN provides a superset of OpenFlow matching
+ capabilities, using a syntax similar to Boolean expressions in a
+ programming language.
+ </p>
+
+ <p>
+ The most important components of match expression are
+ <dfn>comparisons</dfn> between <dfn>symbols</dfn> and
+ <dfn>constants</dfn>, e.g. <code>ip4.dst == 192.168.0.1</code>,
+ <code>ip.proto == 6</code>, <code>arp.op == 1</code>, <code>eth.type ==
+ 0x800</code>. The logical AND operator <code>&&</code> and
+ logical OR operator <code>||</code> can combine comparisons into a
+ larger expression.
+ </p>
+
+ <p>
+ Matching expressions also support parentheses for grouping, the logical
+ NOT prefix operator <code>!</code>, and literals <code>0</code> and
+ <code>1</code> to express ``false'' or ``true,'' respectively. The
+ latter is useful by itself as a catch-all expression that matches every
+ packet.
+ </p>
+
+ <p><em>Symbols</em></p>
+
+ <p>
+ <em>Type</em>. Symbols have <dfn>integer</dfn> or <dfn>string</dfn>
+ type. Integer symbols have a <dfn>width</dfn> in bits.
+ </p>
+
+ <p>
+ <em>Kinds</em>. There are three kinds of symbols:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ <dfn>Fields</dfn>. A field symbol represents a packet header or
+ metadata field. For example, a field
+ named <code>vlan.tci</code> might represent the VLAN TCI field in a
+ packet.
+ </p>
+
+ <p>
+ A field symbol can have integer or string type. Integer fields can
+ be nominal or ordinal (see <em>Level of Measurement</em>,
+ below).
+ </p>
+ </li>
+
+ <li>
+ <p>
+ <dfn>Subfields</dfn>. A subfield represents a subset of bits from
+ a larger field. For example, a field <code>vlan.vid</code> might
+ be defined as an alias for <code>vlan.tci[0..11]</code>. Subfields
+ are provided for syntactic convenience, because it is always
+ possible to instead refer to a subset of bits from a field
+ directly.
+ </p>
+
+ <p>
+ Only ordinal fields (see <em>Level of Measurement</em>,
+ below) may have subfields. Subfields are always ordinal.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ <dfn>Predicates</dfn>. A predicate is shorthand for a Boolean
+ expression. Predicates may be used much like 1-bit fields. For
+ example, <code>ip4</code> might expand to <code>eth.type ==
+ 0x800</code>. Predicates are provided for syntactic convenience,
+ because it is always possible to instead specify the underlying
+ expression directly.
+ </p>
+
+ <p>
+ A predicate whose expansion refers to any nominal field or
+ predicate (see <em>Level of Measurement</em>, below) is nominal;
+ other predicates have Boolean level of measurement.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ <em>Level of Measurement</em>. See
+ http://en.wikipedia.org/wiki/Level_of_measurement for the statistical
+ concept on which this classification is based. There are three
+ levels:
+ </p>
+
+ <ul>
+ <li>
+ <p>
+ <dfn>Ordinal</dfn>. In statistics, ordinal values can be ordered
+ on a scale. OVN considers a field (or subfield) to be ordinal if
+ its bits can be examined individually. This is true for the
+ OpenFlow fields that OpenFlow or Open vSwitch makes ``maskable.''
+ </p>
+
+ <p>
+ Any use of a nominal field may specify a single bit or a range of
+ bits, e.g. <code>vlan.tci[13..15]</code> refers to the PCP field
+ within the VLAN TCI, and <code>eth.dst[40]</code> refers to the
+ multicast bit in the Ethernet destination address.
+ </p>
+
+ <p>
+ OVN supports all the usual arithmetic relations (<code>==</code>,
+ <code>!=</code>, <code><</code>, <code><=</code>,
+ <code>></code>, and <code>>=</code>) on ordinal fields and
+ their subfields, because OVN can implement these in OpenFlow and
+ Open vSwitch as collections of bitwise tests.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ <dfn>Nominal</dfn>. In statistics, nominal values cannot be
+ usefully compared except for equality. This is true of OpenFlow
+ port numbers, Ethernet types, and IP protocols are examples: all of
+ these are just identifiers assigned arbitrarily with no deeper
+ meaning. In OpenFlow and Open vSwitch, bits in these fields
+ generally aren't individually addressable.
+ </p>
+
+ <p>
+ OVN only supports arithmetic tests for equality on nominal fields,
+ because OpenFlow and Open vSwitch provide no way for a flow to
+ efficiently implement other comparisons on them. (A test for
+ inequality can be sort of built out of two flows with different
+ priorities, but OVN matching expressions always generate flows with
+ a single priority.)
+ </p>
+
+ <p>
+ String fields are always nominal.
+ </p>
+ </li>
+
+ <li>
+ <p>
+ <dfn>Boolean</dfn>. A nominal field that has only two values, 0
+ and 1, is somewhat exceptional, since it is easy to support both
+ equality and inequality tests on such a field: either one can be
+ implemented as a test for 0 or 1.
+ </p>
+
+ <p>
+ Only predicates (see above) have a Boolean level of measurement.
+ </p>
+
+ <p>
+ This isn't a standard level of measurement.
+ </p>
+ </li>
+ </ul>
+
+ <p>
+ <em>Prerequisites</em>. Any symbol can have prerequisites, which are
+ additional condition implied by the use of the symbol. For example,
+ For example, <code>icmp4.type</code> symbol might have prerequisite
+ <code>icmp4</code>, which would cause an expression <code>icmp4.type ==
+ 0</code> to be interpreted as <code>icmp4.type == 0 &&
+ icmp4</code>, which would in turn expand to <code>icmp4.type == 0
+ && eth.type == 0x800 && ip4.proto == 1</code> (assuming
+ <code>icmp4</code> is a predicate defined as suggested under
+ <em>Types</em> above).
+ </p>
+
+ <p><em>Relational operators</em></p>
+
+ <p>
+ All of the standard relational operators <code>==</code>,
+ <code>!=</code>, <code><</code>, <code><=</code>,
+ <code>></code>, and <code>>=</code> are supported. Nominal
+ fields support only <code>==</code> and <code>!=</code>, and only in a
+ positive sense when outer <code>!</code> are taken into account,
+ e.g. given string field <code>inport</code>, <code>inport ==
+ "eth0"</code> and <code>!(inport != "eth0")</code> are acceptable, but
+ not <code>inport != "eth0"</code>.
+ </p>
+
+ <p>
+ The implementation of <code>==</code> (or <code>!=</code> when it is
+ negated), is more efficient than that of the other relational
+ operators.
+ </p>
+
+ <p><em>Constants</em></p>
+
+ <p>
+ Integer constants may be expressed in decimal, hexadecimal prefixed by
+ <code>0x</code>, or as dotted-quad IPv4 addresses, IPv6 addresses in
+ their standard forms, or Ethernet addresses as colon-separated hex
+ digits. A constant in any of these forms may be followed by a slash
+ and a second constant (the mask) in the same form, to form a masked
+ constant. IPv4 and IPv6 masks may be given as integers, to express
+ CIDR prefixes.
+ </p>
+
+ <p>
+ String constants have the same syntax as quoted strings in JSON (thus,
+ they are Unicode strings). String constants are used for naming
+ logical ports. Thus, the useful values are <ref
+ column="logical_port"/> names from the <ref column="Binding"/> and
+ <ref column="Gateway"/> tables in a logical flow's <ref
+ column="logical_datapath"/>.
+ </p>
+
+ <p>
+ Some operators support sets of constants written inside curly braces
+ <code>{</code> ... <code>}</code>. Commas between elements of a set,
+ and after the last elements, are optional. With <code>==</code>,
+ ``<code><var>field</var> == { <var>constant1</var>,
+ <var>constant2</var>,</code> ... <code>}</code>'' is syntactic sugar
+ for ``<code><var>field</var> == <var>constant1</var> ||
+ <var>field</var> == <var>constant2</var> || </code>...<code></code>.
+ Similarly, ``<code><var>field</var> != { <var>constant1</var>,
+ <var>constant2</var>, </code>...<code> }</code>'' is equivalent to
+ ``<code><var>field</var> != <var>constant1</var> &&
+ <var>field</var> != <var>constant2</var> &&
+ </code>...<code></code>''.
+ </p>
+
+ <p><em>Miscellaneous</em></p>
+
+ <p>
+ Comparisons may name the symbol or the constant first,
+ e.g. <code>tcp.src == 80</code> and <code>80 == tcp.src</code> are both
+ acceptable.
+ </p>
+
+ <p>
+ Tests for a range may be expressed using a syntax like <code>1024 <=
+ tcp.src <= 49151</code>, which is equivalent to <code>1024 <=
+ tcp.src && tcp.src <= 49151</code>.
+ </p>
+
+ <p>
+ For a one-bit field or predicate, a mention of its name is equivalent
+ to <code><var>symobl</var> == 1</code>, e.g. <code>vlan.present</code>
+ is equivalent to <code>vlan.present == 1</code>. The same is true for
+ one-bit subfields, e.g. <code>vlan.tci[12]</code>. There is no
+ technical limitation to implementing the same for ordinal fields of all
+ widths, but the implementation is expensive enough that the syntax
+ parser requires writing an explicit comparison against zero to make
+ mistakes less likely, e.g. in <code>tcp.src != 0</code> the comparison
+ against 0 is required.
+ </p>
+
+ <p>
+ <em>Operator precedence</em> is as shown below, from highest to lowest.
+ There are two exceptions where parentheses are required even though the
+ table would suggest that they are not: <code>&&</code> and
+ <code>||</code> require parentheses when used together, and
+ <code>!</code> requires parentheses when applied to a relational
+ expression. Thus, in <code>(eth.type == 0x800 || eth.type == 0x86dd)
+ && ip.proto == 6</code> or <code>!(arp.op == 1)</code>, the
+ parentheses are mandatory.
+ </p>
+
+ <ul>
+ <li><code>()</code></li>
+ <li><code>== != < <= > >=</code></li>
+ <li><code>!</code></li>
+ <li><code>&& ||</code></li>
+ </ul>
+
+ <p>
+ <em>Comments</em> may be introduced by <code>//</code>, which extends
+ to the next new-line. Comments within a line may be bracketed by
+ <code>/*</code> and <code>*/</code>. Multiline comments are not
+ supported.
+ </p>
+
+ <p><em>Symbols</em></p>
+
+ <ul>
+ <li>
+ <code>metadata</code> <code>reg0</code> ... <code>reg7</code>
+ <code>xreg0</code> ... <code>xreg3</code>
+ </li>
+ <li><code>inport</code> <code>outport</code> <code>queue</code></li>
+ <li><code>eth.src</code> <code>eth.dst</code> <code>eth.type</code></li>
+ <li><code>vlan.tci</code> <code>vlan.vid</code> <code>vlan.pcp</code> <code>vlan.present</code></li>
+ <li><code>ip.proto</code> <code>ip.dscp</code> <code>ip.ecn</code> <code>ip.ttl</code> <code>ip.frag</code></li>
+ <li><code>ip4.src</code> <code>ip4.dst</code></li>
+ <li><code>ip6.src</code> <code>ip6.dst</code> <code>ip6.label</code></li>
+ <li><code>arp.op</code> <code>arp.spa</code> <code>arp.tpa</code> <code>arp.sha</code> <code>arp.tha</code></li>
+ <li><code>tcp.src</code> <code>tcp.dst</code> <code>tcp.flags</code></li>
+ <li><code>udp.src</code> <code>udp.dst</code></li>
+ <li><code>sctp.src</code> <code>sctp.dst</code></li>
+ <li><code>icmp4.type</code> <code>icmp4.code</code></li>
+ <li><code>icmp6.type</code> <code>icmp6.code</code></li>
+ <li><code>nd.target</code> <code>nd.sll</code> <code>nd.tll</code></li>
+ </ul>
+
+ </column>
+
+ <column name="actions">
+ <p>
+ Logical datapath actions, to be executed when the logical flow
+ represented by this row is the highest-priority match.
+ </p>
+
+ <p>
+ Actions share lexical syntax with the <ref column="match"/> column. An
+ empty set of actions (or one that contains just white space or
+ comments), or a set of actions that consists of just
+ <code>drop;</code>, causes the matched packets to be dropped.
+ Otherwise, the column should contain a sequence of actions, each
+ terminated by a semicolon.
+ </p>
+
+ <p>
+ The following actions will be initially supported:
+ </p>
+
+ <dl>
+ <dt><code>output;</code></dt>
+ <dd>
+ Outputs the packet to the logical port current designated by
+ <code>outport</code>. Output to the ingress port is implicitly
+ dropped, that is, <code>output</code> becomes a no-op if
+ <code>outport</code> == <code>inport</code>.
+ </dd>
+
+ <dt><code>next;</code></dt>
+ <dd>
+ Executes the next logical datapath table as a subroutine.
+ </dd>
+
+ <dt><code><var>field</var> = <var>constant</var>;</code></dt>
+ <dd>
+ Sets data or metadata field <var>field</var> to constant value
+ <var>constant</var>, e.g. <code>outport = "vif0";</code> to set the
+ logical output port. Assigning to a field with prerequisites
+ implicitly adds those prerequisites to <ref column="match"/>; thus,
+ for example, a flow that sets <code>tcp.dst</code> applies only to
+ TCP flows, regardless of whether its <ref column="match"/> mentions
+ any TCP field. To set only a subset of bits in a field,
+ <var>field</var> may be a subfield or <var>constant</var> may be
+ masked, e.g. <code>vlan.pcp[2] = 1;</code> and <code>vlan.pcp =
+ 4/4;</code> both set the most sigificant bit of the VLAN PCP. Not
+ all fields are modifiable (e.g. <code>eth.type</code> and
+ <code>ip.proto</code> are read-only), and not all modifiable fields
+ may be partially modified (e.g. <code>ip.ttl</code> must assigned as
+ a whole).
+ </dd>
+ </dl>
+
+ <p>
+ The following actions will likely be useful later, but they have not
+ been thought out carefully.
+ </p>
+
+ <dl>
+ <dt><code><var>field1</var> = <var>field2</var>;</code></dt>
+ <dd>
+ Extends the assignment action to allow copying between fields.
+ </dd>
+
+ <dt><code>learn</code></dt>
+
+ <dt><code>conntrack</code></dt>
+
+ <dt><code>dec_ttl { <var>action</var>, </code>...<code> } { <var>action</var>; </code>...<code>};</code></dt>
+ <dd>
+ decrement TTL; execute first set of actions if
+ successful, second set if TTL decrement fails
+ </dd>
+
+ <dt><code>icmp_reply { <var>action</var>, </code>...<code> };</code></dt>
+ <dd>generate ICMP reply from packet, execute <var>action</var>s</dd>
+
+ <dt><code>arp { <var>action</var>, </code>...<code> }</code></dt>
+ <dd>generate ARP from packet, execute <var>action</var>s</dd>
+ </dl>
+ </column>
+ </table>
+
+ <table name="Binding" title="Physical-Logical Bindings">
+ <p>
+ Each row in this table identifies the physical location of a logical
+ port.
+ </p>
+
+ <p>
+ For every <code>Logical_Port</code> record in <code>OVN_Northbound</code>
+ database, <code>ovn-northd</code> creates a record in this table.
+ <code>ovn-northd</code> populates and maintains every column except
+ the <code>chassis</code> column, which it leaves empty in new records.
+ </p>
+
+ <p>
+ <code>ovn-controller</code> populates the <code>chassis</code> column
+ for the records that identify the logical ports that are located on its
+ hypervisor, which <code>ovn-controller</code> in turn finds out by
+ monitoring the local hypervisor's Open_vSwitch database, which
+ identifies logical ports via the conventions described in
+ <code>IntegrationGuide.md</code>.
+ </p>
+
+ <p>
+ When a chassis shuts down gracefully, it should cleanup the
+ <code>chassis</code> column that it previously had populated.
+ (This is not critical because resources hosted on the chassis are equally
+ unreachable regardless of whether their rows are present.) To handle the
+ case where a VM is shut down abruptly on one chassis, then brought up
+ again on a different one, <code>ovn-controller</code> must overwrite the
+ <code>chassis</code> column with new information.
+ </p>
+
+ <column name="logical_datapath">
+ The logical datapath to which the logical port belongs. A logical
+ datapath implements a logical pipeline via logical flows in the <ref
+ table="Pipeline"/> table. (No table represents a logical datapath.)
+ </column>
+
+ <column name="logical_port">
+ A logical port, taken from <ref table="Logical_Port" column="name"
+ db="OVN_Northbound"/> in the OVN_Northbound database's
+ <ref table="Logical_Port" db="OVN_Northbound"/> table. OVN does not
+ prescribe a particular format for the logical port ID.
+ </column>
+
+ <column name="tunnel_key">
+ <p>
+ A number that represents the logical port in the key (e.g. VXLAN VNI or
+ STT key) field carried within tunnel protocol packets. (This avoids
+ wasting space for a whole UUID in tunneled packets. It also allows OVN
+ to support encapsulations that cannot fit an entire UUID in their
+ tunnel keys.)
+ </p>
+
+ <p>
+ Tunnel ID 0 is reserved for internal use within OVN.
+ </p>
+ </column>
+
+ <column name="parent_port">
+ For containers created inside a VM, this is taken from
+ <ref table="Logical_Port" column="parent_name" db="OVN_Northbound"/>
+ in the OVN_Northbound database's <ref table="Logical_Port"
+ db="OVN_Northbound"/> table. It is left empty if
+ <ref column="logical_port"/> belongs to a VM or a container created
+ in the hypervisor.
+ </column>
+
+ <column name="tag">
+ When <ref column="logical_port"/> identifies the interface of a container
+ spawned inside a VM, this column identifies the VLAN tag in
+ the network traffic associated with that container's network interface.
+ It is left empty if <ref column="logical_port"/> belongs to a VM or a
+ container created in the hypervisor.
+ </column>
+
+ <column name="chassis">
+ The physical location of the logical port. To successfully identify a
+ chassis, this column must be a <ref table="Chassis"/> record. This is
+ populated by <code>ovn-controller</code>.
+ </column>
+
+ <column name="mac">
+ <p>
+ The Ethernet address or addresses used as a source address on the
+ logical port, each in the form
+ <var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>:<var>xx</var>.
+ The string <code>unknown</code> is also allowed to indicate that the
+ logical port has an unknown set of (additional) source addresses.
+ </p>
+
+ <p>
+ A VM interface would ordinarily have a single Ethernet address. A
+ gateway port might initially only have <code>unknown</code>, and then
+ add MAC addresses to the set as it learns new source addresses.
+ </p>
+ </column>
+ </table>
+</database>
--- /dev/null
+/ovn-ctl.8
--- /dev/null
+scripts_SCRIPTS += \
+ ovn/utilities/ovn-ctl
+
+man_MANS += \
+ ovn/utilities/ovn-ctl.8
+
+EXTRA_DIST += \
+ ovn/utilities/ovn-ctl \
+ ovn/utilities/ovn-ctl.8.xml
--- /dev/null
+#!/bin/sh
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+case $0 in
+ */*) dir0=`echo "$0" | sed 's,/[^/]*$,,'` ;;
+ *) dir0=./ ;;
+esac
+. "$dir0/ovs-lib" || exit 1
+
+for dir in "$sbindir" "$bindir" /sbin /bin /usr/sbin /usr/bin; do
+ case :$PATH: in
+ *:$dir:*) ;;
+ *) PATH=$PATH:$dir ;;
+ esac
+done
+
+
+## ----- ##
+## start ##
+## ----- ##
+
+create_db () {
+ DB_FILE=$1
+ DB_SCHEMA=$2
+ action "Creating empty database $DB_FILE" ovsdb-tool create "$DB_FILE" "$DB_SCHEMA"
+}
+
+check_ovn_dbs () {
+ if test ! -e "$DB_NB_FILE"; then
+ create_db "$DB_NB_FILE" "$DB_NB_SCHEMA"
+ fi
+
+ if test ! -e "$DB_SB_FILE"; then
+ create_db "$DB_SB_FILE" "$DB_SB_SCHEMA"
+ fi
+
+ running_ovn_dbs=$(ovs-appctl -t ovsdb-server ovsdb-server/list-dbs | grep OVN | wc -l)
+ if [ "$running_ovn_dbs" != "2" ] ; then
+ ovs-appctl -t ovsdb-server ovsdb-server/add-db $DB_NB_FILE
+ ovs-appctl -t ovsdb-server ovsdb-server/add-db $DB_SB_FILE
+ running_ovn_dbs=$(ovs-appctl -t ovsdb-server ovsdb-server/list-dbs | grep OVN | wc -l)
+ if [ "$running_ovn_dbs" != "2" ] ; then
+ echo >&2 "$0: Failed to add OVN dbs to ovsdb-server"
+ exit 1
+ fi
+ fi
+}
+
+start_northd () {
+ # We expect ovn-northd to be co-located with ovsdb-server handling both the
+ # OVN_Northbound and OVN_Southbound dbs.
+ check_ovn_dbs
+
+ set ovn-northd
+ set "$@" -vconsole:emer -vsyslog:err -vfile:info
+ OVS_RUNDIR=${OVN_RUNDIR} start_daemon "$OVN_NORTHD_PRIORITY" "$OVN_NORTHD_WRAPPER" "$@"
+}
+
+start_controller () {
+ set ovn-controller "unix:$DB_SOCK"
+ set "$@" -vconsole:emer -vsyslog:err -vfile:info
+ OVS_RUNDIR=${OVN_RUNDIR} start_daemon "$OVN_CONTROLLER_PRIORITY" "$OVN_CONTROLLER_WRAPPER" "$@"
+}
+
+## ---- ##
+## stop ##
+## ---- ##
+
+stop_northd () {
+ OVS_RUNDIR=${OVN_RUNDIR} stop_daemon ovn-northd
+}
+
+stop_controller () {
+ OVS_RUNDIR=${OVN_RUNDIR} stop_daemon ovn-controller
+}
+
+## ------- ##
+## restart ##
+## ------- ##
+
+restart_northd () {
+ stop_northd
+ start_northd
+}
+
+restart_controller () {
+ stop_controller
+ start_controller
+}
+
+## ---- ##
+## main ##
+## ---- ##
+
+set_defaults () {
+ DB_SOCK=$rundir/db.sock
+ DB_NB_FILE=$dbdir/ovnnb.db
+ DB_SB_FILE=$dbdir/ovnsb.db
+ DB_NB_SCHEMA=$datadir/ovn-nb.ovsschema
+ DB_SB_SCHEMA=$datadir/ovn-sb.ovsschema
+
+ OVN_NORTHD_PRIORITY=-10
+ OVN_NORTHD_WRAPPER=
+ OVN_CONTROLLER_PRIORITY=-10
+ OVN_CONTROLLER_WRAPPER=
+
+ OVS_RUNDIR=${OVS_RUNDIR:-${rundir}}
+ OVN_RUNDIR=${OVN_RUNDIR:-${OVS_RUNDIR}}
+}
+
+set_option () {
+ var=`echo "$option" | tr abcdefghijklmnopqrstuvwxyz- ABCDEFGHIJKLMNOPQRSTUVWXYZ_`
+ eval set=\${$var+yes}
+ eval old_value=\$$var
+ if test X$set = X || \
+ (test $type = bool && \
+ test X"$old_value" != Xno && test X"$old_value" != Xyes); then
+ echo >&2 "$0: unknown option \"$arg\" (use --help for help)"
+ return
+ fi
+ eval $var=\$value
+}
+
+usage () {
+ set_defaults
+ cat << EOF
+$0: controls Open Virtual Network daemons
+usage: $0 [OPTIONS] COMMAND
+
+This program is intended to be invoked internally by Open Virtual Network
+startup scripts. System administrators should not normally invoke it directly.
+
+Commands:
+ start_northd start ovn-northd
+ start_controller start ovn-controller
+ stop_northd stop ovn-northd
+ stop_controller stop ovn-controller
+ restart_northd restart ovn-northd
+ restart_controller restart ovn-controller
+
+Options:
+ --ovn-northd-priority=NICE set ovn-northd's niceness (default: $OVN_NORTHD_PRIORITY)
+ --ovn-northd-wrapper=WRAPPER run with a wrapper like valgrind for debugging
+ --ovn-controller-priority=NICE set ovn-northd's niceness (default: $OVN_CONTROLLER_PRIORITY)
+ --ovn-controller-wrapper=WRAPPER run with a wrapper like valgrind for debugging
+ -h, --help display this help message
+
+File location options:
+ --db-sock=SOCKET JSON-RPC socket name (default: $DB_SOCK)
+ --db-nb-file=FILE OVN_Northbound db file (default: $DB_NB_FILE)
+ --db-sb-file=FILE OVN_Southbound db file (default: $DB_SB_FILE)
+ --db-nb-schema=FILE OVN_Northbound db file (default: $DB_NB_SCHEMA)
+ --db-sb-schema=FILE OVN_Southbound db file (default: $DB_SB_SCHEMA)
+
+Default directories with "configure" option and environment variable override:
+ logs: /usr/local/var/log/openvswitch (--with-logdir, OVS_LOGDIR)
+ pidfiles and sockets: /usr/local/var/run/openvswitch (--with-rundir, OVS_RUNDIR)
+ ovn-nb.db: /usr/local/etc/openvswitch (--with-dbdir, OVS_DBDIR)
+ ovn-sb.db: /usr/local/etc/openvswitch (--with-dbdir, OVS_DBDIR)
+ system configuration: /usr/local/etc (--sysconfdir, OVS_SYSCONFDIR)
+ data files: /usr/local/share/openvswitch (--pkgdatadir, OVS_PKGDATADIR)
+ user binaries: /usr/local/bin (--bindir, OVS_BINDIR)
+ system binaries: /usr/local/sbin (--sbindir, OVS_SBINDIR)
+EOF
+}
+
+set_defaults
+command=
+for arg
+do
+ case $arg in
+ -h | --help)
+ usage
+ ;;
+ --[a-z]*=*)
+ option=`expr X"$arg" : 'X--\([^=]*\)'`
+ value=`expr X"$arg" : 'X[^=]*=\(.*\)'`
+ type=string
+ set_option
+ ;;
+ --no-[a-z]*)
+ option=`expr X"$arg" : 'X--no-\(.*\)'`
+ value=no
+ type=bool
+ set_option
+ ;;
+ --[a-z]*)
+ option=`expr X"$arg" : 'X--\(.*\)'`
+ value=yes
+ type=bool
+ set_option
+ ;;
+ -*)
+ echo >&2 "$0: unknown option \"$arg\" (use --help for help)"
+ exit 1
+ ;;
+ *)
+ if test X"$command" = X; then
+ command=$arg
+ else
+ echo >&2 "$0: exactly one non-option argument required (use --help for help)"
+ exit 1
+ fi
+ ;;
+ esac
+done
+case $command in
+ start_northd)
+ start_northd
+ ;;
+ start_controller)
+ start_controller
+ ;;
+ stop_northd)
+ stop_northd
+ ;;
+ stop_controller)
+ stop_controller
+ ;;
+ restart_northd)
+ restart_northda
+ ;;
+ restart_controller)
+ restart_controller
+ ;;
+ create_ovn_dbs)
+ create_ovn_dbs
+ ;;
+ help)
+ usage
+ ;;
+ preheat)
+ echo >&2 "$0: preheating ovn to 350 degrees F."
+ exit 1
+ ;;
+ '')
+ echo >&2 "$0: missing command name (use --help for help)"
+ exit 1
+ ;;
+ *)
+ echo >&2 "$0: unknown command \"$command\" (use --help for help)"
+ exit 1
+ ;;
+esac
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovn-ctl" section="8" title="ovn-ctl">
+ <h1>Name</h1>
+ <p>ovn-ctl -- Open Virtual Network northbound daemon lifecycle utility</p>
+
+ <h1>Synopsys</h1>
+ <p><code>ovn-ctl</code> [<var>options</var>] <var>command</var></p>
+
+ <h1>Description</h1>
+ <p>This program is intended to be invoked internally by Open Virtual Network
+ startup scripts. System administrators should not normally invoke it directly.</p>
+
+ <h1>Commands</h1>
+
+ <dl>
+ <dt><code>start_northd</code></dt>
+ <dt><code>start_controller</code></dt>
+ <dt><code>stop_northd</code></dt>
+ <dt><code>stop_controller</code></dt>
+ <dt><code>restart_northd</code></dt>
+ <dt><code>restart_controller</code></dt>
+ </dl>
+
+ <h1>Options</h1>
+ <p><code>--ovn-northd-priority=<var>NICE</var></code></p>
+ <p><code>--ovn-northd-wrapper=<var>WRAPPER</var></code></p>
+ <p><code>--ovn-controller-priority=<var>NICE</var></code></p>
+ <p><code>--ovn-controller-wrapper=<var>WRAPPER</var></code></p>
+ <p><code>-h</code> | <code>--help</code></p>
+
+ <h1>File location options</h1>
+ <p><code>--db-sock==<var>SOCKET</var></code></p>
+ <p><code>--db-nb-file==<var>FILE</var></code></p>
+ <p><code>--db-sb-file==<var>FILE</var></code></p>
+ <p><code>--db-nb-schema==<var>FILE</var></code></p>
+ <p><code>--db-sb-schema==<var>FILE</var></code></p>
+
+ <h1>Example Usage</h1>
+ <h2>Run ovn-controller on a host already running OVS</h2>
+ <p><code># ovn-ctl start_controller</code></p>
+
+ <h2>Run ovn-northd on a host already running OVS</h2>
+ <p><code># ovn-ctl start_northd</code></p>
+
+ <h2>All-in-one OVS+OVN for testing</h2>
+ <p><code># ovs-ctl start --system-id="random"</code></p>
+ <p><code># ovn-ctl start_northd</code></p>
+ <p><code># ovn-ctl start_controller</code></p>
+
+</manpage>
else:
return textToNroff(node.data, font)
elif node.nodeType == node.ELEMENT_NODE:
- if node.tagName in ['code', 'em', 'option']:
+ if node.tagName in ['code', 'em', 'option', 'env']:
s = r'\fB'
for child in node.childNodes:
s += inlineXmlToNroff(child, r'\fB')
rhel/usr_share_openvswitch_scripts_sysconfig.template \
rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template \
rhel/usr_lib_systemd_system_openvswitch.service \
- rhel/usr_lib_systemd_system_openvswitch-nonetwork.service
+ rhel/usr_lib_systemd_system_openvswitch-nonetwork.service \
+ rhel/usr_lib_systemd_system_ovn-controller.service \
+ rhel/usr_lib_systemd_system_ovn-northd.service
update_rhel_spec = \
$(AM_V_GEN)($(ro_shell) && sed -e 's,[@]VERSION[@],$(VERSION),g') \
This provides static library, libopenswitch.a and the openvswitch header
files needed to build an external application.
+%package ovn
+Summary: Open vSwitch - Open Virtual Network support
+License: ASL 2.0
+Requires: openvswitch
+
+%description ovn
+OVN, the Open Virtual Network, is a system to support virtual network
+abstraction. OVN complements the existing capabilities of OVS to add
+native support for virtual network abstractions, such as virtual L2 and L3
+overlays and security groups.
+
%prep
%setup -q
install -p -D -m 0644 \
rhel/usr_share_openvswitch_scripts_systemd_sysconfig.template \
$RPM_BUILD_ROOT/%{_sysconfdir}/sysconfig/openvswitch
-install -p -D -m 0644 \
- rhel/usr_lib_systemd_system_openvswitch.service \
- $RPM_BUILD_ROOT%{_unitdir}/openvswitch.service
-install -p -D -m 0644 \
- rhel/usr_lib_systemd_system_openvswitch-nonetwork.service \
- $RPM_BUILD_ROOT%{_unitdir}/openvswitch-nonetwork.service
-
+for service in openvswitch openvswitch-nonetwork ovn-controller ovn-northd; do
+ install -p -D -m 0644 \
+ rhel/usr_lib_systemd_system_${service}.service \
+ $RPM_BUILD_ROOT%{_unitdir}/${service}.service
+done
install -m 0755 rhel/etc_init.d_openvswitch \
$RPM_BUILD_ROOT%{_datadir}/openvswitch/scripts/openvswitch.init
fi
%endif
+%preun ovn
+%if 0%{?systemd_preun:1}
+ %systemd_preun ovn-controller.service
+ %systemd_preun ovn-northd.service
+%else
+ if [ $1 -eq 0 ] ; then
+ # Package removal, not upgrade
+ /bin/systemctl --no-reload disable ovn-controller.service >/dev/null 2>&1 || :
+ /bin/systemctl stop ovn-controller.service >/dev/null 2>&1 || :
+ /bin/systemctl --no-reload disable ovn-northd.service >/dev/null 2>&1 || :
+ /bin/systemctl stop ovn-northd.service >/dev/null 2>&1 || :
+ fi
+%endif
%post
%if 0%{?systemd_post:1}
fi
%endif
+%post ovn
+%if 0%{?systemd_post:1}
+ %systemd_post ovn-controller.service
+ %systemd_post ovn-northd.service
+%else
+ # Package install, not upgrade
+ if [ $1 -eq 1 ]; then
+ /bin/systemctl daemon-reload >dev/null || :
+ fi
+%endif
%postun
%if 0%{?systemd_postun_with_restart:1}
fi
%endif
+%postun ovn
+%if 0%{?systemd_postun_with_restart:1}
+ %systemd_postun_with_restart ovn-controller.service
+ %systemd_postun_with_restart ovn-northd.service
+%else
+ /bin/systemctl daemon-reload >/dev/null 2>&1 || :
+ if [ "$1" -ge "1" ] ; then
+ # Package upgrade, not uninstall
+ /bin/systemctl try-restart ovn-controller.service >/dev/null 2>&1 || :
+ /bin/systemctl try-restart ovn-northd.service >/dev/null 2>&1 || :
+ fi
+%endif
%files -n python-openvswitch
%{python_sitelib}/ovs
%exclude %{_mandir}/man8/ovs-vlan-bug-workaround.8.gz
%exclude %{_datadir}/openvswitch/scripts/ovs-save
+%files ovn
+%{_bindir}/ovn-controller
+%{_bindir}/ovn-nbctl
+%{_bindir}/ovn-northd
+%{_datadir}/openvswitch/scripts/ovn-ctl
+%{_mandir}/man8/ovs-testcontroller.8*
+%{_mandir}/man5/ovn-nb.5*
+%{_mandir}/man5/ovn-sb.5*
+%{_mandir}/man7/ovn-architecture.7*
+%{_mandir}/man8/ovn-controller.8*
+%{_mandir}/man8/ovn-ctl.8*
+%{_mandir}/man8/ovn-nbctl.8*
+%config %{_datadir}/openvswitch/ovn-nb.ovsschema
+%config %{_datadir}/openvswitch/ovn-sb.ovsschema
+%{_unitdir}/ovn-controller.service
+%{_unitdir}/ovn-northd.service
+%ghost %attr(755,root,root) %{_rundir}/ovn-controller
+%ghost %attr(755,root,root) %{_rundir}/ovn-northd
+
%changelog
* Wed Jan 12 2011 Ralf Spenneberg <ralf@os-s.net>
- First build on F14
--- /dev/null
+[Unit]
+Description=OVN controller daemon
+After=syslog.target
+Requires=openvswitch.service
+After=openvswitch.service
+
+[Service]
+Type=simple
+RuntimeDirectory=ovn-controller
+RuntimeDirectoryMode=0755
+PIDFile=%t/ovn-controller/ovn-controller.pid
+Environment=OVS_RUNDIR=%t/ovn-controller
+ExecStart=/usr/bin/ovn-controller -vconsole:emer -vsyslog:err -vfile:info \
+ --log-file=/var/log/openvswitch/ovn-controller.log \
+ --no-chdir --pidfile=%t/ovn-controller/ovn-controller.pid \
+ unix:/var/run/openvswitch/db.sock
--- /dev/null
+[Unit]
+Description=OVN northd management daemon
+After=syslog.target
+Requires=openvswitch.service
+After=openvswitch.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+RuntimeDirectory=ovn-northd
+RuntimeDirectoryMode=0755
+PIDFile=%t/ovn-northd/ovn-northd.pid
+Environment=OVN_RUNDIR=%t/ovn-northd OVS_RUNDIR=%t/openvswitch OVS_DBDIR=/var/lib/openvswitch
+ExecStart=/usr/share/openvswitch/scripts/ovn-ctl start_northd
+ExecStop=/usr/share/openvswitch/scripts/ovn-ctl stop_northd
tests/interface-reconfigure.at \
tests/vlog.at \
tests/vtep-ctl.at \
- tests/auto-attach.at
+ tests/auto-attach.at \
+ tests/ovn.at
KMOD_TESTSUITE_AT = \
tests/kmod-testsuite.at \
tests/test-multipath.c \
tests/test-netflow.c \
tests/test-odp.c \
+ tests/test-ovn.c \
tests/test-packets.c \
tests/test-random.c \
tests/test-reconnect.c \
tests/test-unix-socket.c
endif
-tests_ovstest_LDADD = lib/libopenvswitch.la
+tests_ovstest_LDADD = lib/libopenvswitch.la ovn/lib/libovn.la
dist_check_SCRIPTS = tests/flowgen.pl
noinst_PROGRAMS += tests/test-strtok_r
--- /dev/null
+AT_BANNER([OVN])
+
+AT_SETUP([ovn -- lexer])
+dnl For lines without =>, input and expected output are identical.
+dnl For lines with =>, input precedes => and expected output follows =>.
+AT_DATA([test-cases.txt], [dnl
+foo bar baz quuxquuxquux _abcd_ a.b.c.d a123_.456
+"abc\u0020def" => "abc def"
+" => error("Input ends inside quoted string.")dnl "
+
+a/*b*/c => a c
+a//b c => a
+a/**/b => a b
+a/*/b => a error("`/*' without matching `*/'.")
+a/*/**/b => a b
+a/b => a error("`/' is only valid as part of `//' or `/*'.") b
+
+0 1 12345 18446744073709551615
+18446744073709551616 => error("Decimal constants must be less than 2**64.")
+9999999999999999999999 => error("Decimal constants must be less than 2**64.")
+01 => error("Decimal constants must not have leading zeros.")
+
+0/0
+0/1
+1/0 => error("Value contains unmasked 1-bits.")
+1/1
+128/384
+1/3
+1/ => error("Integer constant expected.")
+
+1/0x123 => error("Value and mask have incompatible formats.")
+
+0x1234
+0x01234 => 0x1234
+0x0 => 0
+0x000 => 0
+0xfedcba9876543210
+0XFEDCBA9876543210 => 0xfedcba9876543210
+0xfedcba9876543210fedcba9876543210
+0xfedcba9876543210fedcba98765432100 => error("Hexadecimal constant requires more than 128 bits.")
+0x0000fedcba9876543210fedcba9876543210 => 0xfedcba9876543210fedcba9876543210
+0x => error("Hex digits expected following 0x.")
+0X => error("Hex digits expected following 0X.")
+0x0/0x0 => 0/0
+0x0/0x1 => 0/0x1
+0x1/0x0 => error("Value contains unmasked 1-bits.")
+0xffff/0x1ffff
+0x. => error("Invalid syntax in hexadecimal constant.")
+
+192.168.128.1 1.2.3.4 255.255.255.255 0.0.0.0
+256.1.2.3 => error("Invalid numeric constant.")
+192.168.0.0/16
+192.168.0.0/255.255.0.0 => 192.168.0.0/16
+192.168.0.0/255.255.255.0 => 192.168.0.0/24
+192.168.0.0/255.255.0.255
+192.168.0.0/255.0.0.0 => error("Value contains unmasked 1-bits.")
+192.168.0.0/32
+192.168.0.0/255.255.255.255 => 192.168.0.0/32
+
+::
+::1
+ff00::1234 => ff00::1234
+2001:db8:85a3::8a2e:370:7334
+2001:db8:85a3:0:0:8a2e:370:7334 => 2001:db8:85a3::8a2e:370:7334
+2001:0db8:85a3:0000:0000:8a2e:0370:7334 => 2001:db8:85a3::8a2e:370:7334
+::ffff:192.0.2.128
+::ffff:c000:0280 => ::ffff:192.0.2.128
+::1/::1
+::1/ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff => ::1/128
+::1/128
+ff00::/8
+ff00::/ff00:: => ff00::/8
+
+01:23:45:67:ab:cd
+01:23:45:67:AB:CD => 01:23:45:67:ab:cd
+fe:dc:ba:98:76:54
+FE:DC:ba:98:76:54 => fe:dc:ba:98:76:54
+01:00:00:00:00:00/01:00:00:00:00:00
+ff:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff
+fe:ff:ff:ff:ff:ff/ff:ff:ff:ff:ff:ff
+ff:ff:ff:ff:ff:ff/fe:ff:ff:ff:ff:ff => error("Value contains unmasked 1-bits.")
+fe:x => error("Invalid numeric constant.")
+00:01:02:03:04:x => error("Invalid numeric constant.")
+
+(){}[[]]==!=<<=>>=!&&||..,;= => ( ) { } [[ ]] == != < <= > >= ! && || .. , ; =
+& => error("`&' is only valid as part of `&&'.")
+| => error("`|' is only valid as part of `||'.")
+
+^ => error("Invalid character `^' in input.")
+])
+AT_CAPTURE_FILE([input.txt])
+sed 's/ =>.*//' test-cases.txt > input.txt
+sed 's/.* => //' test-cases.txt > expout
+AT_CHECK([ovstest test-ovn lex < input.txt], [0], [expout])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression parser])
+dnl For lines without =>, input and expected output are identical.
+dnl For lines with =>, input precedes => and expected output follows =>.
+AT_DATA([test-cases.txt], [[
+eth.type == 0x800
+eth.type==0x800 => eth.type == 0x800
+eth.type[0..15] == 0x800 => eth.type == 0x800
+
+vlan.present
+vlan.present == 1 => vlan.present
+!(vlan.present == 0) => vlan.present
+!(vlan.present != 1) => vlan.present
+!vlan.present
+vlan.present == 0 => !vlan.present
+vlan.present != 1 => !vlan.present
+!(vlan.present == 1) => !vlan.present
+!(vlan.present != 0) => !vlan.present
+
+eth.dst[0]
+eth.dst[0] == 1 => eth.dst[0]
+eth.dst[0] != 0 => eth.dst[0]
+!(eth.dst[0] == 0) => eth.dst[0]
+!(eth.dst[0] != 1) => eth.dst[0]
+
+!eth.dst[0]
+eth.dst[0] == 0 => !eth.dst[0]
+eth.dst[0] != 1 => !eth.dst[0]
+!(eth.dst[0] == 1) => !eth.dst[0]
+!(eth.dst[0] != 0) => !eth.dst[0]
+
+vlan.tci[12..15] == 0x3
+vlan.tci == 0x3000/0xf000 => vlan.tci[12..15] == 0x3
+vlan.tci[12..15] != 0x3
+vlan.tci != 0x3000/0xf000 => vlan.tci[12..15] != 0x3
+
+!vlan.pcp => vlan.pcp == 0
+!(vlan.pcp) => vlan.pcp == 0
+vlan.pcp == 0x4
+vlan.pcp != 0x4
+vlan.pcp > 0x4
+vlan.pcp >= 0x4
+vlan.pcp < 0x4
+vlan.pcp <= 0x4
+!(vlan.pcp != 0x4) => vlan.pcp == 0x4
+!(vlan.pcp == 0x4) => vlan.pcp != 0x4
+!(vlan.pcp <= 0x4) => vlan.pcp > 0x4
+!(vlan.pcp < 0x4) => vlan.pcp >= 0x4
+!(vlan.pcp >= 0x4) => vlan.pcp < 0x4
+!(vlan.pcp > 0x4) => vlan.pcp <= 0x4
+0x4 == vlan.pcp => vlan.pcp == 0x4
+0x4 != vlan.pcp => vlan.pcp != 0x4
+0x4 < vlan.pcp => vlan.pcp > 0x4
+0x4 <= vlan.pcp => vlan.pcp >= 0x4
+0x4 > vlan.pcp => vlan.pcp < 0x4
+0x4 >= vlan.pcp => vlan.pcp <= 0x4
+!(0x4 != vlan.pcp) => vlan.pcp == 0x4
+!(0x4 == vlan.pcp) => vlan.pcp != 0x4
+!(0x4 >= vlan.pcp) => vlan.pcp > 0x4
+!(0x4 > vlan.pcp) => vlan.pcp >= 0x4
+!(0x4 <= vlan.pcp) => vlan.pcp < 0x4
+!(0x4 < vlan.pcp) => vlan.pcp <= 0x4
+
+1 < vlan.pcp < 4 => vlan.pcp > 0x1 && vlan.pcp < 0x4
+1 <= vlan.pcp <= 4 => vlan.pcp >= 0x1 && vlan.pcp <= 0x4
+1 < vlan.pcp <= 4 => vlan.pcp > 0x1 && vlan.pcp <= 0x4
+1 <= vlan.pcp < 4 => vlan.pcp >= 0x1 && vlan.pcp < 0x4
+1 <= vlan.pcp <= 4 => vlan.pcp >= 0x1 && vlan.pcp <= 0x4
+4 > vlan.pcp > 1 => vlan.pcp < 0x4 && vlan.pcp > 0x1
+4 >= vlan.pcp > 1 => vlan.pcp <= 0x4 && vlan.pcp > 0x1
+4 > vlan.pcp >= 1 => vlan.pcp < 0x4 && vlan.pcp >= 0x1
+4 >= vlan.pcp >= 1 => vlan.pcp <= 0x4 && vlan.pcp >= 0x1
+!(1 < vlan.pcp < 4) => vlan.pcp <= 0x1 || vlan.pcp >= 0x4
+!(1 <= vlan.pcp <= 4) => vlan.pcp < 0x1 || vlan.pcp > 0x4
+!(1 < vlan.pcp <= 4) => vlan.pcp <= 0x1 || vlan.pcp > 0x4
+!(1 <= vlan.pcp < 4) => vlan.pcp < 0x1 || vlan.pcp >= 0x4
+!(1 <= vlan.pcp <= 4) => vlan.pcp < 0x1 || vlan.pcp > 0x4
+!(4 > vlan.pcp > 1) => vlan.pcp >= 0x4 || vlan.pcp <= 0x1
+!(4 >= vlan.pcp > 1) => vlan.pcp > 0x4 || vlan.pcp <= 0x1
+!(4 > vlan.pcp >= 1) => vlan.pcp >= 0x4 || vlan.pcp < 0x1
+!(4 >= vlan.pcp >= 1) => vlan.pcp > 0x4 || vlan.pcp < 0x1
+
+vlan.pcp == {1, 2, 3, 4} => vlan.pcp == 0x1 || vlan.pcp == 0x2 || vlan.pcp == 0x3 || vlan.pcp == 0x4
+vlan.pcp == 1 || ((vlan.pcp == 2 || vlan.pcp == 3) || vlan.pcp == 4) => vlan.pcp == 0x1 || vlan.pcp == 0x2 || vlan.pcp == 0x3 || vlan.pcp == 0x4
+
+vlan.pcp != {1, 2, 3, 4} => vlan.pcp != 0x1 && vlan.pcp != 0x2 && vlan.pcp != 0x3 && vlan.pcp != 0x4
+vlan.pcp == 1 && ((vlan.pcp == 2 && vlan.pcp == 3) && vlan.pcp == 4) => vlan.pcp == 0x1 && vlan.pcp == 0x2 && vlan.pcp == 0x3 && vlan.pcp == 0x4
+
+vlan.pcp == 1 && !((vlan.pcp == 2 && vlan.pcp == 3) && vlan.pcp == 4) => vlan.pcp == 0x1 && (vlan.pcp != 0x2 || vlan.pcp != 0x3 || vlan.pcp != 0x4)
+vlan.pcp == 1 && (!(vlan.pcp == 2 && vlan.pcp == 3) && vlan.pcp == 4) => vlan.pcp == 0x1 && (vlan.pcp != 0x2 || vlan.pcp != 0x3) && vlan.pcp == 0x4
+vlan.pcp == 1 && !(!(vlan.pcp == 2 && vlan.pcp == 3) && vlan.pcp == 4) => vlan.pcp == 0x1 && ((vlan.pcp == 0x2 && vlan.pcp == 0x3) || vlan.pcp != 0x4)
+
+ip4.src == {10.0.0.0/8, 192.168.0.0/16, 172.16.20.0/24, 8.8.8.8} => ip4.src[24..31] == 0xa || ip4.src[16..31] == 0xc0a8 || ip4.src[8..31] == 0xac1014 || ip4.src == 0x8080808
+ip6.src == ::1 => ip6.src == 0x1
+
+ip4.src == 1.2.3.4 => ip4.src == 0x1020304
+ip4.src == ::1.2.3.4/::ffff:ffff => ip4.src == 0x1020304
+ip6.src == ::1 => ip6.src == 0x1
+
+1
+0
+!1 => 0
+!0 => 1
+
+inport == "eth0"
+!(inport != "eth0") => inport == "eth0"
+
+ip4.src == "eth0" => Integer field ip4.src is not compatible with string constant.
+inport == 1 => String field inport is not compatible with integer constant.
+
+ip4.src > {1, 2, 3} => Only == and != operators may be used with value sets.
+eth.type > 0x800 => Only == and != operators may be used with nominal field eth.type.
+vlan.present > 0 => Only == and != operators may be used with Boolean field vlan.present.
+
+inport != "eth0" => Nominal field inport may only be tested for equality (taking enclosing `!' operators into account).
+!(inport == "eth0") => Nominal field inport may only be tested for equality (taking enclosing `!' operators into account).
+eth.type != 0x800 => Nominal field eth.type may only be tested for equality (taking enclosing `!' operators into account).
+!(eth.type == 0x800) => Nominal field eth.type may only be tested for equality (taking enclosing `!' operators into account).
+
+123 == 123 => Syntax error at `123' expecting field name.
+
+123 == xyzzy => Syntax error at `xyzzy' expecting field name.
+xyzzy == 1 => Syntax error at `xyzzy' expecting field name.
+
+inport[1] == 1 => Cannot select subfield of string field inport.
+
+eth.type[] == 1 => Syntax error at `@:>@' expecting small integer.
+eth.type[::1] == 1 => Syntax error at `::1' expecting small integer.
+eth.type[18446744073709551615] == 1 => Syntax error at `18446744073709551615' expecting small integer.
+
+eth.type[5!] => Syntax error at `!' expecting `@:>@'.
+
+eth.type[5..1] => Invalid bit range 5 to 1.
+
+eth.type[12..16] => Cannot select bits 12 to 16 of 16-bit field eth.type.
+
+eth.type[10] == 1 => Cannot select subfield of nominal field eth.type.
+
+eth.type => Explicit `!= 0' is required for inequality test of multibit field against 0.
+
+!(!(vlan.pcp)) => Explicit `!= 0' is required for inequality test of multibit field against 0.
+
+123 => Syntax error at end of input expecting relational operator.
+
+123 x => Syntax error at `x' expecting relational operator.
+
+{1, "eth0"} => Syntax error at `"eth0"' expecting integer.
+
+eth.type == xyzzy => Syntax error at `xyzzy' expecting constant.
+
+(1 x) => Syntax error at `x' expecting `)'.
+
+!0x800 != eth.type => Missing parentheses around operand of !.
+
+eth.type == 0x800 || eth.type == 0x86dd && ip.proto == 17 => && and || must be parenthesized when used together.
+
+eth.dst == {} => Syntax error at `}' expecting constant.
+
+eth.src > 00:00:00:00:11:11/00:00:00:00:ff:ff => Only == and != operators may be used with masked constants. Consider using subfields instead (e.g. eth.src[0..15] > 0x1111 in place of eth.src > 00:00:00:00:11:11/00:00:00:00:ff:ff).
+
+ip4.src == ::1 => 128-bit constant is not compatible with 32-bit field ip4.src.
+
+1 == eth.type == 2 => Range expressions must have the form `x < field < y' or `x > field > y', with each `<' optionally replaced by `<=' or `>' by `>=').
+]])
+sed 's/ =>.*//' test-cases.txt > input.txt
+sed 's/.* => //' test-cases.txt > expout
+AT_CHECK([ovstest test-ovn parse-expr < input.txt], [0], [expout])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression annotation])
+dnl Input precedes =>, expected output follows =>.
+AT_DATA([test-cases.txt], [[
+ip4.src == 1.2.3.4 => ip4.src == 0x1020304 && eth.type == 0x800
+ip4.src != 1.2.3.4 => ip4.src != 0x1020304 && eth.type == 0x800
+ip.proto == 123 => ip.proto == 0x7b && (eth.type == 0x800 || eth.type == 0x86dd)
+ip.proto == {123, 234} => (ip.proto == 0x7b && (eth.type == 0x800 || eth.type == 0x86dd)) || (ip.proto == 0xea && (eth.type == 0x800 || eth.type == 0x86dd))
+ip4.src == 1.2.3.4 && ip4.dst == 5.6.7.8 => ip4.src == 0x1020304 && eth.type == 0x800 && ip4.dst == 0x5060708 && eth.type == 0x800
+
+ip => eth.type == 0x800 || eth.type == 0x86dd
+ip == 1 => eth.type == 0x800 || eth.type == 0x86dd
+ip[0] == 1 => eth.type == 0x800 || eth.type == 0x86dd
+ip > 0 => Only == and != operators may be used with nominal field ip.
+!ip => Nominal predicate ip may only be tested positively, e.g. `ip' or `ip == 1' but not `!ip' or `ip == 0'.
+ip == 0 => Nominal predicate ip may only be tested positively, e.g. `ip' or `ip == 1' but not `!ip' or `ip == 0'.
+
+vlan.present => vlan.tci[12]
+!vlan.present => !vlan.tci[12]
+
+!vlan.pcp => vlan.tci[13..15] == 0 && vlan.tci[12]
+vlan.pcp == 1 && vlan.vid == 2 => vlan.tci[13..15] == 0x1 && vlan.tci[12] && vlan.tci[0..11] == 0x2 && vlan.tci[12]
+!reg0 && !reg1 && !reg2 && !reg3 => xreg0[32..63] == 0 && xreg0[0..31] == 0 && xreg1[32..63] == 0 && xreg1[0..31] == 0
+
+ip.first_frag => ip.frag[0] && (eth.type == 0x800 || eth.type == 0x86dd) && (!ip.frag[1] || (eth.type != 0x800 && eth.type != 0x86dd))
+!ip.first_frag => !ip.frag[0] || (eth.type != 0x800 && eth.type != 0x86dd) || (ip.frag[1] && (eth.type == 0x800 || eth.type == 0x86dd))
+ip.later_frag => ip.frag[1] && (eth.type == 0x800 || eth.type == 0x86dd)
+
+bad_prereq != 0 => Error parsing expression `xyzzy' encountered as prerequisite or predicate of initial expression: Syntax error at `xyzzy' expecting field name.
+self_recurse != 0 => Error parsing expression `self_recurse != 0' encountered as prerequisite or predicate of initial expression: Recursive expansion of symbol `self_recurse'.
+mutual_recurse_1 != 0 => Error parsing expression `mutual_recurse_2 != 0' encountered as prerequisite or predicate of initial expression: Error parsing expression `mutual_recurse_1 != 0' encountered as prerequisite or predicate of initial expression: Recursive expansion of symbol `mutual_recurse_1'.
+mutual_recurse_2 != 0 => Error parsing expression `mutual_recurse_1 != 0' encountered as prerequisite or predicate of initial expression: Error parsing expression `mutual_recurse_2 != 0' encountered as prerequisite or predicate of initial expression: Recursive expansion of symbol `mutual_recurse_2'.
+]])
+sed 's/ =>.*//' test-cases.txt > input.txt
+sed 's/.* => //' test-cases.txt > expout
+AT_CHECK([ovstest test-ovn annotate-expr < input.txt], [0], [expout])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression conversion (1)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=convert 1], [0],
+ [Tested converting all 1-terminal expressions with 2 vars each of 3 bits in terms of operators == != < <= > >=.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression conversion (2)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=convert 2], [0],
+ [Tested converting 562 expressions of 2 terminals with 2 vars each of 3 bits in terms of operators == != < <= > >=.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression conversion (3)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=convert --bits=2 3], [0],
+ [Tested converting 57618 expressions of 3 terminals with 2 vars each of 2 bits in terms of operators == != < <= > >=.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression simplification])
+AT_CHECK([ovstest test-ovn exhaustive --operation=simplify --vars=2 3], [0],
+ [Tested simplifying 477138 expressions of 3 terminals with 2 vars each of 3 bits in terms of operators == != < <= > >=.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression normalization (1)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=normalize --vars=3 --bits=1 4], [0],
+ [Tested normalizing 1207162 expressions of 4 terminals with 3 vars each of 1 bits in terms of operators == != < <= > >=.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- expression normalization (1)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=normalize --vars=3 --bits=1 --relops='==' 5], [0],
+ [Tested normalizing 368550 expressions of 5 terminals with 3 vars each of 1 bits in terms of operators ==.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- converting expressions to flows (1)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=flow --vars=2 --bits=2 --relops='==' 4], [0],
+ [Tested converting to flows 128282 expressions of 4 terminals with 2 vars each of 2 bits in terms of operators ==.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- converting expressions to flows (2)])
+AT_CHECK([ovstest test-ovn exhaustive --operation=flow --vars=3 --bits=3 --relops='==' 3], [0],
+ [Tested converting to flows 38394 expressions of 3 terminals with 3 vars each of 3 bits in terms of operators ==.
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- converting expressions to flows -- string fields])
+expr_to_flow () {
+ echo "$1" | ovstest test-ovn expr-to-flows | sort
+}
+AT_CHECK([expr_to_flow 'inport == "eth0"'], [0], [reg6=0x5
+])
+AT_CHECK([expr_to_flow 'inport == "eth1"'], [0], [reg6=0x6
+])
+AT_CHECK([expr_to_flow 'inport == "eth2"'], [0], [(no flows)
+])
+AT_CHECK([expr_to_flow 'inport == "eth0" && ip'], [0], [dnl
+ip,reg6=0x5
+ipv6,reg6=0x5
+])
+AT_CHECK([expr_to_flow 'inport == "eth1" && ip'], [0], [dnl
+ip,reg6=0x6
+ipv6,reg6=0x6
+])
+AT_CHECK([expr_to_flow 'inport == "eth2" && ip'], [0], [(no flows)
+])
+AT_CHECK([expr_to_flow 'inport == {"eth0", "eth1", "eth2", "LOCAL"}'], [0],
+[reg6=0x5
+reg6=0x6
+reg6=0xfffe
+])
+AT_CHECK([expr_to_flow 'inport == {"eth0", "eth1", "eth2"} && ip'], [0], [dnl
+ip,reg6=0x5
+ip,reg6=0x6
+ipv6,reg6=0x5
+ipv6,reg6=0x6
+])
+AT_CLEANUP
+
+AT_SETUP([ovn -- action parsing])
+dnl Text before => is input, text after => is expected output.
+AT_DATA([test-cases.txt], [[
+# Positive tests.
+drop; => actions=drop, prereqs=1
+next; => actions=resubmit(,11), prereqs=1
+output; => actions=resubmit(,64), prereqs=1
+outport="eth0"; next; outport="LOCAL"; next; => actions=set_field:0x5->reg7,resubmit(,11),set_field:0xfffe->reg7,resubmit(,11), prereqs=1
+tcp.dst=80; => actions=set_field:80->tcp_dst, prereqs=ip.proto == 0x6 && (eth.type == 0x800 || eth.type == 0x86dd)
+eth.dst[40] = 1; => actions=set_field:01:00:00:00:00:00/01:00:00:00:00:00->eth_dst, prereqs=1
+vlan.pcp = 2; => actions=set_field:0x4000/0xe000->vlan_tci, prereqs=vlan.tci[12]
+vlan.tci[13..15] = 2; => actions=set_field:0x4000/0xe000->vlan_tci, prereqs=1
+
+## Negative tests.
+
+; => Syntax error at `;'.
+xyzzy; => Syntax error at `xyzzy' expecting action.
+next; 123; => Syntax error at `123'.
+next; xyzzy; => Syntax error at `xyzzy' expecting action.
+
+# "drop;" must be on its own:
+drop; next; => Syntax error at `next' expecting end of input.
+next; drop; => Syntax error at `drop' expecting action.
+
+# Missing ";":
+next => Syntax error at end of input expecting ';'.
+
+inport[1] = 1; => Cannot select subfield of string field inport.
+ip.proto[1] = 1; => Cannot select subfield of nominal field ip.proto.
+eth.dst[40] == 1; => Syntax error at `==' expecting `='.
+ip = 1; => Can't assign to predicate symbol ip.
+ip.proto = 6; => Field ip.proto is not modifiable.
+inport = {"a", "b"}; => Assignments require a single value.
+inport = {}; => Syntax error at `}' expecting constant.
+bad_prereq = 123; => Error parsing expression `xyzzy' encountered as prerequisite or predicate of initial expression: Syntax error at `xyzzy' expecting field name.
+self_recurse = 123; => Error parsing expression `self_recurse != 0' encountered as prerequisite or predicate of initial expression: Error parsing expression `self_recurse != 0' encountered as prerequisite or predicate of initial expression: Recursive expansion of symbol `self_recurse'.
+vlan.present = 0; => Can't assign to predicate symbol vlan.present.
+]])
+sed 's/ =>.*//' test-cases.txt > input.txt
+sed 's/.* => //' test-cases.txt > expout
+AT_CHECK([ovstest test-ovn parse-actions < input.txt], [0], [expout])
+AT_CLEANUP
--- /dev/null
+/*
+ * Copyright (c) 2015 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include "command-line.h"
+#include <errno.h>
+#include <getopt.h>
+#include <sys/wait.h>
+#include "dynamic-string.h"
+#include "fatal-signal.h"
+#include "match.h"
+#include "ofp-actions.h"
+#include "ofpbuf.h"
+#include "ovn/lib/actions.h"
+#include "ovn/lib/expr.h"
+#include "ovn/lib/lex.h"
+#include "ovs-thread.h"
+#include "ovstest.h"
+#include "shash.h"
+#include "simap.h"
+#include "util.h"
+#include "openvswitch/vlog.h"
+
+/* --relops: Bitmap of the relational operators to test, in exhaustive test. */
+static unsigned int test_relops;
+
+/* --vars: Number of variables to test, in exhaustive test. */
+static int test_vars = 2;
+
+/* --bits: Number of bits per variable, in exhaustive test. */
+static int test_bits = 3;
+
+/* --operation: The operation to test, in exhaustive test. */
+static enum { OP_CONVERT, OP_SIMPLIFY, OP_NORMALIZE, OP_FLOW } operation
+ = OP_FLOW;
+
+/* --parallel: Number of parallel processes to use in test. */
+static int test_parallel = 1;
+
+/* -m, --more: Message verbosity */
+static int verbosity;
+
+static void
+compare_token(const struct lex_token *a, const struct lex_token *b)
+{
+ if (a->type != b->type) {
+ fprintf(stderr, "type differs: %d -> %d\n", a->type, b->type);
+ return;
+ }
+
+ if (!((a->s && b->s && !strcmp(a->s, b->s))
+ || (!a->s && !b->s))) {
+ fprintf(stderr, "string differs: %s -> %s\n",
+ a->s ? a->s : "(null)",
+ b->s ? b->s : "(null)");
+ return;
+ }
+
+ if (a->type == LEX_T_INTEGER || a->type == LEX_T_MASKED_INTEGER) {
+ if (memcmp(&a->value, &b->value, sizeof a->value)) {
+ fprintf(stderr, "value differs\n");
+ return;
+ }
+
+ if (a->type == LEX_T_MASKED_INTEGER
+ && memcmp(&a->mask, &b->mask, sizeof a->mask)) {
+ fprintf(stderr, "mask differs\n");
+ return;
+ }
+
+ if (a->format != b->format
+ && !(a->format == LEX_F_HEXADECIMAL
+ && b->format == LEX_F_DECIMAL
+ && a->value.integer == 0)) {
+ fprintf(stderr, "format differs: %d -> %d\n",
+ a->format, b->format);
+ }
+ }
+}
+
+static void
+test_lex(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ struct ds input;
+ struct ds output;
+
+ ds_init(&input);
+ ds_init(&output);
+ while (!ds_get_line(&input, stdin)) {
+ struct lexer lexer;
+
+ lexer_init(&lexer, ds_cstr(&input));
+ ds_clear(&output);
+ while (lexer_get(&lexer) != LEX_T_END) {
+ size_t len = output.length;
+ lex_token_format(&lexer.token, &output);
+
+ /* Check that the formatted version can really be parsed back
+ * losslessly. */
+ if (lexer.token.type != LEX_T_ERROR) {
+ const char *s = ds_cstr(&output) + len;
+ struct lexer l2;
+
+ lexer_init(&l2, s);
+ lexer_get(&l2);
+ compare_token(&lexer.token, &l2.token);
+ lexer_destroy(&l2);
+ }
+ ds_put_char(&output, ' ');
+ }
+ lexer_destroy(&lexer);
+
+ ds_chomp(&output, ' ');
+ puts(ds_cstr(&output));
+ }
+ ds_destroy(&input);
+ ds_destroy(&output);
+}
+
+static void
+create_symtab(struct shash *symtab)
+{
+ shash_init(symtab);
+
+ /* Reserve a pair of registers for the logical inport and outport. A full
+ * 32-bit register each is bigger than we need, but the expression code
+ * doesn't yet support string fields that occupy less than a full OXM. */
+ expr_symtab_add_string(symtab, "inport", MFF_REG6, NULL);
+ expr_symtab_add_string(symtab, "outport", MFF_REG7, NULL);
+
+ expr_symtab_add_field(symtab, "xreg0", MFF_XREG0, NULL, false);
+ expr_symtab_add_field(symtab, "xreg1", MFF_XREG1, NULL, false);
+ expr_symtab_add_field(symtab, "xreg2", MFF_XREG2, NULL, false);
+
+ expr_symtab_add_subfield(symtab, "reg0", NULL, "xreg0[32..63]");
+ expr_symtab_add_subfield(symtab, "reg1", NULL, "xreg0[0..31]");
+ expr_symtab_add_subfield(symtab, "reg2", NULL, "xreg1[32..63]");
+ expr_symtab_add_subfield(symtab, "reg3", NULL, "xreg1[0..31]");
+ expr_symtab_add_subfield(symtab, "reg4", NULL, "xreg2[32..63]");
+ expr_symtab_add_subfield(symtab, "reg5", NULL, "xreg2[0..31]");
+
+ expr_symtab_add_field(symtab, "eth.src", MFF_ETH_SRC, NULL, false);
+ expr_symtab_add_field(symtab, "eth.dst", MFF_ETH_DST, NULL, false);
+ expr_symtab_add_field(symtab, "eth.type", MFF_ETH_TYPE, NULL, true);
+
+ expr_symtab_add_field(symtab, "vlan.tci", MFF_VLAN_TCI, NULL, false);
+ expr_symtab_add_predicate(symtab, "vlan.present", "vlan.tci[12]");
+ expr_symtab_add_subfield(symtab, "vlan.pcp", "vlan.present",
+ "vlan.tci[13..15]");
+ expr_symtab_add_subfield(symtab, "vlan.vid", "vlan.present",
+ "vlan.tci[0..11]");
+
+ expr_symtab_add_predicate(symtab, "ip4", "eth.type == 0x800");
+ expr_symtab_add_predicate(symtab, "ip6", "eth.type == 0x86dd");
+ expr_symtab_add_predicate(symtab, "ip", "ip4 || ip6");
+ expr_symtab_add_field(symtab, "ip.proto", MFF_IP_PROTO, "ip", true);
+ expr_symtab_add_field(symtab, "ip.dscp", MFF_IP_DSCP, "ip", false);
+ expr_symtab_add_field(symtab, "ip.ecn", MFF_IP_ECN, "ip", false);
+ expr_symtab_add_field(symtab, "ip.ttl", MFF_IP_TTL, "ip", false);
+
+ expr_symtab_add_field(symtab, "ip4.src", MFF_IPV4_SRC, "ip4", false);
+ expr_symtab_add_field(symtab, "ip4.dst", MFF_IPV4_DST, "ip4", false);
+
+ expr_symtab_add_predicate(symtab, "icmp4", "ip4 && ip.proto == 1");
+ expr_symtab_add_field(symtab, "icmp4.type", MFF_ICMPV4_TYPE, "icmp4",
+ false);
+ expr_symtab_add_field(symtab, "icmp4.code", MFF_ICMPV4_CODE, "icmp4",
+ false);
+
+ expr_symtab_add_field(symtab, "ip6.src", MFF_IPV6_SRC, "ip6", false);
+ expr_symtab_add_field(symtab, "ip6.dst", MFF_IPV6_DST, "ip6", false);
+ expr_symtab_add_field(symtab, "ip6.label", MFF_IPV6_LABEL, "ip6", false);
+
+ expr_symtab_add_predicate(symtab, "icmp6", "ip6 && ip.proto == 58");
+ expr_symtab_add_field(symtab, "icmp6.type", MFF_ICMPV6_TYPE, "icmp6",
+ true);
+ expr_symtab_add_field(symtab, "icmp6.code", MFF_ICMPV6_CODE, "icmp6",
+ true);
+
+ expr_symtab_add_predicate(symtab, "icmp", "icmp4 || icmp6");
+
+ expr_symtab_add_field(symtab, "ip.frag", MFF_IP_FRAG, "ip", false);
+ expr_symtab_add_predicate(symtab, "ip.is_frag", "ip.frag[0]");
+ expr_symtab_add_predicate(symtab, "ip.later_frag", "ip.frag[1]");
+ expr_symtab_add_predicate(symtab, "ip.first_frag", "ip.is_frag && !ip.later_frag");
+
+ expr_symtab_add_predicate(symtab, "arp", "eth.type == 0x806");
+ expr_symtab_add_field(symtab, "arp.op", MFF_ARP_OP, "arp", false);
+ expr_symtab_add_field(symtab, "arp.spa", MFF_ARP_SPA, "arp", false);
+ expr_symtab_add_field(symtab, "arp.sha", MFF_ARP_SHA, "arp", false);
+ expr_symtab_add_field(symtab, "arp.tpa", MFF_ARP_TPA, "arp", false);
+ expr_symtab_add_field(symtab, "arp.tha", MFF_ARP_THA, "arp", false);
+
+ expr_symtab_add_predicate(symtab, "nd", "icmp6.type == {135, 136} && icmp6.code == 0");
+ expr_symtab_add_field(symtab, "nd.target", MFF_ND_TARGET, "nd", false);
+ expr_symtab_add_field(symtab, "nd.sll", MFF_ND_SLL,
+ "nd && icmp6.type == 135", false);
+ expr_symtab_add_field(symtab, "nd.tll", MFF_ND_TLL,
+ "nd && icmp6.type == 136", false);
+
+ expr_symtab_add_predicate(symtab, "tcp", "ip.proto == 6");
+ expr_symtab_add_field(symtab, "tcp.src", MFF_TCP_SRC, "tcp", false);
+ expr_symtab_add_field(symtab, "tcp.dst", MFF_TCP_DST, "tcp", false);
+ expr_symtab_add_field(symtab, "tcp.flags", MFF_TCP_FLAGS, "tcp", false);
+
+ expr_symtab_add_predicate(symtab, "udp", "ip.proto == 17");
+ expr_symtab_add_field(symtab, "udp.src", MFF_UDP_SRC, "udp", false);
+ expr_symtab_add_field(symtab, "udp.dst", MFF_UDP_DST, "udp", false);
+
+ expr_symtab_add_predicate(symtab, "sctp", "ip.proto == 132");
+ expr_symtab_add_field(symtab, "sctp.src", MFF_SCTP_SRC, "sctp", false);
+ expr_symtab_add_field(symtab, "sctp.dst", MFF_SCTP_DST, "sctp", false);
+
+ /* For negative testing. */
+ expr_symtab_add_field(symtab, "bad_prereq", MFF_XREG0, "xyzzy", false);
+ expr_symtab_add_field(symtab, "self_recurse", MFF_XREG0,
+ "self_recurse != 0", false);
+ expr_symtab_add_field(symtab, "mutual_recurse_1", MFF_XREG0,
+ "mutual_recurse_2 != 0", false);
+ expr_symtab_add_field(symtab, "mutual_recurse_2", MFF_XREG0,
+ "mutual_recurse_1 != 0", false);
+}
+
+static void
+test_parse_expr__(int steps)
+{
+ struct shash symtab;
+ struct simap ports;
+ struct ds input;
+
+ create_symtab(&symtab);
+
+ simap_init(&ports);
+ simap_put(&ports, "eth0", 5);
+ simap_put(&ports, "eth1", 6);
+ simap_put(&ports, "LOCAL", ofp_to_u16(OFPP_LOCAL));
+
+ ds_init(&input);
+ while (!ds_get_test_line(&input, stdin)) {
+ struct expr *expr;
+ char *error;
+
+ expr = expr_parse_string(ds_cstr(&input), &symtab, &error);
+ if (!error && steps > 0) {
+ expr = expr_annotate(expr, &symtab, &error);
+ }
+ if (!error) {
+ if (steps > 1) {
+ expr = expr_simplify(expr);
+ }
+ if (steps > 2) {
+ expr = expr_normalize(expr);
+ ovs_assert(expr_is_normalized(expr));
+ }
+ }
+ if (!error) {
+ if (steps > 3) {
+ struct hmap matches;
+
+ expr_to_matches(expr, &ports, &matches);
+ expr_matches_print(&matches, stdout);
+ expr_matches_destroy(&matches);
+ } else {
+ struct ds output = DS_EMPTY_INITIALIZER;
+ expr_format(expr, &output);
+ puts(ds_cstr(&output));
+ ds_destroy(&output);
+ }
+ } else {
+ puts(error);
+ free(error);
+ }
+ expr_destroy(expr);
+ }
+ ds_destroy(&input);
+
+ simap_destroy(&ports);
+ expr_symtab_destroy(&symtab);
+ shash_destroy(&symtab);
+}
+
+static void
+test_parse_expr(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ test_parse_expr__(0);
+}
+
+static void
+test_annotate_expr(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ test_parse_expr__(1);
+}
+
+static void
+test_simplify_expr(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ test_parse_expr__(2);
+}
+
+static void
+test_normalize_expr(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ test_parse_expr__(3);
+}
+
+static void
+test_expr_to_flows(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ test_parse_expr__(4);
+}
+\f
+/* Evaluate an expression. */
+
+static bool evaluate_expr(const struct expr *, unsigned int subst, int n_bits);
+
+static bool
+evaluate_andor_expr(const struct expr *expr, unsigned int subst, int n_bits,
+ bool short_circuit)
+{
+ const struct expr *sub;
+
+ LIST_FOR_EACH (sub, node, &expr->andor) {
+ if (evaluate_expr(sub, subst, n_bits) == short_circuit) {
+ return short_circuit;
+ }
+ }
+ return !short_circuit;
+}
+
+static bool
+evaluate_cmp_expr(const struct expr *expr, unsigned int subst, int n_bits)
+{
+ int var_idx = expr->cmp.symbol->name[0] - 'a';
+ unsigned var_mask = (1u << n_bits) - 1;
+ unsigned int arg1 = (subst >> (var_idx * n_bits)) & var_mask;
+ unsigned int arg2 = ntohll(expr->cmp.value.integer);
+ unsigned int mask = ntohll(expr->cmp.mask.integer);
+
+ ovs_assert(!(mask & ~var_mask));
+ ovs_assert(!(arg2 & ~var_mask));
+ ovs_assert(!(arg2 & ~mask));
+
+ arg1 &= mask;
+ switch (expr->cmp.relop) {
+ case EXPR_R_EQ:
+ return arg1 == arg2;
+
+ case EXPR_R_NE:
+ return arg1 != arg2;
+
+ case EXPR_R_LT:
+ return arg1 < arg2;
+
+ case EXPR_R_LE:
+ return arg1 <= arg2;
+
+ case EXPR_R_GT:
+ return arg1 > arg2;
+
+ case EXPR_R_GE:
+ return arg1 >= arg2;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+/* Evaluates 'expr' and returns its Boolean result. 'subst' provides the value
+ * for the variables, which must be 'n_bits' bits each and be named "a", "b",
+ * "c", etc. The value of variable "a" is the least-significant 'n_bits' bits
+ * of 'subst', the value of "b" is the next 'n_bits' bits, and so on. */
+static bool
+evaluate_expr(const struct expr *expr, unsigned int subst, int n_bits)
+{
+ switch (expr->type) {
+ case EXPR_T_CMP:
+ return evaluate_cmp_expr(expr, subst, n_bits);
+
+ case EXPR_T_AND:
+ return evaluate_andor_expr(expr, subst, n_bits, false);
+
+ case EXPR_T_OR:
+ return evaluate_andor_expr(expr, subst, n_bits, true);
+
+ case EXPR_T_BOOLEAN:
+ return expr->boolean;
+
+ default:
+ OVS_NOT_REACHED();
+ }
+}
+
+static void
+test_evaluate_expr(struct ovs_cmdl_context *ctx)
+{
+ int a = atoi(ctx->argv[1]);
+ int b = atoi(ctx->argv[2]);
+ int c = atoi(ctx->argv[3]);
+ unsigned int subst = a | (b << 3) || (c << 6);
+ struct shash symtab;
+ struct ds input;
+
+ shash_init(&symtab);
+ expr_symtab_add_field(&symtab, "xreg0", MFF_XREG0, NULL, false);
+ expr_symtab_add_field(&symtab, "xreg1", MFF_XREG1, NULL, false);
+ expr_symtab_add_field(&symtab, "xreg2", MFF_XREG1, NULL, false);
+ expr_symtab_add_subfield(&symtab, "a", NULL, "xreg0[0..2]");
+ expr_symtab_add_subfield(&symtab, "b", NULL, "xreg1[0..2]");
+ expr_symtab_add_subfield(&symtab, "c", NULL, "xreg2[0..2]");
+
+ ds_init(&input);
+ while (!ds_get_test_line(&input, stdin)) {
+ struct expr *expr;
+ char *error;
+
+ expr = expr_parse_string(ds_cstr(&input), &symtab, &error);
+ if (!error) {
+ expr = expr_annotate(expr, &symtab, &error);
+ }
+ if (!error) {
+ printf("%d\n", evaluate_expr(expr, subst, 3));
+ } else {
+ puts(error);
+ free(error);
+ }
+ expr_destroy(expr);
+ }
+ ds_destroy(&input);
+
+ expr_symtab_destroy(&symtab);
+ shash_destroy(&symtab);
+}
+\f
+/* Compositions.
+ *
+ * The "compositions" of a positive integer N are all of the ways that one can
+ * add up positive integers to sum to N. For example, the compositions of 3
+ * are 3, 2+1, 1+2, and 1+1+1.
+ *
+ * We use compositions to find all the ways to break up N terms of a Boolean
+ * expression into subexpressions. Suppose we want to generate all expressions
+ * with 3 terms. The compositions of 3 (ignoring 3 itself) provide the
+ * possibilities (x && x) || x, x || (x && x), and x || x || x. (Of course one
+ * can exchange && for || in each case.) One must recursively compose the
+ * sub-expressions whose values are 3 or greater; that is what the "tree shape"
+ * concept later covers.
+ *
+ * To iterate through all compositions of, e.g., 5:
+ *
+ * unsigned int state;
+ * int s[5];
+ * int n;
+ *
+ * for (n = first_composition(ARRAY_SIZE(s), &state, s); n > 0;
+ * n = next_composition(&state, s, n)) {
+ * // Do something with composition 's' with 'n' elements.
+ * }
+ *
+ * Algorithm from D. E. Knuth, _The Art of Computer Programming, Vol. 4A:
+ * Combinatorial Algorithms, Part 1_, section 7.2.1.1, answer to exercise
+ * 12(a).
+ */
+
+/* Begins iteration through the compositions of 'n'. Initializes 's' to the
+ * number of elements in the first composition of 'n' and returns that number
+ * of elements. The first composition in fact is always 'n' itself, so the
+ * return value will be 1.
+ *
+ * Initializes '*state' to some internal state information. The caller must
+ * maintain this state (and 's') for use by next_composition().
+ *
+ * 's' must have room for at least 'n' elements. */
+static int
+first_composition(int n, unsigned int *state, int s[])
+{
+ *state = 0;
+ s[0] = n;
+ return 1;
+}
+
+/* Advances 's', with 'sn' elements, to the next composition and returns the
+ * number of elements in this new composition, or 0 if no compositions are
+ * left. 'state' is the same internal state passed to first_composition(). */
+static int
+next_composition(unsigned int *state, int s[], int sn)
+{
+ int j = sn - 1;
+ if (++*state & 1) {
+ if (s[j] > 1) {
+ s[j]--;
+ s[j + 1] = 1;
+ j++;
+ } else {
+ j--;
+ s[j]++;
+ }
+ } else {
+ if (s[j - 1] > 1) {
+ s[j - 1]--;
+ s[j + 1] = s[j];
+ s[j] = 1;
+ j++;
+ } else {
+ j--;
+ s[j] = s[j + 1];
+ s[j - 1]++;
+ if (!j) {
+ return 0;
+ }
+ }
+ }
+ return j + 1;
+}
+
+static void
+test_composition(struct ovs_cmdl_context *ctx)
+{
+ int n = atoi(ctx->argv[1]);
+ unsigned int state;
+ int s[50];
+
+ for (int sn = first_composition(n, &state, s); sn;
+ sn = next_composition(&state, s, sn)) {
+ for (int i = 0; i < sn; i++) {
+ printf("%d%c", s[i], i == sn - 1 ? '\n' : ' ');
+ }
+ }
+}
+\f
+/* Tree shapes.
+ *
+ * This code generates all possible Boolean expressions with a specified number
+ * of terms N (equivalent to the number of external nodes in a tree).
+ *
+ * See test_tree_shape() for a simple example. */
+
+/* An array of these structures describes the shape of a tree.
+ *
+ * A single element of struct tree_shape describes a single node in the tree.
+ * The node has 'sn' direct children. From left to right, for i in 0...sn-1,
+ * s[i] is 1 if the child is a leaf node, otherwise the child is a subtree and
+ * s[i] is the number of leaf nodes within that subtree. In the latter case,
+ * the subtree is described by another struct tree_shape within the enclosing
+ * array. The tree_shapes are ordered in the array in in-order.
+ */
+struct tree_shape {
+ unsigned int state;
+ int s[50];
+ int sn;
+};
+
+static int
+init_tree_shape__(struct tree_shape ts[], int n)
+{
+ if (n <= 2) {
+ return 0;
+ }
+
+ int n_tses = 1;
+ /* Skip the first composition intentionally. */
+ ts->sn = first_composition(n, &ts->state, ts->s);
+ ts->sn = next_composition(&ts->state, ts->s, ts->sn);
+ for (int i = 0; i < ts->sn; i++) {
+ n_tses += init_tree_shape__(&ts[n_tses], ts->s[i]);
+ }
+ return n_tses;
+}
+
+/* Initializes 'ts[]' as the first in the set of all of possible shapes of
+ * trees with 'n' leaves. Returns the number of "struct tree_shape"s in the
+ * first tree shape. */
+static int
+init_tree_shape(struct tree_shape ts[], int n)
+{
+ switch (n) {
+ case 1:
+ ts->sn = 1;
+ ts->s[0] = 1;
+ return 1;
+ case 2:
+ ts->sn = 2;
+ ts->s[0] = 1;
+ ts->s[1] = 1;
+ return 1;
+ default:
+ return init_tree_shape__(ts, n);
+ }
+}
+
+/* Advances 'ts', which currently has 'n_tses' elements, to the next possible
+ * tree shape with the number of leaves passed to init_tree_shape(). Returns
+ * the number of "struct tree_shape"s in the next shape, or 0 if all tree
+ * shapes have been visited. */
+static int
+next_tree_shape(struct tree_shape ts[], int n_tses)
+{
+ if (n_tses == 1 && ts->sn == 2 && ts->s[0] == 1 && ts->s[1] == 1) {
+ return 0;
+ }
+ while (n_tses > 0) {
+ struct tree_shape *p = &ts[n_tses - 1];
+ p->sn = p->sn > 1 ? next_composition(&p->state, p->s, p->sn) : 0;
+ if (p->sn) {
+ for (int i = 0; i < p->sn; i++) {
+ n_tses += init_tree_shape__(&ts[n_tses], p->s[i]);
+ }
+ break;
+ }
+ n_tses--;
+ }
+ return n_tses;
+}
+
+static void
+print_tree_shape(const struct tree_shape ts[], int n_tses)
+{
+ for (int i = 0; i < n_tses; i++) {
+ if (i) {
+ printf(", ");
+ }
+ for (int j = 0; j < ts[i].sn; j++) {
+ int k = ts[i].s[j];
+ if (k > 9) {
+ printf("(%d)", k);
+ } else {
+ printf("%d", k);
+ }
+ }
+ }
+}
+
+static void
+test_tree_shape(struct ovs_cmdl_context *ctx)
+{
+ int n = atoi(ctx->argv[1]);
+ struct tree_shape ts[50];
+ int n_tses;
+
+ for (n_tses = init_tree_shape(ts, n); n_tses;
+ n_tses = next_tree_shape(ts, n_tses)) {
+ print_tree_shape(ts, n_tses);
+ putchar('\n');
+ }
+}
+\f
+/* Iteration through all possible terminal expressions (e.g. EXPR_T_CMP and
+ * EXPR_T_BOOLEAN expressions).
+ *
+ * Given a tree shape, this allows the code to try all possible ways to plug in
+ * terms.
+ *
+ * Example use:
+ *
+ * struct expr terminal;
+ * const struct expr_symbol *vars = ...;
+ * int n_vars = ...;
+ * int n_bits = ...;
+ *
+ * init_terminal(&terminal, vars[0]);
+ * do {
+ * // Something with 'terminal'.
+ * } while (next_terminal(&terminal, vars, n_vars, n_bits));
+ */
+
+/* Sets 'expr' to the first possible terminal expression. 'var' should be the
+ * first variable in the ones to be tested. */
+static void
+init_terminal(struct expr *expr, const struct expr_symbol *var)
+{
+ expr->type = EXPR_T_CMP;
+ expr->cmp.symbol = var;
+ expr->cmp.relop = rightmost_1bit_idx(test_relops);
+ memset(&expr->cmp.value, 0, sizeof expr->cmp.value);
+ memset(&expr->cmp.mask, 0, sizeof expr->cmp.mask);
+ expr->cmp.value.integer = htonll(0);
+ expr->cmp.mask.integer = htonll(1);
+}
+
+/* Returns 'x' with the rightmost contiguous string of 1s changed to 0s,
+ * e.g. 01011100 => 01000000. See H. S. Warren, Jr., _Hacker's Delight_, 2nd
+ * ed., section 2-1. */
+static unsigned int
+turn_off_rightmost_1s(unsigned int x)
+{
+ return ((x & -x) + x) & x;
+}
+
+static const struct expr_symbol *
+next_var(const struct expr_symbol *symbol,
+ const struct expr_symbol *vars[], int n_vars)
+{
+ for (int i = 0; i < n_vars; i++) {
+ if (symbol == vars[i]) {
+ return i + 1 >= n_vars ? NULL : vars[i + 1];
+ }
+ }
+ OVS_NOT_REACHED();
+}
+
+static enum expr_relop
+next_relop(enum expr_relop relop)
+{
+ unsigned int remaining_relops = test_relops & ~((1u << (relop + 1)) - 1);
+ return (remaining_relops
+ ? rightmost_1bit_idx(remaining_relops)
+ : rightmost_1bit_idx(test_relops));
+}
+
+/* Advances 'expr' to the next possible terminal expression within the 'n_vars'
+ * variables of 'n_bits' bits each in 'vars[]'. */
+static bool
+next_terminal(struct expr *expr, const struct expr_symbol *vars[], int n_vars,
+ int n_bits)
+{
+ if (expr->type == EXPR_T_BOOLEAN) {
+ if (expr->boolean) {
+ return false;
+ } else {
+ expr->boolean = true;
+ return true;
+ }
+ }
+
+ unsigned int next;
+
+ next = (ntohll(expr->cmp.value.integer)
+ + (ntohll(expr->cmp.mask.integer) << n_bits));
+ for (;;) {
+ next++;
+ unsigned m = next >> n_bits;
+ unsigned v = next & ((1u << n_bits) - 1);
+ if (next >= (1u << (2 * n_bits))) {
+ enum expr_relop old_relop = expr->cmp.relop;
+ expr->cmp.relop = next_relop(old_relop);
+ if (expr->cmp.relop <= old_relop) {
+ expr->cmp.symbol = next_var(expr->cmp.symbol,vars, n_vars);
+ if (!expr->cmp.symbol) {
+ expr->type = EXPR_T_BOOLEAN;
+ expr->boolean = false;
+ return true;
+ }
+ }
+ next = 0;
+ } else if (m == 0) {
+ /* Skip: empty mask is pathological. */
+ } else if (v & ~m) {
+ /* Skip: 1-bits in value correspond to 0-bits in mask. */
+ } else if (turn_off_rightmost_1s(m)
+ && (expr->cmp.relop != EXPR_R_EQ &&
+ expr->cmp.relop != EXPR_R_NE)) {
+ /* Skip: can't have discontiguous mask for > >= < <=. */
+ } else {
+ expr->cmp.value.integer = htonll(v);
+ expr->cmp.mask.integer = htonll(m);
+ return true;
+ }
+ }
+}
+\f
+static struct expr *
+make_terminal(struct expr ***terminalp)
+{
+ struct expr *e = expr_create_boolean(true);
+ **terminalp = e;
+ (*terminalp)++;
+ return e;
+}
+
+static struct expr *
+build_simple_tree(enum expr_type type, int n, struct expr ***terminalp)
+{
+ if (n == 2) {
+ struct expr *e = expr_create_andor(type);
+ for (int i = 0; i < 2; i++) {
+ struct expr *sub = make_terminal(terminalp);
+ list_push_back(&e->andor, &sub->node);
+ }
+ return e;
+ } else if (n == 1) {
+ return make_terminal(terminalp);
+ } else {
+ OVS_NOT_REACHED();
+ }
+}
+
+static struct expr *
+build_tree_shape(enum expr_type type, const struct tree_shape **tsp,
+ struct expr ***terminalp)
+{
+ const struct tree_shape *ts = *tsp;
+ (*tsp)++;
+
+ struct expr *e = expr_create_andor(type);
+ enum expr_type t = type == EXPR_T_AND ? EXPR_T_OR : EXPR_T_AND;
+ for (int i = 0; i < ts->sn; i++) {
+ struct expr *sub = (ts->s[i] > 2
+ ? build_tree_shape(t, tsp, terminalp)
+ : build_simple_tree(t, ts->s[i], terminalp));
+ list_push_back(&e->andor, &sub->node);
+ }
+ return e;
+}
+
+struct test_rule {
+ struct cls_rule cr;
+};
+
+static void
+free_rule(struct test_rule *test_rule)
+{
+ cls_rule_destroy(&test_rule->cr);
+ free(test_rule);
+}
+
+static int
+test_tree_shape_exhaustively(struct expr *expr, struct shash *symtab,
+ struct expr *terminals[], int n_terminals,
+ const struct expr_symbol *vars[], int n_vars,
+ int n_bits)
+{
+ int n_tested = 0;
+
+ const unsigned int var_mask = (1u << n_bits) - 1;
+ for (int i = 0; i < n_terminals; i++) {
+ init_terminal(terminals[i], vars[0]);
+ }
+
+ struct ds s = DS_EMPTY_INITIALIZER;
+ struct flow f;
+ memset(&f, 0, sizeof f);
+ for (;;) {
+ for (int i = n_terminals - 1; ; i--) {
+ if (!i) {
+ ds_destroy(&s);
+ return n_tested;
+ }
+ if (next_terminal(terminals[i], vars, n_vars, n_bits)) {
+ break;
+ }
+ init_terminal(terminals[i], vars[0]);
+ }
+ ovs_assert(expr_honors_invariants(expr));
+
+ n_tested++;
+
+ struct expr *modified;
+ if (operation == OP_CONVERT) {
+ ds_clear(&s);
+ expr_format(expr, &s);
+
+ char *error;
+ modified = expr_parse_string(ds_cstr(&s), symtab, &error);
+ if (error) {
+ fprintf(stderr, "%s fails to parse (%s)\n",
+ ds_cstr(&s), error);
+ exit(EXIT_FAILURE);
+ }
+ } else if (operation >= OP_SIMPLIFY) {
+ modified = expr_simplify(expr_clone(expr));
+ ovs_assert(expr_honors_invariants(modified));
+
+ if (operation >= OP_NORMALIZE) {
+ modified = expr_normalize(modified);
+ ovs_assert(expr_is_normalized(modified));
+ }
+ }
+
+ struct hmap matches;
+ struct classifier cls;
+ if (operation >= OP_FLOW) {
+ struct expr_match *m;
+ struct test_rule *test_rule;
+
+ expr_to_matches(modified, NULL, &matches);
+
+ classifier_init(&cls, NULL);
+ HMAP_FOR_EACH (m, hmap_node, &matches) {
+ test_rule = xmalloc(sizeof *test_rule);
+ cls_rule_init(&test_rule->cr, &m->match, 0, CLS_MIN_VERSION);
+ classifier_insert(&cls, &test_rule->cr, m->conjunctions, m->n);
+ }
+ }
+ for (int subst = 0; subst < 1 << (n_bits * n_vars); subst++) {
+ bool expected = evaluate_expr(expr, subst, n_bits);
+ bool actual = evaluate_expr(modified, subst, n_bits);
+ if (actual != expected) {
+ struct ds expr_s, modified_s;
+
+ ds_init(&expr_s);
+ expr_format(expr, &expr_s);
+
+ ds_init(&modified_s);
+ expr_format(modified, &modified_s);
+
+ fprintf(stderr,
+ "%s evaluates to %d, but %s evaluates to %d, for",
+ ds_cstr(&expr_s), expected,
+ ds_cstr(&modified_s), actual);
+ for (int i = 0; i < n_vars; i++) {
+ if (i > 0) {
+ fputs(",", stderr);
+ }
+ fprintf(stderr, " %c = 0x%x", 'a' + i,
+ (subst >> (n_bits * i)) & var_mask);
+ }
+ putc('\n', stderr);
+ exit(EXIT_FAILURE);
+ }
+
+ if (operation >= OP_FLOW) {
+ for (int i = 0; i < n_vars; i++) {
+ f.regs[i] = (subst >> (i * n_bits)) & var_mask;
+ }
+ bool found = classifier_lookup(&cls, CLS_MIN_VERSION,
+ &f, NULL) != NULL;
+ if (expected != found) {
+ struct ds expr_s, modified_s;
+
+ ds_init(&expr_s);
+ expr_format(expr, &expr_s);
+
+ ds_init(&modified_s);
+ expr_format(modified, &modified_s);
+
+ fprintf(stderr,
+ "%s and %s evaluate to %d, for",
+ ds_cstr(&expr_s), ds_cstr(&modified_s), expected);
+ for (int i = 0; i < n_vars; i++) {
+ if (i > 0) {
+ fputs(",", stderr);
+ }
+ fprintf(stderr, " %c = 0x%x", 'a' + i,
+ (subst >> (n_bits * i)) & var_mask);
+ }
+ fputs(".\n", stderr);
+
+ fprintf(stderr, "Converted to classifier:\n");
+ expr_matches_print(&matches, stderr);
+ fprintf(stderr,
+ "However, %s flow was found in the classifier.\n",
+ found ? "a" : "no");
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ if (operation >= OP_FLOW) {
+ struct test_rule *test_rule;
+
+ CLS_FOR_EACH (test_rule, cr, &cls) {
+ classifier_remove(&cls, &test_rule->cr);
+ ovsrcu_postpone(free_rule, test_rule);
+ }
+ classifier_destroy(&cls);
+ ovsrcu_quiesce();
+
+ expr_matches_destroy(&matches);
+ }
+ expr_destroy(modified);
+ }
+}
+
+#ifndef _WIN32
+static void
+wait_pid(pid_t *pids, int *n)
+{
+ int status;
+ pid_t pid;
+
+ pid = waitpid(WAIT_ANY, &status, 0);
+ if (pid < 0) {
+ ovs_fatal(errno, "waitpid failed");
+ } else if (WIFEXITED(status)) {
+ if (WEXITSTATUS(status)) {
+ exit(WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ raise(WTERMSIG(status));
+ exit(1);
+ } else {
+ OVS_NOT_REACHED();
+ }
+
+ for (int i = 0; i < *n; i++) {
+ if (pids[i] == pid) {
+ pids[i] = pids[--*n];
+ return;
+ }
+ }
+ ovs_fatal(0, "waitpid returned unknown child");
+}
+#endif
+
+static void
+test_exhaustive(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ int n_terminals = atoi(ctx->argv[1]);
+ struct tree_shape ts[50];
+ int n_tses;
+
+ struct shash symtab;
+ const struct expr_symbol *vars[4];
+
+ ovs_assert(test_vars <= ARRAY_SIZE(vars));
+
+ shash_init(&symtab);
+ for (int i = 0; i < test_vars; i++) {
+ char name[2] = { 'a' + i, '\0' };
+
+ vars[i] = expr_symtab_add_field(&symtab, name, MFF_REG0 + i, NULL,
+ false);
+ }
+
+#ifndef _WIN32
+ pid_t *children = xmalloc(test_parallel * sizeof *children);
+ int n_children = 0;
+#endif
+
+ int n_tested = 0;
+ for (int i = 0; i < 2; i++) {
+ enum expr_type base_type = i ? EXPR_T_OR : EXPR_T_AND;
+
+ for (n_tses = init_tree_shape(ts, n_terminals); n_tses;
+ n_tses = next_tree_shape(ts, n_tses)) {
+ const struct tree_shape *tsp = ts;
+ struct expr *terminals[50];
+ struct expr **terminalp = terminals;
+ struct expr *expr = build_tree_shape(base_type, &tsp, &terminalp);
+ ovs_assert(terminalp == &terminals[n_terminals]);
+
+ if (verbosity > 0) {
+ print_tree_shape(ts, n_tses);
+ printf(": ");
+ struct ds s = DS_EMPTY_INITIALIZER;
+ expr_format(expr, &s);
+ puts(ds_cstr(&s));
+ ds_destroy(&s);
+ }
+
+#ifndef _WIN32
+ if (test_parallel > 1) {
+ pid_t pid = xfork();
+ if (!pid) {
+ test_tree_shape_exhaustively(expr, &symtab,
+ terminals, n_terminals,
+ vars, test_vars, test_bits);
+ expr_destroy(expr);
+ exit(0);
+ } else {
+ if (n_children >= test_parallel) {
+ wait_pid(children, &n_children);
+ }
+ children[n_children++] = pid;
+ }
+ } else
+#endif
+ {
+ n_tested += test_tree_shape_exhaustively(
+ expr, &symtab, terminals, n_terminals,
+ vars, test_vars, test_bits);
+ }
+ expr_destroy(expr);
+ }
+ }
+#ifndef _WIN32
+ while (n_children > 0) {
+ wait_pid(children, &n_children);
+ }
+ free(children);
+#endif
+
+ printf("Tested ");
+ switch (operation) {
+ case OP_CONVERT:
+ printf("converting");
+ break;
+ case OP_SIMPLIFY:
+ printf("simplifying");
+ break;
+ case OP_NORMALIZE:
+ printf("normalizing");
+ break;
+ case OP_FLOW:
+ printf("converting to flows");
+ break;
+ }
+ if (n_tested) {
+ printf(" %d expressions of %d terminals", n_tested, n_terminals);
+ } else {
+ printf(" all %d-terminal expressions", n_terminals);
+ }
+ printf(" with %d vars each of %d bits in terms of operators",
+ test_vars, test_bits);
+ for (unsigned int relops = test_relops; relops;
+ relops = zero_rightmost_1bit(relops)) {
+ enum expr_relop r = rightmost_1bit_idx(relops);
+ printf(" %s", expr_relop_to_string(r));
+ }
+ printf(".\n");
+
+ expr_symtab_destroy(&symtab);
+ shash_destroy(&symtab);
+}
+\f
+/* Actions. */
+
+static void
+test_parse_actions(struct ovs_cmdl_context *ctx OVS_UNUSED)
+{
+ struct shash symtab;
+ struct simap ports;
+ struct ds input;
+
+ create_symtab(&symtab);
+
+ simap_init(&ports);
+ simap_put(&ports, "eth0", 5);
+ simap_put(&ports, "eth1", 6);
+ simap_put(&ports, "LOCAL", ofp_to_u16(OFPP_LOCAL));
+
+ ds_init(&input);
+ while (!ds_get_test_line(&input, stdin)) {
+ struct ofpbuf ofpacts;
+ struct expr *prereqs;
+ char *error;
+
+ ofpbuf_init(&ofpacts, 0);
+ error = actions_parse_string(ds_cstr(&input), &symtab, &ports, 11,
+ &ofpacts, &prereqs);
+ if (!error) {
+ struct ds output;
+
+ ds_init(&output);
+ ds_put_cstr(&output, "actions=");
+ ofpacts_format(ofpacts.data, ofpacts.size, &output);
+ ds_put_cstr(&output, ", prereqs=");
+ if (prereqs) {
+ expr_format(prereqs, &output);
+ } else {
+ ds_put_char(&output, '1');
+ }
+ puts(ds_cstr(&output));
+ ds_destroy(&output);
+ } else {
+ puts(error);
+ free(error);
+ }
+
+ expr_destroy(prereqs);
+ ofpbuf_uninit(&ofpacts);
+ }
+ ds_destroy(&input);
+
+ simap_destroy(&ports);
+ expr_symtab_destroy(&symtab);
+ shash_destroy(&symtab);
+}
+\f
+static unsigned int
+parse_relops(const char *s)
+{
+ unsigned int relops = 0;
+ struct lexer lexer;
+
+ lexer_init(&lexer, s);
+ lexer_get(&lexer);
+ do {
+ enum expr_relop relop;
+
+ if (expr_relop_from_token(lexer.token.type, &relop)) {
+ relops |= 1u << relop;
+ lexer_get(&lexer);
+ } else {
+ ovs_fatal(0, "%s: relational operator expected at `%.*s'",
+ s, (int) (lexer.input - lexer.start), lexer.start);
+ }
+ lexer_match(&lexer, LEX_T_COMMA);
+ } while (lexer.token.type != LEX_T_END);
+ lexer_destroy(&lexer);
+
+ return relops;
+}
+
+static void
+usage(void)
+{
+ printf("\
+%s: OVN test utility\n\
+usage: test-ovn %s [OPTIONS] COMMAND [ARG...]\n\
+\n\
+lex\n\
+ Lexically analyzes OVN input from stdin and print them back on stdout.\n\
+\n\
+parse-expr\n\
+annotate-expr\n\
+simplify-expr\n\
+normalize-expr\n\
+expr-to-flows\n\
+ Parses OVN expressions from stdin and print them back on stdout after\n\
+ differing degrees of analysis. Available fields are based on packet\n\
+ headers.\n\
+\n\
+evaluate-expr A B C\n\
+ Parses OVN expressions from stdin, evaluate them with assigned values,\n\
+ and print the results on stdout. Available fields are 'a', 'b', and 'c'\n\
+ of 3 bits each. A, B, and C should be in the range 0 to 7.\n\
+\n\
+composition N\n\
+ Prints all the compositions of N on stdout.\n\
+\n\
+tree-shape N\n\
+ Prints all the tree shapes with N terminals on stdout.\n\
+\n\
+exhaustive N\n\
+ Tests that all possible Boolean expressions with N terminals are properly\n\
+ simplified, normalized, and converted to flows. Available options:\n\
+ --relops=OPERATORS Test only the specified Boolean operators.\n\
+ OPERATORS may include == != < <= > >=, space or\n\
+ comma separated. Default is all operators.\n\
+ --vars=N Number of variables to test, in range 1...4, default 2.\n\
+ --bits=N Number of bits per variable, in range 1...3, default 3.\n\
+ --operation=OPERATION Operation to test, one of: convert, simplify,\n\
+ normalize, flow. Default: flow. 'normalize' includes 'simplify',\n\
+ 'flow' includes 'simplify' and 'normaize'.\n\
+ --parallel=N Number of processes to use in parallel, default 1.\n\
+",
+ program_name, program_name);
+ exit(EXIT_SUCCESS);
+}
+
+static void
+test_ovn_main(int argc, char *argv[])
+{
+ set_program_name(argv[0]);
+
+ test_relops = parse_relops("== != < <= > >=");
+ for (;;) {
+ enum {
+ OPT_RELOPS = UCHAR_MAX + 1,
+ OPT_VARS,
+ OPT_BITS,
+ OPT_OPERATION,
+ OPT_PARALLEL
+ };
+
+ static const struct option options[] = {
+ {"relops", required_argument, NULL, OPT_RELOPS},
+ {"vars", required_argument, NULL, OPT_VARS},
+ {"bits", required_argument, NULL, OPT_BITS},
+ {"operation", required_argument, NULL, OPT_OPERATION},
+ {"parallel", required_argument, NULL, OPT_PARALLEL},
+ {"more", no_argument, NULL, 'm'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0},
+ };
+ int option_index = 0;
+ int c = getopt_long (argc, argv, "", options, &option_index);
+
+ if (c == -1) {
+ break;
+ }
+ switch (c) {
+ case OPT_RELOPS:
+ test_relops = parse_relops(optarg);
+ break;
+
+ case OPT_VARS:
+ test_vars = atoi(optarg);
+ if (test_vars < 1 || test_vars > 4) {
+ ovs_fatal(0, "number of variables must be between 1 and 4");
+ }
+ break;
+
+ case OPT_BITS:
+ test_bits = atoi(optarg);
+ if (test_bits < 1 || test_bits > 3) {
+ ovs_fatal(0, "number of bits must be between 1 and 3");
+ }
+ break;
+
+ case OPT_OPERATION:
+ if (!strcmp(optarg, "convert")) {
+ operation = OP_CONVERT;
+ } else if (!strcmp(optarg, "simplify")) {
+ operation = OP_SIMPLIFY;
+ } else if (!strcmp(optarg, "normalize")) {
+ operation = OP_NORMALIZE;
+ } else if (!strcmp(optarg, "flow")) {
+ operation = OP_FLOW;
+ } else {
+ ovs_fatal(0, "%s: unknown operation", optarg);
+ }
+ break;
+
+ case OPT_PARALLEL:
+ test_parallel = atoi(optarg);
+ break;
+
+ case 'm':
+ verbosity++;
+ break;
+
+ case 'h':
+ usage();
+
+ case '?':
+ exit(1);
+
+ default:
+ abort();
+ }
+ }
+
+ static const struct ovs_cmdl_command commands[] = {
+ /* Lexer. */
+ {"lex", NULL, 0, 0, test_lex},
+
+ /* Expressions. */
+ {"parse-expr", NULL, 0, 0, test_parse_expr},
+ {"annotate-expr", NULL, 0, 0, test_annotate_expr},
+ {"simplify-expr", NULL, 0, 0, test_simplify_expr},
+ {"normalize-expr", NULL, 0, 0, test_normalize_expr},
+ {"expr-to-flows", NULL, 0, 0, test_expr_to_flows},
+ {"evaluate-expr", NULL, 1, 1, test_evaluate_expr},
+ {"composition", NULL, 1, 1, test_composition},
+ {"tree-shape", NULL, 1, 1, test_tree_shape},
+ {"exhaustive", NULL, 1, 1, test_exhaustive},
+
+ /* Actions. */
+ {"parse-actions", NULL, 0, 0, test_parse_actions},
+
+ {NULL, NULL, 0, 0, NULL},
+ };
+ struct ovs_cmdl_context ctx;
+ ctx.argc = argc - optind;
+ ctx.argv = argv + optind;
+ ovs_cmdl_run_command(&ctx, commands);
+}
+
+OVSTEST_REGISTER("test-ovn", test_ovn_main);
m4_include([tests/vlog.at])
m4_include([tests/vtep-ctl.at])
m4_include([tests/auto-attach.at])
+m4_include([tests/ovn.at])
gdb_ovsdb=false
gdb_vswitchd_ex=false
gdb_ovsdb_ex=false
+gdb_ovn_northd=false
+gdb_ovn_northd_ex=false
+gdb_ovn_controller=false
+gdb_ovn_controller_ex=false
builddir=
srcdir=
schema=
installed=false
built=false
+ovn=false
+ovnsb_schema=
+ovnnb_schema=
for option; do
# This option-parsing mechanism borrowed from a Autoconf-generated
-i, --installed use installed Open vSwitch
-g, --gdb-vswitchd run ovs-vswitchd under gdb
-d, --gdb-ovsdb run ovsdb-server under gdb
+ --gdb-ovn-northd run ovn-northd under gdb
+ --gdb-ovn-controller run ovn-controller under gdb
+ -R, --gdb-run automatically start running the daemon in gdb
+ for any daemon set to run under gdb
-S, --schema=FILE use FILE as vswitch.ovsschema
+ -o, --ovn enable OVN
Other options:
-h, --help Print this usage message.
gdb_vswitchd=true
gdb_vswitchd_ex=true
;;
- -d|--gdb-o*)
+ -d|--gdb-ovsdb)
gdb_ovsdb=true
gdb_ovsdb_ex=false
;;
gdb_ovsdb=true
gdb_ovsdb_ex=true
;;
+ --gdb-ovn-northd)
+ gdb_ovn_northd=true
+ ;;
+ --gdb-ovn-controller)
+ gdb_ovn_controller=true
+ ;;
+ -o|--ovn)
+ ovn=true
+ ;;
+ -R|--gdb-run)
+ gdb_vswitchd_ex=true
+ gdb_ovsdb_ex=true
+ gdb_ovn_northd_ex=true
+ gdb_ovn_controller_ex=true
+ ;;
-*)
echo "unrecognized option $option (use --help for help)" >&2
exit 1
echo >&2 'source directory not found, please use --srcdir'
exit 1
fi
+ if $ovn; then
+ ovnsb_schema=$srcdir/ovn/ovn-sb.ovsschema
+ if test ! -e "$ovnsb_schema"; then
+ echo >&2 'source directory not found, please use --srcdir'
+ exit 1
+ fi
+ ovnnb_schema=$srcdir/ovn/ovn-nb.ovsschema
+ if test ! -e "$ovnnb_schema"; then
+ echo >&2 'source directory not found, please use --srcdir'
+ exit 1
+ fi
+ fi
# Put built tools early in $PATH.
if test ! -e $builddir/vswitchd/ovs-vswitchd; then
exit 1
fi
PATH=$builddir/ovsdb:$builddir/vswitchd:$builddir/utilities:$PATH
+ if $ovn; then
+ PATH=$builddir/ovn:$builddir/ovn/controller:$builddir/ovn/northd:$PATH
+ fi
export PATH
else
case $schema in
echo "can't find vswitch.ovsschema, please specify --schema" >&2
exit 1
fi
+ if $ovn; then
+ echo "running with ovn is only supported from the build dir." >&2
+ exit 1
+ fi
fi
# Create sandbox.
# Create database and start ovsdb-server.
touch "$sandbox"/.conf.db.~lock~
run ovsdb-tool create conf.db "$schema"
+ovsdb_server_args=
+if $ovn; then
+ touch "$sandbox"/.ovnsb.db.~lock~
+ touch "$sandbox"/.ovnnb.db.~lock~
+ run ovsdb-tool create ovnsb.db "$ovnsb_schema"
+ run ovsdb-tool create ovnnb.db "$ovnnb_schema"
+ ovsdb_server_args="ovnsb.db ovnnb.db conf.db"
+fi
rungdb $gdb_ovsdb $gdb_ovsdb_ex ovsdb-server --detach --no-chdir --pidfile -vconsole:off --log-file \
- --remote=punix:"$sandbox"/db.sock
+ --remote=punix:"$sandbox"/db.sock $ovsdb_server_args
#Add a small delay to allow ovsdb-server to launch.
sleep 0.1
rungdb $gdb_vswitchd $gdb_vswitchd_ex ovs-vswitchd --detach --no-chdir --pidfile -vconsole:off --log-file \
--enable-dummy=override -vvconn -vnetdev_dummy
+if $ovn; then
+ ovs-vsctl set open . external-ids:system-id=56b18105-5706-46ef-80c4-ff20979ab068
+ ovs-vsctl set open . external-ids:ovn-remote=unix:"$sandbox"/db.sock
+ ovs-vsctl set open . external-ids:ovn-encap-type=geneve
+ ovs-vsctl set open . external-ids:ovn-encap-ip=127.0.0.1
+ ovs-vsctl add-br br-int \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+ rungdb $gdb_ovn_northd $gdb_ovn_northd_ex ovn-northd --detach --no-chdir --pidfile -vconsole:off --log-file
+ rungdb $gdb_ovn_controller $gdb_ovn_controller_ex ovn-controller --detach --no-chdir --pidfile -vconsole:off --log-file
+fi
+
cat <<EOF
utilities/ovs-appctl-bashcomp.bash \
utilities/ovs-vsctl-bashcomp.bash
+EXTRA_DIST += utilities/ovs-sim.in utilities/ovs-sim.1.xml
+man_MANS += utilities/ovs-sim.1
+noinst_SCRIPTS += utilities/ovs-sim
+
utilities/ovs-lib: $(top_builddir)/config.status
docs += utilities/ovs-command-bashcomp.INSTALL.md
--- /dev/null
+<?xml version="1.0" encoding="utf-8"?>
+<manpage program="ovs-sim" section="1" title="ovs-sim">
+ <h1>Name</h1>
+ <p>ovs-sim -- Open vSwitch simulator environment</p>
+
+ <h1>Synopsis</h1>
+ <p><code>ovs-sim</code> [<var>option</var>]... [<var>script</var>]...</p>
+
+ <h1>Description</h1>
+ <p>
+ <code>ovs-sim</code> provides a convenient environment for running one or
+ more Open vSwitch instances and related software in a sandboxed
+ simulation environment.
+ </p>
+
+ <p>
+ To use <code>ovs-sim</code>, first build Open vSwitch, then invoke it
+ directly from the build directory, e.g.:
+ </p>
+
+ <pre>
+git clone https://github.com/openvswitch/ovs.git
+cd ovs
+./configure
+make
+utilities/ovs-sim
+ </pre>
+
+ <p>
+ When invoked in the most ordinary way as shown above,
+ <code>ovs-sim</code> does the following:
+ </p>
+
+ <ol>
+ <li>
+ Creates a directory <code>sandbox</code> as a subdirectory of the
+ current directory (first destroying such a directory if it already
+ exists) and <code>cd</code>s into that directory.
+ </li>
+
+ <li>
+ Installs all of the Open vSwitch manpages into a <code>man</code>
+ subdirectory of <code>sandbox</code> and adjusts the <env>MANPATH</env>
+ environment variable so that <code>man</code> and other manpage viewers
+ can find them.
+ </li>
+
+ <li>
+ <p>
+ Creates a simulated Open vSwitch named <code>main</code> and sets it
+ up as the default target for OVS commands, as if the following
+ <code>ovs-sim</code> commands had been run:
+ </p>
+
+ <pre>
+ sim_add main
+ as main
+ </pre>
+
+ <p>
+ See <code>Commands</code>, below, for an explanation.
+ </p>
+ </li>
+
+ <li>
+ Runs any scripts specified on the command line (see
+ <code>Options</code> below). The scripts can use arbitrary Bash
+ syntax, plus the additional commands described under
+ <code>Commands</code>, below.
+ </li>
+
+ <li>
+ If no scripts were specified, or if <option>-i</option> or
+ <option>--interactive</option> was specified, invokes an interactive
+ Bash subshell. The user can use arbitrary Bash commands, plus the
+ additional commands described under <code>Commands</code>, below.
+ </li>
+ </ol>
+
+ <p>
+ <code>ovs-sim</code> and the sandbox environment that it creates does not
+ require superuser or other special privileges. Generally, it should not
+ be run with such privileges.
+ </p>
+
+ <h1>Options</h1>
+
+ <p>
+ <code>ovs-sim</code> accepts the following options and arguments:
+ </p>
+
+ <dl>
+ <dt><var>script</var></dt>
+ <dd>
+ Runs <var>script</var>, which should be a Bash script, within a
+ subshell after initializing. If multiple <var>script</var> arguments
+ are given, then they are run in the order given. If any
+ <var>script</var> exits with a nonzero exit code, then
+ <code>ovs-sim</code> exits immediately with the same exit code.
+ </dd>
+
+ <dt><option>-i</option></dt>
+ <dt><option>--interactive</option></dt>
+ <dd>
+ By default, if any <var>script</var> is specified, <code>ovs-sim</code>
+ exits as soon as the scripts finish executing. With this option, or if
+ no scripts are specified, <code>ovs-sim</code> instead starts an
+ interactive Bash session.
+ </dd>
+ </dl>
+
+ <h1>Commands</h1>
+
+ <p>
+ Scripts and interactive usage may use the following commands implemented
+ by <code>ovs-sim</code>. They are implemented as Bash shell functions
+ exported to subshells.
+ </p>
+
+ <h2>Basic Commands</h2>
+
+ <p>
+ These are the basic commands for working with sandboxed Open vSwitch
+ instances.
+ </p>
+
+ <dl>
+ <dt><code>sim_add</code> <var>sandbox</var></dt>
+ <dd>
+ <p>
+ Starts a new simulated Open vSwitch instance named
+ <var>sandbox</var>. Files related to the instance, such as logs,
+ databases, sockets, and pidfiles, are created in a subdirectory also
+ named <var>sandbox</var>. Afterward, the <code>as</code> command
+ (see below) can be used to run Open vSwitch utilities in the context
+ of the new sandbox.
+ </p>
+
+ <p>
+ The new sandbox starts out without any bridges. Use
+ <code>ovs-vsctl</code> in the context of the new sandbox to create a
+ bridge, e.g.:
+ </p>
+
+ <pre>
+sim_add hv0 # Create sandbox hv0.
+as hv0 # Set hv0 as default sandbox.
+ovs-vsctl add-br br0 # Add bridge br0 inside hv0.
+ </pre>
+
+ <p>
+ The Open vSwitch instances that <code>sim_add</code> create enable
+ ``dummy'' devices. This means that bridges and interfaces can be
+ created with type <code>dummy</code> to indicate that they should be
+ totally simulated, without any reference to system entities. In
+ fact, <code>ovs-sim</code> also configures Open vSwitch so that the
+ default <code>system</code> type of bridges and interfaces are
+ replaced by <code>dummy</code> devices. Other types of devices,
+ however, retain their usual functions, which means that, e.g.,
+ <code>vxlan</code> tunnels still act as tunnels (see
+ <code>README-native-tunneling.md</code>).
+ </p>
+ </dd>
+
+ <dt><code>as</code> <var>sandbox</var></dt>
+ <dd>
+ <p>
+ Sets <var>sandbox</var> as the default simulation target for Open
+ vSwitch commands (e.g. <code>ovs-vsctl</code>,
+ <code>ovs-ofctl</code>, <code>ovs-appctl</code>).
+ </p>
+
+ <p>
+ This command updates the beginning of the shell prompt to indicate
+ the new default target.
+ </p>
+ </dd>
+
+ <dt><code>as</code> <var>sandbox</var> <var>command</var> <var>arg</var>...</dt>
+ <dd>
+ Runs the given <var>command</var> with <var>sandbox</var> as the
+ simulation target, e.g. <code>as hv0 ovs-vsctl add-br br0</code> runs
+ <code>ovs-vsctl add-br br0</code> within sandbox <code>hv0</code>.
+ The default target is unchanged.
+ </dd>
+ </dl>
+
+ <h2>Interconnection Network Commands</h2>
+
+ <p>
+ When multiple sandboxed Open vSwitch instances exist, one will inevitably
+ want to connect them together. These commands allow for that.
+ Conceptually, an interconnection network is a switch that
+ <code>ovs-sim</code> makes it easy to plug into other switches in other
+ sandboxed Open vSwitch instances. Interconnection networks are
+ implemented as bridges in the <code>main</code> switch that
+ <code>ovs-sim</code> creates by default, so to use interconnection
+ networks please avoid working with <code>main</code> directly.
+ </p>
+
+ <dl>
+ <dt><code>net_add</code> <var>network</var></dt>
+ <dd>
+ Creates a new interconnection network named <var>network</var>.
+ </dd>
+
+ <dt><code>net_attach</code> <var>network</var> <var>bridge</var></dt>
+ <dd>
+ Adds a new port to <var>bridge</var> in the default sandbox (as set
+ with <code>as</code>) and plugs it into the <var>network</var>
+ interconnection network. <var>network</var> must already have been
+ created by a previous invocation of <code>net_add</code>. The default
+ sandbox must not be <code>main</code>.
+ </dd>
+ </dl>
+
+ <h2>OVN Commands</h2>
+
+ <p>
+ These commands interact with OVN, the Open Virtual Network.
+ </p>
+
+ <dl>
+ <dt><code>ovn_start</code></dt>
+ <dd>
+ Creates and initializes the central OVN databases (both
+ <code>ovn-sb</code>(5) and <code>ovn-nb</code>) and starts an instance
+ of <code>ovsdb-server</code> for each one. Also starts an instance of
+ <code>ovn-northd</code>.
+ </dd>
+
+ <dt><code>ovn_attach</code> <var>network</var> <var>bridge</var> <var>ip</var> [<var>masklen</var>]</dt>
+ <dd>
+ First, this command attaches <var>bridge</var> to interconnection
+ network <var>network</var>, just like <code>net_attach</code>
+ <var>network</var> <var>bridge</var>. Second, it configures
+ (simulated) IP address <var>ip</var> (with network mask length
+ <code>masklen</code>, which defaults to 24) on <var>bridge</var>.
+ Finally, it configures the Open vSwitch database to work with OVN and
+ starts <code>ovn-controller</code>.
+ </dd>
+ </dl>
+
+ <h1>Examples</h1>
+
+ <p>
+ The following creates a pair of Open vSwitch instances
+ <code>hv0</code> and <code>hv1</code>, adds a port named
+ <code>vif0</code> or <code>vif1</code>, respectively, to each
+ one, and then connects the two through an interconnection
+ network <code>n1</code>:
+ </p>
+
+ <pre>
+net_add n1
+for i in 0 1; do
+ sim_add hv$i
+ as hv$i ovs-vsctl add-br br0 -- add-port br0 vif$i
+ as hv$i net_attach n1 br0
+done
+ </pre>
+
+ <p>
+ Here's an extended version that also starts OVN:
+ </p>
+
+ <pre>
+ovn_start
+ovn-nbctl lswitch-add lsw0
+
+net_add n1
+for i in 0 1; do
+ sim_add hv$i
+ as hv$i
+ ovs-vsctl add-br br-phys
+ ovn_attach n1 br-phys 192.168.0.`expr $i + 1`
+ ovs-vsctl add-port br-int vif$i -- set Interface vif$i external-ids:iface-id=lp$i
+ ovn-nbctl lport-add lsw0 lp$i
+ ovn-nbctl lport-set-macs lp$i f0:00:00:00:00:0$i
+done
+ </pre>
+
+ <p>
+ Here's a primitive OVN ``scale test'' (adjust the scale by
+ changing <var>n</var> in the first line :
+ </p>
+
+ <pre>
+n=200; export n
+ovn_start
+net_add n1
+ovn-nbctl lswitch-add br0
+for i in `seq $n`; do
+ (sim_add hv$i
+ as hv$i
+ ovs-vsctl add-br br-phys
+ y=$(expr $i / 256)
+ x=$(expr $i % 256)
+ ovn_attach n1 br-phys 192.168.$y.$x
+ ovs-vsctl add-port br-int vif$i -- set Interface vif$i external-ids:iface-id=lp$i) &
+ case $i in
+ *50|*00) echo $i; wait ;;
+ esac
+done
+wait
+for i in `seq $n`; do
+ yy=$(printf %02x $(expr $i / 256))
+ xx=$(printf $02x $(expr $i % 256))
+ ovn-nbctl lport-add br0 lp$i
+ ovn-nbctl lport-set-macs lp$i f0:00:00:00:$yy:$xx
+done
+ </pre>
+
+ <p>
+ When the scale test has finished initializing, you can watch the
+ logical ports come up with a command like this:
+ </p>
+
+ <pre>
+watch 'for i in `seq $n`; do if test `ovn-nbctl lport-get-up lp$i` != up; then echo $i; fi; done'
+ </pre>
+
+</manpage>
--- /dev/null
+#! /usr/bin/env bash
+#
+# Copyright (c) 2013, 2015 Nicira, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+sim_builddir='@abs_builddir@'; export sim_builddir
+sim_srcdir='@abs_top_srcdir@'; export sim_srcdir
+interactive=false
+scripts=
+
+for option; do
+ case $option in
+ -h|--help)
+ cat <<EOF
+$0, for starting sandboxed dummy Open vSwitch environments
+usage: $0 [OPTION...] [SCRIPT...]
+
+Options:
+ -i, --interactive Prompt for interactive input (default if no SCRIPTs)
+ -h, --help Print this usage message.
+EOF
+ exit 0
+ ;;
+
+ -i|--i*)
+ interactive=:
+ ;;
+
+ -*)
+ echo "unrecognized option $option (use --help for help)" >&2
+ exit 1
+ ;;
+ *)
+ case $option in
+ /*) ;;
+ *) option=`pwd`/$option ;;
+ esac
+ scripts="$scripts $option"
+ ;;
+ esac
+ shift
+done
+
+if test -z "$scripts"; then
+ interactive=:
+fi
+
+# Check that we've got proper builddir and srcdir.
+if test ! -e "$sim_builddir"/vswitchd/ovs-vswitchd; then
+ echo "$sim_builddir/vswitchd/ovs-vswitchd does not exist (need to run \"make\"?)" >&2
+ exit 1
+fi
+if test ! -e "$sim_srcdir"/WHY-OVS.md; then
+ echo "$sim_srcdir/WHY-OVS.md does not exist" >&2
+ exit 1
+fi
+
+# Put built tools early in $PATH.
+PATH=$sim_builddir/ovsdb:$sim_builddir/vswitchd:$sim_builddir/utilities:$PATH
+PATH=$sim_builddir/ovn:$sim_srcdir/ovn:$sim_builddir/ovn/controller:$sim_builddir/ovn/northd:$PATH
+export PATH
+
+rm -rf sandbox
+mkdir sandbox
+cd sandbox
+sim_base=`pwd`; export sim_base
+
+trap_signals() {
+ for signal in 0 1 2 3 13 14 15; do
+ trap "
+ set +e
+ cd '$sim_base' && (kill \`cat */*.pid\`) >/dev/null 2>&1
+ trap - $signal
+ kill -$signal $$" $signal
+ done
+}
+export -f trap_signals
+trap_signals
+
+sim_setvars() {
+ sandbox=$1
+ OVS_RUNDIR=$sim_base/$1; export OVS_RUNDIR
+ OVS_LOGDIR=$sim_base/$1; export OVS_LOGDIR
+ OVS_DBDIR=$sim_base/$1; export OVS_DBDIR
+ OVS_SYSCONFDIR=$sim_base/$1; export OVS_SYSCONFDIR
+ PS1="|$1: $sim_PS1"
+}
+export -f sim_setvars
+
+as() {
+ case $# in
+ 0)
+ echo >&2 "$FUNCNAME: missing arguments (use --help for help)"
+ return 1
+ ;;
+ 1)
+ if test "$1" != --help; then
+ sim_setvars $1
+ else
+ cat <<EOF
+$FUNCNAME: set the default sandbox for Open vSwitch commands
+usage: $FUNCNAME SANDBOX [COMMAND ARG...]
+where SANDBOX is the name of the desired sandbox.
+
+With COMMAND arguments, this command sets the default target for that
+single command, which it runs directly. Otherwise, it sets the default
+target for all following commands.
+EOF
+ fi
+ ;;
+ *)
+ (sim_setvars $1; shift; $@)
+ ;;
+ esac
+}
+export -f as
+
+sim_add() {
+ if test "$1" == --help; then
+ cat <<EOF
+$FUNCNAME: create a new sandboxed Open vSwitch instance
+usage: $FUNCNAME SANDBOX
+
+where SANDBOX is the name of the new sandbox, which will be created in
+a directory named $sim_base/SANDBOX.
+Afterward, use "as SANDBOX" to execute OVS commands in the sandbox's
+context.
+EOF
+ return 0
+ fi
+ if test $# != 1; then
+ echo >&2 "$FUNCNAME: missing argument (use --help for help)"
+ return 1
+ fi
+
+ set X $1; shift
+ if test $# != 1; then
+ echo >&2 "$FUNCNAME: sandbox name must be a single word"
+ return 1
+ fi
+
+ if test -e "$sim_base/$1"; then
+ echo >&2 "$1 already exists"
+ return 1
+ fi
+
+ # Create sandbox.
+ mkdir "$sim_base"/$1 || return 1
+
+ daemon_opts="--detach --no-chdir --pidfile -vconsole:off --log-file"
+
+ # Create database and start ovsdb-server.
+ touch $sim_base/$1/.conf.db.~lock~
+ as $1 ovsdb-tool create $sim_base/$1/conf.db "$sim_srcdir/vswitchd/vswitch.ovsschema"
+ as $1 ovsdb-server $daemon_opts --remote=punix:"$sim_base"/$1/db.sock
+
+ # Initialize database.
+ as $1 ovs-vsctl --no-wait -- init
+
+ # Start ovs-vswitchd.
+ as $1 ovs-vswitchd $daemon_opts --enable-dummy=system -vvconn -vnetdev_dummy
+}
+export -f sim_add
+
+net_add() {
+ if test "$1" == --help; then
+ cat <<EOF
+$FUNCNAME: create a new interconnection network
+usage: $FUNCNAME NETWORK
+
+where NETWORK is the name of the new network. Interconnection networks
+are used with net_attach and ovn_attach.
+EOF
+ return 0
+ fi
+ if test $# != 1; then
+ echo >&2 "$FUNCNAME: missing argument (use --help for help)"
+ return 1
+ fi
+
+ as main ovs-vsctl add-br "$1"
+}
+export -f net_add
+
+net_attach() {
+ if test "$1" == --help; then
+ cat <<EOF
+$FUNCNAME: attach the default sandbox to an interconnection network
+usage: $FUNCNAME NETWORK BRIDGE
+
+Adds a port to BRIDGE within the default sandbox that connects BRIDGE
+to the interconnection network NETWORK. (Use "as" to set the default
+sandbox.)
+EOF
+ return 0
+ fi
+ if test $# != 2; then
+ echo >&2 "$FUNCNAME: wrong number of arguments (use --help for help)"
+ return 1
+ fi
+ if test $sandbox = main; then
+ echo >&2 "$FUNCNAME: can only attach interconnection networks to sandboxes other than main"
+ return 1
+ fi
+
+ local net=$1 bridge=$2
+
+ port=${sandbox}_$bridge
+ as main ovs-vsctl \
+ -- add-port $net "$port" \
+ -- set Interface "$port" options:pstream="punix:$sim_base/main/$port.sock" options:rxq_pcap="$sim_base/main/$port-rx.pcap" options:tx_pcap="$sim_base/main/$port-tx.pcap" options:header=extended
+
+ ovs-vsctl \
+ -- set Interface $bridge options:tx_pcap="$sim_base/$sandbox/$bridge-tx.pcap" options:rxq_pcap="$sim_base/$sandbox/$bridge-rx.pcap" \
+ -- add-port $bridge ${bridge}_$net \
+ -- set Interface ${bridge}_$net options:stream="unix:$sim_base/main/$port.sock" options:rxq_pcap="$sim_base/$sandbox/${bridge}_$net-rx.pcap" options:tx_pcap="$sim_base/$sandbox/${bridge}_$net-tx.pcap" options:header=extended
+}
+export -f net_attach
+
+ovn_start() {
+ if test "$1" == --help; then
+ cat <<EOF
+$FUNCNAME: start OVN central databases and daemons
+usage: $FUNCNAME
+
+This creates and initializes the central OVN databases (northbound and
+southbound), starts their ovsdb-server daemons, and starts the ovn-northd
+daemon.
+EOF
+ return 0
+ fi
+ if test $# != 0; then
+ echo >&2 "$FUNCNAME: no arguments accepted (use --help for help)"
+ return 1
+ fi
+
+ if test -d ovn-sb || test -d ovn-nb; then
+ echo >&2 "OVN already started"
+ exit 1
+ fi
+
+ daemon_opts="--detach --no-chdir --pidfile -vconsole:off --log-file"
+ for db in ovn-sb ovn-nb; do
+ mkdir "$sim_base"/$db
+ touch "$sim_base"/$db/.$db.db.~lock~
+ as $db ovsdb-tool create "$sim_base"/$db/$db.db "$sim_srcdir"/ovn/$db.ovsschema
+ as $db ovsdb-server $daemon_opts --remote=punix:"$sim_base"/$db/$db.sock "$sim_base"/$db/$db.db
+ done
+
+ OVN_NB_DB=unix:$sim_base/ovn-nb/ovn-nb.sock; export OVN_NB_DB
+
+ mkdir "$sim_base"/northd
+ as northd ovn-northd $daemon_opts \
+ --ovnnb-db=unix:"$sim_base"/ovn-nb/ovn-nb.sock \
+ --ovnsb-db=unix:"$sim_base"/ovn-sb/ovn-sb.sock
+}
+export -f ovn_start
+
+ovn_attach() {
+ if test "$1" == --help; then
+ cat <<EOF
+$FUNCNAME: attach default sandbox to an interconnection network for OVN
+usage: $FUNCNAME NETWORK BRIDGE IP [MASKLEN]
+
+This starts by doing everything that net_attach does. Then it configures the
+specified IP and MASKLEN (e.g. 192.168.0.1 and 24) on BRIDGE and starts
+and configures ovn-controller.
+
+MASKLEN defaults to 24 if it is not specified.
+EOF
+ return 0
+ fi
+ if test $# != 3 && test $# != 4; then
+ echo >&2 "$FUNCNAME: wrong number of arguments (use --help for help)"
+ return 1
+ fi
+
+ local net=$1 bridge=$2 ip=$3 masklen=${4-24}
+ net_attach $net $bridge || return $?
+
+ ovs-appctl netdev-dummy/ip4addr $bridge $ip/$masklen >/dev/null
+ ovs-appctl ovs/route/add $ip/$masklen $bridge > /dev/null
+ ovs-vsctl \
+ -- set Open_vSwitch . external-ids:system-id=$sandbox \
+ -- set Open_vSwitch . external-ids:ovn-remote=unix:$sim_base/ovn-sb/ovn-sb.sock \
+ -- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+ -- set Open_vSwitch . external-ids:ovn-encap-ip=$ip\
+ -- add-br br-int \
+ -- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+ ovn-controller --detach --no-chdir --pidfile -vconsole:off --log-file
+}
+export -f ovn_attach
+
+# Easy access to OVS manpages.
+mkdir $sim_base/man
+mandir=`cd $sim_base/man && pwd`
+(cd "$sim_builddir" && ${MAKE-make} install-man mandir=$mandir >/dev/null)
+MANPATH=$mandir:; export MANPATH
+
+export scripts
+export interactive
+rc='
+ if [ -f /etc/bashrc ]; then
+ . /etc/bashrc
+ fi
+ if [ -f ~/.bashrc ]; then
+ . ~/.bashrc
+ fi
+
+ trap_signals
+ sim_PS1=$PS1
+ sim_add main
+ as main
+
+ for script in $scripts; do
+ . $script || exit $?
+ done
+
+ $interactive || exit 0
+
+ cat <<EOF
+ ______________________________________________________________________
+|
+| You are running in a nested shell environment meant for Open vSwitch
+| and OVN testing in simulation. The OVS manpages are available via
+| "man". Please see ovs-sim(1) for more information.
+|
+| Exit the shell to kill the running daemons and leave the simulation
+| environment.
+EOF
+'
+
+status=0; bash --rcfile <(echo "$rc") || status=$?
+
+if $interactive; then
+ cat <<EOF
+|______________________________________________________________________
+
+EOF
+fi
+
+exit $status
-/* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+/* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
break;
case OPT_ENABLE_DUMMY:
- dummy_enable(optarg && !strcmp(optarg, "override"));
+ dummy_enable(optarg);
break;
case OPT_DISABLE_SYSTEM: