/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
+ * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
#include "netdev.h"
#include "netdev-linux.h"
#include "netdev-vport.h"
+#include "netlink-conntrack.h"
#include "netlink-notifier.h"
#include "netlink-socket.h"
#include "netlink.h"
#include "timeval.h"
#include "unaligned.h"
#include "util.h"
-#include "vlog.h"
+#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(dpif_netlink);
#ifdef _WIN32
struct dpif_flow_stats *);
static void dpif_netlink_flow_to_dpif_flow(struct dpif *, struct dpif_flow *,
const struct dpif_netlink_flow *);
-static bool dpif_netlink_check_ufid(struct dpif *dpif);
/* One of the dpif channels between the kernel and userspace. */
struct dpif_channel {
/* Change notification. */
struct nl_sock *port_notifier; /* vport multicast group subscriber. */
bool refresh_channels;
-
- /* If the datapath supports indexing flows using unique identifiers, then
- * we can reduce the size of netlink messages by omitting fields like the
- * flow key during flow operations. */
- bool ufid_supported;
};
static void report_loss(struct dpif_netlink *, struct dpif_channel *,
dp->dp_ifindex, dp->dp_ifindex);
dpif->dp_ifindex = dp->dp_ifindex;
- dpif->ufid_supported = dpif_netlink_check_ufid(&dpif->dpif);
*dpifp = &dpif->dpif;
return 0;
vport_request.cmd = OVS_VPORT_CMD_SET;
vport_request.dp_ifindex = dpif->dp_ifindex;
vport_request.port_no = u32_to_odp(i);
+ vport_request.n_upcall_pids = 1;
vport_request.upcall_pids = &upcall_pids;
dpif_netlink_vport_transact(&vport_request, NULL, NULL);
case OVS_VPORT_TYPE_GRE:
return "gre";
- case OVS_VPORT_TYPE_GRE64:
- return "gre64";
-
case OVS_VPORT_TYPE_VXLAN:
return "vxlan";
case OVS_VPORT_TYPE_LISP:
return "lisp";
+ case OVS_VPORT_TYPE_STT:
+ return "stt";
+
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
break;
return OVS_VPORT_TYPE_NETDEV;
} else if (!strcmp(type, "internal")) {
return OVS_VPORT_TYPE_INTERNAL;
+ } else if (strstr(type, "stt")) {
+ return OVS_VPORT_TYPE_STT;
} else if (!strcmp(type, "geneve")) {
return OVS_VPORT_TYPE_GENEVE;
- } else if (strstr(type, "gre64")) {
- return OVS_VPORT_TYPE_GRE64;
} else if (strstr(type, "gre")) {
return OVS_VPORT_TYPE_GRE;
} else if (!strcmp(type, "vxlan")) {
}
tnl_cfg = netdev_get_tunnel_config(netdev);
- if (tnl_cfg && tnl_cfg->dst_port != 0) {
+ if (tnl_cfg && (tnl_cfg->dst_port != 0 || tnl_cfg->exts)) {
ofpbuf_use_stack(&options, options_stub, sizeof options_stub);
- nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
- ntohs(tnl_cfg->dst_port));
- request.options = ofpbuf_data(&options);
- request.options_len = ofpbuf_size(&options);
+ if (tnl_cfg->dst_port) {
+ nl_msg_put_u16(&options, OVS_TUNNEL_ATTR_DST_PORT,
+ ntohs(tnl_cfg->dst_port));
+ }
+ if (tnl_cfg->exts) {
+ size_t ext_ofs;
+ int i;
+
+ ext_ofs = nl_msg_start_nested(&options, OVS_TUNNEL_ATTR_EXTENSION);
+ for (i = 0; i < 32; i++) {
+ if (tnl_cfg->exts & (1 << i)) {
+ nl_msg_put_flag(&options, i);
+ }
+ }
+ nl_msg_end_nested(&options, ext_ofs);
+ }
+ request.options = options.data;
+ request.options_len = options.size;
}
request.port_no = *port_nop;
const struct dpif_flow_del *del,
struct dpif_netlink_flow *request)
{
- return dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
- del->ufid, dpif->ufid_supported,
- request);
-}
-
-static int
-dpif_netlink_flow_del(struct dpif_netlink *dpif,
- const struct nlattr *key, size_t key_len,
- const ovs_u128 *ufid, bool terse)
-{
- struct dpif_netlink_flow request;
-
- dpif_netlink_init_flow_del__(dpif, key, key_len, ufid, terse, &request);
-
- /* Ignore stats */
- return dpif_netlink_flow_transact(&request, NULL, NULL);
+ dpif_netlink_init_flow_del__(dpif, del->key, del->key_len,
+ del->ufid, del->terse, request);
}
struct dpif_netlink_flow_dump {
}
static struct dpif_flow_dump *
-dpif_netlink_flow_dump_create(const struct dpif *dpif_)
+dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
{
const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
struct dpif_netlink_flow_dump *dump;
dpif_netlink_flow_init(&request);
request.cmd = OVS_FLOW_CMD_GET;
request.dp_ifindex = dpif->dp_ifindex;
+ request.ufid_present = false;
+ request.ufid_terse = terse;
buf = ofpbuf_new(1024);
dpif_netlink_flow_to_ofpbuf(&request, buf);
nl_dump_start(&dump->nl_dump, NETLINK_GENERIC, buf);
ofpbuf_delete(buf);
atomic_init(&dump->status, 0);
+ dump->up.terse = terse;
return &dump->up;
}
dpif_flow->actions = datapath_flow->actions;
dpif_flow->actions_len = datapath_flow->actions_len;
dpif_flow->ufid_present = datapath_flow->ufid_present;
+ dpif_flow->pmd_id = PMD_ID_NULL;
if (datapath_flow->ufid_present) {
dpif_flow->ufid = datapath_flow->ufid;
} else {
n_flows = 0;
while (!n_flows
- || (n_flows < max_flows && ofpbuf_size(&thread->nl_flows))) {
+ || (n_flows < max_flows && thread->nl_flows.size)) {
struct dpif_netlink_flow datapath_flow;
struct ofpbuf nl_flow;
int error;
break;
}
- if (datapath_flow.actions) {
- /* Common case: the flow includes actions. */
+ if (dump->up.terse || datapath_flow.actions) {
+ /* Common case: we don't want actions, or the flow includes
+ * actions. */
dpif_netlink_flow_to_dpif_flow(&dpif->dpif, &flows[n_flows++],
&datapath_flow);
} else {
size_t key_ofs;
ofpbuf_prealloc_tailroom(buf, (64
- + ofpbuf_size(d_exec->packet)
+ + dp_packet_size(d_exec->packet)
+ ODP_KEY_METADATA_SIZE
+ d_exec->actions_len));
k_exec->dp_ifindex = dp_ifindex;
nl_msg_put_unspec(buf, OVS_PACKET_ATTR_PACKET,
- ofpbuf_data(d_exec->packet),
- ofpbuf_size(d_exec->packet));
+ dp_packet_data(d_exec->packet),
+ dp_packet_size(d_exec->packet));
key_ofs = nl_msg_start_nested(buf, OVS_PACKET_ATTR_KEY);
- odp_key_from_pkt_metadata(buf, &d_exec->md);
+ odp_key_from_pkt_metadata(buf, &d_exec->packet->md);
nl_msg_end_nested(buf, key_ofs);
nl_msg_put_unspec(buf, OVS_PACKET_ATTR_ACTIONS,
d_exec->actions, d_exec->actions_len);
if (d_exec->probe) {
- nl_msg_put_flag(buf, OVS_FLOW_ATTR_PROBE);
+ nl_msg_put_flag(buf, OVS_PACKET_ATTR_PROBE);
+ }
+ if (d_exec->mtu) {
+ nl_msg_put_u16(buf, OVS_PACKET_ATTR_MRU, d_exec->mtu);
}
}
-#define MAX_OPS 50
-
-static void
+/* Executes, against 'dpif', up to the first 'n_ops' operations in 'ops'.
+ * Returns the number actually executed (at least 1, if 'n_ops' is
+ * positive). */
+static size_t
dpif_netlink_operate__(struct dpif_netlink *dpif,
struct dpif_op **ops, size_t n_ops)
{
+ enum { MAX_OPS = 50 };
+
struct op_auxdata {
struct nl_transaction txn;
struct nl_transaction *txnsp[MAX_OPS];
size_t i;
- ovs_assert(n_ops <= MAX_OPS);
+ n_ops = MIN(n_ops, MAX_OPS);
for (i = 0; i < n_ops; i++) {
struct op_auxdata *aux = &auxes[i];
struct dpif_op *op = ops[i];
struct dpif_flow_put *put;
struct dpif_flow_del *del;
- struct dpif_execute *execute;
struct dpif_flow_get *get;
struct dpif_netlink_flow flow;
break;
case DPIF_OP_EXECUTE:
- execute = &op->u.execute;
- dpif_netlink_encode_execute(dpif->dp_ifindex, execute,
- &aux->request);
+ /* Can't execute a packet that won't fit in a Netlink attribute. */
+ if (OVS_UNLIKELY(nl_attr_oversized(
+ dp_packet_size(op->u.execute.packet)))) {
+ /* Report an error immediately if this is the first operation.
+ * Otherwise the easiest thing to do is to postpone to the next
+ * call (when this will be the first operation). */
+ if (i == 0) {
+ VLOG_ERR_RL(&error_rl,
+ "dropping oversized %"PRIu32"-byte packet",
+ dp_packet_size(op->u.execute.packet));
+ op->error = ENOBUFS;
+ return 1;
+ }
+ n_ops = i;
+ } else {
+ dpif_netlink_encode_execute(dpif->dp_ifindex, &op->u.execute,
+ &aux->request);
+ }
break;
case DPIF_OP_FLOW_GET:
ofpbuf_uninit(&aux->request);
ofpbuf_uninit(&aux->reply);
}
+
+ return n_ops;
}
static void
struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
while (n_ops > 0) {
- size_t chunk = MIN(n_ops, MAX_OPS);
- dpif_netlink_operate__(dpif, ops, chunk);
+ size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
ops += chunk;
n_ops -= chunk;
}
}
#endif
-/* Checks support for unique flow identifiers. */
-static bool
-dpif_netlink_check_ufid(struct dpif *dpif_)
-{
- struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
- struct flow flow;
- struct odputil_keybuf keybuf;
- struct ofpbuf key, *replybuf;
- struct dpif_netlink_flow reply;
- ovs_u128 ufid;
- int error;
- bool enable_ufid = false;
-
- memset(&flow, 0, sizeof flow);
- flow.dl_type = htons(0x1234);
-
- ofpbuf_use_stack(&key, &keybuf, sizeof keybuf);
- odp_flow_key_from_flow(&key, &flow, NULL, 0, true);
- dpif_flow_hash(dpif_, ofpbuf_data(&key), ofpbuf_size(&key), &ufid);
- error = dpif_flow_put(dpif_, DPIF_FP_CREATE | DPIF_FP_PROBE,
- ofpbuf_data(&key), ofpbuf_size(&key), NULL, 0, NULL,
- 0, &ufid, NULL);
-
- if (error && error != EEXIST) {
- VLOG_WARN("%s: UFID feature probe failed (%s).",
- dpif_name(dpif_), ovs_strerror(error));
- goto done;
- }
-
- error = dpif_netlink_flow_get__(dpif, NULL, 0, &ufid, true, &reply,
- &replybuf);
- if (!error && reply.ufid_present && ovs_u128_equal(&ufid, &reply.ufid)) {
- enable_ufid = true;
- }
- ofpbuf_delete(replybuf);
-
- error = dpif_netlink_flow_del(dpif, ofpbuf_data(&key), ofpbuf_size(&key),
- &ufid, false);
- if (error) {
- VLOG_WARN("%s: failed to delete UFID feature probe flow",
- dpif_name(dpif_));
- }
-
-done:
- if (enable_ufid) {
- VLOG_INFO("%s: Datapath supports userspace flow ids",
- dpif_name(dpif_));
- } else {
- VLOG_INFO("%s: Datapath does not support userspace flow ids",
- dpif_name(dpif_));
- }
-
- return enable_ufid;
-}
-
/* Synchronizes 'channels' in 'dpif->handlers' with the set of vports
* currently in 'dpif' in the kernel, by adding a new set of channels for
* any kernel vport that lacks one and deleting any channels that have no
/* OVS_PACKET_CMD_ACTION only. */
[OVS_PACKET_ATTR_USERDATA] = { .type = NL_A_UNSPEC, .optional = true },
[OVS_PACKET_ATTR_EGRESS_TUN_KEY] = { .type = NL_A_NESTED, .optional = true },
+ [OVS_PACKET_ATTR_ACTIONS] = { .type = NL_A_NESTED, .optional = true },
+ [OVS_PACKET_ATTR_MRU] = { .type = NL_A_U16, .optional = true }
};
struct ovs_header *ovs_header;
struct ofpbuf b;
int type;
- ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf));
+ ofpbuf_use_const(&b, buf->data, buf->size);
nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
genl = ofpbuf_try_pull(&b, sizeof *genl);
dpif_flow_hash(&dpif->dpif, upcall->key, upcall->key_len, &upcall->ufid);
upcall->userdata = a[OVS_PACKET_ATTR_USERDATA];
upcall->out_tun_key = a[OVS_PACKET_ATTR_EGRESS_TUN_KEY];
+ upcall->actions = a[OVS_PACKET_ATTR_ACTIONS];
+ upcall->mru = a[OVS_PACKET_ATTR_MRU];
/* Allow overwriting the netlink attribute header without reallocating. */
- ofpbuf_use_stub(&upcall->packet,
+ dp_packet_use_stub(&upcall->packet,
CONST_CAST(struct nlattr *,
nl_attr_get(a[OVS_PACKET_ATTR_PACKET])) - 1,
nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]) +
sizeof(struct nlattr));
- ofpbuf_set_data(&upcall->packet,
- (char *)ofpbuf_data(&upcall->packet) + sizeof(struct nlattr));
- ofpbuf_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
+ dp_packet_set_data(&upcall->packet,
+ (char *)dp_packet_data(&upcall->packet) + sizeof(struct nlattr));
+ dp_packet_set_size(&upcall->packet, nl_attr_get_size(a[OVS_PACKET_ATTR_PACKET]));
*dp_ifindex = ovs_header->dp_ifindex;
return version_str;
}
+#ifdef __linux__
+struct dpif_netlink_ct_dump_state {
+ struct ct_dpif_dump_state up;
+ struct nl_ct_dump_state *nl_ct_dump;
+};
+
+static int
+dpif_netlink_ct_dump_start(struct dpif *dpif OVS_UNUSED,
+ struct ct_dpif_dump_state **dump_,
+ const uint16_t *zone)
+{
+ struct dpif_netlink_ct_dump_state *dump;
+ int err;
+
+ dump = xzalloc(sizeof *dump);
+ err = nl_ct_dump_start(&dump->nl_ct_dump, zone);
+ if (err) {
+ free(dump);
+ return err;
+ }
+
+ *dump_ = &dump->up;
+
+ return 0;
+}
+
+static int
+dpif_netlink_ct_dump_next(struct dpif *dpif OVS_UNUSED,
+ struct ct_dpif_dump_state *dump_,
+ struct ct_dpif_entry *entry)
+{
+ struct dpif_netlink_ct_dump_state *dump;
+
+ INIT_CONTAINER(dump, dump_, up);
+
+ return nl_ct_dump_next(dump->nl_ct_dump, entry);
+}
+
+static int
+dpif_netlink_ct_dump_done(struct dpif *dpif OVS_UNUSED,
+ struct ct_dpif_dump_state *dump_)
+{
+ struct dpif_netlink_ct_dump_state *dump;
+ int err;
+
+ INIT_CONTAINER(dump, dump_, up);
+
+ err = nl_ct_dump_done(dump->nl_ct_dump);
+ free(dump);
+ return err;
+}
+
+static int
+dpif_netlink_ct_flush(struct dpif *dpif OVS_UNUSED, const uint16_t *zone)
+{
+ if (zone) {
+ return nl_ct_flush_zone(*zone);
+ } else {
+ return nl_ct_flush();
+ }
+}
+#endif
+
const struct dpif_class dpif_netlink_class = {
"system",
+ NULL, /* init */
dpif_netlink_enumerate,
NULL,
dpif_netlink_open,
dpif_netlink_recv,
dpif_netlink_recv_wait,
dpif_netlink_recv_purge,
+ NULL, /* register_dp_purge_cb */
NULL, /* register_upcall_cb */
NULL, /* enable_upcall */
NULL, /* disable_upcall */
dpif_netlink_get_datapath_version, /* get_datapath_version */
+#ifdef __linux__
+ dpif_netlink_ct_dump_start,
+ dpif_netlink_ct_dump_next,
+ dpif_netlink_ct_dump_done,
+ dpif_netlink_ct_flush,
+#else
+ NULL, /* ct_dump_start */
+ NULL, /* ct_dump_next */
+ NULL, /* ct_dump_done */
+ NULL, /* ct_flush */
+#endif
};
static int
dpif_netlink_vport_init(vport);
- ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf));
+ ofpbuf_use_const(&b, buf->data, buf->size);
nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
genl = ofpbuf_try_pull(&b, sizeof *genl);
ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
dpif_netlink_dp_init(dp);
- ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf));
+ ofpbuf_use_const(&b, buf->data, buf->size);
nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
genl = ofpbuf_try_pull(&b, sizeof *genl);
ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
dpif_netlink_flow_init(flow);
- ofpbuf_use_const(&b, ofpbuf_data(buf), ofpbuf_size(buf));
+ ofpbuf_use_const(&b, buf->data, buf->size);
nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
genl = ofpbuf_try_pull(&b, sizeof *genl);
ovs_header = ofpbuf_try_pull(&b, sizeof *ovs_header);
OVS_UFID_F_OMIT_KEY | OVS_UFID_F_OMIT_MASK
| OVS_UFID_F_OMIT_ACTIONS);
}
- if (flow->key_len) {
- nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY, flow->key, flow->key_len);
- }
-
- if (flow->mask_len) {
- nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK, flow->mask, flow->mask_len);
- }
+ if (!flow->ufid_terse || !flow->ufid_present) {
+ if (flow->key_len) {
+ nl_msg_put_unspec(buf, OVS_FLOW_ATTR_KEY,
+ flow->key, flow->key_len);
+ }
- if (flow->actions || flow->actions_len) {
- nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
- flow->actions, flow->actions_len);
+ if (flow->mask_len) {
+ nl_msg_put_unspec(buf, OVS_FLOW_ATTR_MASK,
+ flow->mask, flow->mask_len);
+ }
+ if (flow->actions || flow->actions_len) {
+ nl_msg_put_unspec(buf, OVS_FLOW_ATTR_ACTIONS,
+ flow->actions, flow->actions_len);
+ }
}
/* We never need to send these to the kernel. */