The addition of Geneve options to packet metadata significantly
expanded its size. It was reported that this can decrease performance
for DPDK ports by up to 25% since we need to initialize the whole
structure on each packet receive.
It is not really necessary to zero out the entire structure because
miniflow_extract() only copies the tunnel metadata when particular
fields indicate that it is valid. Therefore, as long as we zero out
these fields when the metadata is initialized and ensure that the
rest of the structure is correctly set in the presence of a tunnel,
we can avoid touching the tunnel fields on packet reception.
Reported-by: Ciara Loftus <ciara.loftus@intel.com>
Tested-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>
b->source = source;
b->l2_pad_size = 0;
b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
b->source = source;
b->l2_pad_size = 0;
b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX;
- b->md = PKT_METADATA_INITIALIZER(0);
+ pkt_metadata_init(&b->md, 0);
/* A port in a netdev-based datapath. */
struct dp_netdev_port {
/* A port in a netdev-based datapath. */
struct dp_netdev_port {
- struct pkt_metadata md;
struct netdev *netdev;
struct cmap_node node; /* Node in dp_netdev's 'ports'. */
struct netdev_saved_flags *sf;
struct netdev *netdev;
struct cmap_node node; /* Node in dp_netdev's 'ports'. */
struct netdev_saved_flags *sf;
}
}
port = xzalloc(sizeof *port);
}
}
port = xzalloc(sizeof *port);
- port->md = PKT_METADATA_INITIALIZER(port_no);
+ port->port_no = port_no;
port->netdev = netdev;
port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
port->type = xstrdup(type);
port->netdev = netdev;
port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev));
port->type = xstrdup(type);
struct dp_netdev_port *port;
CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
struct dp_netdev_port *port;
CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) {
- if (port->md.in_port.odp_port == port_no) {
+ if (port->port_no == port_no) {
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
OVS_REQUIRES(dp->port_mutex)
{
do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port)
OVS_REQUIRES(dp->port_mutex)
{
- cmap_remove(&dp->ports, &port->node,
- hash_odp_port(port->md.in_port.odp_port));
+ cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no));
seq_change(dp->port_seq);
if (netdev_is_pmd(port->netdev)) {
int numa_id = netdev_get_numa_id(port->netdev);
seq_change(dp->port_seq);
if (netdev_is_pmd(port->netdev)) {
int numa_id = netdev_get_numa_id(port->netdev);
{
dpif_port->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->type = xstrdup(port->type);
{
dpif_port->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->type = xstrdup(port->type);
- dpif_port->port_no = port->md.in_port.odp_port;
+ dpif_port->port_no = port->port_no;
state->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->name = state->name;
dpif_port->type = port->type;
state->name = xstrdup(netdev_get_name(port->netdev));
dpif_port->name = state->name;
dpif_port->type = port->type;
- dpif_port->port_no = port->md.in_port.odp_port;
+ dpif_port->port_no = port->port_no;
/* XXX: initialize md in netdev implementation. */
for (i = 0; i < cnt; i++) {
/* XXX: initialize md in netdev implementation. */
for (i = 0; i < cnt; i++) {
- packets[i]->md = port->md;
+ pkt_metadata_init(&packets[i]->md, port->port_no);
}
cycles_count_start(pmd);
dp_netdev_input(pmd, packets, cnt);
}
cycles_count_start(pmd);
dp_netdev_input(pmd, packets, cnt);
- cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port));
+ cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no));
ovsrcu_postpone(free, old_port);
/* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
new_port = xmemdup(old_port, sizeof *old_port);
ovsrcu_postpone(free, old_port);
/* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */
new_port = xmemdup(old_port, sizeof *old_port);
- new_port->md.in_port.odp_port = port_no;
+ new_port->port_no = port_no;
cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
seq_change(dp->port_seq);
cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no));
seq_change(dp->port_seq);
ovs_mutex_lock(&dp->port_mutex);
if (get_port_by_name(dp, argv[2], &port)) {
unixctl_command_reply_error(conn, "unknown port");
ovs_mutex_lock(&dp->port_mutex);
if (get_port_by_name(dp, argv[2], &port)) {
unixctl_command_reply_error(conn, "unknown port");
- } else if (port->md.in_port.odp_port == ODPP_LOCAL) {
+ } else if (port->port_no == ODPP_LOCAL) {
unixctl_command_reply_error(conn, "can't delete local port");
} else {
do_del_port(dp, port);
unixctl_command_reply_error(conn, "can't delete local port");
} else {
do_del_port(dp, port);
+static void
+pkt_metadata_init_tnl(struct pkt_metadata *md)
+{
+ memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata));
+
+ /* If 'opt_map' is zero then none of the rest of the tunnel metadata
+ * will be read, so we can skip clearing it. */
+ md->tunnel.metadata.opt_map = 0;
+}
+
static int
netdev_gre_pop_header(struct dp_packet *packet)
{
static int
netdev_gre_pop_header(struct dp_packet *packet)
{
int hlen = sizeof(struct eth_header) +
sizeof(struct ip_header) + 4;
int hlen = sizeof(struct eth_header) +
sizeof(struct ip_header) + 4;
- memset(md, 0, sizeof *md);
+ pkt_metadata_init_tnl(md);
if (hlen > dp_packet_size(packet)) {
return EINVAL;
}
if (hlen > dp_packet_size(packet)) {
return EINVAL;
}
struct flow_tnl *tnl = &md->tunnel;
struct vxlanhdr *vxh;
struct flow_tnl *tnl = &md->tunnel;
struct vxlanhdr *vxh;
- memset(md, 0, sizeof *md);
+ pkt_metadata_init_tnl(md);
if (VXLAN_HLEN > dp_packet_size(packet)) {
return EINVAL;
}
if (VXLAN_HLEN > dp_packet_size(packet)) {
return EINVAL;
}
unsigned int hlen;
int err;
unsigned int hlen;
int err;
- memset(md, 0, sizeof *md);
+ pkt_metadata_init_tnl(md);
if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
(unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
if (GENEVE_BASE_HLEN > dp_packet_size(packet)) {
VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n",
(unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet));
for (i = 0; i < cnt; i++) {
netdev->netdev_class->push_header(buffers[i], data);
for (i = 0; i < cnt; i++) {
netdev->netdev_class->push_header(buffers[i], data);
- buffers[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port));
+ pkt_metadata_init(&buffers[i]->md, u32_to_odp(data->out_port));
enum odp_key_fitness
odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
{
enum odp_key_fitness
odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)
{
+ memset(tun, 0, sizeof *tun);
return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun);
}
return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun);
}
1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL |
1u << OVS_KEY_ATTR_IN_PORT;
1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL |
1u << OVS_KEY_ATTR_IN_PORT;
- *md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+ pkt_metadata_init(md, ODPP_NONE);
NL_ATTR_FOR_EACH (nla, left, key, key_len) {
uint16_t type = nl_attr_type(nla);
NL_ATTR_FOR_EACH (nla, left, key, key_len) {
uint16_t type = nl_attr_type(nla);
/* Tunnel information used in flow key and metadata. */
struct flow_tnl {
ovs_be64 tun_id;
/* Tunnel information used in flow key and metadata. */
struct flow_tnl {
ovs_be64 tun_id;
uint16_t flags;
uint8_t ip_tos;
uint8_t ip_ttl;
uint16_t flags;
uint8_t ip_tos;
uint8_t ip_ttl;
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
};
struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
};
-#define PKT_METADATA_INITIALIZER(PORT) \
- (struct pkt_metadata){ .in_port.odp_port = PORT }
+static inline void
+pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
+{
+ /* It can be expensive to zero out all of the tunnel metadata. However,
+ * we can just zero out ip_dst and the rest of the data will never be
+ * looked at. */
+ memset(md, 0, offsetof(struct pkt_metadata, tunnel));
+ md->tunnel.ip_dst = 0;
+
+ md->in_port.odp_port = port;
+}
bool dpid_from_string(const char *s, uint64_t *dpidp);
bool dpid_from_string(const char *s, uint64_t *dpidp);
* (not necessarily even contiguous), and finding it requires referring to
* 'tab'. */
struct tun_metadata {
* (not necessarily even contiguous), and finding it requires referring to
* 'tab'. */
struct tun_metadata {
- uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
uint64_t opt_map; /* 1-bit for each present TLV. */
uint64_t opt_map; /* 1-bit for each present TLV. */
+ uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */
struct tun_table *tab; /* Types & lengths for 'opts' and 'opt_map'. */
uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */
};
struct tun_table *tab; /* Types & lengths for 'opts' and 'opt_map'. */
uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */
};
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
if (upcall->out_tun_key) {
memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
if (upcall->out_tun_key) {
- memset(&output_tunnel_key, 0, sizeof output_tunnel_key);
odp_tun_key_from_attr(upcall->out_tun_key,
&output_tunnel_key);
}
odp_tun_key_from_attr(upcall->out_tun_key,
&output_tunnel_key);
}
- packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+ pkt_metadata_init(&packet->md, ODPP_NONE);
flow_extract(packet, &flow);
if (flow.dl_type == htons(ETH_TYPE_IP)
&& flow.nw_proto == IPPROTO_TCP
flow_extract(packet, &flow);
if (flow.dl_type == htons(ETH_TYPE_IP)
&& flow.nw_proto == IPPROTO_TCP
ovs_fatal(error, "%s: read failed", ctx->argv[1]);
}
ovs_fatal(error, "%s: read failed", ctx->argv[1]);
}
- packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE);
+ pkt_metadata_init(&packet->md, ODPP_NONE);
flow_extract(packet, &flow);
flow_print(stdout, &flow);
putchar('\n');
flow_extract(packet, &flow);
flow_print(stdout, &flow);
putchar('\n');