From 35303d715b1f0db46e6a27146815061a60385dc6 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Tue, 30 Jun 2015 19:19:40 -0700 Subject: [PATCH] tunnels: Don't initialize unnecessary packet metadata. The addition of Geneve options to packet metadata significantly expanded its size. It was reported that this can decrease performance for DPDK ports by up to 25% since we need to initialize the whole structure on each packet receive. It is not really necessary to zero out the entire structure because miniflow_extract() only copies the tunnel metadata when particular fields indicate that it is valid. Therefore, as long as we zero out these fields when the metadata is initialized and ensure that the rest of the structure is correctly set in the presence of a tunnel, we can avoid touching the tunnel fields on packet reception. Reported-by: Ciara Loftus Tested-by: Ciara Loftus Signed-off-by: Jesse Gross Acked-by: Ben Pfaff --- lib/dp-packet.c | 2 +- lib/dpif-netdev.c | 21 ++++++++++----------- lib/netdev-vport.c | 16 +++++++++++++--- lib/netdev.c | 2 +- lib/odp-util.c | 3 ++- lib/packets.h | 15 ++++++++++++--- lib/tun-metadata.h | 2 +- ofproto/ofproto-dpif-upcall.c | 1 - utilities/ovs-ofctl.c | 4 ++-- 9 files changed, 42 insertions(+), 24 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 31fe9d330..098f81656 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -29,7 +29,7 @@ dp_packet_init__(struct dp_packet *b, size_t allocated, enum dp_packet_source so b->source = source; b->l2_pad_size = 0; b->l2_5_ofs = b->l3_ofs = b->l4_ofs = UINT16_MAX; - b->md = PKT_METADATA_INITIALIZER(0); + pkt_metadata_init(&b->md, 0); } static void diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index dd6bb1f55..d1465462f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -235,7 +235,7 @@ enum pmd_cycles_counter_type { /* A port in a netdev-based datapath. */ struct dp_netdev_port { - struct pkt_metadata md; + odp_port_t port_no; struct netdev *netdev; struct cmap_node node; /* Node in dp_netdev's 'ports'. */ struct netdev_saved_flags *sf; @@ -1085,7 +1085,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, const char *type, } } port = xzalloc(sizeof *port); - port->md = PKT_METADATA_INITIALIZER(port_no); + port->port_no = port_no; port->netdev = netdev; port->rxq = xmalloc(sizeof *port->rxq * netdev_n_rxq(netdev)); port->type = xstrdup(type); @@ -1190,7 +1190,7 @@ dp_netdev_lookup_port(const struct dp_netdev *dp, odp_port_t port_no) struct dp_netdev_port *port; CMAP_FOR_EACH_WITH_HASH (port, node, hash_port_no(port_no), &dp->ports) { - if (port->md.in_port.odp_port == port_no) { + if (port->port_no == port_no) { return port; } } @@ -1300,8 +1300,7 @@ static void do_del_port(struct dp_netdev *dp, struct dp_netdev_port *port) OVS_REQUIRES(dp->port_mutex) { - cmap_remove(&dp->ports, &port->node, - hash_odp_port(port->md.in_port.odp_port)); + cmap_remove(&dp->ports, &port->node, hash_odp_port(port->port_no)); seq_change(dp->port_seq); if (netdev_is_pmd(port->netdev)) { int numa_id = netdev_get_numa_id(port->netdev); @@ -1323,7 +1322,7 @@ answer_port_query(const struct dp_netdev_port *port, { dpif_port->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->type = xstrdup(port->type); - dpif_port->port_no = port->md.in_port.odp_port; + dpif_port->port_no = port->port_no; } static int @@ -1450,7 +1449,7 @@ dpif_netdev_port_dump_next(const struct dpif *dpif, void *state_, state->name = xstrdup(netdev_get_name(port->netdev)); dpif_port->name = state->name; dpif_port->type = port->type; - dpif_port->port_no = port->md.in_port.odp_port; + dpif_port->port_no = port->port_no; retval = 0; } else { @@ -2540,7 +2539,7 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd, /* XXX: initialize md in netdev implementation. */ for (i = 0; i < cnt; i++) { - packets[i]->md = port->md; + pkt_metadata_init(&packets[i]->md, port->port_no); } cycles_count_start(pmd); dp_netdev_input(pmd, packets, cnt); @@ -3652,12 +3651,12 @@ dpif_dummy_change_port_number(struct unixctl_conn *conn, int argc OVS_UNUSED, } /* Remove old port. */ - cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->md.in_port.odp_port)); + cmap_remove(&dp->ports, &old_port->node, hash_port_no(old_port->port_no)); ovsrcu_postpone(free, old_port); /* Insert new port (cmap semantics mean we cannot re-insert 'old_port'). */ new_port = xmemdup(old_port, sizeof *old_port); - new_port->md.in_port.odp_port = port_no; + new_port->port_no = port_no; cmap_insert(&dp->ports, &new_port->node, hash_port_no(port_no)); seq_change(dp->port_seq); @@ -3688,7 +3687,7 @@ dpif_dummy_delete_port(struct unixctl_conn *conn, int argc OVS_UNUSED, ovs_mutex_lock(&dp->port_mutex); if (get_port_by_name(dp, argv[2], &port)) { unixctl_command_reply_error(conn, "unknown port"); - } else if (port->md.in_port.odp_port == ODPP_LOCAL) { + } else if (port->port_no == ODPP_LOCAL) { unixctl_command_reply_error(conn, "can't delete local port"); } else { do_del_port(dp, port); diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 259d0ed4d..a3394dd63 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -1051,6 +1051,16 @@ parse_gre_header(struct dp_packet *packet, return hlen; } +static void +pkt_metadata_init_tnl(struct pkt_metadata *md) +{ + memset(md, 0, offsetof(struct pkt_metadata, tunnel.metadata)); + + /* If 'opt_map' is zero then none of the rest of the tunnel metadata + * will be read, so we can skip clearing it. */ + md->tunnel.metadata.opt_map = 0; +} + static int netdev_gre_pop_header(struct dp_packet *packet) { @@ -1059,7 +1069,7 @@ netdev_gre_pop_header(struct dp_packet *packet) int hlen = sizeof(struct eth_header) + sizeof(struct ip_header) + 4; - memset(md, 0, sizeof *md); + pkt_metadata_init_tnl(md); if (hlen > dp_packet_size(packet)) { return EINVAL; } @@ -1143,7 +1153,7 @@ netdev_vxlan_pop_header(struct dp_packet *packet) struct flow_tnl *tnl = &md->tunnel; struct vxlanhdr *vxh; - memset(md, 0, sizeof *md); + pkt_metadata_init_tnl(md); if (VXLAN_HLEN > dp_packet_size(packet)) { return EINVAL; } @@ -1201,7 +1211,7 @@ netdev_geneve_pop_header(struct dp_packet *packet) unsigned int hlen; int err; - memset(md, 0, sizeof *md); + pkt_metadata_init_tnl(md); if (GENEVE_BASE_HLEN > dp_packet_size(packet)) { VLOG_WARN_RL(&err_rl, "geneve packet too small: min header=%u packet size=%u\n", (unsigned int)GENEVE_BASE_HLEN, dp_packet_size(packet)); diff --git a/lib/netdev.c b/lib/netdev.c index 42e5d8aed..4819ae9e3 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -790,7 +790,7 @@ netdev_push_header(const struct netdev *netdev, for (i = 0; i < cnt; i++) { netdev->netdev_class->push_header(buffers[i], data); - buffers[i]->md = PKT_METADATA_INITIALIZER(u32_to_odp(data->out_port)); + pkt_metadata_init(&buffers[i]->md, u32_to_odp(data->out_port)); } return 0; diff --git a/lib/odp-util.c b/lib/odp-util.c index c70ee0f24..bce8f6b06 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1463,6 +1463,7 @@ odp_tun_key_from_attr__(const struct nlattr *attr, enum odp_key_fitness odp_tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun) { + memset(tun, 0, sizeof *tun); return odp_tun_key_from_attr__(attr, NULL, 0, NULL, tun); } @@ -3666,7 +3667,7 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, 1u << OVS_KEY_ATTR_SKB_MARK | 1u << OVS_KEY_ATTR_TUNNEL | 1u << OVS_KEY_ATTR_IN_PORT; - *md = PKT_METADATA_INITIALIZER(ODPP_NONE); + pkt_metadata_init(md, ODPP_NONE); NL_ATTR_FOR_EACH (nla, left, key, key_len) { uint16_t type = nl_attr_type(nla); diff --git a/lib/packets.h b/lib/packets.h index c401d4e69..311589cbd 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -35,8 +35,8 @@ struct ds; /* Tunnel information used in flow key and metadata. */ struct flow_tnl { ovs_be64 tun_id; - ovs_be32 ip_src; ovs_be32 ip_dst; + ovs_be32 ip_src; uint16_t flags; uint8_t ip_tos; uint8_t ip_ttl; @@ -69,8 +69,17 @@ struct pkt_metadata { struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */ }; -#define PKT_METADATA_INITIALIZER(PORT) \ - (struct pkt_metadata){ .in_port.odp_port = PORT } +static inline void +pkt_metadata_init(struct pkt_metadata *md, odp_port_t port) +{ + /* It can be expensive to zero out all of the tunnel metadata. However, + * we can just zero out ip_dst and the rest of the data will never be + * looked at. */ + memset(md, 0, offsetof(struct pkt_metadata, tunnel)); + md->tunnel.ip_dst = 0; + + md->in_port.odp_port = port; +} bool dpid_from_string(const char *s, uint64_t *dpidp); diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h index a1fbc0ab6..56bdf2a52 100644 --- a/lib/tun-metadata.h +++ b/lib/tun-metadata.h @@ -43,8 +43,8 @@ struct geneve_opt; * (not necessarily even contiguous), and finding it requires referring to * 'tab'. */ struct tun_metadata { - uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */ uint64_t opt_map; /* 1-bit for each present TLV. */ + uint8_t opts[TUN_METADATA_TOT_OPT_SIZE]; /* Values from tunnel TLVs. */ struct tun_table *tab; /* Types & lengths for 'opts' and 'opt_map'. */ uint8_t pad[sizeof(uint64_t) - sizeof(struct tun_table *)]; /* Make 8 bytes */ }; diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 85f53e865..84a761acf 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1134,7 +1134,6 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix); if (upcall->out_tun_key) { - memset(&output_tunnel_key, 0, sizeof output_tunnel_key); odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key); } diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 5af1f13a4..6ef70702b 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -2018,7 +2018,7 @@ ofctl_ofp_parse_pcap(struct ovs_cmdl_context *ctx) if (error) { break; } - packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE); + pkt_metadata_init(&packet->md, ODPP_NONE); flow_extract(packet, &flow); if (flow.dl_type == htons(ETH_TYPE_IP) && flow.nw_proto == IPPROTO_TCP @@ -3374,7 +3374,7 @@ ofctl_parse_pcap(struct ovs_cmdl_context *ctx) ovs_fatal(error, "%s: read failed", ctx->argv[1]); } - packet->md = PKT_METADATA_INITIALIZER(ODPP_NONE); + pkt_metadata_init(&packet->md, ODPP_NONE); flow_extract(packet, &flow); flow_print(stdout, &flow); putchar('\n'); -- 2.20.1