X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=ofproto%2Fofproto-dpif-sflow.c;h=f11699cd9401ce671c4569e02e9872ea6411423b;hb=6b1c573408f8771e428707278f86c5b3b076982a;hp=b387b945a1040d72299caaab48b6fb6b5a3eb621;hpb=344e21d47ecfe342fc5c3c8f32020d7a43089301;p=cascardo%2Fovs.git diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index b387b945a..f11699cd9 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. * Copyright (c) 2009 InMon Corp. * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,6 +18,7 @@ #include #include "ofproto-dpif-sflow.h" #include +#include #include #include #include @@ -32,23 +33,44 @@ #include "ofproto.h" #include "packets.h" #include "poll-loop.h" +#include "ovs-router.h" #include "route-table.h" #include "sflow_api.h" #include "socket-util.h" #include "timeval.h" -#include "vlog.h" +#include "openvswitch/vlog.h" #include "lib/odp-util.h" +#include "lib/unaligned.h" #include "ofproto-provider.h" +#include "lacp.h" VLOG_DEFINE_THIS_MODULE(sflow); static struct ovs_mutex mutex; +/* This global var is used to determine which sFlow + sub-agent should send the datapath counters. */ +#define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1 +static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED; + +/* + * The enum dpif_sflow_tunnel_type is to declare the types supported + */ +enum dpif_sflow_tunnel_type { + DPIF_SFLOW_TUNNEL_UNKNOWN = 0, + DPIF_SFLOW_TUNNEL_VXLAN, + DPIF_SFLOW_TUNNEL_GRE, + DPIF_SFLOW_TUNNEL_LISP, + DPIF_SFLOW_TUNNEL_IPSEC_GRE, + DPIF_SFLOW_TUNNEL_GENEVE +}; + struct dpif_sflow_port { struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */ SFLDataSource_instance dsi; /* sFlow library's notion of port number. */ struct ofport *ofport; /* To retrive port stats. */ odp_port_t odp_port; + enum dpif_sflow_tunnel_type tunnel_type; }; struct dpif_sflow { @@ -59,7 +81,7 @@ struct dpif_sflow { size_t n_flood, n_all; struct hmap ports; /* Contains "struct dpif_sflow_port"s. */ uint32_t probability; - atomic_int ref_cnt; + struct ovs_refcount ref_cnt; }; static void dpif_sflow_del_port__(struct dpif_sflow *, @@ -114,7 +136,7 @@ static void * sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED, size_t bytes) { - return calloc(1, bytes); + return xzalloc(bytes); } /* sFlow library callback to free memory. */ @@ -159,18 +181,137 @@ dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port) return NULL; } +/* Call to get the datapath stats. Modeled after the dpctl utility. + * + * It might be more efficient for this module to be given a handle it can use + * to get these stats more efficiently, but this is only going to be called + * once every 20-30 seconds. Return number of datapaths found (normally expect + * 1). */ +static int +sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED, + struct dpif_dp_stats *dp_totals) +{ + struct sset types; + const char *type; + int count = 0; + + memset(dp_totals, 0, sizeof *dp_totals); + sset_init(&types); + dp_enumerate_types(&types); + SSET_FOR_EACH (type, &types) { + struct sset names; + const char *name; + sset_init(&names); + if (dp_enumerate_names(type, &names) == 0) { + SSET_FOR_EACH (name, &names) { + struct dpif *dpif; + if (dpif_open(name, type, &dpif) == 0) { + struct dpif_dp_stats dp_stats; + if (dpif_get_dp_stats(dpif, &dp_stats) == 0) { + count++; + dp_totals->n_hit += dp_stats.n_hit; + dp_totals->n_missed += dp_stats.n_missed; + dp_totals->n_lost += dp_stats.n_lost; + dp_totals->n_flows += dp_stats.n_flows; + dp_totals->n_mask_hit += dp_stats.n_mask_hit; + dp_totals->n_masks += dp_stats.n_masks; + } + dpif_close(dpif); + } + } + sset_destroy(&names); + } + } + sset_destroy(&types); + return count; +} + +/* If there are multiple bridges defined then we need some + minimal artibration to decide which one should send the + global counters. This function allows each sub-agent to + ask if he should do it or not. */ +static bool +sflow_global_counters_subid_test(uint32_t subid) + OVS_REQUIRES(mutex) +{ + if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) { + /* The role is up for grabs. */ + sflow_global_counters_subid = subid; + } + return (sflow_global_counters_subid == subid); +} + +static void +sflow_global_counters_subid_clear(uint32_t subid) + OVS_REQUIRES(mutex) +{ + if (sflow_global_counters_subid == subid) { + /* The sub-agent that was sending global counters + is going away, so reset to allow another + to take over. */ + sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED; + } +} + +static void +sflow_agent_get_global_counters(void *ds_, SFLPoller *poller, + SFL_COUNTERS_SAMPLE_TYPE *cs) + OVS_REQUIRES(mutex) +{ + struct dpif_sflow *ds = ds_; + SFLCounters_sample_element dp_elem, res_elem; + struct dpif_dp_stats dp_totals; + struct rusage usage; + + if (!sflow_global_counters_subid_test(poller->agent->subId)) { + /* Another sub-agent is currently responsible for this. */ + return; + } + + /* datapath stats */ + if (sflow_get_dp_stats(ds, &dp_totals)) { + dp_elem.tag = SFLCOUNTERS_OVSDP; + dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit; + dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed; + dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost; + dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit; + dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows; + dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks; + SFLADD_ELEMENT(cs, &dp_elem); + } + + /* resource usage */ + getrusage(RUSAGE_SELF, &usage); + res_elem.tag = SFLCOUNTERS_APP_RESOURCES; + res_elem.counterBlock.appResources.user_time + = timeval_to_msec(&usage.ru_utime); + res_elem.counterBlock.appResources.system_time + = timeval_to_msec(&usage.ru_stime); + res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024); + SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max); + SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open); + SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max); + SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open); + SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max); + + SFLADD_ELEMENT(cs, &res_elem); + sfl_poller_writeCountersSample(poller, cs); +} + static void sflow_agent_get_counters(void *ds_, SFLPoller *poller, SFL_COUNTERS_SAMPLE_TYPE *cs) OVS_REQUIRES(mutex) { struct dpif_sflow *ds = ds_; - SFLCounters_sample_element elem; + SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem; enum netdev_features current; struct dpif_sflow_port *dsp; SFLIf_counters *counters; struct netdev_stats stats; enum netdev_flags flags; + struct lacp_slave_stats lacp_stats; + const char *ifName; dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort)); if (!dsp) { @@ -222,6 +363,57 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller, counters->ifPromiscuousMode = 0; SFLADD_ELEMENT(cs, &elem); + + /* Include LACP counters and identifiers if this port is part of a LAG. */ + if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) { + memset(&lacp_elem, 0, sizeof lacp_elem); + lacp_elem.tag = SFLCOUNTERS_LACP; + lacp_elem.counterBlock.lacp.actorSystemID = + lacp_stats.dot3adAggPortActorSystemID; + lacp_elem.counterBlock.lacp.partnerSystemID = + lacp_stats.dot3adAggPortPartnerOperSystemID; + lacp_elem.counterBlock.lacp.attachedAggID = + lacp_stats.dot3adAggPortAttachedAggID; + lacp_elem.counterBlock.lacp.portState.v.actorAdmin = + lacp_stats.dot3adAggPortActorAdminState; + lacp_elem.counterBlock.lacp.portState.v.actorOper = + lacp_stats.dot3adAggPortActorOperState; + lacp_elem.counterBlock.lacp.portState.v.partnerAdmin = + lacp_stats.dot3adAggPortPartnerAdminState; + lacp_elem.counterBlock.lacp.portState.v.partnerOper = + lacp_stats.dot3adAggPortPartnerOperState; + lacp_elem.counterBlock.lacp.LACPDUsRx = + lacp_stats.dot3adAggPortStatsLACPDUsRx; + SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx); + SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx); + SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx); + lacp_elem.counterBlock.lacp.illegalRx = + lacp_stats.dot3adAggPortStatsIllegalRx; + lacp_elem.counterBlock.lacp.LACPDUsTx = + lacp_stats.dot3adAggPortStatsLACPDUsTx; + SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx); + SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx); + SFLADD_ELEMENT(cs, &lacp_elem); + } + + /* Include Port name. */ + if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) { + memset(&name_elem, 0, sizeof name_elem); + name_elem.tag = SFLCOUNTERS_PORTNAME; + name_elem.counterBlock.portName.portName.str = (char *)ifName; + name_elem.counterBlock.portName.portName.len = strlen(ifName); + SFLADD_ELEMENT(cs, &name_elem); + } + + /* Include OpenFlow DPID and openflow port number. */ + memset(&of_elem, 0, sizeof of_elem); + of_elem.tag = SFLCOUNTERS_OPENFLOWPORT; + of_elem.counterBlock.ofPort.datapath_id = + ofproto_get_datapath_id(dsp->ofport->ofproto); + of_elem.counterBlock.ofPort.port_no = + (OVS_FORCE uint32_t)dsp->ofport->ofp_port; + SFLADD_ELEMENT(cs, &of_elem); + sfl_poller_writeCountersSample(poller, cs); } @@ -253,13 +445,20 @@ sflow_choose_agent_address(const char *agent_device, } SSET_FOR_EACH (target, targets) { - struct sockaddr_in sin; + union { + struct sockaddr_storage ss; + struct sockaddr_in sin; + } sa; char name[IFNAMSIZ]; - if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sin) - && route_table_get_name(sin.sin_addr.s_addr, name) - && !netdev_get_in4_by_name(name, &in4)) { - goto success; + if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss) + && sa.ss.ss_family == AF_INET) { + ovs_be32 gw; + + if (ovs_router_lookup4(sa.sin.sin_addr.s_addr, name, &gw) + && !netdev_get_in4_by_name(name, &in4)) { + goto success; + } } } @@ -279,7 +478,9 @@ static void dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex) { if (ds->sflow_agent) { + sflow_global_counters_subid_clear(ds->sflow_agent->subId); sfl_agent_release(ds->sflow_agent); + free(ds->sflow_agent); ds->sflow_agent = NULL; } collectors_destroy(ds->collectors); @@ -317,7 +518,7 @@ dpif_sflow_create(void) struct dpif_sflow *ds; if (ovsthread_once_start(&once)) { - ovs_mutex_init(&mutex, PTHREAD_MUTEX_RECURSIVE); + ovs_mutex_init_recursive(&mutex); ovsthread_once_done(&once); } @@ -325,8 +526,7 @@ dpif_sflow_create(void) ds->next_tick = time_now() + 1; hmap_init(&ds->ports); ds->probability = 0; - route_table_register(); - atomic_init(&ds->ref_cnt, 1); + ovs_refcount_init(&ds->ref_cnt); return ds; } @@ -336,9 +536,7 @@ dpif_sflow_ref(const struct dpif_sflow *ds_) { struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_); if (ds) { - int orig; - atomic_add(&ds->ref_cnt, 1, &orig); - ovs_assert(orig > 0); + ovs_refcount_ref(&ds->ref_cnt); } return ds; } @@ -359,18 +557,9 @@ dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex) void dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex) { - int orig; - - if (!ds) { - return; - } - - atomic_sub(&ds->ref_cnt, 1, &orig); - ovs_assert(orig > 0); - if (orig == 1) { + if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) { struct dpif_sflow_port *dsp, *next; - route_table_unregister(); dpif_sflow_clear(ds); HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) { dpif_sflow_del_port__(ds, dsp); @@ -391,20 +580,70 @@ dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp) sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port)); } +static enum dpif_sflow_tunnel_type +dpif_sflow_tunnel_type(struct ofport *ofport) { + const char *type = netdev_get_type(ofport->netdev); + if (type) { + if (strcmp(type, "gre") == 0) { + return DPIF_SFLOW_TUNNEL_GRE; + } else if (strcmp(type, "ipsec_gre") == 0) { + return DPIF_SFLOW_TUNNEL_IPSEC_GRE; + } else if (strcmp(type, "vxlan") == 0) { + return DPIF_SFLOW_TUNNEL_VXLAN; + } else if (strcmp(type, "lisp") == 0) { + return DPIF_SFLOW_TUNNEL_LISP; + } else if (strcmp(type, "geneve") == 0) { + return DPIF_SFLOW_TUNNEL_GENEVE; + } + } + return DPIF_SFLOW_TUNNEL_UNKNOWN; +} + +static uint8_t +dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type) +{ + /* Default to 0 (IPPROTO_IP), meaning "unknown". */ + uint8_t ipproto = 0; + switch(tunnel_type) { + + case DPIF_SFLOW_TUNNEL_GRE: + ipproto = IPPROTO_GRE; + break; + + case DPIF_SFLOW_TUNNEL_IPSEC_GRE: + ipproto = IPPROTO_ESP; + break; + + case DPIF_SFLOW_TUNNEL_VXLAN: + case DPIF_SFLOW_TUNNEL_LISP: + case DPIF_SFLOW_TUNNEL_GENEVE: + ipproto = IPPROTO_UDP; + + case DPIF_SFLOW_TUNNEL_UNKNOWN: + break; + } + return ipproto; +} + void dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport, odp_port_t odp_port) OVS_EXCLUDED(mutex) { struct dpif_sflow_port *dsp; int ifindex; + enum dpif_sflow_tunnel_type tunnel_type; ovs_mutex_lock(&mutex); dpif_sflow_del_port(ds, odp_port); + tunnel_type = dpif_sflow_tunnel_type(ofport); ifindex = netdev_get_ifindex(ofport->netdev); - if (ifindex <= 0) { - /* Not an ifindex port, so do not add a cross-reference to it here */ + if (ifindex <= 0 + && tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) { + /* Not an ifindex port, and not a tunnel port either + * so do not add a cross-reference to it here. + */ goto out; } @@ -412,12 +651,18 @@ dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport, dsp = xmalloc(sizeof *dsp); dsp->ofport = ofport; dsp->odp_port = odp_port; - SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0); + dsp->tunnel_type = tunnel_type; hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port)); - /* Add poller. */ - if (ds->sflow_agent) { - dpif_sflow_add_poller(ds, dsp); + if (ifindex > 0) { + /* Add poller for ports that have ifindex. */ + SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0); + if (ds->sflow_agent) { + dpif_sflow_add_poller(ds, dsp); + } + } else { + /* Record "ifindex unknown" for the others */ + SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0); } out: @@ -428,9 +673,10 @@ static void dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp) OVS_REQUIRES(mutex) { - if (ds->sflow_agent) { - sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi); - sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi); + if (ds->sflow_agent + && SFL_DS_INDEX(dsp->dsi)) { + sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi); + sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi); } hmap_remove(&ds->ports, &dsp->hmap_node); free(dsp); @@ -463,6 +709,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds, SFLDataSource_instance dsi; uint32_t dsIndex; SFLSampler *sampler; + SFLPoller *poller; ovs_mutex_lock(&mutex); if (sset_is_empty(&options->targets) || !options->sampling_rate) { @@ -509,6 +756,7 @@ dpif_sflow_set_options(struct dpif_sflow *ds, /* Create agent. */ VLOG_INFO("creating sFlow agent %d", options->sub_id); if (ds->sflow_agent) { + sflow_global_counters_subid_clear(ds->sflow_agent->subId); sfl_agent_release(ds->sflow_agent); } ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent); @@ -542,9 +790,18 @@ dpif_sflow_set_options(struct dpif_sflow *ds, sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len); sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX); + /* Add a counter poller for the bridge so we can use it to send + global counters such as datapath cache hit/miss stats. */ + poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds, + sflow_agent_get_global_counters); + sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval); + sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX); + /* Add pollers for the currently known ifindex-ports */ HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) { - dpif_sflow_add_poller(ds, dsp); + if (SFL_DS_INDEX(dsp->dsi)) { + dpif_sflow_add_poller(ds, dsp); + } } @@ -566,18 +823,419 @@ dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds, return ret; } +static void +dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto, + const struct flow_tnl *tunnel, + SFLSampled_ipv4 *ipv4) + +{ + ipv4->protocol = tunnel_ipproto; + ipv4->tos = tunnel->ip_tos; + ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src; + ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst; + ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src; + ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst; +} + +static void +dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions, + ovs_be32 lse) +{ + if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) { + sflow_actions->mpls_err = true; + return; + } + + /* Record the new lse in host-byte-order. */ + /* BOS flag will be fixed later when we send stack to sFlow library. */ + sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse); +} + +static void +dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions) +{ + if (sflow_actions->mpls_stack_depth == 0) { + sflow_actions->mpls_err = true; + return; + } + sflow_actions->mpls_stack_depth--; +} + +static void +dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions, + const struct ovs_key_mpls *mpls_key, int n) +{ + int ii; + if (n > FLOW_MAX_MPLS_LABELS) { + sflow_actions->mpls_err = true; + return; + } + + for (ii = 0; ii < n; ii++) { + /* Reverse stack order, and use host-byte-order for each lse. */ + sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse); + } + sflow_actions->mpls_stack_depth = n; +} + +static void +sflow_read_tnl_push_action(const struct nlattr *attr, + struct dpif_sflow_actions *sflow_actions) +{ + /* Modeled on lib/odp-util.c: format_odp_tnl_push_header */ + const struct ovs_action_push_tnl *data = nl_attr_get(attr); + const struct eth_header *eth = (const struct eth_header *) data->header; + const struct ip_header *ip + = ALIGNED_CAST(const struct ip_header *, eth + 1); + + sflow_actions->out_port = u32_to_odp(data->out_port); + + /* Ethernet. */ + /* TODO: SFlow does not currently define a MAC-in-MAC + * encapsulation structure. We could use an extension + * structure to report this. + */ + + /* IPv4 */ + /* Cannot assume alignment so just use memcpy. */ + sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src); + sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst); + sflow_actions->tunnel.ip_tos = ip->ip_tos; + sflow_actions->tunnel.ip_ttl = ip->ip_ttl; + /* The tnl_push action can supply the ip_protocol too. */ + sflow_actions->tunnel_ipproto = ip->ip_proto; + + /* Layer 4 */ + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN + || data->tnl_type == OVS_VPORT_TYPE_GENEVE) { + const struct udp_header *udp = (const struct udp_header *) (ip + 1); + sflow_actions->tunnel.tp_src = udp->udp_src; + sflow_actions->tunnel.tp_dst = udp->udp_dst; + + if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) { + const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1); + uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8; + sflow_actions->tunnel.tun_id = htonll(tun_id); + } else { + const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1); + uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8; + sflow_actions->tunnel.tun_id = htonll(tun_id); + } + } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) { + const void *l4 = ip + 1; + const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4; + ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1); + if (greh->flags & htons(GRE_CSUM)) { + options++; + } + if (greh->flags & htons(GRE_KEY)) { + uint64_t tun_id = ntohl(get_16aligned_be32(options)); + sflow_actions->tunnel.tun_id = htonll(tun_id); + } + } +} + +static void +sflow_read_set_action(const struct nlattr *attr, + struct dpif_sflow_actions *sflow_actions) +{ + enum ovs_key_attr type = nl_attr_type(attr); + switch (type) { + case OVS_KEY_ATTR_ENCAP: + if (++sflow_actions->encap_depth > 1) { + /* Do not handle multi-encap for now. */ + sflow_actions->tunnel_err = true; + } else { + dpif_sflow_read_actions(NULL, + nl_attr_get(attr), nl_attr_get_size(attr), + sflow_actions); + } + break; + case OVS_KEY_ATTR_PRIORITY: + case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_DP_HASH: + case OVS_KEY_ATTR_RECIRC_ID: + break; + + case OVS_KEY_ATTR_TUNNEL: { + if (++sflow_actions->encap_depth > 1) { + /* Do not handle multi-encap for now. */ + sflow_actions->tunnel_err = true; + } else { + if (odp_tun_key_from_attr(attr, false, &sflow_actions->tunnel) + == ODP_FIT_ERROR) { + /* Tunnel parsing error. */ + sflow_actions->tunnel_err = true; + } + } + break; + } + + case OVS_KEY_ATTR_IN_PORT: + case OVS_KEY_ATTR_ETHERNET: + case OVS_KEY_ATTR_VLAN: + break; + + case OVS_KEY_ATTR_MPLS: { + const struct ovs_key_mpls *mpls_key = nl_attr_get(attr); + size_t size = nl_attr_get_size(attr); + dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key); + break; + } + + case OVS_KEY_ATTR_ETHERTYPE: + case OVS_KEY_ATTR_IPV4: + if (sflow_actions->encap_depth == 1) { + const struct ovs_key_ipv4 *key = nl_attr_get(attr); + if (key->ipv4_src) { + sflow_actions->tunnel.ip_src = key->ipv4_src; + } + if (key->ipv4_dst) { + sflow_actions->tunnel.ip_dst = key->ipv4_dst; + } + if (key->ipv4_proto) { + sflow_actions->tunnel_ipproto = key->ipv4_proto; + } + if (key->ipv4_tos) { + sflow_actions->tunnel.ip_tos = key->ipv4_tos; + } + if (key->ipv4_ttl) { + sflow_actions->tunnel.ip_tos = key->ipv4_ttl; + } + } + break; + + case OVS_KEY_ATTR_IPV6: + /* TODO: parse IPv6 encap. */ + break; + + /* These have the same structure and format. */ + case OVS_KEY_ATTR_TCP: + case OVS_KEY_ATTR_UDP: + case OVS_KEY_ATTR_SCTP: + if (sflow_actions->encap_depth == 1) { + const struct ovs_key_tcp *key = nl_attr_get(attr); + if (key->tcp_src) { + sflow_actions->tunnel.tp_src = key->tcp_src; + } + if (key->tcp_dst) { + sflow_actions->tunnel.tp_dst = key->tcp_dst; + } + } + break; + + case OVS_KEY_ATTR_TCP_FLAGS: + case OVS_KEY_ATTR_ICMP: + case OVS_KEY_ATTR_ICMPV6: + case OVS_KEY_ATTR_ARP: + case OVS_KEY_ATTR_ND: + case OVS_KEY_ATTR_CT_STATE: + case OVS_KEY_ATTR_CT_ZONE: + case OVS_KEY_ATTR_CT_MARK: + case OVS_KEY_ATTR_CT_LABELS: + case OVS_KEY_ATTR_UNSPEC: + case __OVS_KEY_ATTR_MAX: + default: + break; + } +} + +static void +dpif_sflow_capture_input_mpls(const struct flow *flow, + struct dpif_sflow_actions *sflow_actions) +{ + if (eth_type_mpls(flow->dl_type)) { + int depth = 0; + int ii; + ovs_be32 lse; + /* Calculate depth by detecting BOS. */ + for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) { + lse = flow->mpls_lse[ii]; + depth++; + if (lse & htonl(MPLS_BOS_MASK)) { + break; + } + } + /* Capture stack, reversing stack order, and + * using host-byte-order for each lse. BOS flag + * is ignored for now. It is set later when + * the output stack is encoded. + */ + for (ii = 0; ii < depth; ii++) { + lse = flow->mpls_lse[ii]; + sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse); + } + sflow_actions->mpls_stack_depth = depth; + } +} + +void +dpif_sflow_read_actions(const struct flow *flow, + const struct nlattr *actions, size_t actions_len, + struct dpif_sflow_actions *sflow_actions) +{ + const struct nlattr *a; + unsigned int left; + + if (actions_len == 0) { + /* Packet dropped.*/ + return; + } + + if (flow != NULL) { + /* Make sure the MPLS output stack + * is seeded with the input stack. + */ + dpif_sflow_capture_input_mpls(flow, sflow_actions); + + /* XXX when 802.1AD(QinQ) is supported then + * we can do the same with VLAN stacks here + */ + } + + NL_ATTR_FOR_EACH (a, left, actions, actions_len) { + enum ovs_action_attr type = nl_attr_type(a); + switch (type) { + case OVS_ACTION_ATTR_OUTPUT: + /* Capture the output port in case we need it + * to get the output tunnel type. + */ + sflow_actions->out_port = u32_to_odp(nl_attr_get_u32(a)); + break; + + case OVS_ACTION_ATTR_TUNNEL_POP: + /* XXX: Do not handle this for now. It's not clear + * if we should start with encap_depth == 1 when we + * see an input tunnel, or if we should assume + * that the input tunnel was always "popped" if it + * was presented to us decoded in flow->tunnel? + * + * If we do handle this it might look like this, + * as we clear the captured tunnel info and decrement + * the encap_depth: + * + * memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl); + * sflow_actions->tunnel_ipproto = 0; + * --sflow_actions->encap_depth; + * + * but for now just disable the tunnel annotation: + */ + sflow_actions->tunnel_err = true; + break; + + case OVS_ACTION_ATTR_TUNNEL_PUSH: + /* XXX: This actions appears to come with it's own + * OUTPUT action, so should it be regarded as having + * an implicit "pop" following it too? Put another + * way, would two tnl_push() actions in succession + * result in a packet with two layers of encap? + */ + if (++sflow_actions->encap_depth > 1) { + /* Do not handle multi-encap for now. */ + sflow_actions->tunnel_err = true; + } else { + sflow_read_tnl_push_action(a, sflow_actions); + } + break; + + case OVS_ACTION_ATTR_USERSPACE: + case OVS_ACTION_ATTR_RECIRC: + case OVS_ACTION_ATTR_HASH: + case OVS_ACTION_ATTR_CT: + break; + + case OVS_ACTION_ATTR_SET_MASKED: + /* TODO: apply mask. XXX: Are we likely to see this? */ + break; + + case OVS_ACTION_ATTR_SET: + sflow_read_set_action(nl_attr_get(a), sflow_actions); + break; + + case OVS_ACTION_ATTR_PUSH_VLAN: + case OVS_ACTION_ATTR_POP_VLAN: + /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not + * construct a VLAN-stack. The sFlow user-action cookie already + * captures the egress VLAN ID so there is nothing more to do here. + */ + break; + + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nl_attr_get(a); + if (mpls) { + dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse); + } + break; + } + case OVS_ACTION_ATTR_POP_MPLS: { + dpif_sflow_pop_mpls_lse(sflow_actions); + break; + } + case OVS_ACTION_ATTR_SAMPLE: + case OVS_ACTION_ATTR_UNSPEC: + case __OVS_ACTION_ATTR_MAX: + default: + break; + } + } +} + +static void +dpif_sflow_encode_mpls_stack(SFLLabelStack *stack, + uint32_t *mpls_lse_buf, + const struct dpif_sflow_actions *sflow_actions) +{ + /* Put the MPLS stack back into "packet header" order, + * and make sure the BOS flag is set correctly on the last + * one. Each lse is still in host-byte-order. + */ + int ii; + uint32_t lse; + stack->depth = sflow_actions->mpls_stack_depth; + stack->stack = mpls_lse_buf; + for (ii = 0; ii < stack->depth; ii++) { + lse = sflow_actions->mpls_lse[stack->depth - ii - 1]; + stack->stack[ii] = (lse & ~MPLS_BOS_MASK); + } + stack->stack[stack->depth - 1] |= MPLS_BOS_MASK; +} + +/* Extract the output port count from the user action cookie. + * See http://sflow.org/sflow_version_5.txt "Input/Output port information" + */ +static uint32_t +dpif_sflow_cookie_num_outputs(const union user_action_cookie *cookie) +{ + uint32_t format = cookie->sflow.output & 0xC0000000; + uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF; + if (format == 0) { + return port_n ? 1 : 0; + } + else if (format == 0x80000000) { + return port_n; + } + return 0; +} + void -dpif_sflow_received(struct dpif_sflow *ds, struct ofpbuf *packet, - const struct flow *flow, odp_port_t odp_in_port, - const union user_action_cookie *cookie) +dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet, + const struct flow *flow, odp_port_t odp_in_port, + const union user_action_cookie *cookie, + const struct dpif_sflow_actions *sflow_actions) OVS_EXCLUDED(mutex) { SFL_FLOW_SAMPLE_TYPE fs; SFLFlow_sample_element hdrElem; SFLSampled_header *header; SFLFlow_sample_element switchElem; + uint8_t tnlInProto, tnlOutProto; + SFLFlow_sample_element tnlInElem, tnlOutElem; + SFLFlow_sample_element vniInElem, vniOutElem; + SFLFlow_sample_element mplsElem; + uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS]; SFLSampler *sampler; struct dpif_sflow_port *in_dsp; + struct dpif_sflow_port *out_dsp; ovs_be16 vlan_tci; ovs_mutex_lock(&mutex); @@ -608,12 +1266,12 @@ dpif_sflow_received(struct dpif_sflow *ds, struct ofpbuf *packet, header->header_protocol = SFLHEADER_ETHERNET_ISO8023; /* The frame_length should include the Ethernet FCS (4 bytes), * but it has already been stripped, so we need to add 4 here. */ - header->frame_length = packet->size + 4; + header->frame_length = dp_packet_size(packet) + 4; /* Ethernet FCS stripped off. */ header->stripped = 4; - header->header_length = MIN(packet->size, + header->header_length = MIN(dp_packet_size(packet), sampler->sFlowFsMaximumHeaderSize); - header->header_bytes = packet->data; + header->header_bytes = dp_packet_data(packet); /* Add extended switch element. */ memset(&switchElem, 0, sizeof(switchElem)); @@ -628,6 +1286,67 @@ dpif_sflow_received(struct dpif_sflow *ds, struct ofpbuf *packet, fs.output = cookie->sflow.output; + /* Input tunnel. */ + if (flow->tunnel.ip_dst) { + memset(&tnlInElem, 0, sizeof(tnlInElem)); + tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS; + tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type); + dpif_sflow_tunnel_v4(tnlInProto, + &flow->tunnel, + &tnlInElem.flowType.ipv4); + SFLADD_ELEMENT(&fs, &tnlInElem); + if (flow->tunnel.tun_id) { + memset(&vniInElem, 0, sizeof(vniInElem)); + vniInElem.tag = SFLFLOW_EX_VNI_INGRESS; + vniInElem.flowType.tunnel_vni.vni + = ntohll(flow->tunnel.tun_id); + SFLADD_ELEMENT(&fs, &vniInElem); + } + } + + /* Output tunnel. */ + if (sflow_actions + && sflow_actions->encap_depth == 1 + && !sflow_actions->tunnel_err + && dpif_sflow_cookie_num_outputs(cookie) == 1) { + tnlOutProto = sflow_actions->tunnel_ipproto; + if (tnlOutProto == 0) { + /* Try to infer the ip-protocol from the output port. */ + if (sflow_actions->out_port != ODPP_NONE) { + out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port); + if (out_dsp) { + tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type); + } + } + } + memset(&tnlOutElem, 0, sizeof(tnlOutElem)); + tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS; + dpif_sflow_tunnel_v4(tnlOutProto, + &sflow_actions->tunnel, + &tnlOutElem.flowType.ipv4); + SFLADD_ELEMENT(&fs, &tnlOutElem); + if (sflow_actions->tunnel.tun_id) { + memset(&vniOutElem, 0, sizeof(vniOutElem)); + vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS; + vniOutElem.flowType.tunnel_vni.vni + = ntohll(sflow_actions->tunnel.tun_id); + SFLADD_ELEMENT(&fs, &vniOutElem); + } + } + + /* MPLS output label stack. */ + if (sflow_actions + && sflow_actions->mpls_stack_depth > 0 + && !sflow_actions->mpls_err + && dpif_sflow_cookie_num_outputs(cookie) == 1) { + memset(&mplsElem, 0, sizeof(mplsElem)); + mplsElem.tag = SFLFLOW_EX_MPLS; + dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack, + mpls_lse_buf, + sflow_actions); + SFLADD_ELEMENT(&fs, &mplsElem); + } + /* Submit the flow sample to be encoded into the next datagram. */ SFLADD_ELEMENT(&fs, &hdrElem); SFLADD_ELEMENT(&fs, &switchElem);