2 * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
3 * Copyright (c) 2009 InMon Corp.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
19 #include "ofproto-dpif-sflow.h"
21 #include <sys/resource.h>
22 #include <sys/socket.h>
25 #include "collectors.h"
35 #include "poll-loop.h"
36 #include "ovs-router.h"
37 #include "route-table.h"
38 #include "sflow_api.h"
39 #include "socket-util.h"
41 #include "openvswitch/vlog.h"
42 #include "lib/odp-util.h"
43 #include "lib/unaligned.h"
44 #include "ofproto-provider.h"
47 VLOG_DEFINE_THIS_MODULE(sflow);
49 static struct ovs_mutex mutex;
51 /* This global var is used to determine which sFlow
52 sub-agent should send the datapath counters. */
53 #define SFLOW_GC_SUBID_UNCLAIMED (uint32_t)-1
54 static uint32_t sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
57 * The enum dpif_sflow_tunnel_type is to declare the types supported
59 enum dpif_sflow_tunnel_type {
60 DPIF_SFLOW_TUNNEL_UNKNOWN = 0,
61 DPIF_SFLOW_TUNNEL_VXLAN,
62 DPIF_SFLOW_TUNNEL_GRE,
63 DPIF_SFLOW_TUNNEL_LISP,
64 DPIF_SFLOW_TUNNEL_IPSEC_GRE,
65 DPIF_SFLOW_TUNNEL_GENEVE
68 struct dpif_sflow_port {
69 struct hmap_node hmap_node; /* In struct dpif_sflow's "ports" hmap. */
70 SFLDataSource_instance dsi; /* sFlow library's notion of port number. */
71 struct ofport *ofport; /* To retrive port stats. */
73 enum dpif_sflow_tunnel_type tunnel_type;
77 struct collectors *collectors;
78 SFLAgent *sflow_agent;
79 struct ofproto_sflow_options *options;
81 size_t n_flood, n_all;
82 struct hmap ports; /* Contains "struct dpif_sflow_port"s. */
84 struct ovs_refcount ref_cnt;
87 static void dpif_sflow_del_port__(struct dpif_sflow *,
88 struct dpif_sflow_port *);
90 #define RECEIVER_INDEX 1
92 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
95 nullable_string_is_equal(const char *a, const char *b)
97 return a ? b && !strcmp(a, b) : !b;
101 ofproto_sflow_options_equal(const struct ofproto_sflow_options *a,
102 const struct ofproto_sflow_options *b)
104 return (sset_equals(&a->targets, &b->targets)
105 && a->sampling_rate == b->sampling_rate
106 && a->polling_interval == b->polling_interval
107 && a->header_len == b->header_len
108 && a->sub_id == b->sub_id
109 && nullable_string_is_equal(a->agent_device, b->agent_device)
110 && nullable_string_is_equal(a->control_ip, b->control_ip));
113 static struct ofproto_sflow_options *
114 ofproto_sflow_options_clone(const struct ofproto_sflow_options *old)
116 struct ofproto_sflow_options *new = xmemdup(old, sizeof *old);
117 sset_clone(&new->targets, &old->targets);
118 new->agent_device = old->agent_device ? xstrdup(old->agent_device) : NULL;
119 new->control_ip = old->control_ip ? xstrdup(old->control_ip) : NULL;
124 ofproto_sflow_options_destroy(struct ofproto_sflow_options *options)
127 sset_destroy(&options->targets);
128 free(options->agent_device);
129 free(options->control_ip);
134 /* sFlow library callback to allocate memory. */
136 sflow_agent_alloc_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
139 return calloc(1, bytes);
142 /* sFlow library callback to free memory. */
144 sflow_agent_free_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
151 /* sFlow library callback to report error. */
153 sflow_agent_error_cb(void *magic OVS_UNUSED, SFLAgent *agent OVS_UNUSED,
156 VLOG_WARN("sFlow agent error: %s", msg);
159 /* sFlow library callback to send datagram. */
161 sflow_agent_send_packet_cb(void *ds_, SFLAgent *agent OVS_UNUSED,
162 SFLReceiver *receiver OVS_UNUSED, u_char *pkt,
165 struct dpif_sflow *ds = ds_;
166 collectors_send(ds->collectors, pkt, pktLen);
169 static struct dpif_sflow_port *
170 dpif_sflow_find_port(const struct dpif_sflow *ds, odp_port_t odp_port)
173 struct dpif_sflow_port *dsp;
175 HMAP_FOR_EACH_IN_BUCKET (dsp, hmap_node, hash_odp_port(odp_port),
177 if (dsp->odp_port == odp_port) {
184 /* Call to get the datapath stats. Modeled after the dpctl utility.
186 * It might be more efficient for this module to be given a handle it can use
187 * to get these stats more efficiently, but this is only going to be called
188 * once every 20-30 seconds. Return number of datapaths found (normally expect
191 sflow_get_dp_stats(struct dpif_sflow *ds OVS_UNUSED,
192 struct dpif_dp_stats *dp_totals)
198 memset(dp_totals, 0, sizeof *dp_totals);
200 dp_enumerate_types(&types);
201 SSET_FOR_EACH (type, &types) {
205 if (dp_enumerate_names(type, &names) == 0) {
206 SSET_FOR_EACH (name, &names) {
208 if (dpif_open(name, type, &dpif) == 0) {
209 struct dpif_dp_stats dp_stats;
210 if (dpif_get_dp_stats(dpif, &dp_stats) == 0) {
212 dp_totals->n_hit += dp_stats.n_hit;
213 dp_totals->n_missed += dp_stats.n_missed;
214 dp_totals->n_lost += dp_stats.n_lost;
215 dp_totals->n_flows += dp_stats.n_flows;
216 dp_totals->n_mask_hit += dp_stats.n_mask_hit;
217 dp_totals->n_masks += dp_stats.n_masks;
222 sset_destroy(&names);
225 sset_destroy(&types);
229 /* If there are multiple bridges defined then we need some
230 minimal artibration to decide which one should send the
231 global counters. This function allows each sub-agent to
232 ask if he should do it or not. */
234 sflow_global_counters_subid_test(uint32_t subid)
237 if (sflow_global_counters_subid == SFLOW_GC_SUBID_UNCLAIMED) {
238 /* The role is up for grabs. */
239 sflow_global_counters_subid = subid;
241 return (sflow_global_counters_subid == subid);
245 sflow_global_counters_subid_clear(uint32_t subid)
248 if (sflow_global_counters_subid == subid) {
249 /* The sub-agent that was sending global counters
250 is going away, so reset to allow another
252 sflow_global_counters_subid = SFLOW_GC_SUBID_UNCLAIMED;
257 sflow_agent_get_global_counters(void *ds_, SFLPoller *poller,
258 SFL_COUNTERS_SAMPLE_TYPE *cs)
261 struct dpif_sflow *ds = ds_;
262 SFLCounters_sample_element dp_elem, res_elem;
263 struct dpif_dp_stats dp_totals;
266 if (!sflow_global_counters_subid_test(poller->agent->subId)) {
267 /* Another sub-agent is currently responsible for this. */
272 if (sflow_get_dp_stats(ds, &dp_totals)) {
273 dp_elem.tag = SFLCOUNTERS_OVSDP;
274 dp_elem.counterBlock.ovsdp.n_hit = dp_totals.n_hit;
275 dp_elem.counterBlock.ovsdp.n_missed = dp_totals.n_missed;
276 dp_elem.counterBlock.ovsdp.n_lost = dp_totals.n_lost;
277 dp_elem.counterBlock.ovsdp.n_mask_hit = dp_totals.n_mask_hit;
278 dp_elem.counterBlock.ovsdp.n_flows = dp_totals.n_flows;
279 dp_elem.counterBlock.ovsdp.n_masks = dp_totals.n_masks;
280 SFLADD_ELEMENT(cs, &dp_elem);
284 getrusage(RUSAGE_SELF, &usage);
285 res_elem.tag = SFLCOUNTERS_APP_RESOURCES;
286 res_elem.counterBlock.appResources.user_time
287 = timeval_to_msec(&usage.ru_utime);
288 res_elem.counterBlock.appResources.system_time
289 = timeval_to_msec(&usage.ru_stime);
290 res_elem.counterBlock.appResources.mem_used = (usage.ru_maxrss * 1024);
291 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.mem_max);
292 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_open);
293 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.fd_max);
294 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_open);
295 SFL_UNDEF_GAUGE(res_elem.counterBlock.appResources.conn_max);
297 SFLADD_ELEMENT(cs, &res_elem);
298 sfl_poller_writeCountersSample(poller, cs);
302 sflow_agent_get_counters(void *ds_, SFLPoller *poller,
303 SFL_COUNTERS_SAMPLE_TYPE *cs)
306 struct dpif_sflow *ds = ds_;
307 SFLCounters_sample_element elem, lacp_elem, of_elem, name_elem;
308 enum netdev_features current;
309 struct dpif_sflow_port *dsp;
310 SFLIf_counters *counters;
311 struct netdev_stats stats;
312 enum netdev_flags flags;
313 struct lacp_slave_stats lacp_stats;
316 dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
321 elem.tag = SFLCOUNTERS_GENERIC;
322 counters = &elem.counterBlock.generic;
323 counters->ifIndex = SFL_DS_INDEX(poller->dsi);
324 counters->ifType = 6;
325 if (!netdev_get_features(dsp->ofport->netdev, ¤t, NULL, NULL, NULL)) {
326 /* The values of ifDirection come from MAU MIB (RFC 2668): 0 = unknown,
327 1 = full-duplex, 2 = half-duplex, 3 = in, 4=out */
328 counters->ifSpeed = netdev_features_to_bps(current, 0);
329 counters->ifDirection = (netdev_features_is_full_duplex(current)
332 counters->ifSpeed = 100000000;
333 counters->ifDirection = 0;
335 if (!netdev_get_flags(dsp->ofport->netdev, &flags) && flags & NETDEV_UP) {
336 counters->ifStatus = 1; /* ifAdminStatus up. */
337 if (netdev_get_carrier(dsp->ofport->netdev)) {
338 counters->ifStatus |= 2; /* ifOperStatus us. */
341 counters->ifStatus = 0; /* Down. */
345 1. Is the multicast counter filled in?
346 2. Does the multicast counter include broadcasts?
347 3. Does the rx_packets counter include multicasts/broadcasts?
349 ofproto_port_get_stats(dsp->ofport, &stats);
350 counters->ifInOctets = stats.rx_bytes;
351 counters->ifInUcastPkts = stats.rx_packets;
352 counters->ifInMulticastPkts = stats.multicast;
353 counters->ifInBroadcastPkts = -1;
354 counters->ifInDiscards = stats.rx_dropped;
355 counters->ifInErrors = stats.rx_errors;
356 counters->ifInUnknownProtos = -1;
357 counters->ifOutOctets = stats.tx_bytes;
358 counters->ifOutUcastPkts = stats.tx_packets;
359 counters->ifOutMulticastPkts = -1;
360 counters->ifOutBroadcastPkts = -1;
361 counters->ifOutDiscards = stats.tx_dropped;
362 counters->ifOutErrors = stats.tx_errors;
363 counters->ifPromiscuousMode = 0;
365 SFLADD_ELEMENT(cs, &elem);
367 /* Include LACP counters and identifiers if this port is part of a LAG. */
368 if (ofproto_port_get_lacp_stats(dsp->ofport, &lacp_stats) == 0) {
369 memset(&lacp_elem, 0, sizeof lacp_elem);
370 lacp_elem.tag = SFLCOUNTERS_LACP;
371 memcpy(&lacp_elem.counterBlock.lacp.actorSystemID,
372 lacp_stats.dot3adAggPortActorSystemID,
374 memcpy(&lacp_elem.counterBlock.lacp.partnerSystemID,
375 lacp_stats.dot3adAggPortPartnerOperSystemID,
377 lacp_elem.counterBlock.lacp.attachedAggID =
378 lacp_stats.dot3adAggPortAttachedAggID;
379 lacp_elem.counterBlock.lacp.portState.v.actorAdmin =
380 lacp_stats.dot3adAggPortActorAdminState;
381 lacp_elem.counterBlock.lacp.portState.v.actorOper =
382 lacp_stats.dot3adAggPortActorOperState;
383 lacp_elem.counterBlock.lacp.portState.v.partnerAdmin =
384 lacp_stats.dot3adAggPortPartnerAdminState;
385 lacp_elem.counterBlock.lacp.portState.v.partnerOper =
386 lacp_stats.dot3adAggPortPartnerOperState;
387 lacp_elem.counterBlock.lacp.LACPDUsRx =
388 lacp_stats.dot3adAggPortStatsLACPDUsRx;
389 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsRx);
390 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsRx);
391 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.unknownRx);
392 lacp_elem.counterBlock.lacp.illegalRx =
393 lacp_stats.dot3adAggPortStatsIllegalRx;
394 lacp_elem.counterBlock.lacp.LACPDUsTx =
395 lacp_stats.dot3adAggPortStatsLACPDUsTx;
396 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerPDUsTx);
397 SFL_UNDEF_COUNTER(lacp_elem.counterBlock.lacp.markerResponsePDUsTx);
398 SFLADD_ELEMENT(cs, &lacp_elem);
401 /* Include Port name. */
402 if ((ifName = netdev_get_name(dsp->ofport->netdev)) != NULL) {
403 memset(&name_elem, 0, sizeof name_elem);
404 name_elem.tag = SFLCOUNTERS_PORTNAME;
405 name_elem.counterBlock.portName.portName.str = (char *)ifName;
406 name_elem.counterBlock.portName.portName.len = strlen(ifName);
407 SFLADD_ELEMENT(cs, &name_elem);
410 /* Include OpenFlow DPID and openflow port number. */
411 memset(&of_elem, 0, sizeof of_elem);
412 of_elem.tag = SFLCOUNTERS_OPENFLOWPORT;
413 of_elem.counterBlock.ofPort.datapath_id =
414 ofproto_get_datapath_id(dsp->ofport->ofproto);
415 of_elem.counterBlock.ofPort.port_no =
416 (OVS_FORCE uint32_t)dsp->ofport->ofp_port;
417 SFLADD_ELEMENT(cs, &of_elem);
419 sfl_poller_writeCountersSample(poller, cs);
422 /* Obtains an address to use for the local sFlow agent and stores it into
423 * '*agent_addr'. Returns true if successful, false on failure.
425 * The sFlow agent address should be a local IP address that is persistent and
426 * reachable over the network, if possible. The IP address associated with
427 * 'agent_device' is used if it has one, and otherwise 'control_ip', the IP
428 * address used to talk to the controller. If the agent device is not
429 * specified then it is figured out by taking a look at the routing table based
432 sflow_choose_agent_address(const char *agent_device,
433 const struct sset *targets,
434 const char *control_ip,
435 SFLAddress *agent_addr)
440 memset(agent_addr, 0, sizeof *agent_addr);
441 agent_addr->type = SFLADDRESSTYPE_IP_V4;
444 if (!netdev_get_in4_by_name(agent_device, &in4)) {
449 SSET_FOR_EACH (target, targets) {
451 struct sockaddr_storage ss;
452 struct sockaddr_in sin;
456 if (inet_parse_active(target, SFL_DEFAULT_COLLECTOR_PORT, &sa.ss)
457 && sa.ss.ss_family == AF_INET) {
460 if (ovs_router_lookup(sa.sin.sin_addr.s_addr, name, &gw)
461 && !netdev_get_in4_by_name(name, &in4)) {
467 if (control_ip && !lookup_ip(control_ip, &in4)) {
471 VLOG_ERR("could not determine IP address for sFlow agent");
475 agent_addr->address.ip_v4.addr = (OVS_FORCE uint32_t) in4.s_addr;
480 dpif_sflow_clear__(struct dpif_sflow *ds) OVS_REQUIRES(mutex)
482 if (ds->sflow_agent) {
483 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
484 sfl_agent_release(ds->sflow_agent);
485 free(ds->sflow_agent);
486 ds->sflow_agent = NULL;
488 collectors_destroy(ds->collectors);
489 ds->collectors = NULL;
490 ofproto_sflow_options_destroy(ds->options);
493 /* Turn off sampling to save CPU cycles. */
498 dpif_sflow_clear(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
500 ovs_mutex_lock(&mutex);
501 dpif_sflow_clear__(ds);
502 ovs_mutex_unlock(&mutex);
506 dpif_sflow_is_enabled(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
510 ovs_mutex_lock(&mutex);
511 enabled = ds->collectors != NULL;
512 ovs_mutex_unlock(&mutex);
517 dpif_sflow_create(void)
519 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
520 struct dpif_sflow *ds;
522 if (ovsthread_once_start(&once)) {
523 ovs_mutex_init_recursive(&mutex);
524 ovsthread_once_done(&once);
527 ds = xcalloc(1, sizeof *ds);
528 ds->next_tick = time_now() + 1;
529 hmap_init(&ds->ports);
531 ovs_refcount_init(&ds->ref_cnt);
537 dpif_sflow_ref(const struct dpif_sflow *ds_)
539 struct dpif_sflow *ds = CONST_CAST(struct dpif_sflow *, ds_);
541 ovs_refcount_ref(&ds->ref_cnt);
546 /* 32-bit fraction of packets to sample with. A value of 0 samples no packets,
547 * a value of %UINT32_MAX samples all packets and intermediate values sample
548 * intermediate fractions of packets. */
550 dpif_sflow_get_probability(const struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
552 uint32_t probability;
553 ovs_mutex_lock(&mutex);
554 probability = ds->probability;
555 ovs_mutex_unlock(&mutex);
560 dpif_sflow_unref(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
562 if (ds && ovs_refcount_unref_relaxed(&ds->ref_cnt) == 1) {
563 struct dpif_sflow_port *dsp, *next;
565 dpif_sflow_clear(ds);
566 HMAP_FOR_EACH_SAFE (dsp, next, hmap_node, &ds->ports) {
567 dpif_sflow_del_port__(ds, dsp);
569 hmap_destroy(&ds->ports);
575 dpif_sflow_add_poller(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
578 SFLPoller *poller = sfl_agent_addPoller(ds->sflow_agent, &dsp->dsi, ds,
579 sflow_agent_get_counters);
580 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
581 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
582 sfl_poller_set_bridgePort(poller, odp_to_u32(dsp->odp_port));
585 static enum dpif_sflow_tunnel_type
586 dpif_sflow_tunnel_type(struct ofport *ofport) {
587 const char *type = netdev_get_type(ofport->netdev);
589 if (strcmp(type, "gre") == 0) {
590 return DPIF_SFLOW_TUNNEL_GRE;
591 } else if (strcmp(type, "ipsec_gre") == 0) {
592 return DPIF_SFLOW_TUNNEL_IPSEC_GRE;
593 } else if (strcmp(type, "vxlan") == 0) {
594 return DPIF_SFLOW_TUNNEL_VXLAN;
595 } else if (strcmp(type, "lisp") == 0) {
596 return DPIF_SFLOW_TUNNEL_LISP;
597 } else if (strcmp(type, "geneve") == 0) {
598 return DPIF_SFLOW_TUNNEL_GENEVE;
601 return DPIF_SFLOW_TUNNEL_UNKNOWN;
605 dpif_sflow_tunnel_proto(enum dpif_sflow_tunnel_type tunnel_type)
607 /* Default to 0 (IPPROTO_IP), meaning "unknown". */
609 switch(tunnel_type) {
611 case DPIF_SFLOW_TUNNEL_GRE:
612 ipproto = IPPROTO_GRE;
615 case DPIF_SFLOW_TUNNEL_IPSEC_GRE:
616 ipproto = IPPROTO_ESP;
619 case DPIF_SFLOW_TUNNEL_VXLAN:
620 case DPIF_SFLOW_TUNNEL_LISP:
621 case DPIF_SFLOW_TUNNEL_GENEVE:
622 ipproto = IPPROTO_UDP;
624 case DPIF_SFLOW_TUNNEL_UNKNOWN:
631 dpif_sflow_add_port(struct dpif_sflow *ds, struct ofport *ofport,
632 odp_port_t odp_port) OVS_EXCLUDED(mutex)
634 struct dpif_sflow_port *dsp;
636 enum dpif_sflow_tunnel_type tunnel_type;
638 ovs_mutex_lock(&mutex);
639 dpif_sflow_del_port(ds, odp_port);
641 tunnel_type = dpif_sflow_tunnel_type(ofport);
642 ifindex = netdev_get_ifindex(ofport->netdev);
645 && tunnel_type == DPIF_SFLOW_TUNNEL_UNKNOWN) {
646 /* Not an ifindex port, and not a tunnel port either
647 * so do not add a cross-reference to it here.
652 /* Add to table of ports. */
653 dsp = xmalloc(sizeof *dsp);
654 dsp->ofport = ofport;
655 dsp->odp_port = odp_port;
656 dsp->tunnel_type = tunnel_type;
657 hmap_insert(&ds->ports, &dsp->hmap_node, hash_odp_port(odp_port));
660 /* Add poller for ports that have ifindex. */
661 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, ifindex, 0);
662 if (ds->sflow_agent) {
663 dpif_sflow_add_poller(ds, dsp);
666 /* Record "ifindex unknown" for the others */
667 SFL_DS_SET(dsp->dsi, SFL_DSCLASS_IFINDEX, 0, 0);
671 ovs_mutex_unlock(&mutex);
675 dpif_sflow_del_port__(struct dpif_sflow *ds, struct dpif_sflow_port *dsp)
679 && SFL_DS_INDEX(dsp->dsi)) {
680 sfl_agent_removePoller(ds->sflow_agent, &dsp->dsi);
681 sfl_agent_removeSampler(ds->sflow_agent, &dsp->dsi);
683 hmap_remove(&ds->ports, &dsp->hmap_node);
688 dpif_sflow_del_port(struct dpif_sflow *ds, odp_port_t odp_port)
691 struct dpif_sflow_port *dsp;
693 ovs_mutex_lock(&mutex);
694 dsp = dpif_sflow_find_port(ds, odp_port);
696 dpif_sflow_del_port__(ds, dsp);
698 ovs_mutex_unlock(&mutex);
702 dpif_sflow_set_options(struct dpif_sflow *ds,
703 const struct ofproto_sflow_options *options)
706 struct dpif_sflow_port *dsp;
707 bool options_changed;
708 SFLReceiver *receiver;
711 SFLDataSource_instance dsi;
716 ovs_mutex_lock(&mutex);
717 if (sset_is_empty(&options->targets) || !options->sampling_rate) {
718 /* No point in doing any work if there are no targets or nothing to
720 dpif_sflow_clear__(ds);
724 options_changed = (!ds->options
725 || !ofproto_sflow_options_equal(options, ds->options));
727 /* Configure collectors if options have changed or if we're shortchanged in
728 * collectors (which indicates that opening one or more of the configured
729 * collectors failed, so that we should retry). */
731 || collectors_count(ds->collectors) < sset_count(&options->targets)) {
732 collectors_destroy(ds->collectors);
733 collectors_create(&options->targets, SFL_DEFAULT_COLLECTOR_PORT,
735 if (ds->collectors == NULL) {
736 VLOG_WARN_RL(&rl, "no collectors could be initialized, "
738 dpif_sflow_clear__(ds);
743 /* Choose agent IP address and agent device (if not yet setup) */
744 if (!sflow_choose_agent_address(options->agent_device,
746 options->control_ip, &agentIP)) {
747 dpif_sflow_clear__(ds);
751 /* Avoid reconfiguring if options didn't change. */
752 if (!options_changed) {
755 ofproto_sflow_options_destroy(ds->options);
756 ds->options = ofproto_sflow_options_clone(options);
759 VLOG_INFO("creating sFlow agent %d", options->sub_id);
760 if (ds->sflow_agent) {
761 sflow_global_counters_subid_clear(ds->sflow_agent->subId);
762 sfl_agent_release(ds->sflow_agent);
764 ds->sflow_agent = xcalloc(1, sizeof *ds->sflow_agent);
766 sfl_agent_init(ds->sflow_agent,
769 now, /* Boot time. */
770 now, /* Current time. */
771 ds, /* Pointer supplied to callbacks. */
772 sflow_agent_alloc_cb,
774 sflow_agent_error_cb,
775 sflow_agent_send_packet_cb);
777 receiver = sfl_agent_addReceiver(ds->sflow_agent);
778 sfl_receiver_set_sFlowRcvrOwner(receiver, "Open vSwitch sFlow");
779 sfl_receiver_set_sFlowRcvrTimeout(receiver, 0xffffffff);
781 /* Set the sampling_rate down in the datapath. */
782 ds->probability = MAX(1, UINT32_MAX / ds->options->sampling_rate);
784 /* Add a single sampler for the bridge. This appears as a PHYSICAL_ENTITY
785 because it is associated with the hypervisor, and interacts with the server
786 hardware directly. The sub_id is used to distinguish this sampler from
787 others on other bridges within the same agent. */
788 dsIndex = 1000 + options->sub_id;
789 SFL_DS_SET(dsi, SFL_DSCLASS_PHYSICAL_ENTITY, dsIndex, 0);
790 sampler = sfl_agent_addSampler(ds->sflow_agent, &dsi);
791 sfl_sampler_set_sFlowFsPacketSamplingRate(sampler, ds->options->sampling_rate);
792 sfl_sampler_set_sFlowFsMaximumHeaderSize(sampler, ds->options->header_len);
793 sfl_sampler_set_sFlowFsReceiver(sampler, RECEIVER_INDEX);
795 /* Add a counter poller for the bridge so we can use it to send
796 global counters such as datapath cache hit/miss stats. */
797 poller = sfl_agent_addPoller(ds->sflow_agent, &dsi, ds,
798 sflow_agent_get_global_counters);
799 sfl_poller_set_sFlowCpInterval(poller, ds->options->polling_interval);
800 sfl_poller_set_sFlowCpReceiver(poller, RECEIVER_INDEX);
802 /* Add pollers for the currently known ifindex-ports */
803 HMAP_FOR_EACH (dsp, hmap_node, &ds->ports) {
804 if (SFL_DS_INDEX(dsp->dsi)) {
805 dpif_sflow_add_poller(ds, dsp);
811 ovs_mutex_unlock(&mutex);
815 dpif_sflow_odp_port_to_ifindex(const struct dpif_sflow *ds,
816 odp_port_t odp_port) OVS_EXCLUDED(mutex)
818 struct dpif_sflow_port *dsp;
821 ovs_mutex_lock(&mutex);
822 dsp = dpif_sflow_find_port(ds, odp_port);
823 ret = dsp ? SFL_DS_INDEX(dsp->dsi) : 0;
824 ovs_mutex_unlock(&mutex);
829 dpif_sflow_tunnel_v4(uint8_t tunnel_ipproto,
830 const struct flow_tnl *tunnel,
831 SFLSampled_ipv4 *ipv4)
834 ipv4->protocol = tunnel_ipproto;
835 ipv4->tos = tunnel->ip_tos;
836 ipv4->src_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_src;
837 ipv4->dst_ip.addr = (OVS_FORCE uint32_t) tunnel->ip_dst;
838 ipv4->src_port = (OVS_FORCE uint16_t) tunnel->tp_src;
839 ipv4->dst_port = (OVS_FORCE uint16_t) tunnel->tp_dst;
843 dpif_sflow_push_mpls_lse(struct dpif_sflow_actions *sflow_actions,
846 if (sflow_actions->mpls_stack_depth >= FLOW_MAX_MPLS_LABELS) {
847 sflow_actions->mpls_err = true;
851 /* Record the new lse in host-byte-order. */
852 /* BOS flag will be fixed later when we send stack to sFlow library. */
853 sflow_actions->mpls_lse[sflow_actions->mpls_stack_depth++] = ntohl(lse);
857 dpif_sflow_pop_mpls_lse(struct dpif_sflow_actions *sflow_actions)
859 if (sflow_actions->mpls_stack_depth == 0) {
860 sflow_actions->mpls_err = true;
863 sflow_actions->mpls_stack_depth--;
867 dpif_sflow_set_mpls(struct dpif_sflow_actions *sflow_actions,
868 const struct ovs_key_mpls *mpls_key, int n)
871 if (n > FLOW_MAX_MPLS_LABELS) {
872 sflow_actions->mpls_err = true;
876 for (ii = 0; ii < n; ii++) {
877 /* Reverse stack order, and use host-byte-order for each lse. */
878 sflow_actions->mpls_lse[n - ii - 1] = ntohl(mpls_key[ii].mpls_lse);
880 sflow_actions->mpls_stack_depth = n;
884 sflow_read_tnl_push_action(const struct nlattr *attr,
885 struct dpif_sflow_actions *sflow_actions)
887 /* Modeled on lib/odp-util.c: format_odp_tnl_push_header */
888 const struct ovs_action_push_tnl *data = nl_attr_get(attr);
889 const struct eth_header *eth = (const struct eth_header *) data->header;
890 const struct ip_header *ip
891 = ALIGNED_CAST(const struct ip_header *, eth + 1);
893 sflow_actions->out_port = u32_to_odp(data->out_port);
896 /* TODO: SFlow does not currently define a MAC-in-MAC
897 * encapsulation structure. We could use an extension
898 * structure to report this.
902 /* Cannot assume alignment so just use memcpy. */
903 sflow_actions->tunnel.ip_src = get_16aligned_be32(&ip->ip_src);
904 sflow_actions->tunnel.ip_dst = get_16aligned_be32(&ip->ip_dst);
905 sflow_actions->tunnel.ip_tos = ip->ip_tos;
906 sflow_actions->tunnel.ip_ttl = ip->ip_ttl;
907 /* The tnl_push action can supply the ip_protocol too. */
908 sflow_actions->tunnel_ipproto = ip->ip_proto;
911 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN
912 || data->tnl_type == OVS_VPORT_TYPE_GENEVE) {
913 const struct udp_header *udp = (const struct udp_header *) (ip + 1);
914 sflow_actions->tunnel.tp_src = udp->udp_src;
915 sflow_actions->tunnel.tp_dst = udp->udp_dst;
917 if (data->tnl_type == OVS_VPORT_TYPE_VXLAN) {
918 const struct vxlanhdr *vxh = (const struct vxlanhdr *) (udp + 1);
919 uint64_t tun_id = ntohl(get_16aligned_be32(&vxh->vx_vni)) >> 8;
920 sflow_actions->tunnel.tun_id = htonll(tun_id);
922 const struct genevehdr *gnh = (const struct genevehdr *) (udp + 1);
923 uint64_t tun_id = ntohl(get_16aligned_be32(&gnh->vni)) >> 8;
924 sflow_actions->tunnel.tun_id = htonll(tun_id);
926 } else if (data->tnl_type == OVS_VPORT_TYPE_GRE) {
927 const void *l4 = ip + 1;
928 const struct gre_base_hdr *greh = (const struct gre_base_hdr *) l4;
929 ovs_16aligned_be32 *options = (ovs_16aligned_be32 *)(greh + 1);
930 if (greh->flags & htons(GRE_CSUM)) {
933 if (greh->flags & htons(GRE_KEY)) {
934 uint64_t tun_id = ntohl(get_16aligned_be32(options));
935 sflow_actions->tunnel.tun_id = htonll(tun_id);
941 sflow_read_set_action(const struct nlattr *attr,
942 struct dpif_sflow_actions *sflow_actions)
944 enum ovs_key_attr type = nl_attr_type(attr);
946 case OVS_KEY_ATTR_ENCAP:
947 if (++sflow_actions->encap_depth > 1) {
948 /* Do not handle multi-encap for now. */
949 sflow_actions->tunnel_err = true;
951 dpif_sflow_read_actions(NULL,
952 nl_attr_get(attr), nl_attr_get_size(attr),
956 case OVS_KEY_ATTR_PRIORITY:
957 case OVS_KEY_ATTR_SKB_MARK:
958 case OVS_KEY_ATTR_DP_HASH:
959 case OVS_KEY_ATTR_RECIRC_ID:
962 case OVS_KEY_ATTR_TUNNEL: {
963 if (++sflow_actions->encap_depth > 1) {
964 /* Do not handle multi-encap for now. */
965 sflow_actions->tunnel_err = true;
967 if (odp_tun_key_from_attr(attr, false, &sflow_actions->tunnel)
969 /* Tunnel parsing error. */
970 sflow_actions->tunnel_err = true;
976 case OVS_KEY_ATTR_IN_PORT:
977 case OVS_KEY_ATTR_ETHERNET:
978 case OVS_KEY_ATTR_VLAN:
981 case OVS_KEY_ATTR_MPLS: {
982 const struct ovs_key_mpls *mpls_key = nl_attr_get(attr);
983 size_t size = nl_attr_get_size(attr);
984 dpif_sflow_set_mpls(sflow_actions, mpls_key, size / sizeof *mpls_key);
988 case OVS_KEY_ATTR_ETHERTYPE:
989 case OVS_KEY_ATTR_IPV4:
990 if (sflow_actions->encap_depth == 1) {
991 const struct ovs_key_ipv4 *key = nl_attr_get(attr);
993 sflow_actions->tunnel.ip_src = key->ipv4_src;
996 sflow_actions->tunnel.ip_dst = key->ipv4_dst;
998 if (key->ipv4_proto) {
999 sflow_actions->tunnel_ipproto = key->ipv4_proto;
1001 if (key->ipv4_tos) {
1002 sflow_actions->tunnel.ip_tos = key->ipv4_tos;
1004 if (key->ipv4_ttl) {
1005 sflow_actions->tunnel.ip_tos = key->ipv4_ttl;
1010 case OVS_KEY_ATTR_IPV6:
1011 /* TODO: parse IPv6 encap. */
1014 /* These have the same structure and format. */
1015 case OVS_KEY_ATTR_TCP:
1016 case OVS_KEY_ATTR_UDP:
1017 case OVS_KEY_ATTR_SCTP:
1018 if (sflow_actions->encap_depth == 1) {
1019 const struct ovs_key_tcp *key = nl_attr_get(attr);
1021 sflow_actions->tunnel.tp_src = key->tcp_src;
1024 sflow_actions->tunnel.tp_dst = key->tcp_dst;
1029 case OVS_KEY_ATTR_TCP_FLAGS:
1030 case OVS_KEY_ATTR_ICMP:
1031 case OVS_KEY_ATTR_ICMPV6:
1032 case OVS_KEY_ATTR_ARP:
1033 case OVS_KEY_ATTR_ND:
1034 case OVS_KEY_ATTR_UNSPEC:
1035 case __OVS_KEY_ATTR_MAX:
1042 dpif_sflow_capture_input_mpls(const struct flow *flow,
1043 struct dpif_sflow_actions *sflow_actions)
1045 if (eth_type_mpls(flow->dl_type)) {
1049 /* Calculate depth by detecting BOS. */
1050 for (ii = 0; ii < FLOW_MAX_MPLS_LABELS; ii++) {
1051 lse = flow->mpls_lse[ii];
1053 if (lse & htonl(MPLS_BOS_MASK)) {
1057 /* Capture stack, reversing stack order, and
1058 * using host-byte-order for each lse. BOS flag
1059 * is ignored for now. It is set later when
1060 * the output stack is encoded.
1062 for (ii = 0; ii < depth; ii++) {
1063 lse = flow->mpls_lse[ii];
1064 sflow_actions->mpls_lse[depth - ii - 1] = ntohl(lse);
1066 sflow_actions->mpls_stack_depth = depth;
1071 dpif_sflow_read_actions(const struct flow *flow,
1072 const struct nlattr *actions, size_t actions_len,
1073 struct dpif_sflow_actions *sflow_actions)
1075 const struct nlattr *a;
1078 if (actions_len == 0) {
1079 /* Packet dropped.*/
1084 /* Make sure the MPLS output stack
1085 * is seeded with the input stack.
1087 dpif_sflow_capture_input_mpls(flow, sflow_actions);
1089 /* XXX when 802.1AD(QinQ) is supported then
1090 * we can do the same with VLAN stacks here
1094 NL_ATTR_FOR_EACH (a, left, actions, actions_len) {
1095 enum ovs_action_attr type = nl_attr_type(a);
1097 case OVS_ACTION_ATTR_OUTPUT:
1098 /* Capture the output port in case we need it
1099 * to get the output tunnel type.
1101 sflow_actions->out_port = u32_to_odp(nl_attr_get_u32(a));
1104 case OVS_ACTION_ATTR_TUNNEL_POP:
1105 /* XXX: Do not handle this for now. It's not clear
1106 * if we should start with encap_depth == 1 when we
1107 * see an input tunnel, or if we should assume
1108 * that the input tunnel was always "popped" if it
1109 * was presented to us decoded in flow->tunnel?
1111 * If we do handle this it might look like this,
1112 * as we clear the captured tunnel info and decrement
1115 * memset(&sflow_actions->tunnel, 0, sizeof struct flow_tnl);
1116 * sflow_actions->tunnel_ipproto = 0;
1117 * --sflow_actions->encap_depth;
1119 * but for now just disable the tunnel annotation:
1121 sflow_actions->tunnel_err = true;
1124 case OVS_ACTION_ATTR_TUNNEL_PUSH:
1125 /* XXX: This actions appears to come with it's own
1126 * OUTPUT action, so should it be regarded as having
1127 * an implicit "pop" following it too? Put another
1128 * way, would two tnl_push() actions in succession
1129 * result in a packet with two layers of encap?
1131 if (++sflow_actions->encap_depth > 1) {
1132 /* Do not handle multi-encap for now. */
1133 sflow_actions->tunnel_err = true;
1135 sflow_read_tnl_push_action(a, sflow_actions);
1139 case OVS_ACTION_ATTR_USERSPACE:
1140 case OVS_ACTION_ATTR_RECIRC:
1141 case OVS_ACTION_ATTR_HASH:
1144 case OVS_ACTION_ATTR_SET_MASKED:
1145 /* TODO: apply mask. XXX: Are we likely to see this? */
1148 case OVS_ACTION_ATTR_SET:
1149 sflow_read_set_action(nl_attr_get(a), sflow_actions);
1152 case OVS_ACTION_ATTR_PUSH_VLAN:
1153 case OVS_ACTION_ATTR_POP_VLAN:
1154 /* TODO: 802.1AD(QinQ) is not supported by OVS (yet), so do not
1155 * construct a VLAN-stack. The sFlow user-action cookie already
1156 * captures the egress VLAN ID so there is nothing more to do here.
1160 case OVS_ACTION_ATTR_PUSH_MPLS: {
1161 const struct ovs_action_push_mpls *mpls = nl_attr_get(a);
1163 dpif_sflow_push_mpls_lse(sflow_actions, mpls->mpls_lse);
1167 case OVS_ACTION_ATTR_POP_MPLS: {
1168 dpif_sflow_pop_mpls_lse(sflow_actions);
1171 case OVS_ACTION_ATTR_SAMPLE:
1172 case OVS_ACTION_ATTR_UNSPEC:
1173 case __OVS_ACTION_ATTR_MAX:
1181 dpif_sflow_encode_mpls_stack(SFLLabelStack *stack,
1182 uint32_t *mpls_lse_buf,
1183 const struct dpif_sflow_actions *sflow_actions)
1185 /* Put the MPLS stack back into "packet header" order,
1186 * and make sure the BOS flag is set correctly on the last
1187 * one. Each lse is still in host-byte-order.
1191 stack->depth = sflow_actions->mpls_stack_depth;
1192 stack->stack = mpls_lse_buf;
1193 for (ii = 0; ii < stack->depth; ii++) {
1194 lse = sflow_actions->mpls_lse[stack->depth - ii - 1];
1195 stack->stack[ii] = (lse & ~MPLS_BOS_MASK);
1197 stack->stack[stack->depth - 1] |= MPLS_BOS_MASK;
1200 /* Extract the output port count from the user action cookie.
1201 * See http://sflow.org/sflow_version_5.txt "Input/Output port information"
1204 dpif_sflow_cookie_num_outputs(const union user_action_cookie *cookie)
1206 uint32_t format = cookie->sflow.output & 0xC0000000;
1207 uint32_t port_n = cookie->sflow.output & 0x3FFFFFFF;
1209 return port_n ? 1 : 0;
1211 else if (format == 0x80000000) {
1218 dpif_sflow_received(struct dpif_sflow *ds, const struct dp_packet *packet,
1219 const struct flow *flow, odp_port_t odp_in_port,
1220 const union user_action_cookie *cookie,
1221 const struct dpif_sflow_actions *sflow_actions)
1224 SFL_FLOW_SAMPLE_TYPE fs;
1225 SFLFlow_sample_element hdrElem;
1226 SFLSampled_header *header;
1227 SFLFlow_sample_element switchElem;
1228 uint8_t tnlInProto, tnlOutProto;
1229 SFLFlow_sample_element tnlInElem, tnlOutElem;
1230 SFLFlow_sample_element vniInElem, vniOutElem;
1231 SFLFlow_sample_element mplsElem;
1232 uint32_t mpls_lse_buf[FLOW_MAX_MPLS_LABELS];
1233 SFLSampler *sampler;
1234 struct dpif_sflow_port *in_dsp;
1235 struct dpif_sflow_port *out_dsp;
1238 ovs_mutex_lock(&mutex);
1239 sampler = ds->sflow_agent->samplers;
1244 /* Build a flow sample. */
1245 memset(&fs, 0, sizeof fs);
1247 /* Look up the input ifIndex if this port has one. Otherwise just
1248 * leave it as 0 (meaning 'unknown') and continue. */
1249 in_dsp = dpif_sflow_find_port(ds, odp_in_port);
1251 fs.input = SFL_DS_INDEX(in_dsp->dsi);
1254 /* Make the assumption that the random number generator in the datapath converges
1255 * to the configured mean, and just increment the samplePool by the configured
1256 * sampling rate every time. */
1257 sampler->samplePool += sfl_sampler_get_sFlowFsPacketSamplingRate(sampler);
1259 /* Sampled header. */
1260 memset(&hdrElem, 0, sizeof hdrElem);
1261 hdrElem.tag = SFLFLOW_HEADER;
1262 header = &hdrElem.flowType.header;
1263 header->header_protocol = SFLHEADER_ETHERNET_ISO8023;
1264 /* The frame_length should include the Ethernet FCS (4 bytes),
1265 * but it has already been stripped, so we need to add 4 here. */
1266 header->frame_length = dp_packet_size(packet) + 4;
1267 /* Ethernet FCS stripped off. */
1268 header->stripped = 4;
1269 header->header_length = MIN(dp_packet_size(packet),
1270 sampler->sFlowFsMaximumHeaderSize);
1271 header->header_bytes = dp_packet_data(packet);
1273 /* Add extended switch element. */
1274 memset(&switchElem, 0, sizeof(switchElem));
1275 switchElem.tag = SFLFLOW_EX_SWITCH;
1276 switchElem.flowType.sw.src_vlan = vlan_tci_to_vid(flow->vlan_tci);
1277 switchElem.flowType.sw.src_priority = vlan_tci_to_pcp(flow->vlan_tci);
1279 /* Retrieve data from user_action_cookie. */
1280 vlan_tci = cookie->sflow.vlan_tci;
1281 switchElem.flowType.sw.dst_vlan = vlan_tci_to_vid(vlan_tci);
1282 switchElem.flowType.sw.dst_priority = vlan_tci_to_pcp(vlan_tci);
1284 fs.output = cookie->sflow.output;
1287 if (flow->tunnel.ip_dst) {
1288 memset(&tnlInElem, 0, sizeof(tnlInElem));
1289 tnlInElem.tag = SFLFLOW_EX_IPV4_TUNNEL_INGRESS;
1290 tnlInProto = dpif_sflow_tunnel_proto(in_dsp->tunnel_type);
1291 dpif_sflow_tunnel_v4(tnlInProto,
1293 &tnlInElem.flowType.ipv4);
1294 SFLADD_ELEMENT(&fs, &tnlInElem);
1295 if (flow->tunnel.tun_id) {
1296 memset(&vniInElem, 0, sizeof(vniInElem));
1297 vniInElem.tag = SFLFLOW_EX_VNI_INGRESS;
1298 vniInElem.flowType.tunnel_vni.vni
1299 = ntohll(flow->tunnel.tun_id);
1300 SFLADD_ELEMENT(&fs, &vniInElem);
1304 /* Output tunnel. */
1306 && sflow_actions->encap_depth == 1
1307 && !sflow_actions->tunnel_err
1308 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1309 tnlOutProto = sflow_actions->tunnel_ipproto;
1310 if (tnlOutProto == 0) {
1311 /* Try to infer the ip-protocol from the output port. */
1312 if (sflow_actions->out_port != ODPP_NONE) {
1313 out_dsp = dpif_sflow_find_port(ds, sflow_actions->out_port);
1315 tnlOutProto = dpif_sflow_tunnel_proto(out_dsp->tunnel_type);
1319 memset(&tnlOutElem, 0, sizeof(tnlOutElem));
1320 tnlOutElem.tag = SFLFLOW_EX_IPV4_TUNNEL_EGRESS;
1321 dpif_sflow_tunnel_v4(tnlOutProto,
1322 &sflow_actions->tunnel,
1323 &tnlOutElem.flowType.ipv4);
1324 SFLADD_ELEMENT(&fs, &tnlOutElem);
1325 if (sflow_actions->tunnel.tun_id) {
1326 memset(&vniOutElem, 0, sizeof(vniOutElem));
1327 vniOutElem.tag = SFLFLOW_EX_VNI_EGRESS;
1328 vniOutElem.flowType.tunnel_vni.vni
1329 = ntohll(sflow_actions->tunnel.tun_id);
1330 SFLADD_ELEMENT(&fs, &vniOutElem);
1334 /* MPLS output label stack. */
1336 && sflow_actions->mpls_stack_depth > 0
1337 && !sflow_actions->mpls_err
1338 && dpif_sflow_cookie_num_outputs(cookie) == 1) {
1339 memset(&mplsElem, 0, sizeof(mplsElem));
1340 mplsElem.tag = SFLFLOW_EX_MPLS;
1341 dpif_sflow_encode_mpls_stack(&mplsElem.flowType.mpls.out_stack,
1344 SFLADD_ELEMENT(&fs, &mplsElem);
1347 /* Submit the flow sample to be encoded into the next datagram. */
1348 SFLADD_ELEMENT(&fs, &hdrElem);
1349 SFLADD_ELEMENT(&fs, &switchElem);
1350 sfl_sampler_writeFlowSample(sampler, &fs);
1353 ovs_mutex_unlock(&mutex);
1357 dpif_sflow_run(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
1359 ovs_mutex_lock(&mutex);
1360 if (ds->collectors != NULL) {
1361 time_t now = time_now();
1363 if (now >= ds->next_tick) {
1364 sfl_agent_tick(ds->sflow_agent, time_wall());
1365 ds->next_tick = now + 1;
1368 ovs_mutex_unlock(&mutex);
1372 dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
1374 ovs_mutex_lock(&mutex);
1375 if (ds->collectors != NULL) {
1376 poll_timer_wait_until(ds->next_tick * 1000LL);
1378 ovs_mutex_unlock(&mutex);