util: Expose function nullable_string_is_equal.
[cascardo/ovs.git] / ofproto / ofproto-dpif-ipfix.c
1 /*
2  * Copyright (c) 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16
17 #include <config.h>
18 #include "ofproto-dpif-ipfix.h"
19 #include <sys/time.h>
20 #include "byte-order.h"
21 #include "collectors.h"
22 #include "flow.h"
23 #include "hash.h"
24 #include "openvswitch/hmap.h"
25 #include "netdev.h"
26 #include "openvswitch/list.h"
27 #include "openvswitch/ofpbuf.h"
28 #include "ofproto.h"
29 #include "ofproto-dpif.h"
30 #include "dp-packet.h"
31 #include "packets.h"
32 #include "poll-loop.h"
33 #include "sset.h"
34 #include "util.h"
35 #include "timeval.h"
36 #include "openvswitch/vlog.h"
37
38 VLOG_DEFINE_THIS_MODULE(ipfix);
39
40 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
41 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
42
43 /* Cf. IETF RFC 5101 Section 10.3.4. */
44 #define IPFIX_DEFAULT_COLLECTOR_PORT 4739
45
46 /* Cf. IETF RFC 5881 Setion 8. */
47 #define BFD_CONTROL_DEST_PORT        3784
48 #define BFD_ECHO_DEST_PORT           3785
49
50 enum ipfix_sampled_packet_type {
51     IPFIX_SAMPLED_PKT_UNKNOWN = 0x00,
52     IPFIX_SAMPLED_PKT_IPV4_OK = 0x01,
53     IPFIX_SAMPLED_PKT_IPV6_OK = 0x02,
54     IPFIX_SAMPLED_PKT_IPV4_ERROR = 0x03,
55     IPFIX_SAMPLED_PKT_IPV6_ERROR = 0x04,
56     IPFIX_SAMPLED_PKT_OTHERS = 0x05
57 };
58
59 /* The standard layer2SegmentId (ID 351) element is included in vDS to send
60  * the VxLAN tunnel's VNI. It is 64-bit long, the most significant byte is
61  * used to indicate the type of tunnel (0x01 = VxLAN, 0x02 = GRE) and the three
62  * least significant bytes hold the value of the layer 2 overlay network
63  * segment identifier: a 24-bit VxLAN tunnel's VNI or a 24-bit GRE tunnel's
64  * TNI. This is not compatible with STT, as implemented in OVS, as
65  * its tunnel IDs is 64-bit.
66  *
67  * Two new enterprise information elements are defined which are similar to
68  * laryerSegmentId but support 64-bit IDs:
69  *     tunnelType (ID 891) and tunnelKey (ID 892).
70  *
71  * The enum dpif_ipfix_tunnel_type is to declare the types supported in the
72  * tunnelType element.
73  * The number of ipfix tunnel types includes two reserverd types: 0x04 and 0x06.
74  */
75 enum dpif_ipfix_tunnel_type {
76     DPIF_IPFIX_TUNNEL_UNKNOWN = 0x00,
77     DPIF_IPFIX_TUNNEL_VXLAN = 0x01,
78     DPIF_IPFIX_TUNNEL_GRE = 0x02,
79     DPIF_IPFIX_TUNNEL_LISP = 0x03,
80     DPIF_IPFIX_TUNNEL_STT = 0x04,
81     DPIF_IPFIX_TUNNEL_IPSEC_GRE = 0x05,
82     DPIF_IPFIX_TUNNEL_GENEVE = 0x07,
83     NUM_DPIF_IPFIX_TUNNEL
84 };
85
86 typedef struct ofputil_ipfix_stats ofproto_ipfix_stats;
87
88 struct dpif_ipfix_port {
89     struct hmap_node hmap_node; /* In struct dpif_ipfix's "tunnel_ports" hmap. */
90     struct ofport *ofport;      /* To retrieve port stats. */
91     odp_port_t odp_port;
92     enum dpif_ipfix_tunnel_type tunnel_type;
93     uint8_t tunnel_key_length;
94 };
95
96 struct dpif_ipfix_exporter {
97     struct collectors *collectors;
98     uint32_t seq_number;
99     time_t last_template_set_time;
100     struct hmap cache_flow_key_map;  /* ipfix_flow_cache_entry. */
101     struct ovs_list cache_flow_start_timestamp_list;  /* ipfix_flow_cache_entry. */
102     uint32_t cache_active_timeout;  /* In seconds. */
103     uint32_t cache_max_flows;
104     char *virtual_obs_id;
105     uint8_t virtual_obs_len;
106
107     ofproto_ipfix_stats stats;
108 };
109
110 struct dpif_ipfix_bridge_exporter {
111     struct dpif_ipfix_exporter exporter;
112     struct ofproto_ipfix_bridge_exporter_options *options;
113     uint32_t probability;
114 };
115
116 struct dpif_ipfix_flow_exporter {
117     struct dpif_ipfix_exporter exporter;
118     struct ofproto_ipfix_flow_exporter_options *options;
119 };
120
121 struct dpif_ipfix_flow_exporter_map_node {
122     struct hmap_node node;
123     struct dpif_ipfix_flow_exporter exporter;
124 };
125
126 struct dpif_ipfix {
127     struct dpif_ipfix_bridge_exporter bridge_exporter;
128     struct hmap flow_exporter_map;  /* dpif_ipfix_flow_exporter_map_node. */
129     struct hmap tunnel_ports;       /* Contains "struct dpif_ipfix_port"s.
130                                      * It makes tunnel port lookups faster in
131                                      * sampling upcalls. */
132     struct ovs_refcount ref_cnt;
133 };
134
135 #define IPFIX_VERSION 0x000a
136
137 /* When using UDP, IPFIX Template Records must be re-sent regularly.
138  * The standard default interval is 10 minutes (600 seconds).
139  * Cf. IETF RFC 5101 Section 10.3.6. */
140 #define IPFIX_TEMPLATE_INTERVAL 600
141
142 /* Cf. IETF RFC 5101 Section 3.1. */
143 OVS_PACKED(
144 struct ipfix_header {
145     ovs_be16 version;  /* IPFIX_VERSION. */
146     ovs_be16 length;  /* Length in bytes including this header. */
147     ovs_be32 export_time;  /* Seconds since the epoch. */
148     ovs_be32 seq_number;  /* Message sequence number. */
149     ovs_be32 obs_domain_id;  /* Observation Domain ID. */
150 });
151 BUILD_ASSERT_DECL(sizeof(struct ipfix_header) == 16);
152
153 #define IPFIX_SET_ID_TEMPLATE 2
154 #define IPFIX_SET_ID_OPTION_TEMPLATE 3
155
156 /* Cf. IETF RFC 5101 Section 3.3.2. */
157 OVS_PACKED(
158 struct ipfix_set_header {
159     ovs_be16 set_id;  /* IPFIX_SET_ID_* or valid template ID for Data Sets. */
160     ovs_be16 length;  /* Length of the set in bytes including header. */
161 });
162 BUILD_ASSERT_DECL(sizeof(struct ipfix_set_header) == 4);
163
164 /* Alternatives for templates at each layer.  A template is defined by
165  * a combination of one value for each layer. */
166 enum ipfix_proto_l2 {
167     IPFIX_PROTO_L2_ETH = 0,  /* No VLAN. */
168     IPFIX_PROTO_L2_VLAN,
169     NUM_IPFIX_PROTO_L2
170 };
171 enum ipfix_proto_l3 {
172     IPFIX_PROTO_L3_UNKNOWN = 0,
173     IPFIX_PROTO_L3_IPV4,
174     IPFIX_PROTO_L3_IPV6,
175     NUM_IPFIX_PROTO_L3
176 };
177 enum ipfix_proto_l4 {
178     IPFIX_PROTO_L4_UNKNOWN = 0,
179     IPFIX_PROTO_L4_TCP_UDP_SCTP,
180     IPFIX_PROTO_L4_ICMP,
181     NUM_IPFIX_PROTO_L4
182 };
183 enum ipfix_proto_tunnel {
184     IPFIX_PROTO_NOT_TUNNELED = 0,
185     IPFIX_PROTO_TUNNELED,  /* Support gre, lisp and vxlan. */
186     NUM_IPFIX_PROTO_TUNNEL
187 };
188
189 /* Any Template ID > 255 is usable for Template Records. */
190 #define IPFIX_TEMPLATE_ID_MIN 256
191
192 /* Cf. IETF RFC 5101 Section 3.4.1. */
193 OVS_PACKED(
194 struct ipfix_template_record_header {
195     ovs_be16 template_id;
196     ovs_be16 field_count;
197 });
198 BUILD_ASSERT_DECL(sizeof(struct ipfix_template_record_header) == 4);
199
200 enum ipfix_entity_id {
201 /* standard IPFIX elements */
202 #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME)  IPFIX_ENTITY_ID_##ENUM = ID,
203 #include "ofproto/ipfix-entities.def"
204 /* non-standard IPFIX elements */
205 #define IPFIX_SET_ENTERPRISE(v) (((v) | 0x8000))
206 #define IPFIX_ENTERPRISE_ENTITY(ENUM, ID, SIZE, NAME, ENTERPRISE) \
207     IPFIX_ENTITY_ID_##ENUM = IPFIX_SET_ENTERPRISE(ID),
208 #include "ofproto/ipfix-enterprise-entities.def"
209 };
210
211 enum ipfix_entity_size {
212 /* standard IPFIX elements */
213 #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME)  IPFIX_ENTITY_SIZE_##ENUM = SIZE,
214 #include "ofproto/ipfix-entities.def"
215 /* non-standard IPFIX elements */
216 #define IPFIX_ENTERPRISE_ENTITY(ENUM, ID, SIZE, NAME, ENTERPRISE) \
217     IPFIX_ENTITY_SIZE_##ENUM = SIZE,
218 #include "ofproto/ipfix-enterprise-entities.def"
219 };
220
221 enum ipfix_entity_enterprise {
222 /* standard IPFIX elements */
223 #define IPFIX_ENTITY(ENUM, ID, SIZE, NAME)  IPFIX_ENTITY_ENTERPRISE_##ENUM = 0,
224 #include "ofproto/ipfix-entities.def"
225 /* non-standard IPFIX elements */
226 #define IPFIX_ENTERPRISE_ENTITY(ENUM, ID, SIZE, NAME, ENTERPRISE) \
227     IPFIX_ENTITY_ENTERPRISE_##ENUM = ENTERPRISE,
228 #include "ofproto/ipfix-enterprise-entities.def"
229 };
230
231 OVS_PACKED(
232 struct ipfix_template_field_specifier {
233     ovs_be16 element_id;  /* IPFIX_ENTITY_ID_*. */
234     ovs_be16 field_length;  /* Length of the field's value, in bytes.
235                              * For Variable-Length element, it should be 65535.
236                              */
237     ovs_be32 enterprise;  /* Enterprise number */
238 });
239 BUILD_ASSERT_DECL(sizeof(struct ipfix_template_field_specifier) == 8);
240
241 /* Cf. IETF RFC 5102 Section 5.11.6. */
242 enum ipfix_flow_direction {
243     INGRESS_FLOW = 0x00,
244     EGRESS_FLOW = 0x01
245 };
246
247 /* Part of data record flow key for common metadata and Ethernet entities. */
248 OVS_PACKED(
249 struct ipfix_data_record_flow_key_common {
250     ovs_be32 observation_point_id;  /* OBSERVATION_POINT_ID */
251     uint8_t flow_direction;  /* FLOW_DIRECTION */
252     struct eth_addr source_mac_address; /* SOURCE_MAC_ADDRESS */
253     struct eth_addr destination_mac_address; /* DESTINATION_MAC_ADDRESS */
254     ovs_be16 ethernet_type;  /* ETHERNET_TYPE */
255     uint8_t ethernet_header_length;  /* ETHERNET_HEADER_LENGTH */
256 });
257 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_common) == 20);
258
259 /* Part of data record flow key for VLAN entities. */
260 OVS_PACKED(
261 struct ipfix_data_record_flow_key_vlan {
262     ovs_be16 vlan_id;  /* VLAN_ID */
263     ovs_be16 dot1q_vlan_id;  /* DOT1Q_VLAN_ID */
264     uint8_t dot1q_priority;  /* DOT1Q_PRIORITY */
265 });
266 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_vlan) == 5);
267
268 /* Part of data record flow key for IP entities. */
269 /* XXX: Replace IP_TTL with MINIMUM_TTL and MAXIMUM_TTL? */
270 OVS_PACKED(
271 struct ipfix_data_record_flow_key_ip {
272     uint8_t ip_version;  /* IP_VERSION */
273     uint8_t ip_ttl;  /* IP_TTL */
274     uint8_t protocol_identifier;  /* PROTOCOL_IDENTIFIER */
275     uint8_t ip_diff_serv_code_point;  /* IP_DIFF_SERV_CODE_POINT */
276     uint8_t ip_precedence;  /* IP_PRECEDENCE */
277     uint8_t ip_class_of_service;  /* IP_CLASS_OF_SERVICE */
278 });
279 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ip) == 6);
280
281 /* Part of data record flow key for IPv4 entities. */
282 OVS_PACKED(
283 struct ipfix_data_record_flow_key_ipv4 {
284     ovs_be32 source_ipv4_address;  /* SOURCE_IPV4_ADDRESS */
285     ovs_be32 destination_ipv4_address;  /* DESTINATION_IPV4_ADDRESS */
286 });
287 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv4) == 8);
288
289 /* Part of data record flow key for IPv6 entities. */
290 OVS_PACKED(
291 struct ipfix_data_record_flow_key_ipv6 {
292     uint8_t source_ipv6_address[16];  /* SOURCE_IPV6_ADDRESS */
293     uint8_t destination_ipv6_address[16];  /* DESTINATION_IPV6_ADDRESS */
294     ovs_be32 flow_label_ipv6;  /* FLOW_LABEL_IPV6 */
295 });
296 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_ipv6) == 36);
297
298 /* Part of data record flow key for TCP/UDP/SCTP entities. */
299 OVS_PACKED(
300 struct ipfix_data_record_flow_key_transport {
301     ovs_be16 source_transport_port;  /* SOURCE_TRANSPORT_PORT */
302     ovs_be16 destination_transport_port;  /* DESTINATION_TRANSPORT_PORT */
303 });
304 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_transport) == 4);
305
306 /* Part of data record flow key for ICMP entities. */
307 OVS_PACKED(
308 struct ipfix_data_record_flow_key_icmp {
309     uint8_t icmp_type;  /* ICMP_TYPE_IPV4 / ICMP_TYPE_IPV6 */
310     uint8_t icmp_code;  /* ICMP_CODE_IPV4 / ICMP_CODE_IPV6 */
311 });
312 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_icmp) == 2);
313
314 /* For the tunnel type that is on the top of IPSec, the protocol identifier
315  * of the upper tunnel type is used.
316  */
317 static uint8_t tunnel_protocol[NUM_DPIF_IPFIX_TUNNEL] = {
318     0,              /* reserved */
319     IPPROTO_UDP,    /* DPIF_IPFIX_TUNNEL_VXLAN */
320     IPPROTO_GRE,    /* DPIF_IPFIX_TUNNEL_GRE */
321     IPPROTO_UDP,    /* DPIF_IPFIX_TUNNEL_LISP*/
322     IPPROTO_TCP,    /* DPIF_IPFIX_TUNNEL_STT*/
323     IPPROTO_GRE,    /* DPIF_IPFIX_TUNNEL_IPSEC_GRE */
324     0          ,    /* reserved */
325     IPPROTO_UDP,    /* DPIF_IPFIX_TUNNEL_GENEVE*/
326 };
327
328 OVS_PACKED(
329 struct ipfix_data_record_flow_key_tunnel {
330     ovs_be32 tunnel_source_ipv4_address;  /* TUNNEL_SOURCE_IPV4_ADDRESS */
331     ovs_be32 tunnel_destination_ipv4_address;  /* TUNNEL_DESTINATION_IPV4_ADDRESS */
332     uint8_t tunnel_protocol_identifier;  /* TUNNEL_PROTOCOL_IDENTIFIER */
333     ovs_be16 tunnel_source_transport_port;  /* TUNNEL_SOURCE_TRANSPORT_PORT */
334     ovs_be16 tunnel_destination_transport_port;  /* TUNNEL_DESTINATION_TRANSPORT_PORT */
335     uint8_t tunnel_type;  /* TUNNEL_TYPE */
336     uint8_t tunnel_key_length;  /* length of TUNNEL_KEY */
337     uint8_t tunnel_key[];  /* data of  TUNNEL_KEY */
338 });
339 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_flow_key_tunnel) == 15);
340
341 /* Cf. IETF RFC 5102 Section 5.11.3. */
342 enum ipfix_flow_end_reason {
343     IDLE_TIMEOUT = 0x01,
344     ACTIVE_TIMEOUT = 0x02,
345     END_OF_FLOW_DETECTED = 0x03,
346     FORCED_END = 0x04,
347     LACK_OF_RESOURCES = 0x05
348 };
349
350 /* Part of data record for common aggregated elements. */
351 OVS_PACKED(
352 struct ipfix_data_record_aggregated_common {
353     ovs_be32 flow_start_delta_microseconds; /* FLOW_START_DELTA_MICROSECONDS */
354     ovs_be32 flow_end_delta_microseconds; /* FLOW_END_DELTA_MICROSECONDS */
355     ovs_be64 packet_delta_count;  /* PACKET_DELTA_COUNT */
356     ovs_be64 layer2_octet_delta_count;  /* LAYER2_OCTET_DELTA_COUNT */
357     uint8_t flow_end_reason;  /* FLOW_END_REASON */
358 });
359 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_common) == 25);
360
361 /* Part of data record for IP aggregated elements. */
362 OVS_PACKED(
363 struct ipfix_data_record_aggregated_ip {
364     ovs_be64 octet_delta_count;  /* OCTET_DELTA_COUNT */
365     ovs_be64 octet_delta_sum_of_squares;  /* OCTET_DELTA_SUM_OF_SQUARES */
366     ovs_be64 minimum_ip_total_length;  /* MINIMUM_IP_TOTAL_LENGTH */
367     ovs_be64 maximum_ip_total_length;  /* MAXIMUM_IP_TOTAL_LENGTH */
368 });
369 BUILD_ASSERT_DECL(sizeof(struct ipfix_data_record_aggregated_ip) == 32);
370
371 /*
372  * Refer to RFC 7011, the length of Variable length element is 0~65535:
373  * In most case, it should be less than 255 octets:
374  *  0                   1                   2                   3
375  *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
376  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
377  *  | Length (< 255)|          Information Element                  |
378  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
379  *  |                      ... continuing as needed                 |
380  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
381  *
382  * When it is greater than or equeal to 255 octets:
383  *  0                   1                   2                   3
384  *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
385  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
386  *  |      255      |      Length (0 to 65535)      |       IE      |
387  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
388  *  |                      ... continuing as needed                 |
389  *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
390  *
391  *
392  * Now, only the virtual_obs_id whose length < 255 is implemented.
393  */
394
395 #define IPFIX_VIRTUAL_OBS_MAX_LEN 254
396
397 /*
398  * support tunnel key for:
399  * VxLAN: 24-bit VIN,
400  * GRE: 32-bit key,
401  * LISP: 24-bit instance ID
402  * STT: 64-bit key
403  */
404 #define MAX_TUNNEL_KEY_LEN 8
405
406 #define MAX_FLOW_KEY_LEN                                        \
407     (sizeof(struct ipfix_data_record_flow_key_common)           \
408      + sizeof(struct ipfix_data_record_flow_key_vlan)           \
409      + sizeof(struct ipfix_data_record_flow_key_ip)             \
410      + MAX(sizeof(struct ipfix_data_record_flow_key_ipv4),      \
411            sizeof(struct ipfix_data_record_flow_key_ipv6))      \
412      + MAX(sizeof(struct ipfix_data_record_flow_key_icmp),      \
413            sizeof(struct ipfix_data_record_flow_key_transport)) \
414      + sizeof(struct ipfix_data_record_flow_key_tunnel)         \
415      + MAX_TUNNEL_KEY_LEN)
416
417 #define MAX_DATA_RECORD_LEN                                 \
418     (MAX_FLOW_KEY_LEN                                       \
419      + sizeof(struct ipfix_data_record_aggregated_common)   \
420      + sizeof(struct ipfix_data_record_aggregated_ip))
421
422 /* Max length of a data set.  To simplify the implementation, each
423  * data record is sent in a separate data set, so each data set
424  * contains at most one data record. */
425 #define MAX_DATA_SET_LEN             \
426     (sizeof(struct ipfix_set_header) \
427      + MAX_DATA_RECORD_LEN)
428
429 /* Max length of an IPFIX message. Arbitrarily set to accommodate low
430  * MTU. */
431 #define MAX_MESSAGE_LEN 1024
432
433 /* Cache structures. */
434
435 /* Flow key. */
436 struct ipfix_flow_key {
437     uint32_t obs_domain_id;
438     uint16_t template_id;
439     size_t flow_key_msg_part_size;
440     uint64_t flow_key_msg_part[DIV_ROUND_UP(MAX_FLOW_KEY_LEN, 8)];
441 };
442
443 /* Flow cache entry. */
444 struct ipfix_flow_cache_entry {
445     struct hmap_node flow_key_map_node;
446     struct ovs_list cache_flow_start_timestamp_list_node;
447     struct ipfix_flow_key flow_key;
448     /* Common aggregated elements. */
449     uint64_t flow_start_timestamp_usec;
450     uint64_t flow_end_timestamp_usec;
451     uint64_t packet_delta_count;
452     uint64_t layer2_octet_delta_count;
453     uint64_t octet_delta_count;
454     uint64_t octet_delta_sum_of_squares;  /* 0 if not IP. */
455     uint16_t minimum_ip_total_length;  /* 0 if not IP. */
456     uint16_t maximum_ip_total_length;  /* 0 if not IP. */
457 };
458
459 static void dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *, bool,
460                                     const uint64_t, const uint32_t);
461
462 static void get_export_time_now(uint64_t *, uint32_t *);
463
464 static void dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *, bool);
465
466 static bool
467 ofproto_ipfix_bridge_exporter_options_equal(
468     const struct ofproto_ipfix_bridge_exporter_options *a,
469     const struct ofproto_ipfix_bridge_exporter_options *b)
470 {
471     return (a->obs_domain_id == b->obs_domain_id
472             && a->obs_point_id == b->obs_point_id
473             && a->sampling_rate == b->sampling_rate
474             && a->cache_active_timeout == b->cache_active_timeout
475             && a->cache_max_flows == b->cache_max_flows
476             && a->enable_tunnel_sampling == b->enable_tunnel_sampling
477             && a->enable_input_sampling == b->enable_input_sampling
478             && a->enable_output_sampling == b->enable_output_sampling
479             && sset_equals(&a->targets, &b->targets)
480             && nullable_string_is_equal(a->virtual_obs_id, b->virtual_obs_id));
481 }
482
483 static struct ofproto_ipfix_bridge_exporter_options *
484 ofproto_ipfix_bridge_exporter_options_clone(
485     const struct ofproto_ipfix_bridge_exporter_options *old)
486 {
487     struct ofproto_ipfix_bridge_exporter_options *new =
488         xmemdup(old, sizeof *old);
489     sset_clone(&new->targets, &old->targets);
490     new->virtual_obs_id = nullable_xstrdup(old->virtual_obs_id);
491     return new;
492 }
493
494 static void
495 ofproto_ipfix_bridge_exporter_options_destroy(
496     struct ofproto_ipfix_bridge_exporter_options *options)
497 {
498     if (options) {
499         sset_destroy(&options->targets);
500         free(options->virtual_obs_id);
501         free(options);
502     }
503 }
504
505 static bool
506 ofproto_ipfix_flow_exporter_options_equal(
507     const struct ofproto_ipfix_flow_exporter_options *a,
508     const struct ofproto_ipfix_flow_exporter_options *b)
509 {
510     return (a->collector_set_id == b->collector_set_id
511             && a->cache_active_timeout == b->cache_active_timeout
512             && a->cache_max_flows == b->cache_max_flows
513             && a->enable_tunnel_sampling == b->enable_tunnel_sampling
514             && sset_equals(&a->targets, &b->targets)
515             && nullable_string_is_equal(a->virtual_obs_id, b->virtual_obs_id));
516 }
517
518 static struct ofproto_ipfix_flow_exporter_options *
519 ofproto_ipfix_flow_exporter_options_clone(
520     const struct ofproto_ipfix_flow_exporter_options *old)
521 {
522     struct ofproto_ipfix_flow_exporter_options *new =
523         xmemdup(old, sizeof *old);
524     sset_clone(&new->targets, &old->targets);
525     new->virtual_obs_id = nullable_xstrdup(old->virtual_obs_id);
526     return new;
527 }
528
529 static void
530 ofproto_ipfix_flow_exporter_options_destroy(
531     struct ofproto_ipfix_flow_exporter_options *options)
532 {
533     if (options) {
534         sset_destroy(&options->targets);
535         free(options->virtual_obs_id);
536         free(options);
537     }
538 }
539
540 static void
541 dpif_ipfix_exporter_init(struct dpif_ipfix_exporter *exporter)
542 {
543     exporter->collectors = NULL;
544     exporter->seq_number = 1;
545     exporter->last_template_set_time = 0;
546     hmap_init(&exporter->cache_flow_key_map);
547     ovs_list_init(&exporter->cache_flow_start_timestamp_list);
548     exporter->cache_active_timeout = 0;
549     exporter->cache_max_flows = 0;
550     exporter->virtual_obs_id = NULL;
551     exporter->virtual_obs_len = 0;
552 }
553
554 static void
555 dpif_ipfix_exporter_clear(struct dpif_ipfix_exporter *exporter)
556 {
557     /* Flush the cache with flow end reason "forced end." */
558     dpif_ipfix_cache_expire_now(exporter, true);
559
560     collectors_destroy(exporter->collectors);
561     exporter->collectors = NULL;
562     exporter->seq_number = 1;
563     exporter->last_template_set_time = 0;
564     exporter->cache_active_timeout = 0;
565     exporter->cache_max_flows = 0;
566     free(exporter->virtual_obs_id);
567     exporter->virtual_obs_id = NULL;
568     exporter->virtual_obs_len = 0;
569 }
570
571 static void
572 dpif_ipfix_exporter_destroy(struct dpif_ipfix_exporter *exporter)
573 {
574     dpif_ipfix_exporter_clear(exporter);
575     hmap_destroy(&exporter->cache_flow_key_map);
576 }
577
578 static bool
579 dpif_ipfix_exporter_set_options(struct dpif_ipfix_exporter *exporter,
580                                 const struct sset *targets,
581                                 const uint32_t cache_active_timeout,
582                                 const uint32_t cache_max_flows,
583                                 const char *virtual_obs_id)
584 {
585     size_t virtual_obs_len;
586     collectors_destroy(exporter->collectors);
587     collectors_create(targets, IPFIX_DEFAULT_COLLECTOR_PORT,
588                       &exporter->collectors);
589     if (exporter->collectors == NULL) {
590         VLOG_WARN_RL(&rl, "no collectors could be initialized, "
591                      "IPFIX exporter disabled");
592         dpif_ipfix_exporter_clear(exporter);
593         return false;
594     }
595     exporter->cache_active_timeout = cache_active_timeout;
596     exporter->cache_max_flows = cache_max_flows;
597     virtual_obs_len = virtual_obs_id ? strlen(virtual_obs_id) : 0;
598     if (virtual_obs_len > IPFIX_VIRTUAL_OBS_MAX_LEN) {
599         VLOG_WARN_RL(&rl, "Virtual obsevation ID too long (%d bytes), "
600                      "should not be longer than %d bytes.",
601                      exporter->virtual_obs_len, IPFIX_VIRTUAL_OBS_MAX_LEN);
602         dpif_ipfix_exporter_clear(exporter);
603         return false;
604     }
605     exporter->virtual_obs_len = virtual_obs_len;
606     exporter->virtual_obs_id = nullable_xstrdup(virtual_obs_id);
607     return true;
608 }
609
610 static struct dpif_ipfix_port *
611 dpif_ipfix_find_port(const struct dpif_ipfix *di,
612                      odp_port_t odp_port) OVS_REQUIRES(mutex)
613 {
614     struct dpif_ipfix_port *dip;
615
616     HMAP_FOR_EACH_IN_BUCKET (dip, hmap_node, hash_odp_port(odp_port),
617                              &di->tunnel_ports) {
618         if (dip->odp_port == odp_port) {
619             return dip;
620         }
621     }
622     return NULL;
623 }
624
625 static void
626 dpif_ipfix_del_port(struct dpif_ipfix *di,
627                       struct dpif_ipfix_port *dip)
628     OVS_REQUIRES(mutex)
629 {
630     hmap_remove(&di->tunnel_ports, &dip->hmap_node);
631     free(dip);
632 }
633
634 void
635 dpif_ipfix_add_tunnel_port(struct dpif_ipfix *di, struct ofport *ofport,
636                            odp_port_t odp_port) OVS_EXCLUDED(mutex)
637 {
638     struct dpif_ipfix_port *dip;
639     const char *type;
640
641     ovs_mutex_lock(&mutex);
642     dip = dpif_ipfix_find_port(di, odp_port);
643     if (dip) {
644         dpif_ipfix_del_port(di, dip);
645     }
646
647     type = netdev_get_type(ofport->netdev);
648     if (type == NULL) {
649         goto out;
650     }
651
652     /* Add to table of tunnel ports. */
653     dip = xmalloc(sizeof *dip);
654     dip->ofport = ofport;
655     dip->odp_port = odp_port;
656     if (strcmp(type, "gre") == 0) {
657         /* 32-bit key gre */
658         dip->tunnel_type = DPIF_IPFIX_TUNNEL_GRE;
659         dip->tunnel_key_length = 4;
660     } else if (strcmp(type, "ipsec_gre") == 0) {
661         /* 32-bit key ipsec_gre */
662         dip->tunnel_type = DPIF_IPFIX_TUNNEL_IPSEC_GRE;
663         dip->tunnel_key_length = 4;
664     } else if (strcmp(type, "vxlan") == 0) {
665         dip->tunnel_type = DPIF_IPFIX_TUNNEL_VXLAN;
666         dip->tunnel_key_length = 3;
667     } else if (strcmp(type, "lisp") == 0) {
668         dip->tunnel_type = DPIF_IPFIX_TUNNEL_LISP;
669         dip->tunnel_key_length = 3;
670     } else if (strcmp(type, "geneve") == 0) {
671         dip->tunnel_type = DPIF_IPFIX_TUNNEL_GENEVE;
672         dip->tunnel_key_length = 3;
673     } else if (strcmp(type, "stt") == 0) {
674         dip->tunnel_type = DPIF_IPFIX_TUNNEL_STT;
675         dip->tunnel_key_length = 8;
676     } else {
677         free(dip);
678         goto out;
679     }
680     hmap_insert(&di->tunnel_ports, &dip->hmap_node, hash_odp_port(odp_port));
681
682 out:
683     ovs_mutex_unlock(&mutex);
684 }
685
686 void
687 dpif_ipfix_del_tunnel_port(struct dpif_ipfix *di, odp_port_t odp_port)
688     OVS_EXCLUDED(mutex)
689 {
690     struct dpif_ipfix_port *dip;
691     ovs_mutex_lock(&mutex);
692     dip = dpif_ipfix_find_port(di, odp_port);
693     if (dip) {
694         dpif_ipfix_del_port(di, dip);
695     }
696     ovs_mutex_unlock(&mutex);
697 }
698
699 bool
700 dpif_ipfix_get_tunnel_port(const struct dpif_ipfix *di, odp_port_t odp_port)
701     OVS_EXCLUDED(mutex)
702 {
703     struct dpif_ipfix_port *dip;
704     ovs_mutex_lock(&mutex);
705     dip = dpif_ipfix_find_port(di, odp_port);
706     ovs_mutex_unlock(&mutex);
707     return dip != NULL;
708 }
709
710 static void
711 dpif_ipfix_bridge_exporter_init(struct dpif_ipfix_bridge_exporter *exporter)
712 {
713     dpif_ipfix_exporter_init(&exporter->exporter);
714     exporter->options = NULL;
715     exporter->probability = 0;
716 }
717
718 static void
719 dpif_ipfix_bridge_exporter_clear(struct dpif_ipfix_bridge_exporter *exporter)
720 {
721     dpif_ipfix_exporter_clear(&exporter->exporter);
722     ofproto_ipfix_bridge_exporter_options_destroy(exporter->options);
723     exporter->options = NULL;
724     exporter->probability = 0;
725 }
726
727 static void
728 dpif_ipfix_bridge_exporter_destroy(struct dpif_ipfix_bridge_exporter *exporter)
729 {
730     dpif_ipfix_bridge_exporter_clear(exporter);
731     dpif_ipfix_exporter_destroy(&exporter->exporter);
732 }
733
734 static void
735 dpif_ipfix_bridge_exporter_set_options(
736     struct dpif_ipfix_bridge_exporter *exporter,
737     const struct ofproto_ipfix_bridge_exporter_options *options)
738 {
739     bool options_changed;
740
741     if (!options || sset_is_empty(&options->targets)) {
742         /* No point in doing any work if there are no targets. */
743         dpif_ipfix_bridge_exporter_clear(exporter);
744         return;
745     }
746
747     options_changed = (
748         !exporter->options
749         || !ofproto_ipfix_bridge_exporter_options_equal(
750             options, exporter->options));
751
752     /* Configure collectors if options have changed or if we're
753      * shortchanged in collectors (which indicates that opening one or
754      * more of the configured collectors failed, so that we should
755      * retry). */
756     if (options_changed
757         || collectors_count(exporter->exporter.collectors)
758             < sset_count(&options->targets)) {
759         if (!dpif_ipfix_exporter_set_options(
760                 &exporter->exporter, &options->targets,
761                 options->cache_active_timeout, options->cache_max_flows,
762                 options->virtual_obs_id)) {
763             return;
764         }
765     }
766
767     /* Avoid reconfiguring if options didn't change. */
768     if (!options_changed) {
769         return;
770     }
771
772     ofproto_ipfix_bridge_exporter_options_destroy(exporter->options);
773     exporter->options = ofproto_ipfix_bridge_exporter_options_clone(options);
774     exporter->probability =
775         MAX(1, UINT32_MAX / exporter->options->sampling_rate);
776
777     /* Run over the cache as some entries might have expired after
778      * changing the timeouts. */
779     dpif_ipfix_cache_expire_now(&exporter->exporter, false);
780 }
781
782 static struct dpif_ipfix_flow_exporter_map_node*
783 dpif_ipfix_find_flow_exporter_map_node(
784     const struct dpif_ipfix *di, const uint32_t collector_set_id)
785     OVS_REQUIRES(mutex)
786 {
787     struct dpif_ipfix_flow_exporter_map_node *exporter_node;
788
789     HMAP_FOR_EACH_WITH_HASH (exporter_node, node,
790                              hash_int(collector_set_id, 0),
791                              &di->flow_exporter_map) {
792         if (exporter_node->exporter.options->collector_set_id
793             == collector_set_id) {
794             return exporter_node;
795         }
796     }
797
798     return NULL;
799 }
800
801 static void
802 dpif_ipfix_flow_exporter_init(struct dpif_ipfix_flow_exporter *exporter)
803 {
804     dpif_ipfix_exporter_init(&exporter->exporter);
805     exporter->options = NULL;
806 }
807
808 static void
809 dpif_ipfix_flow_exporter_clear(struct dpif_ipfix_flow_exporter *exporter)
810 {
811     dpif_ipfix_exporter_clear(&exporter->exporter);
812     ofproto_ipfix_flow_exporter_options_destroy(exporter->options);
813     exporter->options = NULL;
814 }
815
816 static void
817 dpif_ipfix_flow_exporter_destroy(struct dpif_ipfix_flow_exporter *exporter)
818 {
819     dpif_ipfix_flow_exporter_clear(exporter);
820     dpif_ipfix_exporter_destroy(&exporter->exporter);
821 }
822
823 static bool
824 dpif_ipfix_flow_exporter_set_options(
825     struct dpif_ipfix_flow_exporter *exporter,
826     const struct ofproto_ipfix_flow_exporter_options *options)
827 {
828     bool options_changed;
829
830     if (sset_is_empty(&options->targets)) {
831         /* No point in doing any work if there are no targets. */
832         dpif_ipfix_flow_exporter_clear(exporter);
833         return true;
834     }
835
836     options_changed = (
837         !exporter->options
838         || !ofproto_ipfix_flow_exporter_options_equal(
839             options, exporter->options));
840
841     /* Configure collectors if options have changed or if we're
842      * shortchanged in collectors (which indicates that opening one or
843      * more of the configured collectors failed, so that we should
844      * retry). */
845     if (options_changed
846         || collectors_count(exporter->exporter.collectors)
847             < sset_count(&options->targets)) {
848         if (!dpif_ipfix_exporter_set_options(
849                 &exporter->exporter, &options->targets,
850                 options->cache_active_timeout, options->cache_max_flows,
851                 options->virtual_obs_id)) {
852             return false;
853         }
854     }
855
856     /* Avoid reconfiguring if options didn't change. */
857     if (!options_changed) {
858         return true;
859     }
860
861     ofproto_ipfix_flow_exporter_options_destroy(exporter->options);
862     exporter->options = ofproto_ipfix_flow_exporter_options_clone(options);
863
864     /* Run over the cache as some entries might have expired after
865      * changing the timeouts. */
866     dpif_ipfix_cache_expire_now(&exporter->exporter, false);
867
868     return true;
869 }
870
871 void
872 dpif_ipfix_set_options(
873     struct dpif_ipfix *di,
874     const struct ofproto_ipfix_bridge_exporter_options *bridge_exporter_options,
875     const struct ofproto_ipfix_flow_exporter_options *flow_exporters_options,
876     size_t n_flow_exporters_options) OVS_EXCLUDED(mutex)
877 {
878     int i;
879     struct ofproto_ipfix_flow_exporter_options *options;
880     struct dpif_ipfix_flow_exporter_map_node *node, *next;
881     size_t n_broken_flow_exporters_options = 0;
882
883     ovs_mutex_lock(&mutex);
884     dpif_ipfix_bridge_exporter_set_options(&di->bridge_exporter,
885                                            bridge_exporter_options);
886
887     /* Add new flow exporters and update current flow exporters. */
888     options = (struct ofproto_ipfix_flow_exporter_options *)
889         flow_exporters_options;
890     for (i = 0; i < n_flow_exporters_options; i++) {
891         node = dpif_ipfix_find_flow_exporter_map_node(
892             di, options->collector_set_id);
893         if (!node) {
894             node = xzalloc(sizeof *node);
895             dpif_ipfix_flow_exporter_init(&node->exporter);
896             hmap_insert(&di->flow_exporter_map, &node->node,
897                         hash_int(options->collector_set_id, 0));
898         }
899         if (!dpif_ipfix_flow_exporter_set_options(&node->exporter, options)) {
900             n_broken_flow_exporters_options++;
901         }
902         options++;
903     }
904
905     ovs_assert(hmap_count(&di->flow_exporter_map) >=
906                (n_flow_exporters_options - n_broken_flow_exporters_options));
907
908     /* Remove dropped flow exporters, if any needs to be removed. */
909     if (hmap_count(&di->flow_exporter_map) > n_flow_exporters_options) {
910         HMAP_FOR_EACH_SAFE (node, next, node, &di->flow_exporter_map) {
911             /* This is slow but doesn't take any extra memory, and
912              * this table is not supposed to contain many rows anyway. */
913             options = (struct ofproto_ipfix_flow_exporter_options *)
914                 flow_exporters_options;
915             for (i = 0; i < n_flow_exporters_options; i++) {
916               if (node->exporter.options->collector_set_id
917                   == options->collector_set_id) {
918                   break;
919               }
920               options++;
921             }
922             if (i == n_flow_exporters_options) {  // Not found.
923                 hmap_remove(&di->flow_exporter_map, &node->node);
924                 dpif_ipfix_flow_exporter_destroy(&node->exporter);
925                 free(node);
926             }
927         }
928     }
929
930     ovs_assert(hmap_count(&di->flow_exporter_map) ==
931                (n_flow_exporters_options - n_broken_flow_exporters_options));
932     ovs_mutex_unlock(&mutex);
933 }
934
935 struct dpif_ipfix *
936 dpif_ipfix_create(void)
937 {
938     struct dpif_ipfix *di;
939     di = xzalloc(sizeof *di);
940     dpif_ipfix_bridge_exporter_init(&di->bridge_exporter);
941     hmap_init(&di->flow_exporter_map);
942     hmap_init(&di->tunnel_ports);
943     ovs_refcount_init(&di->ref_cnt);
944     return di;
945 }
946
947 struct dpif_ipfix *
948 dpif_ipfix_ref(const struct dpif_ipfix *di_)
949 {
950     struct dpif_ipfix *di = CONST_CAST(struct dpif_ipfix *, di_);
951     if (di) {
952         ovs_refcount_ref(&di->ref_cnt);
953     }
954     return di;
955 }
956
957 uint32_t
958 dpif_ipfix_get_bridge_exporter_probability(const struct dpif_ipfix *di)
959     OVS_EXCLUDED(mutex)
960 {
961     uint32_t ret;
962     ovs_mutex_lock(&mutex);
963     ret = di->bridge_exporter.probability;
964     ovs_mutex_unlock(&mutex);
965     return ret;
966 }
967
968 bool
969 dpif_ipfix_get_bridge_exporter_input_sampling(const struct dpif_ipfix *di)
970     OVS_EXCLUDED(mutex)
971 {
972     bool ret = false;
973     ovs_mutex_lock(&mutex);
974     if (di->bridge_exporter.options) {
975         ret = di->bridge_exporter.options->enable_input_sampling;
976     }
977     ovs_mutex_unlock(&mutex);
978     return ret;
979 }
980
981 bool
982 dpif_ipfix_get_bridge_exporter_output_sampling(const struct dpif_ipfix *di)
983     OVS_EXCLUDED(mutex)
984 {
985     bool ret = false;
986     ovs_mutex_lock(&mutex);
987     if (di->bridge_exporter.options) {
988         ret = di->bridge_exporter.options->enable_output_sampling;
989     }
990     ovs_mutex_unlock(&mutex);
991     return ret;
992 }
993
994 bool
995 dpif_ipfix_get_bridge_exporter_tunnel_sampling(const struct dpif_ipfix *di)
996     OVS_EXCLUDED(mutex)
997 {
998     bool ret = false;
999     ovs_mutex_lock(&mutex);
1000     if (di->bridge_exporter.options) {
1001         ret = di->bridge_exporter.options->enable_tunnel_sampling;
1002     }
1003     ovs_mutex_unlock(&mutex);
1004     return ret;
1005 }
1006
1007 bool
1008 dpif_ipfix_get_flow_exporter_tunnel_sampling(const struct dpif_ipfix *di,
1009                                              const uint32_t collector_set_id)
1010     OVS_EXCLUDED(mutex)
1011 {
1012     ovs_mutex_lock(&mutex);
1013     struct dpif_ipfix_flow_exporter_map_node *node
1014         = dpif_ipfix_find_flow_exporter_map_node(di, collector_set_id);
1015     bool ret = (node
1016                 && node->exporter.options
1017                 && node->exporter.options->enable_tunnel_sampling);
1018     ovs_mutex_unlock(&mutex);
1019
1020     return ret;
1021 }
1022
1023 static void
1024 dpif_ipfix_clear(struct dpif_ipfix *di) OVS_REQUIRES(mutex)
1025 {
1026     struct dpif_ipfix_flow_exporter_map_node *exp_node;
1027     struct dpif_ipfix_port *dip, *next;
1028
1029     dpif_ipfix_bridge_exporter_clear(&di->bridge_exporter);
1030
1031     HMAP_FOR_EACH_POP (exp_node, node, &di->flow_exporter_map) {
1032         dpif_ipfix_flow_exporter_destroy(&exp_node->exporter);
1033         free(exp_node);
1034     }
1035
1036     HMAP_FOR_EACH_SAFE (dip, next, hmap_node, &di->tunnel_ports) {
1037         dpif_ipfix_del_port(di, dip);
1038     }
1039 }
1040
1041 void
1042 dpif_ipfix_unref(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
1043 {
1044     if (di && ovs_refcount_unref_relaxed(&di->ref_cnt) == 1) {
1045         ovs_mutex_lock(&mutex);
1046         dpif_ipfix_clear(di);
1047         dpif_ipfix_bridge_exporter_destroy(&di->bridge_exporter);
1048         hmap_destroy(&di->flow_exporter_map);
1049         hmap_destroy(&di->tunnel_ports);
1050         free(di);
1051         ovs_mutex_unlock(&mutex);
1052     }
1053 }
1054
1055 static void
1056 ipfix_init_header(uint32_t export_time_sec, uint32_t seq_number,
1057                   uint32_t obs_domain_id, struct dp_packet *msg)
1058 {
1059     struct ipfix_header *hdr;
1060
1061     hdr = dp_packet_put_zeros(msg, sizeof *hdr);
1062     hdr->version = htons(IPFIX_VERSION);
1063     hdr->length = htons(sizeof *hdr);  /* Updated in ipfix_send_msg. */
1064     hdr->export_time = htonl(export_time_sec);
1065     hdr->seq_number = htonl(seq_number);
1066     hdr->obs_domain_id = htonl(obs_domain_id);
1067 }
1068
1069 static size_t
1070 ipfix_send_msg(const struct collectors *collectors, struct dp_packet *msg)
1071 {
1072     struct ipfix_header *hdr;
1073     size_t tx_errors;
1074
1075     /* Adjust the length in the header. */
1076     hdr = dp_packet_data(msg);
1077     hdr->length = htons(dp_packet_size(msg));
1078
1079     tx_errors = collectors_send(collectors,
1080                                 dp_packet_data(msg), dp_packet_size(msg));
1081     dp_packet_set_size(msg, 0);
1082
1083     return tx_errors;
1084 }
1085
1086 static uint16_t
1087 ipfix_get_template_id(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3,
1088                       enum ipfix_proto_l4 l4, enum ipfix_proto_tunnel tunnel)
1089 {
1090     uint16_t template_id;
1091     template_id = l2;
1092     template_id = template_id * NUM_IPFIX_PROTO_L3 + l3;
1093     template_id = template_id * NUM_IPFIX_PROTO_L4 + l4;
1094     template_id = template_id * NUM_IPFIX_PROTO_TUNNEL + tunnel;
1095     return IPFIX_TEMPLATE_ID_MIN + template_id;
1096 }
1097
1098 static void
1099 ipfix_define_template_entity(enum ipfix_entity_id id,
1100                              enum ipfix_entity_size size,
1101                              enum ipfix_entity_enterprise enterprise,
1102                              struct dp_packet *msg)
1103 {
1104     struct ipfix_template_field_specifier *field;
1105     size_t field_size;
1106
1107     if (enterprise) {
1108         field_size = sizeof *field;
1109     } else {
1110         /* No enterprise number */
1111         field_size = sizeof *field - sizeof(ovs_be32);
1112     }
1113     field = dp_packet_put_zeros(msg, field_size);
1114     field->element_id = htons(id);
1115     if (size) {
1116         field->field_length = htons(size);
1117     } else {
1118         /* RFC 5101, Section 7. Variable-Length Information Element */
1119         field->field_length = OVS_BE16_MAX;
1120     }
1121     if (enterprise) {
1122         field->enterprise = htonl(enterprise);
1123     }
1124
1125 }
1126
1127 static uint16_t
1128 ipfix_define_template_fields(enum ipfix_proto_l2 l2, enum ipfix_proto_l3 l3,
1129                              enum ipfix_proto_l4 l4, enum ipfix_proto_tunnel tunnel,
1130                              bool virtual_obs_id_set,
1131                              struct dp_packet *msg)
1132 {
1133     uint16_t count = 0;
1134
1135 #define DEF(ID) \
1136     { \
1137         ipfix_define_template_entity(IPFIX_ENTITY_ID_##ID, \
1138                                      IPFIX_ENTITY_SIZE_##ID, \
1139                                      IPFIX_ENTITY_ENTERPRISE_##ID, msg); \
1140         count++; \
1141     }
1142
1143     /* 1. Flow key. */
1144
1145     DEF(OBSERVATION_POINT_ID);
1146     DEF(FLOW_DIRECTION);
1147
1148     /* Common Ethernet entities. */
1149     DEF(SOURCE_MAC_ADDRESS);
1150     DEF(DESTINATION_MAC_ADDRESS);
1151     DEF(ETHERNET_TYPE);
1152     DEF(ETHERNET_HEADER_LENGTH);
1153
1154     if (l2 == IPFIX_PROTO_L2_VLAN) {
1155         DEF(VLAN_ID);
1156         DEF(DOT1Q_VLAN_ID);
1157         DEF(DOT1Q_PRIORITY);
1158     }
1159
1160     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1161         DEF(IP_VERSION);
1162         DEF(IP_TTL);
1163         DEF(PROTOCOL_IDENTIFIER);
1164         DEF(IP_DIFF_SERV_CODE_POINT);
1165         DEF(IP_PRECEDENCE);
1166         DEF(IP_CLASS_OF_SERVICE);
1167
1168         if (l3 == IPFIX_PROTO_L3_IPV4) {
1169             DEF(SOURCE_IPV4_ADDRESS);
1170             DEF(DESTINATION_IPV4_ADDRESS);
1171             if (l4 == IPFIX_PROTO_L4_TCP_UDP_SCTP) {
1172                 DEF(SOURCE_TRANSPORT_PORT);
1173                 DEF(DESTINATION_TRANSPORT_PORT);
1174             } else if (l4 == IPFIX_PROTO_L4_ICMP) {
1175                 DEF(ICMP_TYPE_IPV4);
1176                 DEF(ICMP_CODE_IPV4);
1177             }
1178         } else {  /* l3 == IPFIX_PROTO_L3_IPV6 */
1179             DEF(SOURCE_IPV6_ADDRESS);
1180             DEF(DESTINATION_IPV6_ADDRESS);
1181             DEF(FLOW_LABEL_IPV6);
1182             if (l4 == IPFIX_PROTO_L4_TCP_UDP_SCTP) {
1183                 DEF(SOURCE_TRANSPORT_PORT);
1184                 DEF(DESTINATION_TRANSPORT_PORT);
1185             } else if (l4 == IPFIX_PROTO_L4_ICMP) {
1186                 DEF(ICMP_TYPE_IPV6);
1187                 DEF(ICMP_CODE_IPV6);
1188             }
1189         }
1190     }
1191
1192     if (tunnel != IPFIX_PROTO_NOT_TUNNELED) {
1193         DEF(TUNNEL_SOURCE_IPV4_ADDRESS);
1194         DEF(TUNNEL_DESTINATION_IPV4_ADDRESS);
1195         DEF(TUNNEL_PROTOCOL_IDENTIFIER);
1196         DEF(TUNNEL_SOURCE_TRANSPORT_PORT);
1197         DEF(TUNNEL_DESTINATION_TRANSPORT_PORT);
1198         DEF(TUNNEL_TYPE);
1199         DEF(TUNNEL_KEY);
1200     }
1201
1202     /* 2. Virtual observation ID, which is not a part of flow key. */
1203     if (virtual_obs_id_set) {
1204         DEF(VIRTUAL_OBS_ID);
1205     }
1206
1207     /* 3. Flow aggregated data. */
1208
1209     DEF(FLOW_START_DELTA_MICROSECONDS);
1210     DEF(FLOW_END_DELTA_MICROSECONDS);
1211     DEF(PACKET_DELTA_COUNT);
1212     DEF(LAYER2_OCTET_DELTA_COUNT);
1213     DEF(FLOW_END_REASON);
1214
1215     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1216         DEF(OCTET_DELTA_COUNT);
1217         DEF(OCTET_DELTA_SUM_OF_SQUARES);
1218         DEF(MINIMUM_IP_TOTAL_LENGTH);
1219         DEF(MAXIMUM_IP_TOTAL_LENGTH);
1220     }
1221 #undef DEF
1222
1223     return count;
1224 }
1225
1226 static void
1227 ipfix_init_template_msg(void *msg_stub, uint32_t export_time_sec,
1228                         uint32_t seq_number, uint32_t obs_domain_id,
1229                         struct dp_packet *msg, size_t *set_hdr_offset)
1230 {
1231     struct ipfix_set_header *set_hdr;
1232
1233     dp_packet_use_stub(msg, msg_stub, sizeof msg_stub);
1234
1235     ipfix_init_header(export_time_sec, seq_number, obs_domain_id, msg);
1236     *set_hdr_offset = dp_packet_size(msg);
1237
1238     /* Add a Template Set. */
1239     set_hdr = dp_packet_put_zeros(msg, sizeof *set_hdr);
1240     set_hdr->set_id = htons(IPFIX_SET_ID_TEMPLATE);
1241 }
1242
1243 static size_t
1244 ipfix_send_template_msg(const struct collectors *collectors,
1245                         struct dp_packet *msg, size_t set_hdr_offset)
1246 {
1247     struct ipfix_set_header *set_hdr;
1248     size_t tx_errors;
1249
1250     /* Send template message. */
1251     set_hdr = (struct ipfix_set_header*)
1252               ((uint8_t*)dp_packet_data(msg) + set_hdr_offset);
1253     set_hdr->length = htons(dp_packet_size(msg) - set_hdr_offset);
1254
1255     tx_errors = ipfix_send_msg(collectors, msg);
1256
1257     dp_packet_uninit(msg);
1258
1259     return tx_errors;
1260 }
1261
1262 static void
1263 ipfix_send_template_msgs(struct dpif_ipfix_exporter *exporter,
1264                          uint32_t export_time_sec, uint32_t obs_domain_id)
1265 {
1266     uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)];
1267     struct dp_packet msg;
1268     size_t set_hdr_offset, tmpl_hdr_offset, error_pkts;
1269     struct ipfix_template_record_header *tmpl_hdr;
1270     uint16_t field_count;
1271     size_t tx_packets = 0;
1272     size_t tx_errors = 0;
1273     enum ipfix_proto_l2 l2;
1274     enum ipfix_proto_l3 l3;
1275     enum ipfix_proto_l4 l4;
1276     enum ipfix_proto_tunnel tunnel;
1277
1278     ipfix_init_template_msg(msg_stub, export_time_sec, exporter->seq_number,
1279                             obs_domain_id, &msg, &set_hdr_offset);
1280     /* Define one template for each possible combination of
1281      * protocols. */
1282     for (l2 = 0; l2 < NUM_IPFIX_PROTO_L2; l2++) {
1283         for (l3 = 0; l3 < NUM_IPFIX_PROTO_L3; l3++) {
1284             for (l4 = 0; l4 < NUM_IPFIX_PROTO_L4; l4++) {
1285                 if (l3 == IPFIX_PROTO_L3_UNKNOWN &&
1286                     l4 != IPFIX_PROTO_L4_UNKNOWN) {
1287                     continue;
1288                 }
1289                 for (tunnel = 0; tunnel < NUM_IPFIX_PROTO_TUNNEL; tunnel++) {
1290                     /* When the size of the template packet reaches
1291                      * MAX_MESSAGE_LEN(1024), send it out.
1292                      * And then reinitialize the msg to construct a new
1293                      * packet for the following templates.
1294                      */
1295                     if (dp_packet_size(&msg) >= MAX_MESSAGE_LEN) {
1296                         /* Send template message. */
1297                         error_pkts = ipfix_send_template_msg(exporter->collectors,
1298                                                              &msg, set_hdr_offset);
1299                         tx_errors += error_pkts;
1300                         tx_packets += collectors_count(exporter->collectors) - error_pkts;
1301
1302                         /* Reinitialize the template msg. */
1303                         ipfix_init_template_msg(msg_stub, export_time_sec,
1304                                                 exporter->seq_number,
1305                                                 obs_domain_id, &msg,
1306                                                 &set_hdr_offset);
1307                     }
1308
1309                     tmpl_hdr_offset = dp_packet_size(&msg);
1310                     tmpl_hdr = dp_packet_put_zeros(&msg, sizeof *tmpl_hdr);
1311                     tmpl_hdr->template_id = htons(
1312                         ipfix_get_template_id(l2, l3, l4, tunnel));
1313                     field_count = ipfix_define_template_fields(
1314                         l2, l3, l4, tunnel, exporter->virtual_obs_id != NULL,
1315                         &msg);
1316                     tmpl_hdr = (struct ipfix_template_record_header*)
1317                         ((uint8_t*)dp_packet_data(&msg) + tmpl_hdr_offset);
1318                     tmpl_hdr->field_count = htons(field_count);
1319                 }
1320             }
1321         }
1322     }
1323
1324     /* Send template message. */
1325     error_pkts = ipfix_send_template_msg(exporter->collectors, &msg, set_hdr_offset);
1326     tx_errors += error_pkts;
1327     tx_packets += collectors_count(exporter->collectors) - error_pkts;
1328
1329     exporter->stats.tx_pkts += tx_packets;
1330     exporter->stats.tx_errors += tx_errors;
1331
1332     /* XXX: Add Options Template Sets, at least to define a Flow Keys
1333      * Option Template. */
1334
1335 }
1336
1337 static inline uint32_t
1338 ipfix_hash_flow_key(const struct ipfix_flow_key *flow_key, uint32_t basis)
1339 {
1340     uint32_t hash;
1341     hash = hash_int(flow_key->obs_domain_id, basis);
1342     hash = hash_int(flow_key->template_id, hash);
1343     hash = hash_bytes(flow_key->flow_key_msg_part,
1344                       flow_key->flow_key_msg_part_size, hash);
1345     return hash;
1346 }
1347
1348 static bool
1349 ipfix_flow_key_equal(const struct ipfix_flow_key *a,
1350                      const struct ipfix_flow_key *b)
1351 {
1352     /* The template ID determines the flow key size, so not need to
1353      * compare it. */
1354     return (a->obs_domain_id == b->obs_domain_id
1355             && a->template_id == b->template_id
1356             && memcmp(a->flow_key_msg_part, b->flow_key_msg_part,
1357                       a->flow_key_msg_part_size) == 0);
1358 }
1359
1360 static struct ipfix_flow_cache_entry*
1361 ipfix_cache_find_entry(const struct dpif_ipfix_exporter *exporter,
1362                        const struct ipfix_flow_key *flow_key)
1363 {
1364     struct ipfix_flow_cache_entry *entry;
1365
1366     HMAP_FOR_EACH_WITH_HASH (entry, flow_key_map_node,
1367                              ipfix_hash_flow_key(flow_key, 0),
1368                              &exporter->cache_flow_key_map) {
1369         if (ipfix_flow_key_equal(&entry->flow_key, flow_key)) {
1370             return entry;
1371         }
1372     }
1373
1374     return NULL;
1375 }
1376
1377 static bool
1378 ipfix_cache_next_timeout_msec(const struct dpif_ipfix_exporter *exporter,
1379                               long long int *next_timeout_msec)
1380 {
1381     struct ipfix_flow_cache_entry *entry;
1382
1383     LIST_FOR_EACH (entry, cache_flow_start_timestamp_list_node,
1384                    &exporter->cache_flow_start_timestamp_list) {
1385         *next_timeout_msec = entry->flow_start_timestamp_usec / 1000LL
1386             + 1000LL * exporter->cache_active_timeout;
1387         return true;
1388     }
1389
1390     return false;
1391 }
1392
1393 static void
1394 ipfix_cache_aggregate_entries(struct ipfix_flow_cache_entry *from_entry,
1395                               struct ipfix_flow_cache_entry *to_entry)
1396 {
1397     uint64_t *to_start, *to_end, *from_start, *from_end;
1398     uint16_t *to_min_len, *to_max_len, *from_min_len, *from_max_len;
1399
1400     to_start = &to_entry->flow_start_timestamp_usec;
1401     to_end = &to_entry->flow_end_timestamp_usec;
1402     from_start = &from_entry->flow_start_timestamp_usec;
1403     from_end = &from_entry->flow_end_timestamp_usec;
1404
1405     if (*to_start > *from_start) {
1406         *to_start = *from_start;
1407     }
1408     if (*to_end < *from_end) {
1409         *to_end = *from_end;
1410     }
1411
1412     to_entry->packet_delta_count += from_entry->packet_delta_count;
1413     to_entry->layer2_octet_delta_count += from_entry->layer2_octet_delta_count;
1414
1415     to_entry->octet_delta_count += from_entry->octet_delta_count;
1416     to_entry->octet_delta_sum_of_squares +=
1417         from_entry->octet_delta_sum_of_squares;
1418
1419     to_min_len = &to_entry->minimum_ip_total_length;
1420     to_max_len = &to_entry->maximum_ip_total_length;
1421     from_min_len = &from_entry->minimum_ip_total_length;
1422     from_max_len = &from_entry->maximum_ip_total_length;
1423
1424     if (!*to_min_len || (*from_min_len && *to_min_len > *from_min_len)) {
1425         *to_min_len = *from_min_len;
1426     }
1427     if (*to_max_len < *from_max_len) {
1428         *to_max_len = *from_max_len;
1429     }
1430 }
1431
1432 /* Get statistics */
1433 static void
1434 ipfix_get_stats__(const struct dpif_ipfix_exporter *exporter,
1435                   ofproto_ipfix_stats *stats)
1436 {
1437     memset(stats, 0xff, sizeof *stats);
1438
1439     if (!exporter) {
1440         return;
1441     }
1442
1443     *stats = exporter->stats;
1444 }
1445
1446 static void
1447 ipfix_get_bridge_stats(const struct dpif_ipfix_bridge_exporter *exporter,
1448                        ofproto_ipfix_stats *stats)
1449 {
1450     ipfix_get_stats__(&exporter->exporter, stats);
1451 }
1452
1453 static void
1454 ipfix_get_flow_stats(const struct dpif_ipfix_flow_exporter *exporter,
1455                      ofproto_ipfix_stats *stats)
1456 {
1457     ipfix_get_stats__(&exporter->exporter, stats);
1458     stats->collector_set_id = exporter->options->collector_set_id;
1459 }
1460
1461 int
1462 dpif_ipfix_get_stats(const struct dpif_ipfix *di,
1463                      bool bridge_ipfix,
1464                      struct ovs_list *replies)
1465     OVS_EXCLUDED(mutex)
1466 {
1467     struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node;
1468     struct ofputil_ipfix_stats ois;
1469
1470     ovs_mutex_lock(&mutex);
1471     if (bridge_ipfix) {
1472         if (!di->bridge_exporter.options) {
1473             ovs_mutex_unlock(&mutex);
1474             return OFPERR_NXST_NOT_CONFIGURED;
1475         }
1476
1477         ipfix_get_bridge_stats(&di->bridge_exporter, &ois);
1478         ofputil_append_ipfix_stat(replies, &ois);
1479     } else {
1480         if (hmap_count(&di->flow_exporter_map) == 0) {
1481             ovs_mutex_unlock(&mutex);
1482             return OFPERR_NXST_NOT_CONFIGURED;
1483         }
1484
1485         HMAP_FOR_EACH (flow_exporter_node, node,
1486                        &di->flow_exporter_map) {
1487             ipfix_get_flow_stats(&flow_exporter_node->exporter, &ois);
1488             ofputil_append_ipfix_stat(replies, &ois);
1489         }
1490     }
1491     ovs_mutex_unlock(&mutex);
1492
1493     return 0;
1494 }
1495
1496 /* Update partial ipfix stats */
1497 static void
1498 ipfix_update_stats(struct dpif_ipfix_exporter *exporter,
1499                    bool new_flow,
1500                    size_t current_flows,
1501                    enum ipfix_sampled_packet_type sampled_pkt_type)
1502 {
1503     if (new_flow) {
1504         exporter->stats.total_flows++;
1505         exporter->stats.current_flows = current_flows;
1506     }
1507     exporter->stats.pkts++;
1508
1509     switch (sampled_pkt_type) {
1510     case IPFIX_SAMPLED_PKT_IPV4_OK:
1511         exporter->stats.ipv4_pkts++;
1512         break;
1513     case IPFIX_SAMPLED_PKT_IPV6_OK:
1514         exporter->stats.ipv6_pkts++;
1515         break;
1516     case IPFIX_SAMPLED_PKT_IPV4_ERROR:
1517         exporter->stats.ipv4_error_pkts++;
1518         exporter->stats.error_pkts++;
1519         break;
1520     case IPFIX_SAMPLED_PKT_IPV6_ERROR:
1521         exporter->stats.ipv6_error_pkts++;
1522         exporter->stats.error_pkts++;
1523         break;
1524     case IPFIX_SAMPLED_PKT_UNKNOWN:
1525         exporter->stats.error_pkts++;
1526         break;
1527     case IPFIX_SAMPLED_PKT_OTHERS:
1528     default:
1529         break;
1530     }
1531 }
1532
1533 /* Add an entry into a flow cache.  The entry is either aggregated into
1534  * an existing entry with the same flow key and free()d, or it is
1535  * inserted into the cache. And IPFIX stats will be updated */
1536 static void
1537 ipfix_cache_update(struct dpif_ipfix_exporter *exporter,
1538                    struct ipfix_flow_cache_entry *entry,
1539                    enum ipfix_sampled_packet_type sampled_pkt_type)
1540 {
1541     struct ipfix_flow_cache_entry *old_entry;
1542     size_t current_flows = 0;
1543
1544     old_entry = ipfix_cache_find_entry(exporter, &entry->flow_key);
1545
1546     if (old_entry == NULL) {
1547         hmap_insert(&exporter->cache_flow_key_map, &entry->flow_key_map_node,
1548                     ipfix_hash_flow_key(&entry->flow_key, 0));
1549
1550         /* As the latest entry added into the cache, it should
1551          * logically have the highest flow_start_timestamp_usec, so
1552          * append it at the tail. */
1553         ovs_list_push_back(&exporter->cache_flow_start_timestamp_list,
1554                        &entry->cache_flow_start_timestamp_list_node);
1555
1556         /* Enforce exporter->cache_max_flows limit. */
1557         current_flows = hmap_count(&exporter->cache_flow_key_map);
1558         ipfix_update_stats(exporter, true, current_flows, sampled_pkt_type);
1559         if (current_flows > exporter->cache_max_flows) {
1560             dpif_ipfix_cache_expire_now(exporter, false);
1561         }
1562     } else {
1563         ipfix_cache_aggregate_entries(entry, old_entry);
1564         free(entry);
1565         ipfix_update_stats(exporter, false, current_flows, sampled_pkt_type);
1566     }
1567 }
1568
1569 static enum ipfix_sampled_packet_type
1570 ipfix_cache_entry_init(struct ipfix_flow_cache_entry *entry,
1571                        const struct dp_packet *packet, const struct flow *flow,
1572                        uint64_t packet_delta_count, uint32_t obs_domain_id,
1573                        uint32_t obs_point_id, odp_port_t output_odp_port,
1574                        const struct dpif_ipfix_port *tunnel_port,
1575                        const struct flow_tnl *tunnel_key)
1576 {
1577     struct ipfix_flow_key *flow_key;
1578     struct dp_packet msg;
1579     enum ipfix_proto_l2 l2;
1580     enum ipfix_proto_l3 l3;
1581     enum ipfix_proto_l4 l4;
1582     enum ipfix_proto_tunnel tunnel = IPFIX_PROTO_NOT_TUNNELED;
1583     enum ipfix_sampled_packet_type sampled_pkt_type = IPFIX_SAMPLED_PKT_UNKNOWN;
1584     uint8_t ethernet_header_length;
1585     uint16_t ethernet_total_length;
1586
1587     flow_key = &entry->flow_key;
1588     dp_packet_use_stub(&msg, flow_key->flow_key_msg_part,
1589                        sizeof flow_key->flow_key_msg_part);
1590
1591     /* Choose the right template ID matching the protocols in the
1592      * sampled packet. */
1593     l2 = (flow->vlan_tci == 0) ? IPFIX_PROTO_L2_ETH : IPFIX_PROTO_L2_VLAN;
1594
1595     switch(ntohs(flow->dl_type)) {
1596     case ETH_TYPE_IP:
1597         l3 = IPFIX_PROTO_L3_IPV4;
1598         switch(flow->nw_proto) {
1599         case IPPROTO_TCP:
1600         case IPPROTO_UDP:
1601         case IPPROTO_SCTP:
1602             l4 = IPFIX_PROTO_L4_TCP_UDP_SCTP;
1603             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV4_OK;
1604             break;
1605         case IPPROTO_ICMP:
1606             l4 = IPFIX_PROTO_L4_ICMP;
1607             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV4_OK;
1608             break;
1609         default:
1610             l4 = IPFIX_PROTO_L4_UNKNOWN;
1611             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV4_ERROR;
1612         }
1613         break;
1614     case ETH_TYPE_IPV6:
1615         l3 = IPFIX_PROTO_L3_IPV6;
1616         switch(flow->nw_proto) {
1617         case IPPROTO_TCP:
1618         case IPPROTO_UDP:
1619         case IPPROTO_SCTP:
1620             l4 = IPFIX_PROTO_L4_TCP_UDP_SCTP;
1621             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV6_OK;
1622             break;
1623         case IPPROTO_ICMPV6:
1624             l4 = IPFIX_PROTO_L4_ICMP;
1625             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV6_OK;
1626             break;
1627         default:
1628             l4 = IPFIX_PROTO_L4_UNKNOWN;
1629             sampled_pkt_type = IPFIX_SAMPLED_PKT_IPV6_ERROR;
1630         }
1631         break;
1632     default:
1633         l3 = IPFIX_PROTO_L3_UNKNOWN;
1634         l4 = IPFIX_PROTO_L4_UNKNOWN;
1635         sampled_pkt_type = IPFIX_SAMPLED_PKT_OTHERS;
1636     }
1637
1638     if (tunnel_port && tunnel_key) {
1639        tunnel = IPFIX_PROTO_TUNNELED;
1640     }
1641
1642     flow_key->obs_domain_id = obs_domain_id;
1643     flow_key->template_id = ipfix_get_template_id(l2, l3, l4, tunnel);
1644
1645     /* The fields defined in the ipfix_data_record_* structs and sent
1646      * below must match exactly the templates defined in
1647      * ipfix_define_template_fields. */
1648
1649     ethernet_header_length = (l2 == IPFIX_PROTO_L2_VLAN)
1650         ? VLAN_ETH_HEADER_LEN : ETH_HEADER_LEN;
1651     ethernet_total_length = dp_packet_size(packet);
1652
1653     /* Common Ethernet entities. */
1654     {
1655         struct ipfix_data_record_flow_key_common *data_common;
1656
1657         data_common = dp_packet_put_zeros(&msg, sizeof *data_common);
1658         data_common->observation_point_id = htonl(obs_point_id);
1659         data_common->flow_direction =
1660             (output_odp_port == ODPP_NONE) ? INGRESS_FLOW : EGRESS_FLOW;
1661         data_common->source_mac_address = flow->dl_src;
1662         data_common->destination_mac_address = flow->dl_dst;
1663         data_common->ethernet_type = flow->dl_type;
1664         data_common->ethernet_header_length = ethernet_header_length;
1665     }
1666
1667     if (l2 == IPFIX_PROTO_L2_VLAN) {
1668         struct ipfix_data_record_flow_key_vlan *data_vlan;
1669         uint16_t vlan_id = vlan_tci_to_vid(flow->vlan_tci);
1670         uint8_t priority = vlan_tci_to_pcp(flow->vlan_tci);
1671
1672         data_vlan = dp_packet_put_zeros(&msg, sizeof *data_vlan);
1673         data_vlan->vlan_id = htons(vlan_id);
1674         data_vlan->dot1q_vlan_id = htons(vlan_id);
1675         data_vlan->dot1q_priority = priority;
1676     }
1677
1678     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1679         struct ipfix_data_record_flow_key_ip *data_ip;
1680
1681         data_ip = dp_packet_put_zeros(&msg, sizeof *data_ip);
1682         data_ip->ip_version = (l3 == IPFIX_PROTO_L3_IPV4) ? 4 : 6;
1683         data_ip->ip_ttl = flow->nw_ttl;
1684         data_ip->protocol_identifier = flow->nw_proto;
1685         data_ip->ip_diff_serv_code_point = flow->nw_tos >> 2;
1686         data_ip->ip_precedence = flow->nw_tos >> 5;
1687         data_ip->ip_class_of_service = flow->nw_tos;
1688
1689         if (l3 == IPFIX_PROTO_L3_IPV4) {
1690             struct ipfix_data_record_flow_key_ipv4 *data_ipv4;
1691
1692             data_ipv4 = dp_packet_put_zeros(&msg, sizeof *data_ipv4);
1693             data_ipv4->source_ipv4_address = flow->nw_src;
1694             data_ipv4->destination_ipv4_address = flow->nw_dst;
1695         } else {  /* l3 == IPFIX_PROTO_L3_IPV6 */
1696             struct ipfix_data_record_flow_key_ipv6 *data_ipv6;
1697
1698             data_ipv6 = dp_packet_put_zeros(&msg, sizeof *data_ipv6);
1699             memcpy(data_ipv6->source_ipv6_address, &flow->ipv6_src,
1700                    sizeof flow->ipv6_src);
1701             memcpy(data_ipv6->destination_ipv6_address, &flow->ipv6_dst,
1702                    sizeof flow->ipv6_dst);
1703             data_ipv6->flow_label_ipv6 = flow->ipv6_label;
1704         }
1705     }
1706
1707     if (l4 == IPFIX_PROTO_L4_TCP_UDP_SCTP) {
1708         struct ipfix_data_record_flow_key_transport *data_transport;
1709
1710         data_transport = dp_packet_put_zeros(&msg, sizeof *data_transport);
1711         data_transport->source_transport_port = flow->tp_src;
1712         data_transport->destination_transport_port = flow->tp_dst;
1713     } else if (l4 == IPFIX_PROTO_L4_ICMP) {
1714         struct ipfix_data_record_flow_key_icmp *data_icmp;
1715
1716         data_icmp = dp_packet_put_zeros(&msg, sizeof *data_icmp);
1717         data_icmp->icmp_type = ntohs(flow->tp_src) & 0xff;
1718         data_icmp->icmp_code = ntohs(flow->tp_dst) & 0xff;
1719     }
1720
1721     if (tunnel == IPFIX_PROTO_TUNNELED) {
1722         struct ipfix_data_record_flow_key_tunnel *data_tunnel;
1723         const uint8_t *tun_id;
1724
1725         data_tunnel = dp_packet_put_zeros(&msg, sizeof *data_tunnel +
1726                                              tunnel_port->tunnel_key_length);
1727         data_tunnel->tunnel_source_ipv4_address = tunnel_key->ip_src;
1728         data_tunnel->tunnel_destination_ipv4_address = tunnel_key->ip_dst;
1729         /* The tunnel_protocol_identifier is from tunnel_proto array, which
1730          * contains protocol_identifiers of each tunnel type.
1731          * For the tunnel type on the top of IPSec, which uses the protocol
1732          * identifier of the upper tunnel type is used, the tcp_src and tcp_dst
1733          * are decided based on the protocol identifiers.
1734          * E.g:
1735          * The protocol identifier of DPIF_IPFIX_TUNNEL_IPSEC_GRE is IPPROTO_GRE,
1736          * and both tp_src and tp_dst are zero.
1737          */
1738         data_tunnel->tunnel_protocol_identifier =
1739             tunnel_protocol[tunnel_port->tunnel_type];
1740         data_tunnel->tunnel_source_transport_port = tunnel_key->tp_src;
1741         data_tunnel->tunnel_destination_transport_port = tunnel_key->tp_dst;
1742         data_tunnel->tunnel_type = tunnel_port->tunnel_type;
1743         data_tunnel->tunnel_key_length = tunnel_port->tunnel_key_length;
1744         /* tun_id is in network order, and tunnel key is in low bits. */
1745         tun_id = (const uint8_t *) &tunnel_key->tun_id;
1746         memcpy(data_tunnel->tunnel_key,
1747                &tun_id[8 - tunnel_port->tunnel_key_length],
1748                tunnel_port->tunnel_key_length);
1749     }
1750
1751     flow_key->flow_key_msg_part_size = dp_packet_size(&msg);
1752
1753     {
1754         struct timeval now;
1755         uint64_t layer2_octet_delta_count;
1756
1757         /* Calculate the total matched octet count by considering as
1758          * an approximation that all matched packets have the same
1759          * length. */
1760         layer2_octet_delta_count = packet_delta_count * ethernet_total_length;
1761
1762         xgettimeofday(&now);
1763         entry->flow_end_timestamp_usec = now.tv_usec + 1000000LL * now.tv_sec;
1764         entry->flow_start_timestamp_usec = entry->flow_end_timestamp_usec;
1765         entry->packet_delta_count = packet_delta_count;
1766         entry->layer2_octet_delta_count = layer2_octet_delta_count;
1767     }
1768
1769     if (l3 != IPFIX_PROTO_L3_UNKNOWN) {
1770         uint16_t ip_total_length =
1771             ethernet_total_length - ethernet_header_length;
1772         uint64_t octet_delta_count;
1773
1774         /* Calculate the total matched octet count by considering as
1775          * an approximation that all matched packets have the same
1776          * length. */
1777         octet_delta_count = packet_delta_count * ip_total_length;
1778
1779         entry->octet_delta_count = octet_delta_count;
1780         entry->octet_delta_sum_of_squares = octet_delta_count * ip_total_length;
1781         entry->minimum_ip_total_length = ip_total_length;
1782         entry->maximum_ip_total_length = ip_total_length;
1783     } else {
1784         entry->octet_delta_sum_of_squares = 0;
1785         entry->minimum_ip_total_length = 0;
1786         entry->maximum_ip_total_length = 0;
1787     }
1788
1789     return sampled_pkt_type;
1790 }
1791
1792 /* Send each single data record in its own data set, to simplify the
1793  * implementation by avoiding having to group record by template ID
1794  * before sending. */
1795 static void
1796 ipfix_put_data_set(uint32_t export_time_sec,
1797                    struct ipfix_flow_cache_entry *entry,
1798                    enum ipfix_flow_end_reason flow_end_reason,
1799                    const char *virtual_obs_id,
1800                    uint8_t virtual_obs_len,
1801                    struct dp_packet *msg)
1802 {
1803     size_t set_hdr_offset;
1804     struct ipfix_set_header *set_hdr;
1805
1806     set_hdr_offset = dp_packet_size(msg);
1807
1808     /* Put a Data Set. */
1809     set_hdr = dp_packet_put_zeros(msg, sizeof *set_hdr);
1810     set_hdr->set_id = htons(entry->flow_key.template_id);
1811
1812     /* Copy the flow key part of the data record. */
1813
1814     dp_packet_put(msg, entry->flow_key.flow_key_msg_part,
1815                entry->flow_key.flow_key_msg_part_size);
1816
1817     /* Export virtual observation ID. */
1818     if (virtual_obs_id) {
1819         dp_packet_put(msg, &virtual_obs_len, sizeof(virtual_obs_len));
1820         dp_packet_put(msg, virtual_obs_id, virtual_obs_len);
1821     }
1822
1823     /* Put the non-key part of the data record. */
1824
1825     {
1826         struct ipfix_data_record_aggregated_common *data_aggregated_common;
1827         uint64_t export_time_usec, flow_start_delta_usec, flow_end_delta_usec;
1828
1829         /* Calculate the negative deltas relative to the export time
1830          * in seconds sent in the header, not the exact export
1831          * time. */
1832         export_time_usec = 1000000LL * export_time_sec;
1833         flow_start_delta_usec = export_time_usec
1834             - entry->flow_start_timestamp_usec;
1835         flow_end_delta_usec = export_time_usec
1836             - entry->flow_end_timestamp_usec;
1837
1838         data_aggregated_common = dp_packet_put_zeros(
1839             msg, sizeof *data_aggregated_common);
1840         data_aggregated_common->flow_start_delta_microseconds = htonl(
1841             flow_start_delta_usec);
1842         data_aggregated_common->flow_end_delta_microseconds = htonl(
1843             flow_end_delta_usec);
1844         data_aggregated_common->packet_delta_count = htonll(
1845             entry->packet_delta_count);
1846         data_aggregated_common->layer2_octet_delta_count = htonll(
1847             entry->layer2_octet_delta_count);
1848         data_aggregated_common->flow_end_reason = flow_end_reason;
1849     }
1850
1851     if (entry->octet_delta_sum_of_squares) {  /* IP packet. */
1852         struct ipfix_data_record_aggregated_ip *data_aggregated_ip;
1853
1854         data_aggregated_ip = dp_packet_put_zeros(
1855             msg, sizeof *data_aggregated_ip);
1856         data_aggregated_ip->octet_delta_count = htonll(
1857             entry->octet_delta_count);
1858         data_aggregated_ip->octet_delta_sum_of_squares = htonll(
1859             entry->octet_delta_sum_of_squares);
1860         data_aggregated_ip->minimum_ip_total_length = htonll(
1861             entry->minimum_ip_total_length);
1862         data_aggregated_ip->maximum_ip_total_length = htonll(
1863             entry->maximum_ip_total_length);
1864     }
1865
1866     set_hdr = (struct ipfix_set_header*)((uint8_t*)dp_packet_data(msg) + set_hdr_offset);
1867     set_hdr->length = htons(dp_packet_size(msg) - set_hdr_offset);
1868 }
1869
1870 /* Send an IPFIX message with a single data record. */
1871 static void
1872 ipfix_send_data_msg(struct dpif_ipfix_exporter *exporter,
1873                     uint32_t export_time_sec,
1874                     struct ipfix_flow_cache_entry *entry,
1875                     enum ipfix_flow_end_reason flow_end_reason)
1876 {
1877     uint64_t msg_stub[DIV_ROUND_UP(MAX_MESSAGE_LEN, 8)];
1878     struct dp_packet msg;
1879     size_t tx_errors;
1880
1881     dp_packet_use_stub(&msg, msg_stub, sizeof msg_stub);
1882
1883     ipfix_init_header(export_time_sec, exporter->seq_number++,
1884                       entry->flow_key.obs_domain_id, &msg);
1885     ipfix_put_data_set(export_time_sec, entry, flow_end_reason,
1886                        exporter->virtual_obs_id, exporter->virtual_obs_len,
1887                        &msg);
1888     tx_errors = ipfix_send_msg(exporter->collectors, &msg);
1889
1890     dp_packet_uninit(&msg);
1891
1892     exporter->stats.current_flows--;
1893     exporter->stats.tx_pkts += collectors_count(exporter->collectors) - tx_errors;
1894     exporter->stats.tx_errors += tx_errors;
1895 }
1896
1897 static void
1898 dpif_ipfix_sample(struct dpif_ipfix_exporter *exporter,
1899                   const struct dp_packet *packet, const struct flow *flow,
1900                   uint64_t packet_delta_count, uint32_t obs_domain_id,
1901                   uint32_t obs_point_id, odp_port_t output_odp_port,
1902                   const struct dpif_ipfix_port *tunnel_port,
1903                   const struct flow_tnl *tunnel_key)
1904 {
1905     struct ipfix_flow_cache_entry *entry;
1906     enum ipfix_sampled_packet_type sampled_packet_type;
1907
1908     /* Create a flow cache entry from the sample. */
1909     entry = xmalloc(sizeof *entry);
1910     sampled_packet_type = ipfix_cache_entry_init(entry, packet,
1911                                                  flow, packet_delta_count,
1912                                                  obs_domain_id, obs_point_id,
1913                                                  output_odp_port, tunnel_port,
1914                                                  tunnel_key);
1915     ipfix_cache_update(exporter, entry, sampled_packet_type);
1916 }
1917
1918 static bool
1919 bridge_exporter_enabled(struct dpif_ipfix *di)
1920 {
1921     return di->bridge_exporter.probability > 0;
1922 }
1923
1924 void
1925 dpif_ipfix_bridge_sample(struct dpif_ipfix *di, const struct dp_packet *packet,
1926                          const struct flow *flow,
1927                          odp_port_t input_odp_port, odp_port_t output_odp_port,
1928                          const struct flow_tnl *output_tunnel_key)
1929     OVS_EXCLUDED(mutex)
1930 {
1931     uint64_t packet_delta_count;
1932     const struct flow_tnl *tunnel_key = NULL;
1933     struct dpif_ipfix_port * tunnel_port = NULL;
1934
1935     ovs_mutex_lock(&mutex);
1936     if (!bridge_exporter_enabled(di)) {
1937         ovs_mutex_unlock(&mutex);
1938         return;
1939     }
1940
1941     /* Skip BFD packets:
1942      * Bidirectional Forwarding Detection(BFD) packets are for monitoring
1943      * the tunnel link status and consumed by ovs itself. No need to
1944      * smaple them.
1945      * CF  IETF RFC 5881, BFD control packet is the UDP packet with
1946      * destination port 3784, and BFD echo packet is the UDP packet with
1947      * destination port 3785.
1948      */
1949     if (is_ip_any(flow) &&
1950         flow->nw_proto == IPPROTO_UDP &&
1951         (flow->tp_dst == htons(BFD_CONTROL_DEST_PORT) ||
1952          flow->tp_dst == htons(BFD_ECHO_DEST_PORT))) {
1953         ovs_mutex_unlock(&mutex);
1954         return;
1955     }
1956
1957     /* Use the sampling probability as an approximation of the number
1958      * of matched packets. */
1959     packet_delta_count = UINT32_MAX / di->bridge_exporter.probability;
1960     if (di->bridge_exporter.options->enable_tunnel_sampling) {
1961         if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
1962             /* Input tunnel. */
1963             tunnel_key = &flow->tunnel;
1964             tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
1965         }
1966         if (output_odp_port != ODPP_NONE && output_tunnel_key) {
1967             /* Output tunnel, output_tunnel_key must be valid. */
1968             tunnel_key = output_tunnel_key;
1969             tunnel_port = dpif_ipfix_find_port(di, output_odp_port);
1970         }
1971     }
1972
1973     dpif_ipfix_sample(&di->bridge_exporter.exporter, packet, flow,
1974                       packet_delta_count,
1975                       di->bridge_exporter.options->obs_domain_id,
1976                       di->bridge_exporter.options->obs_point_id,
1977                       output_odp_port, tunnel_port, tunnel_key);
1978     ovs_mutex_unlock(&mutex);
1979 }
1980
1981 void
1982 dpif_ipfix_flow_sample(struct dpif_ipfix *di, const struct dp_packet *packet,
1983                        const struct flow *flow,
1984                        const union user_action_cookie *cookie,
1985                        odp_port_t input_odp_port,
1986                        const struct flow_tnl *output_tunnel_key)
1987     OVS_EXCLUDED(mutex)
1988 {
1989     struct dpif_ipfix_flow_exporter_map_node *node;
1990     const struct flow_tnl *tunnel_key = NULL;
1991     struct dpif_ipfix_port * tunnel_port = NULL;
1992     odp_port_t output_odp_port = cookie->flow_sample.output_odp_port;
1993     uint32_t collector_set_id = cookie->flow_sample.collector_set_id;
1994     uint16_t probability = cookie->flow_sample.probability;
1995
1996     /* Use the sampling probability as an approximation of the number
1997      * of matched packets. */
1998     uint64_t packet_delta_count = USHRT_MAX / probability;
1999
2000     ovs_mutex_lock(&mutex);
2001     node = dpif_ipfix_find_flow_exporter_map_node(di, collector_set_id);
2002     if (node) {
2003         if (node->exporter.options->enable_tunnel_sampling) {
2004             if (output_odp_port == ODPP_NONE && flow->tunnel.ip_dst) {
2005                 /* Input tunnel. */
2006                 tunnel_key = &flow->tunnel;
2007                 tunnel_port = dpif_ipfix_find_port(di, input_odp_port);
2008             }
2009             if (output_odp_port != ODPP_NONE && output_tunnel_key) {
2010                 /* Output tunnel, output_tunnel_key must be valid. */
2011                 tunnel_key = output_tunnel_key;
2012                 tunnel_port = dpif_ipfix_find_port(di, output_odp_port);
2013             }
2014         }
2015
2016         dpif_ipfix_sample(&node->exporter.exporter, packet, flow,
2017                           packet_delta_count,
2018                           cookie->flow_sample.obs_domain_id,
2019                           cookie->flow_sample.obs_point_id,
2020                           output_odp_port, tunnel_port, tunnel_key);
2021     }
2022     ovs_mutex_unlock(&mutex);
2023 }
2024
2025 static void
2026 dpif_ipfix_cache_expire(struct dpif_ipfix_exporter *exporter,
2027                         bool forced_end, const uint64_t export_time_usec,
2028                         const uint32_t export_time_sec)
2029 {
2030     struct ipfix_flow_cache_entry *entry, *next_entry;
2031     uint64_t max_flow_start_timestamp_usec;
2032     bool template_msg_sent = false;
2033     enum ipfix_flow_end_reason flow_end_reason;
2034
2035     if (ovs_list_is_empty(&exporter->cache_flow_start_timestamp_list)) {
2036         return;
2037     }
2038
2039     max_flow_start_timestamp_usec = export_time_usec -
2040         1000000LL * exporter->cache_active_timeout;
2041
2042     LIST_FOR_EACH_SAFE (entry, next_entry, cache_flow_start_timestamp_list_node,
2043                         &exporter->cache_flow_start_timestamp_list) {
2044         if (forced_end) {
2045             flow_end_reason = FORCED_END;
2046         } else if (entry->flow_start_timestamp_usec
2047                    <= max_flow_start_timestamp_usec) {
2048             flow_end_reason = ACTIVE_TIMEOUT;
2049         } else if (hmap_count(&exporter->cache_flow_key_map)
2050                    > exporter->cache_max_flows) {
2051             /* Enforce exporter->cache_max_flows. */
2052             flow_end_reason = LACK_OF_RESOURCES;
2053         } else {
2054             /* Remaining flows haven't expired yet. */
2055             break;
2056         }
2057
2058         ovs_list_remove(&entry->cache_flow_start_timestamp_list_node);
2059         hmap_remove(&exporter->cache_flow_key_map,
2060                     &entry->flow_key_map_node);
2061
2062         if (!template_msg_sent
2063             && (exporter->last_template_set_time + IPFIX_TEMPLATE_INTERVAL)
2064                 <= export_time_sec) {
2065             ipfix_send_template_msgs(exporter, export_time_sec,
2066                                      entry->flow_key.obs_domain_id);
2067             exporter->last_template_set_time = export_time_sec;
2068             template_msg_sent = true;
2069         }
2070
2071         /* XXX: Group multiple data records for the same obs domain id
2072          * into the same message. */
2073         ipfix_send_data_msg(exporter, export_time_sec, entry, flow_end_reason);
2074         free(entry);
2075     }
2076 }
2077
2078 static void
2079 get_export_time_now(uint64_t *export_time_usec, uint32_t *export_time_sec)
2080 {
2081     struct timeval export_time;
2082     xgettimeofday(&export_time);
2083
2084     *export_time_usec = export_time.tv_usec + 1000000LL * export_time.tv_sec;
2085
2086     /* The IPFIX start and end deltas are negative deltas relative to
2087      * the export time, so set the export time 1 second off to
2088      * calculate those deltas. */
2089     if (export_time.tv_usec == 0) {
2090         *export_time_sec = export_time.tv_sec;
2091     } else {
2092         *export_time_sec = export_time.tv_sec + 1;
2093     }
2094 }
2095
2096 static void
2097 dpif_ipfix_cache_expire_now(struct dpif_ipfix_exporter *exporter,
2098                             bool forced_end)
2099 {
2100     uint64_t export_time_usec;
2101     uint32_t export_time_sec;
2102
2103     get_export_time_now(&export_time_usec, &export_time_sec);
2104     dpif_ipfix_cache_expire(exporter, forced_end, export_time_usec,
2105                             export_time_sec);
2106 }
2107
2108 void
2109 dpif_ipfix_run(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
2110 {
2111     uint64_t export_time_usec;
2112     uint32_t export_time_sec;
2113     struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node;
2114
2115     ovs_mutex_lock(&mutex);
2116     get_export_time_now(&export_time_usec, &export_time_sec);
2117     if (bridge_exporter_enabled(di)) {
2118       dpif_ipfix_cache_expire(
2119           &di->bridge_exporter.exporter, false, export_time_usec,
2120           export_time_sec);
2121     }
2122     HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) {
2123         dpif_ipfix_cache_expire(
2124             &flow_exporter_node->exporter.exporter, false, export_time_usec,
2125             export_time_sec);
2126     }
2127     ovs_mutex_unlock(&mutex);
2128 }
2129
2130 void
2131 dpif_ipfix_wait(struct dpif_ipfix *di) OVS_EXCLUDED(mutex)
2132 {
2133     long long int next_timeout_msec = LLONG_MAX;
2134     struct dpif_ipfix_flow_exporter_map_node *flow_exporter_node;
2135
2136     ovs_mutex_lock(&mutex);
2137     if (bridge_exporter_enabled(di)) {
2138         if (ipfix_cache_next_timeout_msec(
2139                 &di->bridge_exporter.exporter, &next_timeout_msec)) {
2140             poll_timer_wait_until(next_timeout_msec);
2141         }
2142     }
2143     HMAP_FOR_EACH (flow_exporter_node, node, &di->flow_exporter_map) {
2144         if (ipfix_cache_next_timeout_msec(
2145                 &flow_exporter_node->exporter.exporter, &next_timeout_msec)) {
2146             poll_timer_wait_until(next_timeout_msec);
2147         }
2148     }
2149     ovs_mutex_unlock(&mutex);
2150 }