X-Git-Url: http://git.cascardo.eti.br/?a=blobdiff_plain;f=lib%2Fdpif.h;h=97d5d064c184aea97ac97263a6902097a8069913;hb=HEAD;hp=7f986f957b69aa96449328591935708a6ee2751b;hpb=758c456df570a1af1d9e913d50a3478785663e66;p=cascardo%2Fovs.git diff --git a/lib/dpif.h b/lib/dpif.h index 7f986f957..97d5d064c 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,8 @@ * "internal" (for a simulated port used to connect to the TCP/IP stack), * and "gre" (for a GRE tunnel). * - * - A Netlink PID (see "Upcall Queuing and Ordering" below). + * - A Netlink PID for each upcall reading thread (see "Upcall Queuing and + * Ordering" below). * * The dpif interface has functions for adding and deleting ports. When a * datapath implements these (e.g. as the Linux and netdev datapaths do), then @@ -112,9 +113,9 @@ * * In Open vSwitch userspace, "struct flow" is the typical way to describe * a flow, but the datapath interface uses a different data format to - * allow ABI forward- and backward-compatibility. datapath/README + * allow ABI forward- and backward-compatibility. datapath/README.md * describes the rationale and design. Refer to OVS_KEY_ATTR_* and - * "struct ovs_key_*" in include/linux/openvswitch.h for details. + * "struct ovs_key_*" in include/odp-netlink.h for details. * lib/odp-util.h defines several functions for working with these flows. * * - A "mask" that, for each bit in the flow, specifies whether the datapath @@ -153,9 +154,8 @@ * within a flow. Some examples of actions are OVS_ACTION_ATTR_OUTPUT, * which transmits the packet out a port, and OVS_ACTION_ATTR_SET, which * modifies packet headers. Refer to OVS_ACTION_ATTR_* and "struct - * ovs_action_*" in include/linux/openvswitch.h for details. - * lib/odp-util.h defines several functions for working with datapath - * actions. + * ovs_action_*" in include/odp-netlink.h for details. lib/odp-util.h + * defines several functions for working with datapath actions. * * The actions list may be empty. This indicates that nothing should be * done to matching packets, that is, they should be dropped. @@ -205,10 +205,10 @@ * connection consists of two flows with 1-ms latency to set up each one. * * To receive upcalls, a client has to enable them with dpif_recv_set(). A - * datapath should generally support multiple clients at once (e.g. so that one - * may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" is - * also running) but need not support multiple clients enabling upcalls at - * once. + * datapath should generally support being opened multiple times (e.g. so that + * one may run "ovs-dpctl show" or "ovs-dpctl dump-flows" while "ovs-vswitchd" + * is also running) but need not support more than one of these clients + * enabling upcalls at once. * * * Upcall Queuing and Ordering @@ -261,7 +261,7 @@ * PID in "action" upcalls is that dpif_port_get_pid() returns a constant value * and all upcalls are appended to a single queue. * - * The ideal behavior is: + * The preferred behavior is: * * - Each port has a PID that identifies the queue used for "miss" upcalls * on that port. (Thus, if each port has its own queue for "miss" @@ -275,6 +275,18 @@ * * - Upcalls that specify the "special" Netlink PID are queued separately. * + * Multiple threads may want to read upcalls simultaneously from a single + * datapath. To support multiple threads well, one extends the above preferred + * behavior: + * + * - Each port has multiple PIDs. The datapath distributes "miss" upcalls + * across the PIDs, ensuring that a given flow is mapped in a stable way + * to a single PID. + * + * - For "action" upcalls, the thread can specify its own Netlink PID or + * other threads' Netlink PID of the same port for offloading purpose + * (e.g. in a "round robin" manner). + * * * Packet Format * ============= @@ -356,11 +368,19 @@ * thread-safe: they may be called from different threads only on * different dpif objects. * - * - Functions that operate on struct dpif_port_dump or struct - * dpif_flow_dump are conditionally thread-safe with respect to those - * objects. That is, one may dump ports or flows from any number of - * threads at once, but each thread must use its own struct dpif_port_dump - * or dpif_flow_dump. + * - dpif_flow_dump_next() is conditionally thread-safe: It may be called + * from different threads with the same 'struct dpif_flow_dump', but all + * other parameters must be different for each thread. + * + * - dpif_flow_dump_done() is conditionally thread-safe: All threads that + * share the same 'struct dpif_flow_dump' must have finished using it. + * This function must then be called exactly once for a particular + * dpif_flow_dump to finish the corresponding flow dump operation. + * + * - Functions that operate on 'struct dpif_port_dump' are conditionally + * thread-safe with respect to those objects. That is, one may dump ports + * from any number of threads at once, but each thread must use its own + * struct dpif_port_dump. */ #ifndef DPIF_H #define DPIF_H 1 @@ -369,8 +389,9 @@ #include #include #include "netdev.h" -#include "ofpbuf.h" +#include "dp-packet.h" #include "openflow/openflow.h" +#include "ovs-numa.h" #include "packets.h" #include "util.h" @@ -379,11 +400,13 @@ extern "C" { #endif struct dpif; +struct dpif_class; +struct dpif_flow; struct ds; struct flow; +struct flow_wildcards; struct nlattr; struct sset; -struct dpif_class; int dp_register_provider(const struct dpif_class *); int dp_unregister_provider(const char *type); @@ -399,7 +422,7 @@ int dpif_create(const char *name, const char *type, struct dpif **); int dpif_create_and_open(const char *name, const char *type, struct dpif **); void dpif_close(struct dpif *); -void dpif_run(struct dpif *); +bool dpif_run(struct dpif *); void dpif_wait(struct dpif *); const char *dpif_name(const struct dpif *); @@ -445,7 +468,8 @@ int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); int dpif_port_get_name(struct dpif *, odp_port_t port_no, char *name, size_t name_size); -uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no); +uint32_t dpif_port_get_pid(const struct dpif *, odp_port_t port_no, + uint32_t hash); struct dpif_port_dump { const struct dpif *dpif; @@ -481,41 +505,91 @@ struct dpif_flow_stats { uint16_t tcp_flags; }; -void dpif_flow_stats_extract(const struct flow *, const struct ofpbuf *packet, +void dpif_flow_stats_extract(const struct flow *, const struct dp_packet *packet, long long int used, struct dpif_flow_stats *); void dpif_flow_stats_format(const struct dpif_flow_stats *, struct ds *); enum dpif_flow_put_flags { DPIF_FP_CREATE = 1 << 0, /* Allow creating a new flow. */ DPIF_FP_MODIFY = 1 << 1, /* Allow modifying an existing flow. */ - DPIF_FP_ZERO_STATS = 1 << 2 /* Zero the stats of an existing flow. */ + DPIF_FP_ZERO_STATS = 1 << 2, /* Zero the stats of an existing flow. */ + DPIF_FP_PROBE = 1 << 3 /* Suppress error messages, if any. */ }; +bool dpif_probe_feature(struct dpif *, const char *name, + const struct ofpbuf *key, const ovs_u128 *ufid); +void dpif_flow_hash(const struct dpif *, const void *key, size_t key_len, + ovs_u128 *hash); int dpif_flow_flush(struct dpif *); int dpif_flow_put(struct dpif *, enum dpif_flow_put_flags, const struct nlattr *key, size_t key_len, const struct nlattr *mask, size_t mask_len, const struct nlattr *actions, size_t actions_len, + const ovs_u128 *ufid, const unsigned pmd_id, struct dpif_flow_stats *); int dpif_flow_del(struct dpif *, const struct nlattr *key, size_t key_len, + const ovs_u128 *ufid, const unsigned pmd_id, struct dpif_flow_stats *); -int dpif_flow_get(const struct dpif *, +int dpif_flow_get(struct dpif *, const struct nlattr *key, size_t key_len, - struct ofpbuf **actionsp, struct dpif_flow_stats *); - -struct dpif_flow_dump { - const struct dpif *dpif; - int error; - void *state; + const ovs_u128 *ufid, const unsigned pmd_id, + struct ofpbuf *, struct dpif_flow *); + +/* Flow dumping interface + * ====================== + * + * This interface allows iteration through all of the flows currently installed + * in a datapath. It is somewhat complicated by two requirements: + * + * - Efficient support for dumping flows in parallel from multiple threads. + * + * - Allow callers to avoid making unnecessary copies of data returned by + * the interface across several flows in cases where the dpif + * implementation has to maintain a copy of that information anyhow. + * (That is, allow the client visibility into any underlying batching as + * part of its own batching.) + * + * + * Usage + * ----- + * + * 1. Call dpif_flow_dump_create(). + * 2. In each thread that participates in the dump (which may be just a single + * thread if parallelism isn't important): + * (a) Call dpif_flow_dump_thread_create(). + * (b) Call dpif_flow_dump_next() repeatedly until it returns 0. + * (c) Call dpif_flow_dump_thread_destroy(). + * 3. Call dpif_flow_dump_destroy(). + * + * All error reporting is deferred to the call to dpif_flow_dump_destroy(). + */ +struct dpif_flow_dump *dpif_flow_dump_create(const struct dpif *, bool terse); +int dpif_flow_dump_destroy(struct dpif_flow_dump *); + +struct dpif_flow_dump_thread *dpif_flow_dump_thread_create( + struct dpif_flow_dump *); +void dpif_flow_dump_thread_destroy(struct dpif_flow_dump_thread *); + +#define PMD_ID_NULL OVS_CORE_UNSPEC + +/* A datapath flow as dumped by dpif_flow_dump_next(). */ +struct dpif_flow { + const struct nlattr *key; /* Flow key, as OVS_KEY_ATTR_* attrs. */ + size_t key_len; /* 'key' length in bytes. */ + const struct nlattr *mask; /* Flow mask, as OVS_KEY_ATTR_* attrs. */ + size_t mask_len; /* 'mask' length in bytes. */ + const struct nlattr *actions; /* Actions, as OVS_ACTION_ATTR_ */ + size_t actions_len; /* 'actions' length in bytes. */ + ovs_u128 ufid; /* Unique flow identifier. */ + bool ufid_present; /* True if 'ufid' was provided by datapath.*/ + unsigned pmd_id; /* Datapath poll mode driver id. */ + struct dpif_flow_stats stats; /* Flow statistics. */ }; -void dpif_flow_dump_start(struct dpif_flow_dump *, const struct dpif *); -bool dpif_flow_dump_next(struct dpif_flow_dump *, - const struct nlattr **key, size_t *key_len, - const struct nlattr **mask, size_t *mask_len, - const struct nlattr **actions, size_t *actions_len, - const struct dpif_flow_stats **); -int dpif_flow_dump_done(struct dpif_flow_dump *); +int dpif_flow_dump_next(struct dpif_flow_dump_thread *, + struct dpif_flow *flows, int max_flows); + +#define DPIF_FLOW_BUFSIZE 2048 /* Operation batching interface. * @@ -527,8 +601,35 @@ enum dpif_op_type { DPIF_OP_FLOW_PUT = 1, DPIF_OP_FLOW_DEL, DPIF_OP_EXECUTE, + DPIF_OP_FLOW_GET, }; +/* Add or modify a flow. + * + * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in + * the 'key_len' bytes starting at 'key'. The associated actions are specified + * by the Netlink attributes with types OVS_ACTION_ATTR_* in the 'actions_len' + * bytes starting at 'actions'. + * + * - If the flow's key does not exist in the dpif, then the flow will be + * added if 'flags' includes DPIF_FP_CREATE. Otherwise the operation will + * fail with ENOENT. + * + * If the operation succeeds, then 'stats', if nonnull, will be zeroed. + * + * - If the flow's key does exist in the dpif, then the flow's actions will + * be updated if 'flags' includes DPIF_FP_MODIFY. Otherwise the operation + * will fail with EEXIST. If the flow's actions are updated, then its + * statistics will be zeroed if 'flags' includes DPIF_FP_ZERO_STATS, and + * left as-is otherwise. + * + * If the operation succeeds, then 'stats', if nonnull, will be set to the + * flow's statistics before the update. + * + * - If the datapath implements multiple pmd thread with its own flow + * table, 'pmd_id' should be used to specify the particular polling + * thread for the operation. + */ struct dpif_flow_put { /* Input. */ enum dpif_flow_put_flags flags; /* DPIF_FP_*. */ @@ -538,35 +639,106 @@ struct dpif_flow_put { size_t mask_len; /* Length of 'mask' in bytes. */ const struct nlattr *actions; /* Actions to perform on flow. */ size_t actions_len; /* Length of 'actions' in bytes. */ + const ovs_u128 *ufid; /* Optional unique flow identifier. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ }; +/* Delete a flow. + * + * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in + * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If + * the flow was created using 'ufid', then 'ufid' must be specified to delete + * the flow. If both are specified, 'key' will be ignored for flow deletion. + * Succeeds with status 0 if the flow is deleted, or fails with ENOENT if the + * dpif does not contain such a flow. + * + * Callers should always provide the 'key' to improve dpif logging in the event + * of errors or unexpected behaviour. + * + * If the datapath implements multiple polling thread with its own flow table, + * 'pmd_id' should be used to specify the particular polling thread for the + * operation. + * + * If the operation succeeds, then 'stats', if nonnull, will be set to the + * flow's statistics before its deletion. */ struct dpif_flow_del { /* Input. */ const struct nlattr *key; /* Flow to delete. */ size_t key_len; /* Length of 'key' in bytes. */ + const ovs_u128 *ufid; /* Unique identifier of flow to delete. */ + bool terse; /* OK to skip sending/receiving full flow + * info? */ + unsigned pmd_id; /* Datapath poll mode driver id. */ /* Output. */ struct dpif_flow_stats *stats; /* Optional flow statistics. */ }; +/* Executes actions on a specified packet. + * + * Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet + * frame in 'packet' and on the packet metadata in 'md'. May modify both + * 'packet' and 'md'. + * + * Some dpif providers do not implement every action. The Linux kernel + * datapath, in particular, does not implement ARP field modification. If + * 'needs_help' is true, the dpif layer executes in userspace all of the + * actions that it can, and for OVS_ACTION_ATTR_OUTPUT and + * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the dpif + * implementation. + * + * This works even if 'actions_len' is too long for a Netlink attribute. */ struct dpif_execute { - /* Raw support for execute passed along to the provider. */ + /* Input. */ const struct nlattr *actions; /* Actions to execute on packet. */ size_t actions_len; /* Length of 'actions' in bytes. */ - struct ofpbuf *packet; /* Packet to execute. */ - struct pkt_metadata md; /* Packet metadata. */ - - /* Some dpif providers do not implement every action. The Linux kernel - * datapath, in particular, does not implement ARP field modification. - * - * If this member is set to true, the dpif layer executes in userspace all - * of the actions that it can, and for OVS_ACTION_ATTR_OUTPUT and - * OVS_ACTION_ATTR_USERSPACE actions it passes the packet through to the - * dpif implementation. */ bool needs_help; + bool probe; /* Suppress error messages. */ + unsigned int mtu; /* Maximum transmission unit to fragment. + 0 if not a fragmented packet */ + + /* Input, but possibly modified as a side effect of execution. */ + struct dp_packet *packet; /* Packet to execute. */ +}; + +/* Queries the dpif for a flow entry. + * + * The flow is specified by the Netlink attributes with types OVS_KEY_ATTR_* in + * the 'key_len' bytes starting at 'key', or the unique identifier 'ufid'. If + * the flow was created using 'ufid', then 'ufid' must be specified to fetch + * the flow. If both are specified, 'key' will be ignored for the flow query. + * 'buffer' must point to an initialized buffer, with a recommended size of + * DPIF_FLOW_BUFSIZE bytes. + * + * On success, 'flow' will be populated with the mask, actions and stats for + * the datapath flow corresponding to 'key'. The mask and actions may point + * within '*buffer', or may point at RCU-protected data. Therefore, callers + * that wish to hold these over quiescent periods must make a copy of these + * fields before quiescing. + * + * Callers should always provide 'key' to improve dpif logging in the event of + * errors or unexpected behaviour. + * + * If the datapath implements multiple polling thread with its own flow table, + * 'pmd_id' should be used to specify the particular polling thread for the + * operation. + * + * Succeeds with status 0 if the flow is fetched, or fails with ENOENT if no + * such flow exists. Other failures are indicated with a positive errno value. + */ +struct dpif_flow_get { + /* Input. */ + const struct nlattr *key; /* Flow to get. */ + size_t key_len; /* Length of 'key' in bytes. */ + const ovs_u128 *ufid; /* Unique identifier of flow to get. */ + unsigned pmd_id; /* Datapath poll mode driver id. */ + struct ofpbuf *buffer; /* Storage for output parameters. */ + + /* Output. */ + struct dpif_flow *flow; /* Resulting flow from datapath. */ }; int dpif_execute(struct dpif *, struct dpif_execute *); @@ -578,6 +750,7 @@ struct dpif_op { struct dpif_flow_put flow_put; struct dpif_flow_del flow_del; struct dpif_execute execute; + struct dpif_flow_get flow_get; } u; }; @@ -605,18 +778,73 @@ const char *dpif_upcall_type_to_string(enum dpif_upcall_type); struct dpif_upcall { /* All types. */ enum dpif_upcall_type type; - struct ofpbuf packet; /* Packet data. */ + struct dp_packet packet; /* Packet data. */ struct nlattr *key; /* Flow key. */ size_t key_len; /* Length of 'key' in bytes. */ + ovs_u128 ufid; /* Unique flow identifier for 'key'. */ + struct nlattr *mru; /* Maximum receive unit. */ /* DPIF_UC_ACTION only. */ struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ + struct nlattr *out_tun_key; /* Output tunnel key. */ + struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */ }; +/* A callback to notify higher layer of dpif about to be purged, so that + * higher layer could try reacting to this (e.g. grabbing all flow stats + * before they are gone). This function is currently implemented only by + * dpif-netdev. + * + * The caller needs to provide the 'aux' pointer passed down by higher + * layer from the dpif_register_notify_cb() function and the 'pmd_id' of + * the polling thread. + */ + typedef void dp_purge_callback(void *aux, unsigned pmd_id); + +void dpif_register_dp_purge_cb(struct dpif *, dp_purge_callback *, void *aux); + +/* A callback to process an upcall, currently implemented only by dpif-netdev. + * + * The caller provides the 'packet' and 'flow' to process, the corresponding + * 'ufid' as generated by dpif_flow_hash(), the polling thread id 'pmd_id', + * the 'type' of the upcall, and if 'type' is DPIF_UC_ACTION then the + * 'userdata' attached to the action. + * + * The callback must fill in 'actions' with the datapath actions to apply to + * 'packet'. 'wc' and 'put_actions' will either be both null or both nonnull. + * If they are nonnull, then the caller will install a flow entry to process + * all future packets that match 'flow' and 'wc'; the callback must store a + * wildcard mask suitable for that purpose into 'wc'. If the actions to store + * into the flow entry are the same as 'actions', then the callback may leave + * 'put_actions' empty; otherwise it must store the desired actions into + * 'put_actions'. + * + * Returns 0 if successful, ENOSPC if the flow limit has been reached and no + * flow should be installed, or some otherwise a positive errno value. */ +typedef int upcall_callback(const struct dp_packet *packet, + const struct flow *flow, + ovs_u128 *ufid, + unsigned pmd_id, + enum dpif_upcall_type type, + const struct nlattr *userdata, + struct ofpbuf *actions, + struct flow_wildcards *wc, + struct ofpbuf *put_actions, + void *aux); + +void dpif_register_upcall_cb(struct dpif *, upcall_callback *, void *aux); + int dpif_recv_set(struct dpif *, bool enable); -int dpif_recv(struct dpif *, struct dpif_upcall *, struct ofpbuf *); +int dpif_handlers_set(struct dpif *, uint32_t n_handlers); +int dpif_poll_threads_set(struct dpif *, const char *cmask); +int dpif_recv(struct dpif *, uint32_t handler_id, struct dpif_upcall *, + struct ofpbuf *); void dpif_recv_purge(struct dpif *); -void dpif_recv_wait(struct dpif *); +void dpif_recv_wait(struct dpif *, uint32_t handler_id); +void dpif_enable_upcall(struct dpif *); +void dpif_disable_upcall(struct dpif *); + +void dpif_print_packet(struct dpif *, struct dpif_upcall *); /* Miscellaneous. */ @@ -626,6 +854,8 @@ void dpif_get_netflow_ids(const struct dpif *, int dpif_queue_to_priority(const struct dpif *, uint32_t queue_id, uint32_t *priority); +char *dpif_get_dp_version(const struct dpif *); +bool dpif_supports_tnl_push_pop(const struct dpif *); #ifdef __cplusplus } #endif