2 * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
20 #include "ofproto-dpif.h"
21 #include "ofproto-dpif-rid.h"
22 #include "ofproto-provider.h"
23 #include "openvswitch/vlog.h"
25 VLOG_DEFINE_THIS_MODULE(ofproto_dpif_rid);
27 static struct ovs_mutex mutex;
29 static struct cmap id_map;
30 static struct cmap metadata_map;
32 static struct ovs_list expiring OVS_GUARDED_BY(mutex);
33 static struct ovs_list expired OVS_GUARDED_BY(mutex);
35 static uint32_t next_id OVS_GUARDED_BY(mutex); /* Possible next free id. */
37 #define RECIRC_POOL_STATIC_IDS 1024
39 static void recirc_id_node_free(struct recirc_id_node *);
44 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
46 if (ovsthread_once_start(&once)) {
47 ovs_mutex_init(&mutex);
48 ovs_mutex_lock(&mutex);
49 next_id = 1; /* 0 is not a valid ID. */
51 cmap_init(&metadata_map);
54 ovs_mutex_unlock(&mutex);
56 ovsthread_once_done(&once);
61 /* This should be called by the revalidator once at each round (every 500ms or
66 static long long int last = 0;
67 long long int now = time_msec();
69 /* Do maintenance at most 4 times / sec. */
70 ovs_mutex_lock(&mutex);
71 if (now - last > 250) {
72 struct recirc_id_node *node;
76 /* Nodes in 'expiring' and 'expired' lists have the refcount of zero,
77 * which means that while they can still be found (by id), no new
78 * references can be taken on them. We have removed the entry from the
79 * 'metadata_map', at the time when refcount reached zero, causing any
80 * new translations to allocate a new ID. This allows the expiring
81 * entry to be safely deleted while any sudden new use of the similar
82 * recirculation will safely start using a new recirculation ID. When
83 * the refcount gets to zero, the node is also added to the 'expiring'
84 * list. At any time after that the nodes in the 'expiring' list can
85 * be moved to the 'expired' list, from which they are deleted at least
86 * 250ms afterwards. */
88 /* Delete the expired. These have been lingering for at least 250 ms,
89 * which should be enough for any ongoing recirculations to be
91 LIST_FOR_EACH_POP (node, exp_node, &expired) {
92 cmap_remove(&id_map, &node->id_node, node->id);
93 ovsrcu_postpone(recirc_id_node_free, node);
96 if (!list_is_empty(&expiring)) {
97 /* 'expired' is now empty, move nodes in 'expiring' to it. */
98 list_splice(&expired, list_front(&expiring), &expiring);
101 ovs_mutex_unlock(&mutex);
104 /* We use the id as the hash value, which works due to cmap internal rehashing.
105 * We also only insert nodes with unique IDs, so all possible hash collisions
106 * remain internal to the cmap. */
107 static struct recirc_id_node *
108 recirc_find__(uint32_t id)
111 struct cmap_node *node = cmap_find_protected(&id_map, id);
113 return node ? CONTAINER_OF(node, struct recirc_id_node, id_node) : NULL;
116 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
117 * state, caller should copy the contents. */
118 const struct recirc_id_node *
119 recirc_id_node_find(uint32_t id)
121 const struct cmap_node *node = cmap_find(&id_map, id);
124 ? CONTAINER_OF(node, const struct recirc_id_node, id_node)
129 frozen_state_hash(const struct frozen_state *state)
133 hash = uuid_hash(&state->ofproto_uuid);
134 hash = hash_int(state->table_id, hash);
135 if (flow_tnl_dst_is_set(state->metadata.tunnel)) {
136 /* We may leave remainder bytes unhashed, but that is unlikely as
137 * the tunnel is not in the datapath format. */
138 hash = hash_bytes64((const uint64_t *) state->metadata.tunnel,
139 flow_tnl_size(state->metadata.tunnel), hash);
141 hash = hash_boolean(state->conntracked, hash);
142 hash = hash_bytes64((const uint64_t *) &state->metadata.metadata,
143 sizeof state->metadata - sizeof state->metadata.tunnel,
145 if (state->stack && state->n_stack) {
146 hash = hash_bytes64((const uint64_t *) state->stack,
147 state->n_stack * sizeof *state->stack, hash);
149 hash = hash_int(state->mirrors, hash);
150 hash = hash_int(state->action_set_len, hash);
151 if (state->action_set_len) {
152 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->action_set),
153 state->action_set_len, hash);
155 if (state->ofpacts_len) {
156 hash = hash_bytes64(ALIGNED_CAST(const uint64_t *, state->ofpacts),
157 state->ofpacts_len, hash);
163 frozen_state_equal(const struct frozen_state *a, const struct frozen_state *b)
165 return (a->table_id == b->table_id
166 && uuid_equals(&a->ofproto_uuid, &b->ofproto_uuid)
167 && flow_tnl_equal(a->metadata.tunnel, b->metadata.tunnel)
168 && !memcmp(&a->metadata.metadata, &b->metadata.metadata,
169 sizeof a->metadata - sizeof a->metadata.tunnel)
170 && a->n_stack == b->n_stack
171 && !memcmp(a->stack, b->stack, a->n_stack * sizeof *a->stack)
172 && a->mirrors == b->mirrors
173 && a->conntracked == b->conntracked
174 && ofpacts_equal(a->ofpacts, a->ofpacts_len,
175 b->ofpacts, b->ofpacts_len)
176 && ofpacts_equal(a->action_set, a->action_set_len,
177 b->action_set, b->action_set_len));
180 /* Lockless RCU protected lookup. If node is needed accross RCU quiescent
181 * state, caller should take a reference. */
182 static struct recirc_id_node *
183 recirc_find_equal(const struct frozen_state *target, uint32_t hash)
185 struct recirc_id_node *node;
187 CMAP_FOR_EACH_WITH_HASH (node, metadata_node, hash, &metadata_map) {
188 if (frozen_state_equal(&node->state, target)) {
195 static struct recirc_id_node *
196 recirc_ref_equal(const struct frozen_state *target, uint32_t hash)
198 struct recirc_id_node *node;
201 node = recirc_find_equal(target, hash);
203 /* Try again if the node was released before we get the reference. */
204 } while (node && !ovs_refcount_try_ref_rcu(&node->refcount));
210 frozen_state_clone(struct frozen_state *new, const struct frozen_state *old,
211 struct flow_tnl *tunnel)
214 flow_tnl_copy__(tunnel, old->metadata.tunnel);
215 new->metadata.tunnel = tunnel;
217 new->stack = (new->n_stack
218 ? xmemdup(new->stack, new->n_stack * sizeof *new->stack)
220 new->ofpacts = (new->ofpacts_len
221 ? xmemdup(new->ofpacts, new->ofpacts_len)
223 new->action_set = (new->action_set_len
224 ? xmemdup(new->action_set, new->action_set_len)
229 frozen_state_free(struct frozen_state *state)
232 free(state->ofpacts);
233 free(state->action_set);
236 /* Allocate a unique recirculation id for the given set of flow metadata.
237 * The ID space is 2^^32, so there should never be a situation in which all
238 * the IDs are used up. We loop until we find a free one.
239 * hash is recomputed if it is passed in as 0. */
240 static struct recirc_id_node *
241 recirc_alloc_id__(const struct frozen_state *state, uint32_t hash)
243 ovs_assert(state->action_set_len <= state->ofpacts_len);
245 struct recirc_id_node *node = xzalloc(sizeof *node);
248 ovs_refcount_init(&node->refcount);
249 frozen_state_clone(CONST_CAST(struct frozen_state *, &node->state), state,
250 &node->state_metadata_tunnel);
252 ovs_mutex_lock(&mutex);
254 /* Claim the next ID. The ID space should be sparse enough for the
255 allocation to succeed at the first try. We do skip the first
256 RECIRC_POOL_STATIC_IDS IDs on the later rounds, though, as some of
257 the initial allocations may be for long term uses (like bonds). */
258 node->id = next_id++;
259 if (OVS_UNLIKELY(!node->id)) {
260 next_id = RECIRC_POOL_STATIC_IDS + 1;
261 node->id = next_id++;
263 /* Find if the id is free. */
264 if (OVS_LIKELY(!recirc_find__(node->id))) {
268 cmap_insert(&id_map, &node->id_node, node->id);
269 cmap_insert(&metadata_map, &node->metadata_node, node->hash);
270 ovs_mutex_unlock(&mutex);
274 /* Look up an existing ID for the given flow's metadata and optional actions.
277 recirc_find_id(const struct frozen_state *target)
279 uint32_t hash = frozen_state_hash(target);
280 struct recirc_id_node *node = recirc_find_equal(target, hash);
281 return node ? node->id : 0;
284 /* Allocate a unique recirculation id for the given set of flow metadata and
287 recirc_alloc_id_ctx(const struct frozen_state *state)
289 uint32_t hash = frozen_state_hash(state);
290 struct recirc_id_node *node = recirc_ref_equal(state, hash);
292 node = recirc_alloc_id__(state, hash);
297 /* Allocate a unique recirculation id. */
299 recirc_alloc_id(struct ofproto_dpif *ofproto)
301 struct flow_tnl tunnel;
302 tunnel.ip_dst = htonl(0);
303 tunnel.ipv6_dst = in6addr_any;
304 struct frozen_state state = {
305 .table_id = TBL_INTERNAL,
306 .ofproto_uuid = *ofproto_dpif_get_uuid(ofproto),
307 .metadata = { .tunnel = &tunnel, .in_port = OFPP_NONE },
309 return recirc_alloc_id__(&state, frozen_state_hash(&state))->id;
313 recirc_id_node_free(struct recirc_id_node *node)
315 frozen_state_free(CONST_CAST(struct frozen_state *, &node->state));
320 recirc_id_node_unref(const struct recirc_id_node *node_)
323 struct recirc_id_node *node = CONST_CAST(struct recirc_id_node *, node_);
325 if (node && ovs_refcount_unref(&node->refcount) == 1) {
326 ovs_mutex_lock(&mutex);
327 /* Prevent re-use of this node by removing the node from 'metadata_map'
329 cmap_remove(&metadata_map, &node->metadata_node, node->hash);
330 /* We keep the node in the 'id_map' so that it can be found as long
331 * as it lingers, and add it to the 'expiring' list. */
332 list_insert(&expiring, &node->exp_node);
333 ovs_mutex_unlock(&mutex);
338 recirc_free_id(uint32_t id)
340 const struct recirc_id_node *node;
342 node = recirc_id_node_find(id);
344 recirc_id_node_unref(node);
346 VLOG_ERR("Freeing nonexistent recirculation ID: %"PRIu32, id);
350 /* Called when 'ofproto' is destructed. Checks for and clears any
352 * No other thread may have access to the 'ofproto' being destructed.
353 * All related datapath flows must be deleted before calling this. */
355 recirc_free_ofproto(struct ofproto_dpif *ofproto, const char *ofproto_name)
357 struct recirc_id_node *n;
359 const struct uuid *ofproto_uuid = ofproto_dpif_get_uuid(ofproto);
360 CMAP_FOR_EACH (n, metadata_node, &metadata_map) {
361 if (uuid_equals(&n->state.ofproto_uuid, ofproto_uuid)) {
362 VLOG_ERR("recirc_id %"PRIu32
363 " left allocated when ofproto (%s)"
364 " is destructed", n->id, ofproto_name);