if (old) {
int i;
- for (i = 0; i < old->max; i++) {
- if (old->masks[i])
- new->masks[new->count++] = old->masks[i];
- }
+ for (i = 0; i < old->max; i++)
+ new->masks[i] = old->masks[i];
+
+ new->count = old->count;
}
rcu_assign_pointer(tbl->mask_array, new);
return 0;
}
+static void tbl_mask_array_delete_mask(struct mask_array *ma,
+ const struct sw_flow_mask *mask)
+{
+ int i = 0;
+
+ /* Delete a mask pointer from the valid section.
+ *
+ * Also move the last entry in its place, so there is no
+ * whole in the valid section.
+ *
+ * Notice the last entry still points to the original mask.
+ *
+ * <Note>: there is a small race window that may cause a mask
+ * to be missed in a search. Imaging a core is
+ * walking through the array, passing the index of deleting mask.
+ * But before reaching the last entry, it is overwritten,
+ * by another core that is adding a new mask, now the last entry
+ * will point to the new mask. In this case, the moved up last
+ * entry can be missed by the core walking the mask array.
+ *
+ * In case this missed mask would have led to successful
+ * lookup, Hitting the race window could cause a packet to miss
+ * kernel flow cache, and be sent to the user space.
+ * </Note>
+ */
+ for (i = 0; i < ma->count; i++)
+ if (mask == ovsl_dereference(ma->masks[i])) {
+ struct sw_flow_mask *last;
+
+ last = ovsl_dereference(ma->masks[ma->count - 1]);
+ rcu_assign_pointer(ma->masks[i], last);
+ ma->count--;
+ break;
+ }
+
+ /* Remove the deleted mask pointers from the invalid section. */
+ for (i = ma->count; i < ma->max; i++)
+ if (mask == ovsl_dereference(ma->masks[i]))
+ RCU_INIT_POINTER(ma->masks[i], NULL);
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti;
return NULL;
}
-
static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct table_instance *ti,
struct mask_array *ma,
struct sw_flow_mask *mask;
mask = rcu_dereference_ovsl(ma->masks[i]);
- if (mask) {
- flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
- if (flow) { /* Found */
- *index = i;
- return flow;
- }
+ if (!mask)
+ break;
+
+ flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+ if (flow) { /* Found */
+ *index = i;
+ return flow;
}
}
u32 skb_hash,
u32 *n_mask_hit)
{
- struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
- struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
- struct mask_cache_entry *entries, *ce, *del;
+ struct mask_array *ma = rcu_dereference(tbl->mask_array);
+ struct table_instance *ti = rcu_dereference(tbl->ti);
+ struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
u32 hash = skb_hash;
int seg;
return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
}
- del = NULL;
+ ce = NULL;
entries = this_cpu_ptr(tbl->mask_cache);
+ /* Find the cache entry 'ce' to operate on. */
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
- int index;
-
- index = hash & (MC_HASH_ENTRIES - 1);
- ce = &entries[index];
-
- if (ce->skb_hash == skb_hash) {
- struct sw_flow_mask *mask;
-
- mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
- if (mask) {
- flow = masked_flow_lookup(ti, key, mask,
- n_mask_hit);
- if (flow) /* Found */
- return flow;
-
+ int index = hash & (MC_HASH_ENTRIES - 1);
+ struct mask_cache_entry *e;
+
+ e = &entries[index];
+ if (e->skb_hash == skb_hash) {
+ struct sw_flow_mask *cache;
+ int i = e->mask_index;
+
+ if (likely(i < ma->max)) {
+ cache = rcu_dereference(ma->masks[i]);
+ if (cache) {
+ flow = masked_flow_lookup(ti, key,
+ cache, n_mask_hit);
+ if (flow)
+ return flow;
+ }
}
- del = ce;
+
+ /* Cache miss. This is the best cache
+ * replacement candidate. */
+ e->skb_hash = 0;
+ ce = e;
break;
}
- if (!del || (del->skb_hash && !ce->skb_hash) ||
- (rcu_dereference_ovsl(ma->masks[del->mask_index]) &&
- !rcu_dereference_ovsl(ma->masks[ce->mask_index]))) {
- del = ce;
- }
+ if (!ce || e->skb_hash < ce->skb_hash)
+ ce = e; /* A better replacement cache candidate. */
hash >>= MC_HASH_SHIFT;
}
- flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &del->mask_index);
+ /* Cache miss, do full lookup. */
+ flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, &ce->mask_index);
if (flow)
- del->skb_hash = skb_hash;
+ ce->skb_hash = skb_hash;
return flow;
}
u32 __always_unused n_mask_hit;
u32 __always_unused index;
- n_mask_hit = 0;
return flow_lookup(tbl, ti, ma, key, &n_mask_hit, &index);
}
+struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
+ struct sw_flow_match *match)
+{
+ struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
+ struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
+ struct sw_flow *flow;
+ u32 __always_unused n_mask_hit;
+ int i;
+
+ /* Always called under ovs-mutex. */
+ for (i = 0; i < ma->count; i++) {
+ struct sw_flow_mask *mask;
+
+ mask = ovsl_dereference(ma->masks[i]);
+ if (mask) {
+ flow = masked_flow_lookup(ti, match->key, mask, &n_mask_hit);
+ if (flow && ovs_flow_cmp_unmasked_key(flow, match)) { /* Found */
+ return flow;
+ }
+ }
+ }
+ return NULL;
+}
+
int ovs_flow_tbl_num_masks(const struct flow_table *table)
{
struct mask_array *ma;
if (!mask->ref_count) {
struct mask_array *ma;
- int i;
ma = ovsl_dereference(tbl->mask_array);
- for (i = 0; i < ma->max; i++) {
- if (mask == ovsl_dereference(ma->masks[i])) {
- RCU_INIT_POINTER(ma->masks[i], NULL);
- ma->count--;
- goto free;
- }
+ /* Shrink the mask array if necessary. */
+ if (ma->max > MASK_ARRAY_SIZE_MIN * 2
+ && ma->count <= ma->max / 4) {
+
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+ ma = ovsl_dereference(tbl->mask_array);
}
- BUG();
-free:
+
+ tbl_mask_array_delete_mask(ma, mask);
call_rcu(&mask->rcu, rcu_free_sw_flow_mask_cb);
}
}
int i;
ma = ovsl_dereference(tbl->mask_array);
- for (i = 0; i < ma->max; i++) {
+ for (i = 0; i < ma->count; i++) {
struct sw_flow_mask *t;
t = ovsl_dereference(ma->masks[i]);
mask = flow_mask_find(tbl, new);
if (!mask) {
struct mask_array *ma;
- int i;
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
}
ma = ovsl_dereference(tbl->mask_array);
}
- for (i = 0; i < ma->max; i++) {
- const struct sw_flow_mask *t;
-
- t = ovsl_dereference(ma->masks[i]);
- if (!t) {
- rcu_assign_pointer(ma->masks[i], mask);
- ma->count++;
- break;
- }
- }
+
+ rcu_assign_pointer(ma->masks[ma->count], mask);
+ ma->count++;
} else {
BUG_ON(!mask->ref_count);
mask->ref_count++;