net-timestamp: no-payload option in txtimestamp test
[cascardo/linux.git] / lib / rhashtable.c
1 /*
2  * Resizable, Scalable, Concurrent Hash Table
3  *
4  * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
5  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
6  *
7  * Based on the following paper:
8  * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
9  *
10  * Code partially derived from nft_hash
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #include <linux/kernel.h>
18 #include <linux/init.h>
19 #include <linux/log2.h>
20 #include <linux/slab.h>
21 #include <linux/vmalloc.h>
22 #include <linux/mm.h>
23 #include <linux/jhash.h>
24 #include <linux/random.h>
25 #include <linux/rhashtable.h>
26
27 #define HASH_DEFAULT_SIZE       64UL
28 #define HASH_MIN_SIZE           4UL
29 #define BUCKET_LOCKS_PER_CPU   128UL
30
31 /* Base bits plus 1 bit for nulls marker */
32 #define HASH_RESERVED_SPACE     (RHT_BASE_BITS + 1)
33
34 enum {
35         RHT_LOCK_NORMAL,
36         RHT_LOCK_NESTED,
37         RHT_LOCK_NESTED2,
38 };
39
40 /* The bucket lock is selected based on the hash and protects mutations
41  * on a group of hash buckets.
42  *
43  * IMPORTANT: When holding the bucket lock of both the old and new table
44  * during expansions and shrinking, the old bucket lock must always be
45  * acquired first.
46  */
47 static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
48 {
49         return &tbl->locks[hash & tbl->locks_mask];
50 }
51
52 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
53 #define ASSERT_BUCKET_LOCK(TBL, HASH) \
54         BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
55
56 #ifdef CONFIG_PROVE_LOCKING
57 int lockdep_rht_mutex_is_held(struct rhashtable *ht)
58 {
59         return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
60 }
61 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
62
63 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
64 {
65         spinlock_t *lock = bucket_lock(tbl, hash);
66
67         return (debug_locks) ? lockdep_is_held(lock) : 1;
68 }
69 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
70 #endif
71
72 static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
73 {
74         return (void *) he - ht->p.head_offset;
75 }
76
77 static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
78 {
79         return hash & (tbl->size - 1);
80 }
81
82 static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
83 {
84         u32 hash;
85
86         if (unlikely(!ht->p.key_len))
87                 hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
88         else
89                 hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
90                                     ht->p.hash_rnd);
91
92         return hash >> HASH_RESERVED_SPACE;
93 }
94
95 static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
96 {
97         struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
98         u32 hash;
99
100         hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
101         hash >>= HASH_RESERVED_SPACE;
102
103         return rht_bucket_index(tbl, hash);
104 }
105
106 static u32 head_hashfn(const struct rhashtable *ht,
107                        const struct bucket_table *tbl,
108                        const struct rhash_head *he)
109 {
110         return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
111 }
112
113 static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
114 {
115         struct rhash_head __rcu **pprev;
116
117         for (pprev = &tbl->buckets[n];
118              !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
119              pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
120                 ;
121
122         return pprev;
123 }
124
125 static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
126 {
127         unsigned int i, size;
128 #if defined(CONFIG_PROVE_LOCKING)
129         unsigned int nr_pcpus = 2;
130 #else
131         unsigned int nr_pcpus = num_possible_cpus();
132 #endif
133
134         nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
135         size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
136
137         /* Never allocate more than one lock per bucket */
138         size = min_t(unsigned int, size, tbl->size);
139
140         if (sizeof(spinlock_t) != 0) {
141 #ifdef CONFIG_NUMA
142                 if (size * sizeof(spinlock_t) > PAGE_SIZE)
143                         tbl->locks = vmalloc(size * sizeof(spinlock_t));
144                 else
145 #endif
146                 tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
147                                            GFP_KERNEL);
148                 if (!tbl->locks)
149                         return -ENOMEM;
150                 for (i = 0; i < size; i++)
151                         spin_lock_init(&tbl->locks[i]);
152         }
153         tbl->locks_mask = size - 1;
154
155         return 0;
156 }
157
158 static void bucket_table_free(const struct bucket_table *tbl)
159 {
160         if (tbl)
161                 kvfree(tbl->locks);
162
163         kvfree(tbl);
164 }
165
166 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
167                                                size_t nbuckets)
168 {
169         struct bucket_table *tbl;
170         size_t size;
171         int i;
172
173         size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
174         tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
175         if (tbl == NULL)
176                 tbl = vzalloc(size);
177
178         if (tbl == NULL)
179                 return NULL;
180
181         tbl->size = nbuckets;
182
183         if (alloc_bucket_locks(ht, tbl) < 0) {
184                 bucket_table_free(tbl);
185                 return NULL;
186         }
187
188         for (i = 0; i < nbuckets; i++)
189                 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
190
191         return tbl;
192 }
193
194 /**
195  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
196  * @ht:         hash table
197  * @new_size:   new table size
198  */
199 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
200 {
201         /* Expand table when exceeding 75% load */
202         return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
203                (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift);
204 }
205 EXPORT_SYMBOL_GPL(rht_grow_above_75);
206
207 /**
208  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
209  * @ht:         hash table
210  * @new_size:   new table size
211  */
212 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
213 {
214         /* Shrink table beneath 30% load */
215         return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
216                (atomic_read(&ht->shift) > ht->p.min_shift);
217 }
218 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
219
220 static void hashtable_chain_unzip(const struct rhashtable *ht,
221                                   const struct bucket_table *new_tbl,
222                                   struct bucket_table *old_tbl,
223                                   size_t old_hash)
224 {
225         struct rhash_head *he, *p, *next;
226         spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
227         unsigned int new_hash, new_hash2;
228
229         ASSERT_BUCKET_LOCK(old_tbl, old_hash);
230
231         /* Old bucket empty, no work needed. */
232         p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
233                                    old_hash);
234         if (rht_is_a_nulls(p))
235                 return;
236
237         new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
238         new_bucket_lock = bucket_lock(new_tbl, new_hash);
239
240         /* Advance the old bucket pointer one or more times until it
241          * reaches a node that doesn't hash to the same bucket as the
242          * previous node p. Call the previous node p;
243          */
244         rht_for_each_continue(he, p->next, old_tbl, old_hash) {
245                 new_hash2 = head_hashfn(ht, new_tbl, he);
246                 if (new_hash != new_hash2)
247                         break;
248                 p = he;
249         }
250         rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
251
252         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
253
254         /* If we have encountered an entry that maps to a different bucket in
255          * the new table, lock down that bucket as well as we might cut off
256          * the end of the chain.
257          */
258         new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
259         if (new_bucket_lock != new_bucket_lock2)
260                 spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
261
262         /* Find the subsequent node which does hash to the same
263          * bucket as node P, or NULL if no such node exists.
264          */
265         INIT_RHT_NULLS_HEAD(next, ht, old_hash);
266         if (!rht_is_a_nulls(he)) {
267                 rht_for_each_continue(he, he->next, old_tbl, old_hash) {
268                         if (head_hashfn(ht, new_tbl, he) == new_hash) {
269                                 next = he;
270                                 break;
271                         }
272                 }
273         }
274
275         /* Set p's next pointer to that subsequent node pointer,
276          * bypassing the nodes which do not hash to p's bucket
277          */
278         rcu_assign_pointer(p->next, next);
279
280         if (new_bucket_lock != new_bucket_lock2)
281                 spin_unlock_bh(new_bucket_lock2);
282         spin_unlock_bh(new_bucket_lock);
283 }
284
285 static void link_old_to_new(struct bucket_table *new_tbl,
286                             unsigned int new_hash, struct rhash_head *entry)
287 {
288         spinlock_t *new_bucket_lock;
289
290         new_bucket_lock = bucket_lock(new_tbl, new_hash);
291
292         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
293         rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
294         spin_unlock_bh(new_bucket_lock);
295 }
296
297 /**
298  * rhashtable_expand - Expand hash table while allowing concurrent lookups
299  * @ht:         the hash table to expand
300  *
301  * A secondary bucket array is allocated and the hash entries are migrated
302  * while keeping them on both lists until the end of the RCU grace period.
303  *
304  * This function may only be called in a context where it is safe to call
305  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
306  *
307  * The caller must ensure that no concurrent resizing occurs by holding
308  * ht->mutex.
309  *
310  * It is valid to have concurrent insertions and deletions protected by per
311  * bucket locks or concurrent RCU protected lookups and traversals.
312  */
313 int rhashtable_expand(struct rhashtable *ht)
314 {
315         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
316         struct rhash_head *he;
317         spinlock_t *old_bucket_lock;
318         unsigned int new_hash, old_hash;
319         bool complete = false;
320
321         ASSERT_RHT_MUTEX(ht);
322
323         new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
324         if (new_tbl == NULL)
325                 return -ENOMEM;
326
327         atomic_inc(&ht->shift);
328
329         /* Make insertions go into the new, empty table right away. Deletions
330          * and lookups will be attempted in both tables until we synchronize.
331          * The synchronize_rcu() guarantees for the new table to be picked up
332          * so no new additions go into the old table while we relink.
333          */
334         rcu_assign_pointer(ht->future_tbl, new_tbl);
335         synchronize_rcu();
336
337         /* For each new bucket, search the corresponding old bucket for the
338          * first entry that hashes to the new bucket, and link the end of
339          * newly formed bucket chain (containing entries added to future
340          * table) to that entry. Since all the entries which will end up in
341          * the new bucket appear in the same old bucket, this constructs an
342          * entirely valid new hash table, but with multiple buckets
343          * "zipped" together into a single imprecise chain.
344          */
345         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
346                 old_hash = rht_bucket_index(old_tbl, new_hash);
347                 old_bucket_lock = bucket_lock(old_tbl, old_hash);
348
349                 spin_lock_bh(old_bucket_lock);
350                 rht_for_each(he, old_tbl, old_hash) {
351                         if (head_hashfn(ht, new_tbl, he) == new_hash) {
352                                 link_old_to_new(new_tbl, new_hash, he);
353                                 break;
354                         }
355                 }
356                 spin_unlock_bh(old_bucket_lock);
357         }
358
359         /* Publish the new table pointer. Lookups may now traverse
360          * the new table, but they will not benefit from any
361          * additional efficiency until later steps unzip the buckets.
362          */
363         rcu_assign_pointer(ht->tbl, new_tbl);
364
365         /* Unzip interleaved hash chains */
366         while (!complete && !ht->being_destroyed) {
367                 /* Wait for readers. All new readers will see the new
368                  * table, and thus no references to the old table will
369                  * remain.
370                  */
371                 synchronize_rcu();
372
373                 /* For each bucket in the old table (each of which
374                  * contains items from multiple buckets of the new
375                  * table): ...
376                  */
377                 complete = true;
378                 for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
379                         struct rhash_head *head;
380
381                         old_bucket_lock = bucket_lock(old_tbl, old_hash);
382                         spin_lock_bh(old_bucket_lock);
383
384                         hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
385                         head = rht_dereference_bucket(old_tbl->buckets[old_hash],
386                                                       old_tbl, old_hash);
387                         if (!rht_is_a_nulls(head))
388                                 complete = false;
389
390                         spin_unlock_bh(old_bucket_lock);
391                 }
392         }
393
394         bucket_table_free(old_tbl);
395         return 0;
396 }
397 EXPORT_SYMBOL_GPL(rhashtable_expand);
398
399 /**
400  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
401  * @ht:         the hash table to shrink
402  *
403  * This function may only be called in a context where it is safe to call
404  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
405  *
406  * The caller must ensure that no concurrent resizing occurs by holding
407  * ht->mutex.
408  *
409  * The caller must ensure that no concurrent table mutations take place.
410  * It is however valid to have concurrent lookups if they are RCU protected.
411  *
412  * It is valid to have concurrent insertions and deletions protected by per
413  * bucket locks or concurrent RCU protected lookups and traversals.
414  */
415 int rhashtable_shrink(struct rhashtable *ht)
416 {
417         struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
418         spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
419         unsigned int new_hash;
420
421         ASSERT_RHT_MUTEX(ht);
422
423         new_tbl = bucket_table_alloc(ht, tbl->size / 2);
424         if (new_tbl == NULL)
425                 return -ENOMEM;
426
427         rcu_assign_pointer(ht->future_tbl, new_tbl);
428         synchronize_rcu();
429
430         /* Link the first entry in the old bucket to the end of the
431          * bucket in the new table. As entries are concurrently being
432          * added to the new table, lock down the new bucket. As we
433          * always divide the size in half when shrinking, each bucket
434          * in the new table maps to exactly two buckets in the old
435          * table.
436          *
437          * As removals can occur concurrently on the old table, we need
438          * to lock down both matching buckets in the old table.
439          */
440         for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
441                 old_bucket_lock1 = bucket_lock(tbl, new_hash);
442                 old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
443                 new_bucket_lock = bucket_lock(new_tbl, new_hash);
444
445                 spin_lock_bh(old_bucket_lock1);
446
447                 /* Depending on the lock per buckets mapping, the bucket in
448                  * the lower and upper region may map to the same lock.
449                  */
450                 if (old_bucket_lock1 != old_bucket_lock2) {
451                         spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
452                         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
453                 } else {
454                         spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
455                 }
456
457                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
458                                    tbl->buckets[new_hash]);
459                 rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
460                                    tbl->buckets[new_hash + new_tbl->size]);
461
462                 spin_unlock_bh(new_bucket_lock);
463                 if (old_bucket_lock1 != old_bucket_lock2)
464                         spin_unlock_bh(old_bucket_lock2);
465                 spin_unlock_bh(old_bucket_lock1);
466         }
467
468         /* Publish the new, valid hash table */
469         rcu_assign_pointer(ht->tbl, new_tbl);
470         atomic_dec(&ht->shift);
471
472         /* Wait for readers. No new readers will have references to the
473          * old hash table.
474          */
475         synchronize_rcu();
476
477         bucket_table_free(tbl);
478
479         return 0;
480 }
481 EXPORT_SYMBOL_GPL(rhashtable_shrink);
482
483 static void rht_deferred_worker(struct work_struct *work)
484 {
485         struct rhashtable *ht;
486         struct bucket_table *tbl;
487
488         ht = container_of(work, struct rhashtable, run_work);
489         mutex_lock(&ht->mutex);
490         tbl = rht_dereference(ht->tbl, ht);
491
492         if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
493                 rhashtable_expand(ht);
494         else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
495                 rhashtable_shrink(ht);
496
497         mutex_unlock(&ht->mutex);
498 }
499
500 static void rhashtable_wakeup_worker(struct rhashtable *ht)
501 {
502         struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
503         struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
504         size_t size = tbl->size;
505
506         /* Only adjust the table if no resizing is currently in progress. */
507         if (tbl == new_tbl &&
508             ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) ||
509              (ht->p.shrink_decision && ht->p.shrink_decision(ht, size))))
510                 schedule_work(&ht->run_work);
511 }
512
513 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
514                                 struct bucket_table *tbl, u32 hash)
515 {
516         struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash],
517                                                          tbl, hash);
518
519         if (rht_is_a_nulls(head))
520                 INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
521         else
522                 RCU_INIT_POINTER(obj->next, head);
523
524         rcu_assign_pointer(tbl->buckets[hash], obj);
525
526         atomic_inc(&ht->nelems);
527
528         rhashtable_wakeup_worker(ht);
529 }
530
531 /**
532  * rhashtable_insert - insert object into hash table
533  * @ht:         hash table
534  * @obj:        pointer to hash head inside object
535  *
536  * Will take a per bucket spinlock to protect against mutual mutations
537  * on the same bucket. Multiple insertions may occur in parallel unless
538  * they map to the same bucket lock.
539  *
540  * It is safe to call this function from atomic context.
541  *
542  * Will trigger an automatic deferred table resizing if the size grows
543  * beyond the watermark indicated by grow_decision() which can be passed
544  * to rhashtable_init().
545  */
546 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
547 {
548         struct bucket_table *tbl;
549         spinlock_t *lock;
550         unsigned hash;
551
552         rcu_read_lock();
553
554         tbl = rht_dereference_rcu(ht->future_tbl, ht);
555         hash = head_hashfn(ht, tbl, obj);
556         lock = bucket_lock(tbl, hash);
557
558         spin_lock_bh(lock);
559         __rhashtable_insert(ht, obj, tbl, hash);
560         spin_unlock_bh(lock);
561
562         rcu_read_unlock();
563 }
564 EXPORT_SYMBOL_GPL(rhashtable_insert);
565
566 /**
567  * rhashtable_remove - remove object from hash table
568  * @ht:         hash table
569  * @obj:        pointer to hash head inside object
570  *
571  * Since the hash chain is single linked, the removal operation needs to
572  * walk the bucket chain upon removal. The removal operation is thus
573  * considerable slow if the hash table is not correctly sized.
574  *
575  * Will automatically shrink the table via rhashtable_expand() if the
576  * shrink_decision function specified at rhashtable_init() returns true.
577  *
578  * The caller must ensure that no concurrent table mutations occur. It is
579  * however valid to have concurrent lookups if they are RCU protected.
580  */
581 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
582 {
583         struct bucket_table *tbl;
584         struct rhash_head __rcu **pprev;
585         struct rhash_head *he;
586         spinlock_t *lock;
587         unsigned int hash;
588         bool ret = false;
589
590         rcu_read_lock();
591         tbl = rht_dereference_rcu(ht->tbl, ht);
592         hash = head_hashfn(ht, tbl, obj);
593
594         lock = bucket_lock(tbl, hash);
595         spin_lock_bh(lock);
596
597 restart:
598         pprev = &tbl->buckets[hash];
599         rht_for_each(he, tbl, hash) {
600                 if (he != obj) {
601                         pprev = &he->next;
602                         continue;
603                 }
604
605                 rcu_assign_pointer(*pprev, obj->next);
606
607                 ret = true;
608                 break;
609         }
610
611         /* The entry may be linked in either 'tbl', 'future_tbl', or both.
612          * 'future_tbl' only exists for a short period of time during
613          * resizing. Thus traversing both is fine and the added cost is
614          * very rare.
615          */
616         if (tbl != rht_dereference_rcu(ht->future_tbl, ht)) {
617                 spin_unlock_bh(lock);
618
619                 tbl = rht_dereference_rcu(ht->future_tbl, ht);
620                 hash = head_hashfn(ht, tbl, obj);
621
622                 lock = bucket_lock(tbl, hash);
623                 spin_lock_bh(lock);
624                 goto restart;
625         }
626
627         spin_unlock_bh(lock);
628
629         if (ret) {
630                 atomic_dec(&ht->nelems);
631                 rhashtable_wakeup_worker(ht);
632         }
633
634         rcu_read_unlock();
635
636         return ret;
637 }
638 EXPORT_SYMBOL_GPL(rhashtable_remove);
639
640 struct rhashtable_compare_arg {
641         struct rhashtable *ht;
642         const void *key;
643 };
644
645 static bool rhashtable_compare(void *ptr, void *arg)
646 {
647         struct rhashtable_compare_arg *x = arg;
648         struct rhashtable *ht = x->ht;
649
650         return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len);
651 }
652
653 /**
654  * rhashtable_lookup - lookup key in hash table
655  * @ht:         hash table
656  * @key:        pointer to key
657  *
658  * Computes the hash value for the key and traverses the bucket chain looking
659  * for a entry with an identical key. The first matching entry is returned.
660  *
661  * This lookup function may only be used for fixed key hash table (key_len
662  * parameter set). It will BUG() if used inappropriately.
663  *
664  * Lookups may occur in parallel with hashtable mutations and resizing.
665  */
666 void *rhashtable_lookup(struct rhashtable *ht, const void *key)
667 {
668         struct rhashtable_compare_arg arg = {
669                 .ht = ht,
670                 .key = key,
671         };
672
673         BUG_ON(!ht->p.key_len);
674
675         return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg);
676 }
677 EXPORT_SYMBOL_GPL(rhashtable_lookup);
678
679 /**
680  * rhashtable_lookup_compare - search hash table with compare function
681  * @ht:         hash table
682  * @key:        the pointer to the key
683  * @compare:    compare function, must return true on match
684  * @arg:        argument passed on to compare function
685  *
686  * Traverses the bucket chain behind the provided hash value and calls the
687  * specified compare function for each entry.
688  *
689  * Lookups may occur in parallel with hashtable mutations and resizing.
690  *
691  * Returns the first entry on which the compare function returned true.
692  */
693 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
694                                 bool (*compare)(void *, void *), void *arg)
695 {
696         const struct bucket_table *tbl, *old_tbl;
697         struct rhash_head *he;
698         u32 hash;
699
700         rcu_read_lock();
701
702         old_tbl = rht_dereference_rcu(ht->tbl, ht);
703         tbl = rht_dereference_rcu(ht->future_tbl, ht);
704         hash = key_hashfn(ht, key, ht->p.key_len);
705 restart:
706         rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
707                 if (!compare(rht_obj(ht, he), arg))
708                         continue;
709                 rcu_read_unlock();
710                 return rht_obj(ht, he);
711         }
712
713         if (unlikely(tbl != old_tbl)) {
714                 tbl = old_tbl;
715                 goto restart;
716         }
717         rcu_read_unlock();
718
719         return NULL;
720 }
721 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
722
723 /**
724  * rhashtable_lookup_insert - lookup and insert object into hash table
725  * @ht:         hash table
726  * @obj:        pointer to hash head inside object
727  *
728  * Locks down the bucket chain in both the old and new table if a resize
729  * is in progress to ensure that writers can't remove from the old table
730  * and can't insert to the new table during the atomic operation of search
731  * and insertion. Searches for duplicates in both the old and new table if
732  * a resize is in progress.
733  *
734  * This lookup function may only be used for fixed key hash table (key_len
735  * parameter set). It will BUG() if used inappropriately.
736  *
737  * It is safe to call this function from atomic context.
738  *
739  * Will trigger an automatic deferred table resizing if the size grows
740  * beyond the watermark indicated by grow_decision() which can be passed
741  * to rhashtable_init().
742  */
743 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
744 {
745         struct rhashtable_compare_arg arg = {
746                 .ht = ht,
747                 .key = rht_obj(ht, obj) + ht->p.key_offset,
748         };
749
750         BUG_ON(!ht->p.key_len);
751
752         return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
753                                                 &arg);
754 }
755 EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
756
757 /**
758  * rhashtable_lookup_compare_insert - search and insert object to hash table
759  *                                    with compare function
760  * @ht:         hash table
761  * @obj:        pointer to hash head inside object
762  * @compare:    compare function, must return true on match
763  * @arg:        argument passed on to compare function
764  *
765  * Locks down the bucket chain in both the old and new table if a resize
766  * is in progress to ensure that writers can't remove from the old table
767  * and can't insert to the new table during the atomic operation of search
768  * and insertion. Searches for duplicates in both the old and new table if
769  * a resize is in progress.
770  *
771  * Lookups may occur in parallel with hashtable mutations and resizing.
772  *
773  * Will trigger an automatic deferred table resizing if the size grows
774  * beyond the watermark indicated by grow_decision() which can be passed
775  * to rhashtable_init().
776  */
777 bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
778                                       struct rhash_head *obj,
779                                       bool (*compare)(void *, void *),
780                                       void *arg)
781 {
782         struct bucket_table *new_tbl, *old_tbl;
783         spinlock_t *new_bucket_lock, *old_bucket_lock;
784         u32 new_hash, old_hash;
785         bool success = true;
786
787         BUG_ON(!ht->p.key_len);
788
789         rcu_read_lock();
790
791         old_tbl = rht_dereference_rcu(ht->tbl, ht);
792         old_hash = head_hashfn(ht, old_tbl, obj);
793         old_bucket_lock = bucket_lock(old_tbl, old_hash);
794         spin_lock_bh(old_bucket_lock);
795
796         new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
797         new_hash = head_hashfn(ht, new_tbl, obj);
798         new_bucket_lock = bucket_lock(new_tbl, new_hash);
799         if (unlikely(old_tbl != new_tbl))
800                 spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
801
802         if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
803                                       compare, arg)) {
804                 success = false;
805                 goto exit;
806         }
807
808         __rhashtable_insert(ht, obj, new_tbl, new_hash);
809
810 exit:
811         if (unlikely(old_tbl != new_tbl))
812                 spin_unlock_bh(new_bucket_lock);
813         spin_unlock_bh(old_bucket_lock);
814
815         rcu_read_unlock();
816
817         return success;
818 }
819 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
820
821 static size_t rounded_hashtable_size(struct rhashtable_params *params)
822 {
823         return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
824                    1UL << params->min_shift);
825 }
826
827 /**
828  * rhashtable_init - initialize a new hash table
829  * @ht:         hash table to be initialized
830  * @params:     configuration parameters
831  *
832  * Initializes a new hash table based on the provided configuration
833  * parameters. A table can be configured either with a variable or
834  * fixed length key:
835  *
836  * Configuration Example 1: Fixed length keys
837  * struct test_obj {
838  *      int                     key;
839  *      void *                  my_member;
840  *      struct rhash_head       node;
841  * };
842  *
843  * struct rhashtable_params params = {
844  *      .head_offset = offsetof(struct test_obj, node),
845  *      .key_offset = offsetof(struct test_obj, key),
846  *      .key_len = sizeof(int),
847  *      .hashfn = jhash,
848  *      .nulls_base = (1U << RHT_BASE_SHIFT),
849  * };
850  *
851  * Configuration Example 2: Variable length keys
852  * struct test_obj {
853  *      [...]
854  *      struct rhash_head       node;
855  * };
856  *
857  * u32 my_hash_fn(const void *data, u32 seed)
858  * {
859  *      struct test_obj *obj = data;
860  *
861  *      return [... hash ...];
862  * }
863  *
864  * struct rhashtable_params params = {
865  *      .head_offset = offsetof(struct test_obj, node),
866  *      .hashfn = jhash,
867  *      .obj_hashfn = my_hash_fn,
868  * };
869  */
870 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
871 {
872         struct bucket_table *tbl;
873         size_t size;
874
875         size = HASH_DEFAULT_SIZE;
876
877         if ((params->key_len && !params->hashfn) ||
878             (!params->key_len && !params->obj_hashfn))
879                 return -EINVAL;
880
881         if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
882                 return -EINVAL;
883
884         params->min_shift = max_t(size_t, params->min_shift,
885                                   ilog2(HASH_MIN_SIZE));
886
887         if (params->nelem_hint)
888                 size = rounded_hashtable_size(params);
889
890         memset(ht, 0, sizeof(*ht));
891         mutex_init(&ht->mutex);
892         memcpy(&ht->p, params, sizeof(*params));
893
894         if (params->locks_mul)
895                 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
896         else
897                 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
898
899         tbl = bucket_table_alloc(ht, size);
900         if (tbl == NULL)
901                 return -ENOMEM;
902
903         atomic_set(&ht->nelems, 0);
904         atomic_set(&ht->shift, ilog2(tbl->size));
905         RCU_INIT_POINTER(ht->tbl, tbl);
906         RCU_INIT_POINTER(ht->future_tbl, tbl);
907
908         if (!ht->p.hash_rnd)
909                 get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
910
911         if (ht->p.grow_decision || ht->p.shrink_decision)
912                 INIT_WORK(&ht->run_work, rht_deferred_worker);
913
914         return 0;
915 }
916 EXPORT_SYMBOL_GPL(rhashtable_init);
917
918 /**
919  * rhashtable_destroy - destroy hash table
920  * @ht:         the hash table to destroy
921  *
922  * Frees the bucket array. This function is not rcu safe, therefore the caller
923  * has to make sure that no resizing may happen by unpublishing the hashtable
924  * and waiting for the quiescent cycle before releasing the bucket array.
925  */
926 void rhashtable_destroy(struct rhashtable *ht)
927 {
928         ht->being_destroyed = true;
929
930         if (ht->p.grow_decision || ht->p.shrink_decision)
931                 cancel_work_sync(&ht->run_work);
932
933         mutex_lock(&ht->mutex);
934         bucket_table_free(rht_dereference(ht->tbl, ht));
935         mutex_unlock(&ht->mutex);
936 }
937 EXPORT_SYMBOL_GPL(rhashtable_destroy);