mbcache2: Use referenced bit instead of LRU
authorJan Kara <jack@suse.cz>
Mon, 22 Feb 2016 23:23:47 +0000 (18:23 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Mon, 22 Feb 2016 23:23:47 +0000 (18:23 -0500)
Currently we maintain perfect LRU list by moving entry to the tail of
the list when it gets used. However these operations on cache-global
list are relatively expensive.

In this patch we switch to lazy updates of LRU list. Whenever entry gets
used, we set a referenced bit in it. When reclaiming entries, we give
referenced entries another round in the LRU. Since the list is not a
real LRU anymore, rename it to just 'list'.

In my testing this logic gives about 30% boost to workloads with mostly
unique xattr blocks (e.g. xattr-bench with 10 files and 10000 unique
xattr values).

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
fs/mbcache2.c
include/linux/mbcache2.h

index 3e3198d..49f7a6f 100644 (file)
@@ -30,9 +30,9 @@ struct mb2_cache {
        int                     c_bucket_bits;
        /* Maximum entries in cache to avoid degrading hash too much */
        int                     c_max_entries;
-       /* Protects c_lru_list, c_entry_count */
-       spinlock_t              c_lru_list_lock;
-       struct list_head        c_lru_list;
+       /* Protects c_list, c_entry_count */
+       spinlock_t              c_list_lock;
+       struct list_head        c_list;
        /* Number of entries in cache */
        unsigned long           c_entry_count;
        struct shrinker         c_shrink;
@@ -45,6 +45,29 @@ static struct kmem_cache *mb2_entry_cache;
 static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
                                      unsigned int nr_to_scan);
 
+static inline bool mb2_cache_entry_referenced(struct mb2_cache_entry *entry)
+{
+       return entry->_e_hash_list_head & 1;
+}
+
+static inline void mb2_cache_entry_set_referenced(struct mb2_cache_entry *entry)
+{
+       entry->_e_hash_list_head |= 1;
+}
+
+static inline void mb2_cache_entry_clear_referenced(
+                                       struct mb2_cache_entry *entry)
+{
+       entry->_e_hash_list_head &= ~1;
+}
+
+static inline struct hlist_bl_head *mb2_cache_entry_head(
+                                       struct mb2_cache_entry *entry)
+{
+       return (struct hlist_bl_head *)
+                       (entry->_e_hash_list_head & ~1);
+}
+
 /*
  * Number of entries to reclaim synchronously when there are too many entries
  * in cache
@@ -80,13 +103,13 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
        if (!entry)
                return -ENOMEM;
 
-       INIT_LIST_HEAD(&entry->e_lru_list);
+       INIT_LIST_HEAD(&entry->e_list);
        /* One ref for hash, one ref returned */
        atomic_set(&entry->e_refcnt, 1);
        entry->e_key = key;
        entry->e_block = block;
        head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
-       entry->e_hash_list_head = head;
+       entry->_e_hash_list_head = (unsigned long)head;
        hlist_bl_lock(head);
        hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
                if (dup->e_key == key && dup->e_block == block) {
@@ -98,12 +121,12 @@ int mb2_cache_entry_create(struct mb2_cache *cache, gfp_t mask, u32 key,
        hlist_bl_add_head(&entry->e_hash_list, head);
        hlist_bl_unlock(head);
 
-       spin_lock(&cache->c_lru_list_lock);
-       list_add_tail(&entry->e_lru_list, &cache->c_lru_list);
+       spin_lock(&cache->c_list_lock);
+       list_add_tail(&entry->e_list, &cache->c_list);
        /* Grab ref for LRU list */
        atomic_inc(&entry->e_refcnt);
        cache->c_entry_count++;
-       spin_unlock(&cache->c_lru_list_lock);
+       spin_unlock(&cache->c_list_lock);
 
        return 0;
 }
@@ -124,7 +147,7 @@ static struct mb2_cache_entry *__entry_find(struct mb2_cache *cache,
        struct hlist_bl_head *head;
 
        if (entry)
-               head = entry->e_hash_list_head;
+               head = mb2_cache_entry_head(entry);
        else
                head = &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
        hlist_bl_lock(head);
@@ -203,13 +226,13 @@ void mb2_cache_entry_delete_block(struct mb2_cache *cache, u32 key,
                        /* We keep hash list reference to keep entry alive */
                        hlist_bl_del_init(&entry->e_hash_list);
                        hlist_bl_unlock(head);
-                       spin_lock(&cache->c_lru_list_lock);
-                       if (!list_empty(&entry->e_lru_list)) {
-                               list_del_init(&entry->e_lru_list);
+                       spin_lock(&cache->c_list_lock);
+                       if (!list_empty(&entry->e_list)) {
+                               list_del_init(&entry->e_list);
                                cache->c_entry_count--;
                                atomic_dec(&entry->e_refcnt);
                        }
-                       spin_unlock(&cache->c_lru_list_lock);
+                       spin_unlock(&cache->c_list_lock);
                        mb2_cache_entry_put(cache, entry);
                        return;
                }
@@ -222,15 +245,12 @@ EXPORT_SYMBOL(mb2_cache_entry_delete_block);
  * @cache - cache the entry belongs to
  * @entry - entry that got used
  *
- * Move entry in lru list to reflect the fact that it was used.
+ * Marks entry as used to give hit higher chances of surviving in cache.
  */
 void mb2_cache_entry_touch(struct mb2_cache *cache,
                           struct mb2_cache_entry *entry)
 {
-       spin_lock(&cache->c_lru_list_lock);
-       if (!list_empty(&entry->e_lru_list))
-               list_move_tail(&cache->c_lru_list, &entry->e_lru_list);
-       spin_unlock(&cache->c_lru_list_lock);
+       mb2_cache_entry_set_referenced(entry);
 }
 EXPORT_SYMBOL(mb2_cache_entry_touch);
 
@@ -251,18 +271,23 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
        struct hlist_bl_head *head;
        unsigned int shrunk = 0;
 
-       spin_lock(&cache->c_lru_list_lock);
-       while (nr_to_scan-- && !list_empty(&cache->c_lru_list)) {
-               entry = list_first_entry(&cache->c_lru_list,
-                                        struct mb2_cache_entry, e_lru_list);
-               list_del_init(&entry->e_lru_list);
+       spin_lock(&cache->c_list_lock);
+       while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+               entry = list_first_entry(&cache->c_list,
+                                        struct mb2_cache_entry, e_list);
+               if (mb2_cache_entry_referenced(entry)) {
+                       mb2_cache_entry_clear_referenced(entry);
+                       list_move_tail(&cache->c_list, &entry->e_list);
+                       continue;
+               }
+               list_del_init(&entry->e_list);
                cache->c_entry_count--;
                /*
                 * We keep LRU list reference so that entry doesn't go away
                 * from under us.
                 */
-               spin_unlock(&cache->c_lru_list_lock);
-               head = entry->e_hash_list_head;
+               spin_unlock(&cache->c_list_lock);
+               head = mb2_cache_entry_head(entry);
                hlist_bl_lock(head);
                if (!hlist_bl_unhashed(&entry->e_hash_list)) {
                        hlist_bl_del_init(&entry->e_hash_list);
@@ -272,9 +297,9 @@ static unsigned long mb2_cache_shrink(struct mb2_cache *cache,
                if (mb2_cache_entry_put(cache, entry))
                        shrunk++;
                cond_resched();
-               spin_lock(&cache->c_lru_list_lock);
+               spin_lock(&cache->c_list_lock);
        }
-       spin_unlock(&cache->c_lru_list_lock);
+       spin_unlock(&cache->c_list_lock);
 
        return shrunk;
 }
@@ -318,8 +343,8 @@ struct mb2_cache *mb2_cache_create(int bucket_bits)
                goto err_out;
        cache->c_bucket_bits = bucket_bits;
        cache->c_max_entries = bucket_count << 4;
-       INIT_LIST_HEAD(&cache->c_lru_list);
-       spin_lock_init(&cache->c_lru_list_lock);
+       INIT_LIST_HEAD(&cache->c_list);
+       spin_lock_init(&cache->c_list_lock);
        cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
                                GFP_KERNEL);
        if (!cache->c_hash) {
@@ -361,13 +386,13 @@ void mb2_cache_destroy(struct mb2_cache *cache)
         * We don't bother with any locking. Cache must not be used at this
         * point.
         */
-       list_for_each_entry_safe(entry, next, &cache->c_lru_list, e_lru_list) {
+       list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
                if (!hlist_bl_unhashed(&entry->e_hash_list)) {
                        hlist_bl_del_init(&entry->e_hash_list);
                        atomic_dec(&entry->e_refcnt);
                } else
                        WARN_ON(1);
-               list_del(&entry->e_lru_list);
+               list_del(&entry->e_list);
                WARN_ON(atomic_read(&entry->e_refcnt) != 1);
                mb2_cache_entry_put(cache, entry);
        }
index b6f160f..c934843 100644 (file)
@@ -10,8 +10,8 @@
 struct mb2_cache;
 
 struct mb2_cache_entry {
-       /* LRU list - protected by cache->c_lru_list_lock */
-       struct list_head        e_lru_list;
+       /* List of entries in cache - protected by cache->c_list_lock */
+       struct list_head        e_list;
        /* Hash table list - protected by bitlock in e_hash_list_head */
        struct hlist_bl_node    e_hash_list;
        atomic_t                e_refcnt;
@@ -19,8 +19,11 @@ struct mb2_cache_entry {
        u32                     e_key;
        /* Block number of hashed block - stable during lifetime of the entry */
        sector_t                e_block;
-       /* Head of hash list (for list bit lock) - stable */
-       struct hlist_bl_head    *e_hash_list_head;
+       /*
+        * Head of hash list (for list bit lock) - stable. Combined with
+        * referenced bit of entry
+        */
+       unsigned long           _e_hash_list_head;
 };
 
 struct mb2_cache *mb2_cache_create(int bucket_bits);