Btrfs: fix crash caused by block group removal
[cascardo/linux.git] / fs / btrfs / extent-tree.c
index d565895..c57bf38 100644 (file)
@@ -607,6 +607,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                                cache->cached = BTRFS_CACHE_NO;
                        } else {
                                cache->cached = BTRFS_CACHE_STARTED;
+                               cache->has_caching_ctl = 1;
                        }
                }
                spin_unlock(&cache->lock);
@@ -627,6 +628,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                        cache->cached = BTRFS_CACHE_NO;
                } else {
                        cache->cached = BTRFS_CACHE_STARTED;
+                       cache->has_caching_ctl = 1;
                }
                spin_unlock(&cache->lock);
                wake_up(&caching_ctl->wait);
@@ -710,8 +712,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
        rcu_read_unlock();
 }
 
-/* simple helper to search for an existing extent at a given offset */
-int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
+/* simple helper to search for an existing data extent at a given offset */
+int btrfs_lookup_data_extent(struct btrfs_root *root, u64 start, u64 len)
 {
        int ret;
        struct btrfs_key key;
@@ -726,12 +728,6 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len)
        key.type = BTRFS_EXTENT_ITEM_KEY;
        ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path,
                                0, 0);
-       if (ret > 0) {
-               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
-               if (key.objectid == start &&
-                   key.type == BTRFS_METADATA_ITEM_KEY)
-                       ret = 0;
-       }
        btrfs_free_path(path);
        return ret;
 }
@@ -786,7 +782,6 @@ search_again:
        else
                key.type = BTRFS_EXTENT_ITEM_KEY;
 
-again:
        ret = btrfs_search_slot(trans, root->fs_info->extent_root,
                                &key, path, 0, 0);
        if (ret < 0)
@@ -802,13 +797,6 @@ again:
                            key.offset == root->nodesize)
                                ret = 0;
                }
-               if (ret) {
-                       key.objectid = bytenr;
-                       key.type = BTRFS_EXTENT_ITEM_KEY;
-                       key.offset = root->nodesize;
-                       btrfs_release_path(path);
-                       goto again;
-               }
        }
 
        if (ret == 0) {
@@ -3176,7 +3164,19 @@ next_block_group(struct btrfs_root *root,
                 struct btrfs_block_group_cache *cache)
 {
        struct rb_node *node;
+
        spin_lock(&root->fs_info->block_group_cache_lock);
+
+       /* If our block group was removed, we need a full search. */
+       if (RB_EMPTY_NODE(&cache->cache_node)) {
+               const u64 next_bytenr = cache->key.objectid + cache->key.offset;
+
+               spin_unlock(&root->fs_info->block_group_cache_lock);
+               btrfs_put_block_group(cache);
+               cache = btrfs_lookup_first_block_group(root->fs_info,
+                                                      next_bytenr);
+               return cache;
+       }
        node = rb_next(&cache->cache_node);
        btrfs_put_block_group(cache);
        if (node) {
@@ -3518,6 +3518,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
        found->chunk_alloc = 0;
        found->flush = 0;
        init_waitqueue_head(&found->wait);
+       INIT_LIST_HEAD(&found->ro_bgs);
 
        ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
                                    info->space_info_kobj, "%s",
@@ -8525,6 +8526,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
            min_allocable_bytes <= sinfo->total_bytes) {
                sinfo->bytes_readonly += num_bytes;
                cache->ro = 1;
+               list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
                ret = 0;
        }
 out:
@@ -8579,15 +8581,20 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 
 /*
  * helper to account the unused space of all the readonly block group in the
- * list. takes mirrors into account.
+ * space_info. takes mirrors into account.
  */
-static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
 {
        struct btrfs_block_group_cache *block_group;
        u64 free_bytes = 0;
        int factor;
 
-       list_for_each_entry(block_group, groups_list, list) {
+       /* It's df, we don't care if it's racey */
+       if (list_empty(&sinfo->ro_bgs))
+               return 0;
+
+       spin_lock(&sinfo->lock);
+       list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
                spin_lock(&block_group->lock);
 
                if (!block_group->ro) {
@@ -8608,26 +8615,6 @@ static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
 
                spin_unlock(&block_group->lock);
        }
-
-       return free_bytes;
-}
-
-/*
- * helper to account the unused space of all the readonly block group in the
- * space_info. takes mirrors into account.
- */
-u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
-{
-       int i;
-       u64 free_bytes = 0;
-
-       spin_lock(&sinfo->lock);
-
-       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
-               if (!list_empty(&sinfo->block_groups[i]))
-                       free_bytes += __btrfs_get_ro_block_group_free_space(
-                                               &sinfo->block_groups[i]);
-
        spin_unlock(&sinfo->lock);
 
        return free_bytes;
@@ -8647,6 +8634,7 @@ void btrfs_set_block_group_rw(struct btrfs_root *root,
                    cache->bytes_super - btrfs_block_group_used(&cache->item);
        sinfo->bytes_readonly -= num_bytes;
        cache->ro = 0;
+       list_del_init(&cache->ro_list);
        spin_unlock(&cache->lock);
        spin_unlock(&sinfo->lock);
 }
@@ -9016,6 +9004,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
        INIT_LIST_HEAD(&cache->list);
        INIT_LIST_HEAD(&cache->cluster_list);
        INIT_LIST_HEAD(&cache->bg_list);
+       INIT_LIST_HEAD(&cache->ro_list);
        btrfs_init_free_space_ctl(cache);
 
        return cache;
@@ -9330,6 +9319,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        int ret;
        int index;
        int factor;
+       struct btrfs_caching_control *caching_ctl = NULL;
 
        root = root->fs_info->extent_root;
 
@@ -9414,6 +9404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_lock(&root->fs_info->block_group_cache_lock);
        rb_erase(&block_group->cache_node,
                 &root->fs_info->block_group_cache_tree);
+       RB_CLEAR_NODE(&block_group->cache_node);
 
        if (root->fs_info->first_logical_byte == block_group->key.objectid)
                root->fs_info->first_logical_byte = (u64)-1;
@@ -9425,6 +9416,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         * are still on the list after taking the semaphore
         */
        list_del_init(&block_group->list);
+       list_del_init(&block_group->ro_list);
        if (list_empty(&block_group->space_info->block_groups[index])) {
                kobj = block_group->space_info->block_group_kobjs[index];
                block_group->space_info->block_group_kobjs[index] = NULL;
@@ -9436,8 +9428,32 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                kobject_put(kobj);
        }
 
+       if (block_group->has_caching_ctl)
+               caching_ctl = get_caching_control(block_group);
        if (block_group->cached == BTRFS_CACHE_STARTED)
                wait_block_group_cache_done(block_group);
+       if (block_group->has_caching_ctl) {
+               down_write(&root->fs_info->commit_root_sem);
+               if (!caching_ctl) {
+                       struct btrfs_caching_control *ctl;
+
+                       list_for_each_entry(ctl,
+                                   &root->fs_info->caching_block_groups, list)
+                               if (ctl->block_group == block_group) {
+                                       caching_ctl = ctl;
+                                       atomic_inc(&caching_ctl->count);
+                                       break;
+                               }
+               }
+               if (caching_ctl)
+                       list_del_init(&caching_ctl->list);
+               up_write(&root->fs_info->commit_root_sem);
+               if (caching_ctl) {
+                       /* Once for the caching bgs list and once for us. */
+                       put_caching_control(caching_ctl);
+                       put_caching_control(caching_ctl);
+               }
+       }
 
        btrfs_remove_free_space_cache(block_group);
 
@@ -9537,10 +9553,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 */
                start = block_group->key.objectid;
                end = start + block_group->key.offset - 1;
-               clear_extent_bits(&fs_info->freed_extents[0], start, end,
+               ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
                                  EXTENT_DIRTY, GFP_NOFS);
-               clear_extent_bits(&fs_info->freed_extents[1], start, end,
+               if (ret) {
+                       btrfs_set_block_group_rw(root, block_group);
+                       goto end_trans;
+               }
+               ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
                                  EXTENT_DIRTY, GFP_NOFS);
+               if (ret) {
+                       btrfs_set_block_group_rw(root, block_group);
+                       goto end_trans;
+               }
 
                /* Reset pinned so btrfs_put_block_group doesn't complain */
                block_group->pinned = 0;
@@ -9551,6 +9575,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                 */
                ret = btrfs_remove_chunk(trans, root,
                                         block_group->key.objectid);
+end_trans:
                btrfs_end_transaction(trans, root);
 next:
                btrfs_put_block_group(block_group);
@@ -9671,12 +9696,14 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 }
 
 /*
- * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
- * they are used to prevent the some tasks writing data into the page cache
- * by nocow before the subvolume is snapshoted, but flush the data into
- * the disk after the snapshot creation.
+ * btrfs_{start,end}_write_no_snapshoting() are similar to
+ * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
+ * data into the page cache through nocow before the subvolume is snapshoted,
+ * but flush the data into disk after the snapshot creation, or to prevent
+ * operations while snapshoting is ongoing and that cause the snapshot to be
+ * inconsistent (writes followed by expanding truncates for example).
  */
-void btrfs_end_nocow_write(struct btrfs_root *root)
+void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
 {
        percpu_counter_dec(&root->subv_writers->counter);
        /*
@@ -9688,7 +9715,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root)
                wake_up(&root->subv_writers->wait);
 }
 
-int btrfs_start_nocow_write(struct btrfs_root *root)
+int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
 {
        if (atomic_read(&root->will_be_snapshoted))
                return 0;
@@ -9699,7 +9726,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root)
         */
        smp_mb();
        if (atomic_read(&root->will_be_snapshoted)) {
-               btrfs_end_nocow_write(root);
+               btrfs_end_write_no_snapshoting(root);
                return 0;
        }
        return 1;