Btrfs: fix crash caused by block group removal

[cascardo/linux.git] / fs / btrfs / extent-tree.c
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 47c1ba1..c57bf38 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -607,6 +607,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                                 cache->cached = BTRFS_CACHE_NO;
                         } else {
                                 cache->cached = BTRFS_CACHE_STARTED;
+                               cache->has_caching_ctl = 1;
                         }
                 }
                 spin_unlock(&cache->lock);
@@ -627,6 +628,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                         cache->cached = BTRFS_CACHE_NO;
                 } else {
                         cache->cached = BTRFS_CACHE_STARTED;
+                       cache->has_caching_ctl = 1;
                 }
                 spin_unlock(&cache->lock);
                 wake_up(&caching_ctl->wait);
@@ -3162,7 +3164,19 @@ next_block_group(struct btrfs_root *root,
                  struct btrfs_block_group_cache *cache)
  {
         struct rb_node *node;
+
         spin_lock(&root->fs_info->block_group_cache_lock);
+
+       /* If our block group was removed, we need a full search. */
+       if (RB_EMPTY_NODE(&cache->cache_node)) {
+               const u64 next_bytenr = cache->key.objectid + cache->key.offset;
+
+               spin_unlock(&root->fs_info->block_group_cache_lock);
+               btrfs_put_block_group(cache);
+               cache = btrfs_lookup_first_block_group(root->fs_info,
+                                                      next_bytenr);
+               return cache;
+       }
         node = rb_next(&cache->cache_node);
         btrfs_put_block_group(cache);
         if (node) {
@@ -3504,6 +3518,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
         found->chunk_alloc = 0;
         found->flush = 0;
         init_waitqueue_head(&found->wait);
+       INIT_LIST_HEAD(&found->ro_bgs);
  
         ret = kobject_init_and_add(&found->kobj, &space_info_ktype,
                                     info->space_info_kobj, "%s",
@@ -8511,6 +8526,7 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force)
             min_allocable_bytes <= sinfo->total_bytes) {
                 sinfo->bytes_readonly += num_bytes;
                 cache->ro = 1;
+               list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
                 ret = 0;
         }
  out:
@@ -8565,15 +8581,20 @@ int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
  
  /*
   * helper to account the unused space of all the readonly block group in the
- * list. takes mirrors into account.
+ * space_info. takes mirrors into account.
   */
-static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
+u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
  {
         struct btrfs_block_group_cache *block_group;
         u64 free_bytes = 0;
         int factor;
  
-       list_for_each_entry(block_group, groups_list, list) {
+       /* It's df, we don't care if it's racey */
+       if (list_empty(&sinfo->ro_bgs))
+               return 0;
+
+       spin_lock(&sinfo->lock);
+       list_for_each_entry(block_group, &sinfo->ro_bgs, ro_list) {
                 spin_lock(&block_group->lock);
  
                 if (!block_group->ro) {
@@ -8594,26 +8615,6 @@ static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list)
  
                 spin_unlock(&block_group->lock);
         }
-
-       return free_bytes;
-}
-
-/*
- * helper to account the unused space of all the readonly block group in the
- * space_info. takes mirrors into account.
- */
-u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
-{
-       int i;
-       u64 free_bytes = 0;
-
-       spin_lock(&sinfo->lock);
-
-       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
-               if (!list_empty(&sinfo->block_groups[i]))
-                       free_bytes += __btrfs_get_ro_block_group_free_space(
-                                               &sinfo->block_groups[i]);
-
         spin_unlock(&sinfo->lock);
  
         return free_bytes;
@@ -8633,6 +8634,7 @@ void btrfs_set_block_group_rw(struct btrfs_root *root,
                     cache->bytes_super - btrfs_block_group_used(&cache->item);
         sinfo->bytes_readonly -= num_bytes;
         cache->ro = 0;
+       list_del_init(&cache->ro_list);
         spin_unlock(&cache->lock);
         spin_unlock(&sinfo->lock);
  }
@@ -9002,6 +9004,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
         INIT_LIST_HEAD(&cache->list);
         INIT_LIST_HEAD(&cache->cluster_list);
         INIT_LIST_HEAD(&cache->bg_list);
+       INIT_LIST_HEAD(&cache->ro_list);
         btrfs_init_free_space_ctl(cache);
  
         return cache;
@@ -9316,6 +9319,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         int ret;
         int index;
         int factor;
+       struct btrfs_caching_control *caching_ctl = NULL;
  
         root = root->fs_info->extent_root;
  
@@ -9400,6 +9404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         spin_lock(&root->fs_info->block_group_cache_lock);
         rb_erase(&block_group->cache_node,
                  &root->fs_info->block_group_cache_tree);
+       RB_CLEAR_NODE(&block_group->cache_node);
  
         if (root->fs_info->first_logical_byte == block_group->key.objectid)
                 root->fs_info->first_logical_byte = (u64)-1;
@@ -9411,6 +9416,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
          * are still on the list after taking the semaphore
          */
         list_del_init(&block_group->list);
+       list_del_init(&block_group->ro_list);
         if (list_empty(&block_group->space_info->block_groups[index])) {
                 kobj = block_group->space_info->block_group_kobjs[index];
                 block_group->space_info->block_group_kobjs[index] = NULL;
@@ -9422,8 +9428,32 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
                 kobject_put(kobj);
         }
  
+       if (block_group->has_caching_ctl)
+               caching_ctl = get_caching_control(block_group);
         if (block_group->cached == BTRFS_CACHE_STARTED)
                 wait_block_group_cache_done(block_group);
+       if (block_group->has_caching_ctl) {
+               down_write(&root->fs_info->commit_root_sem);
+               if (!caching_ctl) {
+                       struct btrfs_caching_control *ctl;
+
+                       list_for_each_entry(ctl,
+                                   &root->fs_info->caching_block_groups, list)
+                               if (ctl->block_group == block_group) {
+                                       caching_ctl = ctl;
+                                       atomic_inc(&caching_ctl->count);
+                                       break;
+                               }
+               }
+               if (caching_ctl)
+                       list_del_init(&caching_ctl->list);
+               up_write(&root->fs_info->commit_root_sem);
+               if (caching_ctl) {
+                       /* Once for the caching bgs list and once for us. */
+                       put_caching_control(caching_ctl);
+                       put_caching_control(caching_ctl);
+               }
+       }
  
         btrfs_remove_free_space_cache(block_group);
  
@@ -9523,10 +9553,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                  */
                 start = block_group->key.objectid;
                 end = start + block_group->key.offset - 1;
-               clear_extent_bits(&fs_info->freed_extents[0], start, end,
+               ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
                                   EXTENT_DIRTY, GFP_NOFS);
-               clear_extent_bits(&fs_info->freed_extents[1], start, end,
+               if (ret) {
+                       btrfs_set_block_group_rw(root, block_group);
+                       goto end_trans;
+               }
+               ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
                                   EXTENT_DIRTY, GFP_NOFS);
+               if (ret) {
+                       btrfs_set_block_group_rw(root, block_group);
+                       goto end_trans;
+               }
  
                 /* Reset pinned so btrfs_put_block_group doesn't complain */
                 block_group->pinned = 0;
@@ -9537,6 +9575,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
                  */
                 ret = btrfs_remove_chunk(trans, root,
                                          block_group->key.objectid);
+end_trans:
                 btrfs_end_transaction(trans, root);
  next:
                 btrfs_put_block_group(block_group);
@@ -9657,12 +9696,14 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
  }
  
  /*
- * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
- * they are used to prevent the some tasks writing data into the page cache
- * by nocow before the subvolume is snapshoted, but flush the data into
- * the disk after the snapshot creation.
+ * btrfs_{start,end}_write_no_snapshoting() are similar to
+ * mnt_{want,drop}_write(), they are used to prevent some tasks from writing
+ * data into the page cache through nocow before the subvolume is snapshoted,
+ * but flush the data into disk after the snapshot creation, or to prevent
+ * operations while snapshoting is ongoing and that cause the snapshot to be
+ * inconsistent (writes followed by expanding truncates for example).
   */
-void btrfs_end_nocow_write(struct btrfs_root *root)
+void btrfs_end_write_no_snapshoting(struct btrfs_root *root)
  {
         percpu_counter_dec(&root->subv_writers->counter);
         /*
@@ -9674,7 +9715,7 @@ void btrfs_end_nocow_write(struct btrfs_root *root)
                 wake_up(&root->subv_writers->wait);
  }
  
-int btrfs_start_nocow_write(struct btrfs_root *root)
+int btrfs_start_write_no_snapshoting(struct btrfs_root *root)
  {
         if (atomic_read(&root->will_be_snapshoted))
                 return 0;
@@ -9685,7 +9726,7 @@ int btrfs_start_nocow_write(struct btrfs_root *root)
          */
         smp_mb();
         if (atomic_read(&root->will_be_snapshoted)) {
-               btrfs_end_nocow_write(root);
+               btrfs_end_write_no_snapshoting(root);
                 return 0;
         }
         return 1;