btrfs: do not background blkdev_put()
[cascardo/linux.git] / fs / btrfs / volumes.c
index da9e003..9a6f0dc 100644 (file)
@@ -140,7 +140,6 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
-static void btrfs_close_one_device(struct btrfs_device *device);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
@@ -835,10 +834,6 @@ static void __free_device(struct work_struct *work)
        struct btrfs_device *device;
 
        device = container_of(work, struct btrfs_device, rcu_work);
-
-       if (device->bdev)
-               blkdev_put(device->bdev, device->mode);
-
        rcu_string_free(device->name);
        kfree(device);
 }
@@ -853,6 +848,54 @@ static void free_device(struct rcu_head *head)
        schedule_work(&device->rcu_work);
 }
 
+static void btrfs_close_bdev(struct btrfs_device *device)
+{
+       if (device->bdev && device->writeable) {
+               sync_blockdev(device->bdev);
+               invalidate_bdev(device->bdev);
+       }
+
+       if (device->bdev)
+               blkdev_put(device->bdev, device->mode);
+}
+
+static void btrfs_close_one_device(struct btrfs_device *device)
+{
+       struct btrfs_fs_devices *fs_devices = device->fs_devices;
+       struct btrfs_device *new_device;
+       struct rcu_string *name;
+
+       if (device->bdev)
+               fs_devices->open_devices--;
+
+       if (device->writeable &&
+           device->devid != BTRFS_DEV_REPLACE_DEVID) {
+               list_del_init(&device->dev_alloc_list);
+               fs_devices->rw_devices--;
+       }
+
+       if (device->missing)
+               fs_devices->missing_devices--;
+
+       btrfs_close_bdev(device);
+
+       new_device = btrfs_alloc_device(NULL, &device->devid,
+                                       device->uuid);
+       BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
+
+       /* Safe because we are under uuid_mutex */
+       if (device->name) {
+               name = rcu_string_strdup(device->name->str, GFP_NOFS);
+               BUG_ON(!name); /* -ENOMEM */
+               rcu_assign_pointer(new_device->name, name);
+       }
+
+       list_replace_rcu(&device->dev_list, &new_device->dev_list);
+       new_device->fs_devices = device->fs_devices;
+
+       call_rcu(&device->rcu, free_device);
+}
+
 static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device, *tmp;
@@ -1893,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
                btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device);
        }
 
+       btrfs_close_bdev(device);
+
        call_rcu(&device->rcu, free_device);
 
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
@@ -1986,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
                /* zero out the old super if it is writable */
                btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
        }
+
+       btrfs_close_bdev(srcdev);
+
        call_rcu(&srcdev->rcu, free_device);
 
        /*
@@ -2041,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
         * the device_list_mutex lock.
         */
        btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+
+       btrfs_close_bdev(tgtdev);
        call_rcu(&tgtdev->rcu, free_device);
 }
 
@@ -2399,14 +2449,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
                ret = init_first_rw_device(trans, root, device);
                unlock_chunks(root);
                if (ret) {
-                       btrfs_abort_transaction(trans, root, ret);
+                       btrfs_abort_transaction(trans, ret);
                        goto error_trans;
                }
        }
 
        ret = btrfs_add_device(trans, root, device);
        if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
+               btrfs_abort_transaction(trans, ret);
                goto error_trans;
        }
 
@@ -2415,7 +2465,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
                ret = btrfs_finish_sprout(trans, root);
                if (ret) {
-                       btrfs_abort_transaction(trans, root, ret);
+                       btrfs_abort_transaction(trans, ret);
                        goto error_trans;
                }
 
@@ -2801,7 +2851,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                                            &dev_extent_len);
                if (ret) {
                        mutex_unlock(&fs_devices->device_list_mutex);
-                       btrfs_abort_transaction(trans, root, ret);
+                       btrfs_abort_transaction(trans, ret);
                        goto out;
                }
 
@@ -2820,7 +2870,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                        ret = btrfs_update_device(trans, map->stripes[i].dev);
                        if (ret) {
                                mutex_unlock(&fs_devices->device_list_mutex);
-                               btrfs_abort_transaction(trans, root, ret);
+                               btrfs_abort_transaction(trans, ret);
                                goto out;
                        }
                }
@@ -2829,7 +2879,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
 
        ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
        if (ret) {
-               btrfs_abort_transaction(trans, root, ret);
+               btrfs_abort_transaction(trans, ret);
                goto out;
        }
 
@@ -2838,14 +2888,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
                ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
                if (ret) {
-                       btrfs_abort_transaction(trans, root, ret);
+                       btrfs_abort_transaction(trans, ret);
                        goto out;
                }
        }
 
        ret = btrfs_remove_block_group(trans, extent_root, chunk_offset, em);
        if (ret) {
-               btrfs_abort_transaction(trans, extent_root, ret);
+               btrfs_abort_transaction(trans, ret);
                goto out;
        }
 
@@ -2902,7 +2952,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
         * chunk tree entries
         */
        ret = btrfs_remove_chunk(trans, root, chunk_offset);
-       btrfs_end_transaction(trans, root);
+       btrfs_end_transaction(trans, extent_root);
        return ret;
 }
 
@@ -3421,7 +3471,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        u64 size_to_free;
        u64 chunk_type;
        struct btrfs_chunk *chunk;
-       struct btrfs_path *path;
+       struct btrfs_path *path = NULL;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_trans_handle *trans;
@@ -3455,13 +3505,33 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
                ret = btrfs_shrink_device(device, old_size - size_to_free);
                if (ret == -ENOSPC)
                        break;
-               BUG_ON(ret);
+               if (ret) {
+                       /* btrfs_shrink_device never returns ret > 0 */
+                       WARN_ON(ret > 0);
+                       goto error;
+               }
 
                trans = btrfs_start_transaction(dev_root, 0);
-               BUG_ON(IS_ERR(trans));
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+                       btrfs_info_in_rcu(fs_info,
+                "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu",
+                                         rcu_str_deref(device->name), ret,
+                                         old_size, old_size - size_to_free);
+                       goto error;
+               }
 
                ret = btrfs_grow_device(trans, device, old_size);
-               BUG_ON(ret);
+               if (ret) {
+                       btrfs_end_transaction(trans, dev_root);
+                       /* btrfs_grow_device never returns ret > 0 */
+                       WARN_ON(ret > 0);
+                       btrfs_info_in_rcu(fs_info,
+                "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu",
+                                         rcu_str_deref(device->name), ret,
+                                         old_size, old_size - size_to_free);
+                       goto error;
+               }
 
                btrfs_end_transaction(trans, dev_root);
        }
@@ -3885,7 +3955,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
        }
        spin_unlock(&fs_info->balance_lock);
 
-       if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+       if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
                btrfs_info(fs_info, "force skipping balance");
                return 0;
        }
@@ -4240,7 +4310,8 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
                                      BTRFS_UUID_TREE_OBJECTID);
        if (IS_ERR(uuid_root)) {
                ret = PTR_ERR(uuid_root);
-               btrfs_abort_transaction(trans, tree_root, ret);
+               btrfs_abort_transaction(trans, ret);
+               btrfs_end_transaction(trans, tree_root);
                return ret;
        }
 
@@ -4513,8 +4584,7 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
        btrfs_set_fs_incompat(info, RAID56);
 }
 
-#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r)            \
-                       - sizeof(struct btrfs_item)             \
+#define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r)             \
                        - sizeof(struct btrfs_chunk))           \
                        / sizeof(struct btrfs_stripe) + 1)
 
@@ -4693,12 +4763,12 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 
        if (type & BTRFS_BLOCK_GROUP_RAID5) {
                raid_stripe_len = find_raid56_stripe_len(ndevs - 1,
-                                btrfs_super_stripesize(info->super_copy));
+                                               extent_root->stripesize);
                data_stripes = num_stripes - 1;
        }
        if (type & BTRFS_BLOCK_GROUP_RAID6) {
                raid_stripe_len = find_raid56_stripe_len(ndevs - 2,
-                                btrfs_super_stripesize(info->super_copy));
+                                               extent_root->stripesize);
                data_stripes = num_stripes - 2;
        }
 
@@ -6258,27 +6328,23 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
        return dev;
 }
 
-static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
-                         struct extent_buffer *leaf,
-                         struct btrfs_chunk *chunk)
+/* Return -EIO if any error, otherwise return 0. */
+static int btrfs_check_chunk_valid(struct btrfs_root *root,
+                                  struct extent_buffer *leaf,
+                                  struct btrfs_chunk *chunk, u64 logical)
 {
-       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
-       struct map_lookup *map;
-       struct extent_map *em;
-       u64 logical;
        u64 length;
        u64 stripe_len;
-       u64 devid;
-       u8 uuid[BTRFS_UUID_SIZE];
-       int num_stripes;
-       int ret;
-       int i;
+       u16 num_stripes;
+       u16 sub_stripes;
+       u64 type;
 
-       logical = key->offset;
        length = btrfs_chunk_length(leaf, chunk);
        stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
        num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
-       /* Validation check */
+       sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+       type = btrfs_chunk_type(leaf, chunk);
+
        if (!num_stripes) {
                btrfs_err(root->fs_info, "invalid chunk num_stripes: %u",
                          num_stripes);
@@ -6289,6 +6355,11 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                          "invalid chunk logical %llu", logical);
                return -EIO;
        }
+       if (btrfs_chunk_sector_size(leaf, chunk) != root->sectorsize) {
+               btrfs_err(root->fs_info, "invalid chunk sectorsize %u",
+                         btrfs_chunk_sector_size(leaf, chunk));
+               return -EIO;
+       }
        if (!length || !IS_ALIGNED(length, root->sectorsize)) {
                btrfs_err(root->fs_info,
                        "invalid chunk length %llu", length);
@@ -6300,13 +6371,54 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                return -EIO;
        }
        if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
-           btrfs_chunk_type(leaf, chunk)) {
+           type) {
                btrfs_err(root->fs_info, "unrecognized chunk type: %llu",
                          ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
                            BTRFS_BLOCK_GROUP_PROFILE_MASK) &
                          btrfs_chunk_type(leaf, chunk));
                return -EIO;
        }
+       if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+           (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+           (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+           (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+           ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+            num_stripes != 1)) {
+               btrfs_err(root->fs_info,
+                       "invalid num_stripes:sub_stripes %u:%u for profile %llu",
+                       num_stripes, sub_stripes,
+                       type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
+                         struct extent_buffer *leaf,
+                         struct btrfs_chunk *chunk)
+{
+       struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+       struct map_lookup *map;
+       struct extent_map *em;
+       u64 logical;
+       u64 length;
+       u64 stripe_len;
+       u64 devid;
+       u8 uuid[BTRFS_UUID_SIZE];
+       int num_stripes;
+       int ret;
+       int i;
+
+       logical = key->offset;
+       length = btrfs_chunk_length(leaf, chunk);
+       stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+       num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+
+       ret = btrfs_check_chunk_valid(root, leaf, chunk, logical);
+       if (ret)
+               return ret;
 
        read_lock(&map_tree->map_tree.lock);
        em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
@@ -6353,7 +6465,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                                   BTRFS_UUID_SIZE);
                map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
                                                        uuid, NULL);
-               if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
+               if (!map->stripes[i].dev &&
+                   !btrfs_test_opt(root->fs_info, DEGRADED)) {
                        free_extent_map(em);
                        return -EIO;
                }
@@ -6421,7 +6534,7 @@ static struct btrfs_fs_devices *open_seed_devices(struct btrfs_root *root,
 
        fs_devices = find_fsid(fsid);
        if (!fs_devices) {
-               if (!btrfs_test_opt(root, DEGRADED))
+               if (!btrfs_test_opt(root->fs_info, DEGRADED))
                        return ERR_PTR(-ENOENT);
 
                fs_devices = alloc_fs_devices(fsid);
@@ -6483,7 +6596,7 @@ static int read_one_dev(struct btrfs_root *root,
 
        device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
        if (!device) {
-               if (!btrfs_test_opt(root, DEGRADED))
+               if (!btrfs_test_opt(root->fs_info, DEGRADED))
                        return -EIO;
 
                device = add_missing_dev(root, fs_devices, devid, dev_uuid);
@@ -6492,7 +6605,7 @@ static int read_one_dev(struct btrfs_root *root,
                btrfs_warn(root->fs_info, "devid %llu uuid %pU missing",
                                devid, dev_uuid);
        } else {
-               if (!device->bdev && !btrfs_test_opt(root, DEGRADED))
+               if (!device->bdev && !btrfs_test_opt(root->fs_info, DEGRADED))
                        return -EIO;
 
                if(!device->bdev && !device->missing) {
@@ -6554,6 +6667,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        u32 array_size;
        u32 len = 0;
        u32 cur_offset;
+       u64 type;
        struct btrfs_key key;
 
        ASSERT(BTRFS_SUPER_INFO_SIZE <= root->nodesize);
@@ -6563,8 +6677,8 @@ int btrfs_read_sys_array(struct btrfs_root *root)
         * overallocate but we can keep it as-is, only the first page is used.
         */
        sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
-       if (!sb)
-               return -ENOMEM;
+       if (IS_ERR(sb))
+               return PTR_ERR(sb);
        set_extent_buffer_uptodate(sb);
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
        /*
@@ -6620,6 +6734,15 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                                break;
                        }
 
+                       type = btrfs_chunk_type(sb, chunk);
+                       if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+                               btrfs_err(root->fs_info,
+                           "invalid chunk type %llu in sys_array at offset %u",
+                                       type, cur_offset);
+                               ret = -EIO;
+                               break;
+                       }
+
                        len = btrfs_chunk_item_size(num_stripes);
                        if (cur_offset + len > array_size)
                                goto out_short_read;
@@ -6638,12 +6761,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                sb_array_offset += len;
                cur_offset += len;
        }
+       clear_extent_buffer_uptodate(sb);
        free_extent_buffer_stale(sb);
        return ret;
 
 out_short_read:
        printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
                        len, cur_offset);
+       clear_extent_buffer_uptodate(sb);
        free_extent_buffer_stale(sb);
        return -EIO;
 }
@@ -6656,6 +6781,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
        struct btrfs_key found_key;
        int ret;
        int slot;
+       u64 total_dev = 0;
 
        root = root->fs_info->chunk_root;
 
@@ -6697,6 +6823,7 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
                        ret = read_one_dev(root, leaf, dev_item);
                        if (ret)
                                goto error;
+                       total_dev++;
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -6706,6 +6833,28 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
                }
                path->slots[0]++;
        }
+
+       /*
+        * After loading chunk tree, we've got all device information,
+        * do another round of validation checks.
+        */
+       if (total_dev != root->fs_info->fs_devices->total_devices) {
+               btrfs_err(root->fs_info,
+          "super_num_devices %llu mismatch with num_devices %llu found here",
+                         btrfs_super_num_devices(root->fs_info->super_copy),
+                         total_dev);
+               ret = -EINVAL;
+               goto error;
+       }
+       if (btrfs_super_total_bytes(root->fs_info->super_copy) <
+           root->fs_info->fs_devices->total_rw_bytes) {
+               btrfs_err(root->fs_info,
+       "super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
+                         btrfs_super_total_bytes(root->fs_info->super_copy),
+                         root->fs_info->fs_devices->total_rw_bytes);
+               ret = -EINVAL;
+               goto error;
+       }
        ret = 0;
 error:
        unlock_chunks(root);
@@ -7059,38 +7208,3 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
                fs_devices = fs_devices->seed;
        }
 }
-
-static void btrfs_close_one_device(struct btrfs_device *device)
-{
-       struct btrfs_fs_devices *fs_devices = device->fs_devices;
-       struct btrfs_device *new_device;
-       struct rcu_string *name;
-
-       if (device->bdev)
-               fs_devices->open_devices--;
-
-       if (device->writeable &&
-           device->devid != BTRFS_DEV_REPLACE_DEVID) {
-               list_del_init(&device->dev_alloc_list);
-               fs_devices->rw_devices--;
-       }
-
-       if (device->missing)
-               fs_devices->missing_devices--;
-
-       new_device = btrfs_alloc_device(NULL, &device->devid,
-                                       device->uuid);
-       BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
-
-       /* Safe because we are under uuid_mutex */
-       if (device->name) {
-               name = rcu_string_strdup(device->name->str, GFP_NOFS);
-               BUG_ON(!name); /* -ENOMEM */
-               rcu_assign_pointer(new_device->name, name);
-       }
-
-       list_replace_rcu(&device->dev_list, &new_device->dev_list);
-       new_device->fs_devices = device->fs_devices;
-
-       call_rcu(&device->rcu, free_device);
-}