Btrfs: free sys_array eb as soon as possible
[cascardo/linux.git] / fs / btrfs / volumes.c
index 8264b06..fd3e828 100644 (file)
@@ -118,6 +118,21 @@ const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
        [BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
 };
 
+/*
+ * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
+ * condition is not met. Zero means there's no corresponding
+ * BTRFS_ERROR_DEV_*_NOT_MET value.
+ */
+const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
+       [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
+       [BTRFS_RAID_RAID1]  = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
+       [BTRFS_RAID_DUP]    = 0,
+       [BTRFS_RAID_RAID0]  = 0,
+       [BTRFS_RAID_SINGLE] = 0,
+       [BTRFS_RAID_RAID5]  = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
+       [BTRFS_RAID_RAID6]  = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
+};
+
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
                                struct btrfs_device *device);
@@ -699,7 +714,8 @@ static noinline int device_list_add(const char *path,
         * if there is new btrfs on an already registered device,
         * then remove the stale device entry.
         */
-       btrfs_free_stale_device(device);
+       if (ret > 0)
+               btrfs_free_stale_device(device);
 
        *fs_devices_ret = fs_devices;
 
@@ -1471,7 +1487,7 @@ again:
                extent = btrfs_item_ptr(leaf, path->slots[0],
                                        struct btrfs_dev_extent);
        } else {
-               btrfs_std_error(root->fs_info, ret, "Slot search failed");
+               btrfs_handle_fs_error(root->fs_info, ret, "Slot search failed");
                goto out;
        }
 
@@ -1479,7 +1495,7 @@ again:
 
        ret = btrfs_del_item(trans, root, path);
        if (ret) {
-               btrfs_std_error(root->fs_info, ret,
+               btrfs_handle_fs_error(root->fs_info, ret,
                            "Failed to remove dev extent item");
        } else {
                set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
@@ -1705,19 +1721,17 @@ out:
        return ret;
 }
 
-static int __check_raid_min_devices(struct btrfs_fs_info *fs_info)
+/*
+ * Verify that @num_devices satisfies the RAID profile constraints in the whole
+ * filesystem. It's up to the caller to adjust that number regarding eg. device
+ * replace.
+ */
+static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
+               u64 num_devices)
 {
        u64 all_avail;
-       u64 num_devices;
        unsigned seq;
-
-       num_devices = fs_info->fs_devices->num_devices;
-       btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
-       if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
-               WARN_ON(num_devices < 1);
-               num_devices--;
-       }
-       btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+       int i;
 
        do {
                seq = read_seqbegin(&fs_info->profiles_lock);
@@ -1727,31 +1741,64 @@ static int __check_raid_min_devices(struct btrfs_fs_info *fs_info)
                            fs_info->avail_metadata_alloc_bits;
        } while (read_seqretry(&fs_info->profiles_lock, seq));
 
-       if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
-               return BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
-       }
+       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+               if (!(all_avail & btrfs_raid_group[i]))
+                       continue;
 
-       if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
-               return BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET;
-       }
+               if (num_devices < btrfs_raid_array[i].devs_min) {
+                       int ret = btrfs_raid_mindev_error[i];
 
-       if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) &&
-           fs_info->fs_devices->rw_devices <= 2) {
-               return BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET;
+                       if (ret)
+                               return ret;
+               }
        }
 
-       if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) &&
-           fs_info->fs_devices->rw_devices <= 3) {
-               return BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET;
+       return 0;
+}
+
+struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs,
+                                       struct btrfs_device *device)
+{
+       struct btrfs_device *next_device;
+
+       list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
+               if (next_device != device &&
+                       !next_device->missing && next_device->bdev)
+                       return next_device;
        }
 
-       return 0;
+       return NULL;
 }
 
-int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+/*
+ * Helper function to check if the given device is part of s_bdev / latest_bdev
+ * and replace it with the provided or the next active device, in the context
+ * where this function called, there should be always be another device (or
+ * this_dev) which is active.
+ */
+void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
+               struct btrfs_device *device, struct btrfs_device *this_dev)
 {
-       struct btrfs_device *device;
        struct btrfs_device *next_device;
+
+       if (this_dev)
+               next_device = this_dev;
+       else
+               next_device = btrfs_find_next_active_device(fs_info->fs_devices,
+                                                               device);
+       ASSERT(next_device);
+
+       if (fs_info->sb->s_bdev &&
+                       (fs_info->sb->s_bdev == device->bdev))
+               fs_info->sb->s_bdev = next_device->bdev;
+
+       if (fs_info->fs_devices->latest_bdev == device->bdev)
+               fs_info->fs_devices->latest_bdev = next_device->bdev;
+}
+
+int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid)
+{
+       struct btrfs_device *device;
        struct btrfs_fs_devices *cur_devices;
        u64 num_devices;
        int ret = 0;
@@ -1760,11 +1807,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
        mutex_lock(&uuid_mutex);
 
-       ret = __check_raid_min_devices(root->fs_info);
+       num_devices = root->fs_info->fs_devices->num_devices;
+       btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
+       if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
+               WARN_ON(num_devices < 1);
+               num_devices--;
+       }
+       btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
+
+       ret = btrfs_check_raid_min_devices(root->fs_info, num_devices - 1);
        if (ret)
                goto out;
 
-       ret = btrfs_find_device_by_user_input(root, 0, device_path,
+       ret = btrfs_find_device_by_devspec(root, devid, device_path,
                                &device);
        if (ret)
                goto out;
@@ -1830,12 +1885,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        if (device->missing)
                device->fs_devices->missing_devices--;
 
-       next_device = list_entry(root->fs_info->fs_devices->devices.next,
-                                struct btrfs_device, dev_list);
-       if (device->bdev == root->fs_info->sb->s_bdev)
-               root->fs_info->sb->s_bdev = next_device->bdev;
-       if (device->bdev == root->fs_info->fs_devices->latest_bdev)
-               root->fs_info->fs_devices->latest_bdev = next_device->bdev;
+       btrfs_assign_next_active_device(root->fs_info, device, NULL);
 
        if (device->bdev) {
                device->fs_devices->open_devices--;
@@ -1920,11 +1970,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
        if (srcdev->missing)
                fs_devices->missing_devices--;
 
-       if (srcdev->writeable) {
+       if (srcdev->writeable)
                fs_devices->rw_devices--;
-               /* zero out the old super if it is writable */
-               btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
-       }
 
        if (srcdev->bdev)
                fs_devices->open_devices--;
@@ -1935,6 +1982,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
+       if (srcdev->writeable) {
+               /* zero out the old super if it is writable */
+               btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
+       }
        call_rcu(&srcdev->rcu, free_device);
 
        /*
@@ -1964,32 +2015,33 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
                                      struct btrfs_device *tgtdev)
 {
-       struct btrfs_device *next_device;
-
        mutex_lock(&uuid_mutex);
        WARN_ON(!tgtdev);
        mutex_lock(&fs_info->fs_devices->device_list_mutex);
 
        btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
 
-       if (tgtdev->bdev) {
-               btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+       if (tgtdev->bdev)
                fs_info->fs_devices->open_devices--;
-       }
+
        fs_info->fs_devices->num_devices--;
 
-       next_device = list_entry(fs_info->fs_devices->devices.next,
-                                struct btrfs_device, dev_list);
-       if (tgtdev->bdev == fs_info->sb->s_bdev)
-               fs_info->sb->s_bdev = next_device->bdev;
-       if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
-               fs_info->fs_devices->latest_bdev = next_device->bdev;
-       list_del_rcu(&tgtdev->dev_list);
+       btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
 
-       call_rcu(&tgtdev->rcu, free_device);
+       list_del_rcu(&tgtdev->dev_list);
 
        mutex_unlock(&fs_info->fs_devices->device_list_mutex);
        mutex_unlock(&uuid_mutex);
+
+       /*
+        * The update_dev_time() with in btrfs_scratch_superblocks()
+        * may lead to a call to btrfs_show_devname() which will try
+        * to hold device_list_mutex. And here this device
+        * is already out of device list, so we don't have to hold
+        * the device_list_mutex lock.
+        */
+       btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
+       call_rcu(&tgtdev->rcu, free_device);
 }
 
 static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
@@ -2049,23 +2101,26 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
        }
 }
 
-int btrfs_find_device_by_user_input(struct btrfs_root *root, u64 srcdevid,
-                                        char *srcdev_name,
+/*
+ * Lookup a device given by device id, or the path if the id is 0.
+ */
+int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid,
+                                        char *devpath,
                                         struct btrfs_device **device)
 {
        int ret;
 
-       if (srcdevid) {
+       if (devid) {
                ret = 0;
-               *device = btrfs_find_device(root->fs_info, srcdevid, NULL,
+               *device = btrfs_find_device(root->fs_info, devid, NULL,
                                            NULL);
                if (!*device)
                        ret = -ENOENT;
        } else {
-               if (!srcdev_name || !srcdev_name[0])
+               if (!devpath || !devpath[0])
                        return -EINVAL;
 
-               ret = btrfs_find_device_missing_or_by_path(root, srcdev_name,
+               ret = btrfs_find_device_missing_or_by_path(root, devpath,
                                                           device);
        }
        return ret;
@@ -2388,7 +2443,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
                ret = btrfs_relocate_sys_chunks(root);
                if (ret < 0)
-                       btrfs_std_error(root->fs_info, ret,
+                       btrfs_handle_fs_error(root->fs_info, ret,
                                    "Failed to relocate sys chunks after "
                                    "device initialization. This can be fixed "
                                    "using the \"btrfs balance\" command.");
@@ -2633,7 +2688,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
        if (ret < 0)
                goto out;
        else if (ret > 0) { /* Logic error or corruption */
-               btrfs_std_error(root->fs_info, -ENOENT,
+               btrfs_handle_fs_error(root->fs_info, -ENOENT,
                            "Failed lookup while freeing chunk.");
                ret = -ENOENT;
                goto out;
@@ -2641,7 +2696,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
 
        ret = btrfs_del_item(trans, root, path);
        if (ret < 0)
-               btrfs_std_error(root->fs_info, ret,
+               btrfs_handle_fs_error(root->fs_info, ret,
                            "Failed to delete chunk item.");
 out:
        btrfs_free_path(path);
@@ -2827,7 +2882,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset)
                                                     chunk_offset);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_std_error(root->fs_info, ret, NULL);
+               btrfs_handle_fs_error(root->fs_info, ret, NULL);
                return ret;
        }
 
@@ -3372,6 +3427,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        u32 count_meta = 0;
        u32 count_sys = 0;
        int chunk_reserved = 0;
+       u64 bytes_used = 0;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -3510,7 +3566,13 @@ again:
                        goto loop;
                }
 
-               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
+               ASSERT(fs_info->data_sinfo);
+               spin_lock(&fs_info->data_sinfo->lock);
+               bytes_used = fs_info->data_sinfo->bytes_used;
+               spin_unlock(&fs_info->data_sinfo->lock);
+
+               if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
+                   !chunk_reserved && !bytes_used) {
                        trans = btrfs_start_transaction(chunk_root, 0);
                        if (IS_ERR(trans)) {
                                mutex_unlock(&fs_info->delete_unused_bgs_mutex);
@@ -3602,7 +3664,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        unset_balance_control(fs_info);
        ret = del_balance_item(fs_info->tree_root);
        if (ret)
-               btrfs_std_error(fs_info, ret, NULL);
+               btrfs_handle_fs_error(fs_info, ret, NULL);
 
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
@@ -3663,10 +3725,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                num_devices--;
        }
        btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
-       allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
-       if (num_devices == 1)
-               allowed |= BTRFS_BLOCK_GROUP_DUP;
-       else if (num_devices > 1)
+       allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
+       if (num_devices > 1)
                allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
        if (num_devices > 2)
                allowed |= BTRFS_BLOCK_GROUP_RAID5;
@@ -5248,7 +5308,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        stripe_nr = div64_u64(stripe_nr, stripe_len);
 
        stripe_offset = stripe_nr * stripe_len;
-       BUG_ON(offset < stripe_offset);
+       if (offset < stripe_offset) {
+               btrfs_crit(fs_info, "stripe math has gone wrong, "
+                          "stripe_offset=%llu, offset=%llu, start=%llu, "
+                          "logical=%llu, stripe_len=%llu",
+                          stripe_offset, offset, em->start, logical,
+                          stripe_len);
+               free_extent_map(em);
+               return -EINVAL;
+       }
 
        /* stripe_offset is the offset of this block in its stripe*/
        stripe_offset = offset - stripe_offset;
@@ -5489,7 +5557,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                                &stripe_index);
                mirror_num = stripe_index + 1;
        }
-       BUG_ON(stripe_index >= map->num_stripes);
+       if (stripe_index >= map->num_stripes) {
+               btrfs_crit(fs_info, "stripe index math went horribly wrong, "
+                          "got stripe_index=%u, num_stripes=%u",
+                          stripe_index, map->num_stripes);
+               ret = -EINVAL;
+               goto out;
+       }
 
        num_alloc_stripes = num_stripes;
        if (dev_replace_is_ongoing) {
@@ -6212,7 +6286,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                        "invalid chunk length %llu", length);
                return -EIO;
        }
-       if (!is_power_of_2(stripe_len)) {
+       if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
                btrfs_err(root->fs_info, "invalid chunk stripe length: %llu",
                          stripe_len);
                return -EIO;
@@ -6556,13 +6630,13 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                sb_array_offset += len;
                cur_offset += len;
        }
-       free_extent_buffer(sb);
+       free_extent_buffer_stale(sb);
        return ret;
 
 out_short_read:
        printk(KERN_ERR "BTRFS: sys_array too short to read %u bytes at offset %u\n",
                        len, cur_offset);
-       free_extent_buffer(sb);
+       free_extent_buffer_stale(sb);
        return -EIO;
 }