Btrfs: fix our overcommit math
[cascardo/linux.git] / fs / btrfs / extent-tree.c
index ba58024..a8de1c3 100644 (file)
@@ -3504,7 +3504,8 @@ static int should_alloc_chunk(struct btrfs_root *root,
         * and purposes it's used space.  Don't worry about locking the
         * global_rsv, it doesn't change except when the transaction commits.
         */
-       num_allocated += global_rsv->size;
+       if (sinfo->flags & BTRFS_BLOCK_GROUP_METADATA)
+               num_allocated += global_rsv->size;
 
        /*
         * in limited mode, we want to have some free space up to
@@ -3518,15 +3519,8 @@ static int should_alloc_chunk(struct btrfs_root *root,
                if (num_bytes - num_allocated < thresh)
                        return 1;
        }
-       thresh = btrfs_super_total_bytes(root->fs_info->super_copy);
 
-       /* 256MB or 2% of the FS */
-       thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 2));
-       /* system chunks need a much small threshold */
-       if (sinfo->flags & BTRFS_BLOCK_GROUP_SYSTEM)
-               thresh = 32 * 1024 * 1024;
-
-       if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 8))
+       if (num_allocated + alloc_bytes < div_factor(num_bytes, 8))
                return 0;
        return 1;
 }
@@ -3669,6 +3663,46 @@ out:
        return ret;
 }
 
+static int can_overcommit(struct btrfs_root *root,
+                         struct btrfs_space_info *space_info, u64 bytes,
+                         int flush)
+{
+       u64 profile = btrfs_get_alloc_profile(root, 0);
+       u64 avail;
+       u64 used;
+
+       used = space_info->bytes_used + space_info->bytes_reserved +
+               space_info->bytes_pinned + space_info->bytes_readonly +
+               space_info->bytes_may_use;
+
+       spin_lock(&root->fs_info->free_chunk_lock);
+       avail = root->fs_info->free_chunk_space;
+       spin_unlock(&root->fs_info->free_chunk_lock);
+
+       /*
+        * If we have dup, raid1 or raid10 then only half of the free
+        * space is actually useable.
+        */
+       if (profile & (BTRFS_BLOCK_GROUP_DUP |
+                      BTRFS_BLOCK_GROUP_RAID1 |
+                      BTRFS_BLOCK_GROUP_RAID10))
+               avail >>= 1;
+
+       /*
+        * If we aren't flushing don't let us overcommit too much, say
+        * 1/8th of the space.  If we can flush, let it overcommit up to
+        * 1/2 of the space.
+        */
+       if (flush)
+               avail >>= 3;
+       else
+               avail >>= 1;
+
+       if (used + bytes < space_info->total_bytes + avail)
+               return 1;
+       return 0;
+}
+
 /*
  * shrink metadata reservation for delalloc
  */
@@ -3703,11 +3737,15 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
                writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages,
                                               WB_REASON_FS_FREE_SPACE);
 
+               /*
+                * We need to wait for the async pages to actually start before
+                * we do anything.
+                */
+               wait_event(root->fs_info->async_submit_wait,
+                          !atomic_read(&root->fs_info->async_delalloc_pages));
+
                spin_lock(&space_info->lock);
-               if (space_info->bytes_used + space_info->bytes_reserved +
-                   space_info->bytes_pinned + space_info->bytes_readonly +
-                   space_info->bytes_may_use + orig <=
-                   space_info->total_bytes) {
+               if (can_overcommit(root, space_info, orig, !trans)) {
                        spin_unlock(&space_info->lock);
                        break;
                }
@@ -3923,7 +3961,6 @@ again:
        }
 
        if (ret) {
-               u64 profile = btrfs_get_alloc_profile(root, 0);
                u64 avail;
 
                /*
@@ -3944,30 +3981,7 @@ again:
                        goto again;
                }
 
-               spin_lock(&root->fs_info->free_chunk_lock);
-               avail = root->fs_info->free_chunk_space;
-
-               /*
-                * If we have dup, raid1 or raid10 then only half of the free
-                * space is actually useable.
-                */
-               if (profile & (BTRFS_BLOCK_GROUP_DUP |
-                              BTRFS_BLOCK_GROUP_RAID1 |
-                              BTRFS_BLOCK_GROUP_RAID10))
-                       avail >>= 1;
-
-               /*
-                * If we aren't flushing don't let us overcommit too much, say
-                * 1/8th of the space.  If we can flush, let it overcommit up to
-                * 1/2 of the space.
-                */
-               if (flush)
-                       avail >>= 3;
-               else
-                       avail >>= 1;
-                spin_unlock(&root->fs_info->free_chunk_lock);
-
-               if (used + num_bytes < space_info->total_bytes + avail) {
+               if (can_overcommit(root, space_info, orig_bytes, flush)) {
                        space_info->bytes_may_use += orig_bytes;
                        trace_btrfs_space_reservation(root->fs_info,
                                "space_info", space_info->flags, orig_bytes, 1);
@@ -4114,13 +4128,15 @@ static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src,
        return 0;
 }
 
-void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv)
+void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type)
 {
        memset(rsv, 0, sizeof(*rsv));
        spin_lock_init(&rsv->lock);
+       rsv->type = type;
 }
 
-struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
+struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root,
+                                             unsigned short type)
 {
        struct btrfs_block_rsv *block_rsv;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -4129,7 +4145,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
        if (!block_rsv)
                return NULL;
 
-       btrfs_init_block_rsv(block_rsv);
+       btrfs_init_block_rsv(block_rsv, type);
        block_rsv->space_info = __find_space_info(fs_info,
                                                  BTRFS_BLOCK_GROUP_METADATA);
        return block_rsv;
@@ -4138,6 +4154,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
 void btrfs_free_block_rsv(struct btrfs_root *root,
                          struct btrfs_block_rsv *rsv)
 {
+       if (!rsv)
+               return;
        btrfs_block_rsv_release(root, rsv, (u64)-1);
        kfree(rsv);
 }
@@ -4416,10 +4434,10 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans,
        struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root);
        struct btrfs_block_rsv *dst_rsv = &pending->block_rsv;
        /*
-        * two for root back/forward refs, two for directory entries
-        * and one for root of the snapshot.
+        * two for root back/forward refs, two for directory entries,
+        * one for root of the snapshot and one for parent inode.
         */
-       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5);
+       u64 num_bytes = btrfs_calc_trans_metadata_size(root, 6);
        dst_rsv->space_info = src_rsv->space_info;
        return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes);
 }
@@ -6314,7 +6332,7 @@ use_block_rsv(struct btrfs_trans_handle *trans,
        ret = block_rsv_use_bytes(block_rsv, blocksize);
        if (!ret)
                return block_rsv;
-       if (ret) {
+       if (ret && !block_rsv->failfast) {
                static DEFINE_RATELIMIT_STATE(_rs,
                                DEFAULT_RATELIMIT_INTERVAL,
                                /*DEFAULT_RATELIMIT_BURST*/ 2);