Btrfs: fix race of using total_bytes_pinned
[cascardo/linux.git] / fs / btrfs / extent-tree.c
index 5590af9..813537f 100644 (file)
 #include <linux/ratelimit.h>
 #include <linux/percpu_counter.h>
 #include "hash.h"
-#include "ctree.h"
+#include "tree-log.h"
 #include "disk-io.h"
 #include "print-tree.h"
-#include "transaction.h"
 #include "volumes.h"
 #include "raid56.h"
 #include "locking.h"
 #include "free-space-cache.h"
 #include "math.h"
 #include "sysfs.h"
+#include "qgroup.h"
 
 #undef SCRAMBLE_DELAYED_REFS
 
@@ -81,7 +81,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extra_op);
+                               struct btrfs_delayed_extent_op *extra_op,
+                               int no_quota);
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
                                    struct extent_buffer *leaf,
                                    struct btrfs_extent_item *ei);
@@ -94,7 +95,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     u64 parent, u64 root_objectid,
                                     u64 flags, struct btrfs_disk_key *key,
-                                    int level, struct btrfs_key *ins);
+                                    int level, struct btrfs_key *ins,
+                                    int no_quota);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
                          struct btrfs_root *extent_root, u64 flags,
                          int force);
@@ -103,7 +105,8 @@ static int find_next_key(struct btrfs_path *path, int level,
 static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
                            int dump_block_groups);
 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve);
+                                      u64 num_bytes, int reserve,
+                                      int delalloc);
 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
                               u64 num_bytes);
 int btrfs_pin_extent(struct btrfs_root *root,
@@ -1271,7 +1274,7 @@ fail:
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
                                           struct btrfs_root *root,
                                           struct btrfs_path *path,
-                                          int refs_to_drop)
+                                          int refs_to_drop, int *last_ref)
 {
        struct btrfs_key key;
        struct btrfs_extent_data_ref *ref1 = NULL;
@@ -1307,6 +1310,7 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 
        if (num_refs == 0) {
                ret = btrfs_del_item(trans, root, path);
+               *last_ref = 1;
        } else {
                if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
                        btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@@ -1764,7 +1768,8 @@ void update_inline_extent_backref(struct btrfs_root *root,
                                  struct btrfs_path *path,
                                  struct btrfs_extent_inline_ref *iref,
                                  int refs_to_mod,
-                                 struct btrfs_delayed_extent_op *extent_op)
+                                 struct btrfs_delayed_extent_op *extent_op,
+                                 int *last_ref)
 {
        struct extent_buffer *leaf;
        struct btrfs_extent_item *ei;
@@ -1808,6 +1813,7 @@ void update_inline_extent_backref(struct btrfs_root *root,
                else
                        btrfs_set_shared_data_ref_count(leaf, sref, refs);
        } else {
+               *last_ref = 1;
                size =  btrfs_extent_inline_ref_size(type);
                item_size = btrfs_item_size_nr(leaf, path->slots[0]);
                ptr = (unsigned long)iref;
@@ -1839,7 +1845,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
        if (ret == 0) {
                BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);
                update_inline_extent_backref(root, path, iref,
-                                            refs_to_add, extent_op);
+                                            refs_to_add, extent_op, NULL);
        } else if (ret == -ENOENT) {
                setup_inline_extent_backref(root, path, iref, parent,
                                            root_objectid, owner, offset,
@@ -1872,17 +1878,19 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
                                 struct btrfs_root *root,
                                 struct btrfs_path *path,
                                 struct btrfs_extent_inline_ref *iref,
-                                int refs_to_drop, int is_data)
+                                int refs_to_drop, int is_data, int *last_ref)
 {
        int ret = 0;
 
        BUG_ON(!is_data && refs_to_drop != 1);
        if (iref) {
                update_inline_extent_backref(root, path, iref,
-                                            -refs_to_drop, NULL);
+                                            -refs_to_drop, NULL, last_ref);
        } else if (is_data) {
-               ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
+               ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+                                            last_ref);
        } else {
+               *last_ref = 1;
                ret = btrfs_del_item(trans, root, path);
        }
        return ret;
@@ -1946,7 +1954,8 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root,
                         u64 bytenr, u64 num_bytes, u64 parent,
-                        u64 root_objectid, u64 owner, u64 offset, int for_cow)
+                        u64 root_objectid, u64 owner, u64 offset,
+                        int no_quota)
 {
        int ret;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1958,12 +1967,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
                                        num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+                                       BTRFS_ADD_DELAYED_REF, NULL, no_quota);
        } else {
                ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
                                        num_bytes,
                                        parent, root_objectid, owner, offset,
-                                       BTRFS_ADD_DELAYED_REF, NULL, for_cow);
+                                       BTRFS_ADD_DELAYED_REF, NULL, no_quota);
        }
        return ret;
 }
@@ -1973,31 +1982,64 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                  u64 bytenr, u64 num_bytes,
                                  u64 parent, u64 root_objectid,
                                  u64 owner, u64 offset, int refs_to_add,
+                                 int no_quota,
                                  struct btrfs_delayed_extent_op *extent_op)
 {
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_extent_item *item;
+       struct btrfs_key key;
        u64 refs;
        int ret;
+       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_ADD_EXCL;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
+       if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
+               no_quota = 1;
+
        path->reada = 1;
        path->leave_spinning = 1;
        /* this will setup the path even if it fails to insert the back ref */
-       ret = insert_inline_extent_backref(trans, root->fs_info->extent_root,
-                                          path, bytenr, num_bytes, parent,
+       ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
+                                          bytenr, num_bytes, parent,
                                           root_objectid, owner, offset,
                                           refs_to_add, extent_op);
-       if (ret != -EAGAIN)
+       if ((ret < 0 && ret != -EAGAIN) || (!ret && no_quota))
                goto out;
+       /*
+        * Ok we were able to insert an inline extent and it appears to be a new
+        * reference, deal with the qgroup accounting.
+        */
+       if (!ret && !no_quota) {
+               ASSERT(root->fs_info->quota_enabled);
+               leaf = path->nodes[0];
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               item = btrfs_item_ptr(leaf, path->slots[0],
+                                     struct btrfs_extent_item);
+               if (btrfs_extent_refs(leaf, item) > (u64)refs_to_add)
+                       type = BTRFS_QGROUP_OPER_ADD_SHARED;
+               btrfs_release_path(path);
+
+               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+                                             bytenr, num_bytes, type, 0);
+               goto out;
+       }
 
+       /*
+        * Ok we had -EAGAIN which means we didn't have space to insert and
+        * inline extent ref, so just update the reference count and add a
+        * normal backref.
+        */
        leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
        item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, item);
+       if (refs)
+               type = BTRFS_QGROUP_OPER_ADD_SHARED;
        btrfs_set_extent_refs(leaf, item, refs + refs_to_add);
        if (extent_op)
                __run_delayed_extent_op(extent_op, leaf, item);
@@ -2005,9 +2047,15 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
+       if (!no_quota) {
+               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+                                             bytenr, num_bytes, type, 0);
+               if (ret)
+                       goto out;
+       }
+
        path->reada = 1;
        path->leave_spinning = 1;
-
        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
                                    path, bytenr, parent, root_objectid,
@@ -2041,8 +2089,7 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
 
        if (node->type == BTRFS_SHARED_DATA_REF_KEY)
                parent = ref->parent;
-       else
-               ref_root = ref->root;
+       ref_root = ref->root;
 
        if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
                if (extent_op)
@@ -2056,13 +2103,13 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
                                             node->num_bytes, parent,
                                             ref_root, ref->objectid,
                                             ref->offset, node->ref_mod,
-                                            extent_op);
+                                            node->no_quota, extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
                ret = __btrfs_free_extent(trans, root, node->bytenr,
                                          node->num_bytes, parent,
                                          ref_root, ref->objectid,
                                          ref->offset, node->ref_mod,
-                                         extent_op);
+                                         extent_op, node->no_quota);
        } else {
                BUG();
        }
@@ -2199,8 +2246,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 
        if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
                parent = ref->parent;
-       else
-               ref_root = ref->root;
+       ref_root = ref->root;
 
        ins.objectid = node->bytenr;
        if (skinny_metadata) {
@@ -2218,15 +2264,18 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                parent, ref_root,
                                                extent_op->flags_to_set,
                                                &extent_op->key,
-                                               ref->level, &ins);
+                                               ref->level, &ins,
+                                               node->no_quota);
        } else if (node->action == BTRFS_ADD_DELAYED_REF) {
                ret = __btrfs_inc_extent_ref(trans, root, node->bytenr,
                                             node->num_bytes, parent, ref_root,
-                                            ref->level, 0, 1, extent_op);
+                                            ref->level, 0, 1, node->no_quota,
+                                            extent_op);
        } else if (node->action == BTRFS_DROP_DELAYED_REF) {
                ret = __btrfs_free_extent(trans, root, node->bytenr,
                                          node->num_bytes, parent, ref_root,
-                                         ref->level, 0, 1, extent_op);
+                                         ref->level, 0, 1, extent_op,
+                                         node->no_quota);
        } else {
                BUG();
        }
@@ -2574,42 +2623,6 @@ static u64 find_middle(struct rb_root *root)
 }
 #endif
 
-int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
-                                        struct btrfs_fs_info *fs_info)
-{
-       struct qgroup_update *qgroup_update;
-       int ret = 0;
-
-       if (list_empty(&trans->qgroup_ref_list) !=
-           !trans->delayed_ref_elem.seq) {
-               /* list without seq or seq without list */
-               btrfs_err(fs_info,
-                       "qgroup accounting update error, list is%s empty, seq is %#x.%x",
-                       list_empty(&trans->qgroup_ref_list) ? "" : " not",
-                       (u32)(trans->delayed_ref_elem.seq >> 32),
-                       (u32)trans->delayed_ref_elem.seq);
-               BUG();
-       }
-
-       if (!trans->delayed_ref_elem.seq)
-               return 0;
-
-       while (!list_empty(&trans->qgroup_ref_list)) {
-               qgroup_update = list_first_entry(&trans->qgroup_ref_list,
-                                                struct qgroup_update, list);
-               list_del(&qgroup_update->list);
-               if (!ret)
-                       ret = btrfs_qgroup_account_ref(
-                                       trans, fs_info, qgroup_update->node,
-                                       qgroup_update->extent_op);
-               kfree(qgroup_update);
-       }
-
-       btrfs_put_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
-
-       return ret;
-}
-
 static inline u64 heads_to_leaves(struct btrfs_root *root, u64 heads)
 {
        u64 num_bytes;
@@ -2662,15 +2675,94 @@ int btrfs_should_throttle_delayed_refs(struct btrfs_trans_handle *trans,
        u64 num_entries =
                atomic_read(&trans->transaction->delayed_refs.num_entries);
        u64 avg_runtime;
+       u64 val;
 
        smp_mb();
        avg_runtime = fs_info->avg_delayed_ref_runtime;
+       val = num_entries * avg_runtime;
        if (num_entries * avg_runtime >= NSEC_PER_SEC)
                return 1;
+       if (val >= NSEC_PER_SEC / 2)
+               return 2;
 
        return btrfs_check_space_for_delayed_refs(trans, root);
 }
 
+struct async_delayed_refs {
+       struct btrfs_root *root;
+       int count;
+       int error;
+       int sync;
+       struct completion wait;
+       struct btrfs_work work;
+};
+
+static void delayed_ref_async_start(struct btrfs_work *work)
+{
+       struct async_delayed_refs *async;
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       async = container_of(work, struct async_delayed_refs, work);
+
+       trans = btrfs_join_transaction(async->root);
+       if (IS_ERR(trans)) {
+               async->error = PTR_ERR(trans);
+               goto done;
+       }
+
+       /*
+        * trans->sync means that when we call end_transaciton, we won't
+        * wait on delayed refs
+        */
+       trans->sync = true;
+       ret = btrfs_run_delayed_refs(trans, async->root, async->count);
+       if (ret)
+               async->error = ret;
+
+       ret = btrfs_end_transaction(trans, async->root);
+       if (ret && !async->error)
+               async->error = ret;
+done:
+       if (async->sync)
+               complete(&async->wait);
+       else
+               kfree(async);
+}
+
+int btrfs_async_run_delayed_refs(struct btrfs_root *root,
+                                unsigned long count, int wait)
+{
+       struct async_delayed_refs *async;
+       int ret;
+
+       async = kmalloc(sizeof(*async), GFP_NOFS);
+       if (!async)
+               return -ENOMEM;
+
+       async->root = root->fs_info->tree_root;
+       async->count = count;
+       async->error = 0;
+       if (wait)
+               async->sync = 1;
+       else
+               async->sync = 0;
+       init_completion(&async->wait);
+
+       btrfs_init_work(&async->work, delayed_ref_async_start,
+                       NULL, NULL);
+
+       btrfs_queue_work(root->fs_info->extent_workers, &async->work);
+
+       if (wait) {
+               wait_for_completion(&async->wait);
+               ret = async->error;
+               kfree(async);
+               return ret;
+       }
+       return 0;
+}
+
 /*
  * this starts processing the delayed reference count updates and
  * extent insertions we have queued up so far.  count can be
@@ -2698,8 +2790,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        if (root == root->fs_info->extent_root)
                root = root->fs_info->tree_root;
 
-       btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
-
        delayed_refs = &trans->transaction->delayed_refs;
        if (count == 0) {
                count = atomic_read(&delayed_refs->num_entries) * 2;
@@ -2758,6 +2848,9 @@ again:
                goto again;
        }
 out:
+       ret = btrfs_delayed_qgroup_accounting(trans, root->fs_info);
+       if (ret)
+               return ret;
        assert_qgroups_uptodate(trans);
        return 0;
 }
@@ -2964,7 +3057,7 @@ out:
 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                           struct btrfs_root *root,
                           struct extent_buffer *buf,
-                          int full_backref, int inc, int for_cow)
+                          int full_backref, int inc, int no_quota)
 {
        u64 bytenr;
        u64 num_bytes;
@@ -2979,11 +3072,15 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
        int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
                            u64, u64, u64, u64, u64, u64, int);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+       if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+               return 0;
+#endif
        ref_root = btrfs_header_owner(buf);
        nritems = btrfs_header_nritems(buf);
        level = btrfs_header_level(buf);
 
-       if (!root->ref_cows && level == 0)
+       if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state) && level == 0)
                return 0;
 
        if (inc)
@@ -3014,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                        key.offset -= btrfs_file_extent_offset(buf, fi);
                        ret = process_func(trans, root, bytenr, num_bytes,
                                           parent, ref_root, key.objectid,
-                                          key.offset, for_cow);
+                                          key.offset, no_quota);
                        if (ret)
                                goto fail;
                } else {
@@ -3022,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
                        num_bytes = btrfs_level_size(root, level - 1);
                        ret = process_func(trans, root, bytenr, num_bytes,
                                           parent, ref_root, level - 1, 0,
-                                          for_cow);
+                                          no_quota);
                        if (ret)
                                goto fail;
                }
@@ -3033,15 +3130,15 @@ fail:
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref, int for_cow)
+                 struct extent_buffer *buf, int full_backref, int no_quota)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 1, for_cow);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
 }
 
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-                 struct extent_buffer *buf, int full_backref, int for_cow)
+                 struct extent_buffer *buf, int full_backref, int no_quota)
 {
-       return __btrfs_mod_ref(trans, root, buf, full_backref, 0, for_cow);
+       return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -3164,7 +3261,8 @@ again:
 
        spin_lock(&block_group->lock);
        if (block_group->cached != BTRFS_CACHE_FINISHED ||
-           !btrfs_test_opt(root, SPACE_CACHE)) {
+           !btrfs_test_opt(root, SPACE_CACHE) ||
+           block_group->delalloc_bytes) {
                /*
                 * don't bother trying to write stuff out _if_
                 * a) we're not cached,
@@ -3401,10 +3499,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
                return ret;
        }
 
-       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+       for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
                INIT_LIST_HEAD(&found->block_groups[i]);
-               kobject_init(&found->block_group_kobjs[i], &btrfs_raid_ktype);
-       }
        init_rwsem(&found->groups_sem);
        spin_lock_init(&found->lock);
        found->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
@@ -4204,6 +4300,104 @@ static int flush_space(struct btrfs_root *root,
 
        return ret;
 }
+
+static inline u64
+btrfs_calc_reclaim_metadata_size(struct btrfs_root *root,
+                                struct btrfs_space_info *space_info)
+{
+       u64 used;
+       u64 expected;
+       u64 to_reclaim;
+
+       to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024,
+                               16 * 1024 * 1024);
+       spin_lock(&space_info->lock);
+       if (can_overcommit(root, space_info, to_reclaim,
+                          BTRFS_RESERVE_FLUSH_ALL)) {
+               to_reclaim = 0;
+               goto out;
+       }
+
+       used = space_info->bytes_used + space_info->bytes_reserved +
+              space_info->bytes_pinned + space_info->bytes_readonly +
+              space_info->bytes_may_use;
+       if (can_overcommit(root, space_info, 1024 * 1024,
+                          BTRFS_RESERVE_FLUSH_ALL))
+               expected = div_factor_fine(space_info->total_bytes, 95);
+       else
+               expected = div_factor_fine(space_info->total_bytes, 90);
+
+       if (used > expected)
+               to_reclaim = used - expected;
+       else
+               to_reclaim = 0;
+       to_reclaim = min(to_reclaim, space_info->bytes_may_use +
+                                    space_info->bytes_reserved);
+out:
+       spin_unlock(&space_info->lock);
+
+       return to_reclaim;
+}
+
+static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
+                                       struct btrfs_fs_info *fs_info, u64 used)
+{
+       return (used >= div_factor_fine(space_info->total_bytes, 98) &&
+               !btrfs_fs_closing(fs_info) &&
+               !test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state));
+}
+
+static int btrfs_need_do_async_reclaim(struct btrfs_space_info *space_info,
+                                      struct btrfs_fs_info *fs_info)
+{
+       u64 used;
+
+       spin_lock(&space_info->lock);
+       used = space_info->bytes_used + space_info->bytes_reserved +
+              space_info->bytes_pinned + space_info->bytes_readonly +
+              space_info->bytes_may_use;
+       if (need_do_async_reclaim(space_info, fs_info, used)) {
+               spin_unlock(&space_info->lock);
+               return 1;
+       }
+       spin_unlock(&space_info->lock);
+
+       return 0;
+}
+
+static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
+{
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_space_info *space_info;
+       u64 to_reclaim;
+       int flush_state;
+
+       fs_info = container_of(work, struct btrfs_fs_info, async_reclaim_work);
+       space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
+
+       to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info->fs_root,
+                                                     space_info);
+       if (!to_reclaim)
+               return;
+
+       flush_state = FLUSH_DELAYED_ITEMS_NR;
+       do {
+               flush_space(fs_info->fs_root, space_info, to_reclaim,
+                           to_reclaim, flush_state);
+               flush_state++;
+               if (!btrfs_need_do_async_reclaim(space_info, fs_info))
+                       return;
+       } while (flush_state <= COMMIT_TRANS);
+
+       if (btrfs_need_do_async_reclaim(space_info, fs_info))
+               queue_work(system_unbound_wq, work);
+}
+
+void btrfs_init_async_reclaim_work(struct work_struct *work)
+{
+       INIT_WORK(work, btrfs_async_reclaim_metadata_space);
+}
+
 /**
  * reserve_metadata_bytes - try to reserve bytes from the block_rsv's space
  * @root - the root we're allocating for
@@ -4311,8 +4505,13 @@ again:
        if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
                flushing = true;
                space_info->flush = 1;
+       } else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
+               used += orig_bytes;
+               if (need_do_async_reclaim(space_info, root->fs_info, used) &&
+                   !work_busy(&root->fs_info->async_reclaim_work))
+                       queue_work(system_unbound_wq,
+                                  &root->fs_info->async_reclaim_work);
        }
-
        spin_unlock(&space_info->lock);
 
        if (!ret || flush == BTRFS_RESERVE_NO_FLUSH)
@@ -4369,7 +4568,7 @@ static struct btrfs_block_rsv *get_block_rsv(
 {
        struct btrfs_block_rsv *block_rsv = NULL;
 
-       if (root->ref_cows)
+       if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                block_rsv = trans->block_rsv;
 
        if (root == root->fs_info->csum_root && trans->adding_csums)
@@ -5416,6 +5615,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
  * @cache:     The cache we are manipulating
  * @num_bytes: The number of bytes in question
  * @reserve:   One of the reservation enums
+ * @delalloc:   The blocks are allocated for the delalloc write
  *
  * This is called by the allocator when it reserves space, or by somebody who is
  * freeing space that was never actually used on disk.  For example if you
@@ -5434,7 +5634,7 @@ int btrfs_exclude_logged_extents(struct btrfs_root *log,
  * succeeds.
  */
 static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
-                                      u64 num_bytes, int reserve)
+                                      u64 num_bytes, int reserve, int delalloc)
 {
        struct btrfs_space_info *space_info = cache->space_info;
        int ret = 0;
@@ -5453,12 +5653,18 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
                                                num_bytes, 0);
                                space_info->bytes_may_use -= num_bytes;
                        }
+
+                       if (delalloc)
+                               cache->delalloc_bytes += num_bytes;
                }
        } else {
                if (cache->ro)
                        space_info->bytes_readonly += num_bytes;
                cache->reserved -= num_bytes;
                space_info->bytes_reserved -= num_bytes;
+
+               if (delalloc)
+                       cache->delalloc_bytes -= num_bytes;
        }
        spin_unlock(&cache->lock);
        spin_unlock(&space_info->lock);
@@ -5472,7 +5678,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        struct btrfs_caching_control *next;
        struct btrfs_caching_control *caching_ctl;
        struct btrfs_block_group_cache *cache;
-       struct btrfs_space_info *space_info;
 
        down_write(&fs_info->commit_root_sem);
 
@@ -5495,9 +5700,6 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
 
        up_write(&fs_info->commit_root_sem);
 
-       list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
-               percpu_counter_set(&space_info->total_bytes_pinned, 0);
-
        update_global_block_rsv(fs_info);
 }
 
@@ -5535,6 +5737,7 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
                spin_lock(&cache->lock);
                cache->pinned -= len;
                space_info->bytes_pinned -= len;
+               percpu_counter_add(&space_info->total_bytes_pinned, -len);
                if (cache->ro) {
                        space_info->bytes_readonly += len;
                        readonly = true;
@@ -5621,7 +5824,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                u64 bytenr, u64 num_bytes, u64 parent,
                                u64 root_objectid, u64 owner_objectid,
                                u64 owner_offset, int refs_to_drop,
-                               struct btrfs_delayed_extent_op *extent_op)
+                               struct btrfs_delayed_extent_op *extent_op,
+                               int no_quota)
 {
        struct btrfs_key key;
        struct btrfs_path *path;
@@ -5637,9 +5841,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        int num_to_del = 1;
        u32 item_size;
        u64 refs;
+       int last_ref = 0;
+       enum btrfs_qgroup_operation_type type = BTRFS_QGROUP_OPER_SUB_EXCL;
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
+       if (!info->quota_enabled || !is_fstree(root_objectid))
+               no_quota = 1;
+
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -5687,7 +5896,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        BUG_ON(iref);
                        ret = remove_extent_backref(trans, extent_root, path,
                                                    NULL, refs_to_drop,
-                                                   is_data);
+                                                   is_data, &last_ref);
                        if (ret) {
                                btrfs_abort_transaction(trans, extent_root, ret);
                                goto out;
@@ -5806,7 +6015,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        refs = btrfs_extent_refs(leaf, ei);
        if (refs < refs_to_drop) {
                btrfs_err(info, "trying to drop %d refs but we only have %Lu "
-                         "for bytenr %Lu\n", refs_to_drop, refs, bytenr);
+                         "for bytenr %Lu", refs_to_drop, refs, bytenr);
                ret = -EINVAL;
                btrfs_abort_transaction(trans, extent_root, ret);
                goto out;
@@ -5814,6 +6023,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        refs -= refs_to_drop;
 
        if (refs > 0) {
+               type = BTRFS_QGROUP_OPER_SUB_SHARED;
                if (extent_op)
                        __run_delayed_extent_op(extent_op, leaf, ei);
                /*
@@ -5829,7 +6039,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                if (found_extent) {
                        ret = remove_extent_backref(trans, extent_root, path,
                                                    iref, refs_to_drop,
-                                                   is_data);
+                                                   is_data, &last_ref);
                        if (ret) {
                                btrfs_abort_transaction(trans, extent_root, ret);
                                goto out;
@@ -5850,6 +6060,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        }
                }
 
+               last_ref = 1;
                ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
                                      num_to_del);
                if (ret) {
@@ -5872,6 +6083,20 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        goto out;
                }
        }
+       btrfs_release_path(path);
+
+       /* Deal with the quota accounting */
+       if (!ret && last_ref && !no_quota) {
+               int mod_seq = 0;
+
+               if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+                   type == BTRFS_QGROUP_OPER_SUB_SHARED)
+                       mod_seq = 1;
+
+               ret = btrfs_qgroup_record_ref(trans, info, root_objectid,
+                                             bytenr, num_bytes, type,
+                                             mod_seq);
+       }
 out:
        btrfs_free_path(path);
        return ret;
@@ -5987,7 +6212,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
 
                btrfs_add_free_space(cache, buf->start, buf->len);
-               btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE);
+               btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0);
                trace_btrfs_reserved_extent_free(root, buf->start, buf->len);
                pin = 0;
        }
@@ -6008,11 +6233,15 @@ out:
 /* Can return -ENOMEM */
 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-                     u64 owner, u64 offset, int for_cow)
+                     u64 owner, u64 offset, int no_quota)
 {
        int ret;
        struct btrfs_fs_info *fs_info = root->fs_info;
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+       if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+               return 0;
+#endif
        add_pinned_bytes(root->fs_info, num_bytes, owner, root_objectid);
 
        /*
@@ -6028,13 +6257,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
                                        num_bytes,
                                        parent, root_objectid, (int)owner,
-                                       BTRFS_DROP_DELAYED_REF, NULL, for_cow);
+                                       BTRFS_DROP_DELAYED_REF, NULL, no_quota);
        } else {
                ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
                                                num_bytes,
                                                parent, root_objectid, owner,
                                                offset, BTRFS_DROP_DELAYED_REF,
-                                               NULL, for_cow);
+                                               NULL, no_quota);
        }
        return ret;
 }
@@ -6142,6 +6371,70 @@ enum btrfs_loop_type {
        LOOP_NO_EMPTY_SIZE = 3,
 };
 
+static inline void
+btrfs_lock_block_group(struct btrfs_block_group_cache *cache,
+                      int delalloc)
+{
+       if (delalloc)
+               down_read(&cache->data_rwsem);
+}
+
+static inline void
+btrfs_grab_block_group(struct btrfs_block_group_cache *cache,
+                      int delalloc)
+{
+       btrfs_get_block_group(cache);
+       if (delalloc)
+               down_read(&cache->data_rwsem);
+}
+
+static struct btrfs_block_group_cache *
+btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
+                  struct btrfs_free_cluster *cluster,
+                  int delalloc)
+{
+       struct btrfs_block_group_cache *used_bg;
+       bool locked = false;
+again:
+       spin_lock(&cluster->refill_lock);
+       if (locked) {
+               if (used_bg == cluster->block_group)
+                       return used_bg;
+
+               up_read(&used_bg->data_rwsem);
+               btrfs_put_block_group(used_bg);
+       }
+
+       used_bg = cluster->block_group;
+       if (!used_bg)
+               return NULL;
+
+       if (used_bg == block_group)
+               return used_bg;
+
+       btrfs_get_block_group(used_bg);
+
+       if (!delalloc)
+               return used_bg;
+
+       if (down_read_trylock(&used_bg->data_rwsem))
+               return used_bg;
+
+       spin_unlock(&cluster->refill_lock);
+       down_read(&used_bg->data_rwsem);
+       locked = true;
+       goto again;
+}
+
+static inline void
+btrfs_release_block_group(struct btrfs_block_group_cache *cache,
+                        int delalloc)
+{
+       if (delalloc)
+               up_read(&cache->data_rwsem);
+       btrfs_put_block_group(cache);
+}
+
 /*
  * walks the btree of allocated extents and find a hole of a given size.
  * The key ins is changed to record the hole:
@@ -6156,7 +6449,7 @@ enum btrfs_loop_type {
 static noinline int find_free_extent(struct btrfs_root *orig_root,
                                     u64 num_bytes, u64 empty_size,
                                     u64 hint_byte, struct btrfs_key *ins,
-                                    u64 flags)
+                                    u64 flags, int delalloc)
 {
        int ret = 0;
        struct btrfs_root *root = orig_root->fs_info->extent_root;
@@ -6244,6 +6537,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
                                up_read(&space_info->groups_sem);
                        } else {
                                index = get_block_group_index(block_group);
+                               btrfs_lock_block_group(block_group, delalloc);
                                goto have_block_group;
                        }
                } else if (block_group) {
@@ -6258,7 +6552,7 @@ search:
                u64 offset;
                int cached;
 
-               btrfs_get_block_group(block_group);
+               btrfs_grab_block_group(block_group, delalloc);
                search_start = block_group->key.objectid;
 
                /*
@@ -6306,16 +6600,16 @@ have_block_group:
                         * the refill lock keeps out other
                         * people trying to start a new cluster
                         */
-                       spin_lock(&last_ptr->refill_lock);
-                       used_block_group = last_ptr->block_group;
-                       if (used_block_group != block_group &&
-                           (!used_block_group ||
-                            used_block_group->ro ||
-                            !block_group_bits(used_block_group, flags)))
+                       used_block_group = btrfs_lock_cluster(block_group,
+                                                             last_ptr,
+                                                             delalloc);
+                       if (!used_block_group)
                                goto refill_cluster;
 
-                       if (used_block_group != block_group)
-                               btrfs_get_block_group(used_block_group);
+                       if (used_block_group != block_group &&
+                           (used_block_group->ro ||
+                            !block_group_bits(used_block_group, flags)))
+                               goto release_cluster;
 
                        offset = btrfs_alloc_from_cluster(used_block_group,
                                                last_ptr,
@@ -6329,16 +6623,15 @@ have_block_group:
                                                used_block_group,
                                                search_start, num_bytes);
                                if (used_block_group != block_group) {
-                                       btrfs_put_block_group(block_group);
+                                       btrfs_release_block_group(block_group,
+                                                                 delalloc);
                                        block_group = used_block_group;
                                }
                                goto checks;
                        }
 
                        WARN_ON(last_ptr->block_group != used_block_group);
-                       if (used_block_group != block_group)
-                               btrfs_put_block_group(used_block_group);
-refill_cluster:
+release_cluster:
                        /* If we are on LOOP_NO_EMPTY_SIZE, we can't
                         * set up a new clusters, so lets just skip it
                         * and let the allocator find whatever block
@@ -6355,8 +6648,10 @@ refill_cluster:
                         * succeeding in the unclustered
                         * allocation.  */
                        if (loop >= LOOP_NO_EMPTY_SIZE &&
-                           last_ptr->block_group != block_group) {
+                           used_block_group != block_group) {
                                spin_unlock(&last_ptr->refill_lock);
+                               btrfs_release_block_group(used_block_group,
+                                                         delalloc);
                                goto unclustered_alloc;
                        }
 
@@ -6366,6 +6661,10 @@ refill_cluster:
                         */
                        btrfs_return_cluster_to_free_space(NULL, last_ptr);
 
+                       if (used_block_group != block_group)
+                               btrfs_release_block_group(used_block_group,
+                                                         delalloc);
+refill_cluster:
                        if (loop >= LOOP_NO_EMPTY_SIZE) {
                                spin_unlock(&last_ptr->refill_lock);
                                goto unclustered_alloc;
@@ -6473,7 +6772,7 @@ checks:
                BUG_ON(offset > search_start);
 
                ret = btrfs_update_reserved_bytes(block_group, num_bytes,
-                                                 alloc_type);
+                                                 alloc_type, delalloc);
                if (ret == -EAGAIN) {
                        btrfs_add_free_space(block_group, offset, num_bytes);
                        goto loop;
@@ -6485,13 +6784,13 @@ checks:
 
                trace_btrfs_reserve_extent(orig_root, block_group,
                                           search_start, num_bytes);
-               btrfs_put_block_group(block_group);
+               btrfs_release_block_group(block_group, delalloc);
                break;
 loop:
                failed_cluster_refill = false;
                failed_alloc = false;
                BUG_ON(index != get_block_group_index(block_group));
-               btrfs_put_block_group(block_group);
+               btrfs_release_block_group(block_group, delalloc);
        }
        up_read(&space_info->groups_sem);
 
@@ -6514,8 +6813,14 @@ loop:
                loop++;
                if (loop == LOOP_ALLOC_CHUNK) {
                        struct btrfs_trans_handle *trans;
+                       int exist = 0;
+
+                       trans = current->journal_info;
+                       if (trans)
+                               exist = 1;
+                       else
+                               trans = btrfs_join_transaction(root);
 
-                       trans = btrfs_join_transaction(root);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
                                goto out;
@@ -6532,7 +6837,8 @@ loop:
                                                        root, ret);
                        else
                                ret = 0;
-                       btrfs_end_transaction(trans, root);
+                       if (!exist)
+                               btrfs_end_transaction(trans, root);
                        if (ret)
                                goto out;
                }
@@ -6597,7 +6903,7 @@ again:
 int btrfs_reserve_extent(struct btrfs_root *root,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
-                        struct btrfs_key *ins, int is_data)
+                        struct btrfs_key *ins, int is_data, int delalloc)
 {
        bool final_tried = false;
        u64 flags;
@@ -6607,7 +6913,7 @@ int btrfs_reserve_extent(struct btrfs_root *root,
 again:
        WARN_ON(num_bytes < root->sectorsize);
        ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
-                              flags);
+                              flags, delalloc);
 
        if (ret == -ENOSPC) {
                if (!final_tried && ins->offset) {
@@ -6632,7 +6938,8 @@ again:
 }
 
 static int __btrfs_free_reserved_extent(struct btrfs_root *root,
-                                       u64 start, u64 len, int pin)
+                                       u64 start, u64 len,
+                                       int pin, int delalloc)
 {
        struct btrfs_block_group_cache *cache;
        int ret = 0;
@@ -6651,7 +6958,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
                pin_down_extent(root, cache, start, len, 1);
        else {
                btrfs_add_free_space(cache, start, len);
-               btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
+               btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc);
        }
        btrfs_put_block_group(cache);
 
@@ -6661,15 +6968,15 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
 }
 
 int btrfs_free_reserved_extent(struct btrfs_root *root,
-                                       u64 start, u64 len)
+                              u64 start, u64 len, int delalloc)
 {
-       return __btrfs_free_reserved_extent(root, start, len, 0);
+       return __btrfs_free_reserved_extent(root, start, len, 0, delalloc);
 }
 
 int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
                                       u64 start, u64 len)
 {
-       return __btrfs_free_reserved_extent(root, start, len, 1);
+       return __btrfs_free_reserved_extent(root, start, len, 1, 0);
 }
 
 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -6733,6 +7040,13 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
+       /* Always set parent to 0 here since its exclusive anyway. */
+       ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+                                     ins->objectid, ins->offset,
+                                     BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+       if (ret)
+               return ret;
+
        ret = update_block_group(root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6747,7 +7061,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     u64 parent, u64 root_objectid,
                                     u64 flags, struct btrfs_disk_key *key,
-                                    int level, struct btrfs_key *ins)
+                                    int level, struct btrfs_key *ins,
+                                    int no_quota)
 {
        int ret;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6757,6 +7072,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        struct btrfs_path *path;
        struct extent_buffer *leaf;
        u32 size = sizeof(*extent_item) + sizeof(*iref);
+       u64 num_bytes = ins->offset;
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
@@ -6790,6 +7106,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 
        if (skinny_metadata) {
                iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+               num_bytes = root->leafsize;
        } else {
                block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
                btrfs_set_tree_block_key(leaf, block_info, key);
@@ -6811,6 +7128,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
+       if (!no_quota) {
+               ret = btrfs_qgroup_record_ref(trans, fs_info, root_objectid,
+                                             ins->objectid, num_bytes,
+                                             BTRFS_QGROUP_OPER_ADD_EXCL, 0);
+               if (ret)
+                       return ret;
+       }
+
        ret = update_block_group(root, ins->objectid, root->leafsize, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -6866,7 +7191,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                return -EINVAL;
 
        ret = btrfs_update_reserved_bytes(block_group, ins->offset,
-                                         RESERVE_ALLOC_NO_ACCOUNT);
+                                         RESERVE_ALLOC_NO_ACCOUNT, 0);
        BUG_ON(ret); /* logic error */
        ret = alloc_reserved_file_extent(trans, root, 0, root_objectid,
                                         0, owner, offset, ins, 1);
@@ -6994,12 +7319,21 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+       if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) {
+               buf = btrfs_init_new_buffer(trans, root, root->alloc_bytenr,
+                                           blocksize, level);
+               if (!IS_ERR(buf))
+                       root->alloc_bytenr += blocksize;
+               return buf;
+       }
+#endif
        block_rsv = use_block_rsv(trans, root, blocksize);
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
 
        ret = btrfs_reserve_extent(root, blocksize, blocksize,
-                                  empty_size, hint, &ins, 0);
+                                  empty_size, hint, &ins, 0, 0);
        if (ret) {
                unuse_block_rsv(root->fs_info, block_rsv, blocksize);
                return ERR_PTR(ret);
@@ -7735,7 +8069,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                }
        }
 
-       if (root->in_radix) {
+       if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
                btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
        } else {
                free_extent_buffer(root->node);
@@ -8327,8 +8661,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                list_del(&space_info->list);
                for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
                        struct kobject *kobj;
-                       kobj = &space_info->block_group_kobjs[i];
-                       if (kobj->parent) {
+                       kobj = space_info->block_group_kobjs[i];
+                       space_info->block_group_kobjs[i] = NULL;
+                       if (kobj) {
                                kobject_del(kobj);
                                kobject_put(kobj);
                        }
@@ -8352,17 +8687,26 @@ static void __link_block_group(struct btrfs_space_info *space_info,
        up_write(&space_info->groups_sem);
 
        if (first) {
-               struct kobject *kobj = &space_info->block_group_kobjs[index];
+               struct raid_kobject *rkobj;
                int ret;
 
-               kobject_get(&space_info->kobj); /* put in release */
-               ret = kobject_add(kobj, &space_info->kobj, "%s",
-                                 get_raid_name(index));
+               rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+               if (!rkobj)
+                       goto out_err;
+               rkobj->raid_type = index;
+               kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
+               ret = kobject_add(&rkobj->kobj, &space_info->kobj,
+                                 "%s", get_raid_name(index));
                if (ret) {
-                       pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
-                       kobject_put(&space_info->kobj);
+                       kobject_put(&rkobj->kobj);
+                       goto out_err;
                }
+               space_info->block_group_kobjs[index] = &rkobj->kobj;
        }
+
+       return;
+out_err:
+       pr_warn("BTRFS: failed to add kobject for block cache. ignoring.\n");
 }
 
 static struct btrfs_block_group_cache *
@@ -8392,6 +8736,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
                                               start);
        atomic_set(&cache->count, 1);
        spin_lock_init(&cache->lock);
+       init_rwsem(&cache->data_rwsem);
        INIT_LIST_HEAD(&cache->list);
        INIT_LIST_HEAD(&cache->cluster_list);
        INIT_LIST_HEAD(&cache->new_bg_list);
@@ -8611,7 +8956,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 
        extent_root = root->fs_info->extent_root;
 
-       root->fs_info->last_trans_log_full_commit = trans->transid;
+       btrfs_set_log_full_commit(root->fs_info, trans);
 
        cache = btrfs_create_block_group_cache(root, chunk_offset, size);
        if (!cache)
@@ -8697,6 +9042,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        struct btrfs_root *tree_root = root->fs_info->tree_root;
        struct btrfs_key key;
        struct inode *inode;
+       struct kobject *kobj = NULL;
        int ret;
        int index;
        int factor;
@@ -8796,11 +9142,15 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
         */
        list_del_init(&block_group->list);
        if (list_empty(&block_group->space_info->block_groups[index])) {
-               kobject_del(&block_group->space_info->block_group_kobjs[index]);
-               kobject_put(&block_group->space_info->block_group_kobjs[index]);
+               kobj = block_group->space_info->block_group_kobjs[index];
+               block_group->space_info->block_group_kobjs[index] = NULL;
                clear_avail_alloc_bits(root->fs_info, block_group->flags);
        }
        up_write(&block_group->space_info->groups_sem);
+       if (kobj) {
+               kobject_del(kobj);
+               kobject_put(kobj);
+       }
 
        if (block_group->cached == BTRFS_CACHE_STARTED)
                wait_block_group_cache_done(block_group);