btrfs: qgroup: Fix a race in delayed_ref which leads to abort trans
authorQu Wenruo <quwenruo@cn.fujitsu.com>
Mon, 26 Oct 2015 06:11:18 +0000 (14:11 +0800)
committerChris Mason <clm@fb.com>
Tue, 27 Oct 2015 02:44:39 +0000 (19:44 -0700)
Between btrfs_allocerved_file_extent() and
btrfs_add_delayed_qgroup_reserve(), there is a window that delayed_refs
are run and delayed ref head maybe freed before
btrfs_add_delayed_qgroup_reserve().

This will cause btrfs_dad_delayed_qgroup_reserve() to return -ENOENT,
and cause transaction to be aborted.

This patch will record qgroup reserve space info into delayed_ref_head
at btrfs_add_delayed_ref(), to eliminate the race window.

Reported-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Signed-off-by: Chris Mason <clm@fb.com>
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/extent-tree.c
fs/btrfs/inode.c

index 4001585..a2e73f6 100644 (file)
@@ -3430,7 +3430,8 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     u64 root_objectid, u64 owner,
-                                    u64 offset, struct btrfs_key *ins);
+                                    u64 offset, u64 ram_bytes,
+                                    struct btrfs_key *ins);
 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   u64 root_objectid, u64 owner, u64 offset,
index 1c3588a..e06dd75 100644 (file)
@@ -535,7 +535,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
                     struct btrfs_trans_handle *trans,
                     struct btrfs_delayed_ref_node *ref,
                     struct btrfs_qgroup_extent_record *qrecord,
-                    u64 bytenr, u64 num_bytes, int action, int is_data)
+                    u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved,
+                    int action, int is_data)
 {
        struct btrfs_delayed_ref_head *existing;
        struct btrfs_delayed_ref_head *head_ref = NULL;
@@ -544,6 +545,9 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        int count_mod = 1;
        int must_insert_reserved = 0;
 
+       /* If reserved is provided, it must be a data extent. */
+       BUG_ON(!is_data && reserved);
+
        /*
         * the head node stores the sum of all the mods, so dropping a ref
         * should drop the sum in the head node by one.
@@ -593,6 +597,11 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
 
        /* Record qgroup extent info if provided */
        if (qrecord) {
+               if (ref_root && reserved) {
+                       head_ref->qgroup_ref_root = ref_root;
+                       head_ref->qgroup_reserved = reserved;
+               }
+
                qrecord->bytenr = bytenr;
                qrecord->num_bytes = num_bytes;
                qrecord->old_roots = NULL;
@@ -611,6 +620,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        existing = htree_insert(&delayed_refs->href_root,
                                &head_ref->href_node);
        if (existing) {
+               WARN_ON(ref_root && reserved && existing->qgroup_ref_root
+                       && existing->qgroup_reserved);
                update_existing_head_ref(delayed_refs, &existing->node, ref);
                /*
                 * we've updated the existing ref, free the newly
@@ -777,7 +788,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
         * the spin lock
         */
        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
-                                       bytenr, num_bytes, action, 0);
+                                       bytenr, num_bytes, 0, 0, action, 0);
 
        add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                             num_bytes, parent, ref_root, level, action);
@@ -800,7 +811,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, int action,
+                              u64 owner, u64 offset, u64 reserved, int action,
                               struct btrfs_delayed_extent_op *extent_op)
 {
        struct btrfs_delayed_data_ref *ref;
@@ -839,7 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
         * the spin lock
         */
        head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record,
-                                       bytenr, num_bytes, action, 1);
+                                       bytenr, num_bytes, ref_root, reserved,
+                                       action, 1);
 
        add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
                                   num_bytes, parent, ref_root, owner, offset,
@@ -894,7 +906,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
        spin_lock(&delayed_refs->lock);
 
        add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr,
-                            num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
+                            num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD,
                             extent_op->is_data);
 
        spin_unlock(&delayed_refs->lock);
index f9cf234..00ed02c 100644 (file)
@@ -248,7 +248,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
                               struct btrfs_trans_handle *trans,
                               u64 bytenr, u64 num_bytes,
                               u64 parent, u64 ref_root,
-                              u64 owner, u64 offset, int action,
+                              u64 owner, u64 offset, u64 reserved, int action,
                               struct btrfs_delayed_extent_op *extent_op);
 int btrfs_add_delayed_qgroup_reserve(struct btrfs_fs_info *fs_info,
                                     struct btrfs_trans_handle *trans,
index c1f8c7e..f50c7c2 100644 (file)
@@ -2087,8 +2087,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
                                        BTRFS_ADD_DELAYED_REF, NULL);
        } else {
                ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
-                                       num_bytes,
-                                       parent, root_objectid, owner, offset,
+                                       num_bytes, parent, root_objectid,
+                                       owner, offset, 0,
                                        BTRFS_ADD_DELAYED_REF, NULL);
        }
        return ret;
@@ -6832,8 +6832,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
                                                num_bytes,
                                                parent, root_objectid, owner,
-                                               offset, BTRFS_DROP_DELAYED_REF,
-                                               NULL);
+                                               offset, 0,
+                                               BTRFS_DROP_DELAYED_REF, NULL);
        }
        return ret;
 }
@@ -7759,7 +7759,8 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     u64 root_objectid, u64 owner,
-                                    u64 offset, struct btrfs_key *ins)
+                                    u64 offset, u64 ram_bytes,
+                                    struct btrfs_key *ins)
 {
        int ret;
 
@@ -7768,7 +7769,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
                                         ins->offset, 0,
                                         root_objectid, owner, offset,
-                                        BTRFS_ADD_DELAYED_EXTENT, NULL);
+                                        ram_bytes, BTRFS_ADD_DELAYED_EXTENT,
+                                        NULL);
        return ret;
 }
 
index 6f030c2..4439fbb 100644 (file)
@@ -2127,17 +2127,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
        ins.type = BTRFS_EXTENT_ITEM_KEY;
        ret = btrfs_alloc_reserved_file_extent(trans, root,
                                        root->root_key.objectid,
-                                       btrfs_ino(inode), file_pos, &ins);
-       if (ret < 0)
-               goto out;
+                                       btrfs_ino(inode), file_pos,
+                                       ram_bytes, &ins);
        /*
-        * Release the reserved range from inode dirty range map, and
-        * move it to delayed ref codes, as now accounting only happens at
-        * commit_transaction() time.
+        * Release the reserved range from inode dirty range map, as it is
+        * already moved into delayed_ref_head
         */
        btrfs_qgroup_release_data(inode, file_pos, ram_bytes);
-       ret = btrfs_add_delayed_qgroup_reserve(root->fs_info, trans,
-                       root->objectid, disk_bytenr, ram_bytes);
 out:
        btrfs_free_path(path);