Merge branch 'misc-cleanups-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorChris Mason <clm@fb.com>
Mon, 11 Jan 2016 14:08:37 +0000 (06:08 -0800)
committerChris Mason <clm@fb.com>
Mon, 11 Jan 2016 14:08:37 +0000 (06:08 -0800)
Signed-off-by: Chris Mason <clm@fb.com>
39 files changed:
fs/btrfs/Makefile
fs/btrfs/acl.c
fs/btrfs/async-thread.c
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-ref.c
fs/btrfs/delayed-ref.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h [deleted file]
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/free-space-tree.c [new file with mode: 0644]
fs/btrfs/free-space-tree.h [new file with mode: 0644]
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tests/btrfs-tests.h
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/free-space-tests.c
fs/btrfs/tests/free-space-tree-tests.c [new file with mode: 0644]
fs/btrfs/tests/qgroup-tests.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-defrag.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
include/trace/events/btrfs.h

index 6d1d0b9..128ce17 100644 (file)
@@ -9,11 +9,12 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           export.o tree-log.o free-space-cache.o zlib.o lzo.o \
           compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
           reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
-          uuid-tree.o props.o hash.o
+          uuid-tree.o props.o hash.o free-space-tree.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
 
 btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
        tests/extent-buffer-tests.o tests/btrfs-tests.o \
-       tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
+       tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
+       tests/free-space-tree-tests.o
index 9a0124a..dbbb8ed 100644 (file)
@@ -48,7 +48,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
 
        size = __btrfs_getxattr(inode, name, "", 0);
        if (size > 0) {
-               value = kzalloc(size, GFP_NOFS);
+               value = kzalloc(size, GFP_KERNEL);
                if (!value)
                        return ERR_PTR(-ENOMEM);
                size = __btrfs_getxattr(inode, name, value, size);
@@ -102,7 +102,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
 
        if (acl) {
                size = posix_acl_xattr_size(acl->a_count);
-               value = kmalloc(size, GFP_NOFS);
+               value = kmalloc(size, GFP_KERNEL);
                if (!value) {
                        ret = -ENOMEM;
                        goto out;
index 3e36e4a..88d9af3 100644 (file)
@@ -97,7 +97,7 @@ static struct __btrfs_workqueue *
 __btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active,
                         int thresh)
 {
-       struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+       struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 
        if (!ret)
                return NULL;
@@ -148,7 +148,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name,
                                              int limit_active,
                                              int thresh)
 {
-       struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
+       struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
 
        if (!ret)
                return NULL;
index 0ef5cc1..61205e3 100644 (file)
@@ -192,6 +192,10 @@ struct btrfs_inode {
        /* File creation time. */
        struct timespec i_otime;
 
+       /* Hook into fs_info->delayed_iputs */
+       struct list_head delayed_iput;
+       long delayed_iput_count;
+
        struct inode vfs_inode;
 };
 
index cb7720f..769e0ff 100644 (file)
@@ -2248,7 +2248,6 @@ static void reada_for_search(struct btrfs_root *root,
        u64 target;
        u64 nread = 0;
        u64 gen;
-       int direction = path->reada;
        struct extent_buffer *eb;
        u32 nr;
        u32 blocksize;
@@ -2276,16 +2275,16 @@ static void reada_for_search(struct btrfs_root *root,
        nr = slot;
 
        while (1) {
-               if (direction < 0) {
+               if (path->reada == READA_BACK) {
                        if (nr == 0)
                                break;
                        nr--;
-               } else if (direction > 0) {
+               } else if (path->reada == READA_FORWARD) {
                        nr++;
                        if (nr >= nritems)
                                break;
                }
-               if (path->reada < 0 && objectid) {
+               if (path->reada == READA_BACK && objectid) {
                        btrfs_node_key(node, &disk_key, nr);
                        if (btrfs_disk_key_objectid(&disk_key) != objectid)
                                break;
@@ -2493,7 +2492,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
        btrfs_set_path_blocking(p);
 
        free_extent_buffer(tmp);
-       if (p->reada)
+       if (p->reada != READA_NONE)
                reada_for_search(root, p, level, slot, key->objectid);
 
        btrfs_release_path(p);
index 6202557..c5f40dc 100644 (file)
@@ -97,6 +97,9 @@ struct btrfs_ordered_sum;
 /* for storing items that use the BTRFS_UUID_KEY* types */
 #define BTRFS_UUID_TREE_OBJECTID 9ULL
 
+/* tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
 /* for storing balance parameters in the root tree */
 #define BTRFS_BALANCE_OBJECTID -4ULL
 
@@ -175,7 +178,7 @@ struct btrfs_ordered_sum;
 /* csum types */
 #define BTRFS_CSUM_TYPE_CRC32  0
 
-static int btrfs_csum_sizes[] = { 4 };
+static const int btrfs_csum_sizes[] = { 4 };
 
 /* four bytes for CRC32 */
 #define BTRFS_EMPTY_DIR_SIZE 0
@@ -501,6 +504,8 @@ struct btrfs_super_block {
  * Compat flags that we support.  If any incompat flags are set other than the
  * ones specified below then we will fail to mount
  */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE        (1ULL << 0)
+
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF   (1ULL << 0)
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL  (1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS    (1ULL << 2)
@@ -527,7 +532,10 @@ struct btrfs_super_block {
 #define BTRFS_FEATURE_COMPAT_SUPP              0ULL
 #define BTRFS_FEATURE_COMPAT_SAFE_SET          0ULL
 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR                0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SUPP           0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP                   \
+       (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE)
+
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET       0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR     0ULL
 
@@ -591,14 +599,15 @@ struct btrfs_node {
  * The slots array records the index of the item or block pointer
  * used while walking the tree.
  */
+enum { READA_NONE = 0, READA_BACK, READA_FORWARD };
 struct btrfs_path {
        struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
        int slots[BTRFS_MAX_LEVEL];
        /* if there is real range locking, this locks field will change */
-       int locks[BTRFS_MAX_LEVEL];
-       int reada;
+       u8 locks[BTRFS_MAX_LEVEL];
+       u8 reada;
        /* keep some upper locks as we walk down */
-       int lowest_level;
+       u8 lowest_level;
 
        /*
         * set by btrfs_split_item, tells search_slot to keep all locks
@@ -1089,6 +1098,13 @@ struct btrfs_block_group_item {
        __le64 flags;
 } __attribute__ ((__packed__));
 
+struct btrfs_free_space_info {
+       __le32 extent_count;
+       __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
 #define BTRFS_QGROUP_LEVEL_SHIFT               48
 static inline u64 btrfs_qgroup_level(u64 qgroupid)
 {
@@ -1297,6 +1313,9 @@ struct btrfs_caching_control {
        atomic_t count;
 };
 
+/* Once caching_thread() finds this much free space, it will wake up waiters. */
+#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2)
+
 struct btrfs_io_ctl {
        void *cur, *orig;
        struct page *page;
@@ -1322,8 +1341,20 @@ struct btrfs_block_group_cache {
        u64 delalloc_bytes;
        u64 bytes_super;
        u64 flags;
-       u64 sectorsize;
        u64 cache_generation;
+       u32 sectorsize;
+
+       /*
+        * If the free space extent count exceeds this number, convert the block
+        * group to bitmaps.
+        */
+       u32 bitmap_high_thresh;
+
+       /*
+        * If the free space extent count drops below this number, convert the
+        * block group back to extents.
+        */
+       u32 bitmap_low_thresh;
 
        /*
         * It is just used for the delayed data space allocation because
@@ -1379,6 +1410,15 @@ struct btrfs_block_group_cache {
        struct list_head io_list;
 
        struct btrfs_io_ctl io_ctl;
+
+       /* Lock for free space tree operations. */
+       struct mutex free_space_lock;
+
+       /*
+        * Does the block group need to be added to the free space tree?
+        * Protected by free_space_lock.
+        */
+       int needs_free_space;
 };
 
 /* delayed seq elem */
@@ -1430,6 +1470,7 @@ struct btrfs_fs_info {
        struct btrfs_root *csum_root;
        struct btrfs_root *quota_root;
        struct btrfs_root *uuid_root;
+       struct btrfs_root *free_space_root;
 
        /* the log root tree is a directory of all the other log roots */
        struct btrfs_root *log_root_tree;
@@ -1817,6 +1858,8 @@ struct btrfs_fs_info {
         * and will be latter freed. Protected by fs_info->chunk_mutex.
         */
        struct list_head pinned_chunks;
+
+       int creating_free_space_tree;
 };
 
 struct btrfs_subvolume_writers {
@@ -2093,6 +2136,27 @@ struct btrfs_ioctl_defrag_range_args {
  */
 #define BTRFS_BLOCK_GROUP_ITEM_KEY 192
 
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents. A free space bitmap is keyed on
+ * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
+ * (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
 #define BTRFS_DEV_EXTENT_KEY   204
 #define BTRFS_DEV_ITEM_KEY     216
 #define BTRFS_CHUNK_ITEM_KEY   228
@@ -2185,6 +2249,7 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_RESCAN_UUID_TREE   (1 << 23)
 #define BTRFS_MOUNT_FRAGMENT_DATA      (1 << 24)
 #define BTRFS_MOUNT_FRAGMENT_METADATA  (1 << 25)
+#define BTRFS_MOUNT_FREE_SPACE_TREE    (1 << 26)
 
 #define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
 #define BTRFS_DEFAULT_MAX_INLINE       (8192)
@@ -2507,6 +2572,11 @@ BTRFS_SETGET_FUNCS(disk_block_group_flags,
 BTRFS_SETGET_STACK_FUNCS(block_group_flags,
                        struct btrfs_block_group_item, flags, 64);
 
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+                  extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
 /* struct btrfs_inode_ref */
 BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
 BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
@@ -3574,6 +3644,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root);
 void check_system_chunk(struct btrfs_trans_handle *trans,
                        struct btrfs_root *root,
                        const u64 type);
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+                      struct btrfs_fs_info *info, u64 start, u64 end);
+
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
@@ -3738,6 +3811,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
        kfree(fs_info->csum_root);
        kfree(fs_info->quota_root);
        kfree(fs_info->uuid_root);
+       kfree(fs_info->free_space_root);
        kfree(fs_info->super_copy);
        kfree(fs_info->super_for_commit);
        security_free_mnt_opts(&fs_info->security_opts);
@@ -3907,7 +3981,6 @@ void btrfs_extent_item_to_extent_map(struct inode *inode,
 /* inode.c */
 struct btrfs_delalloc_work {
        struct inode *inode;
-       int wait;
        int delay_iput;
        struct completion completion;
        struct list_head list;
@@ -3915,7 +3988,7 @@ struct btrfs_delalloc_work {
 };
 
 struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-                                                   int wait, int delay_iput);
+                                                   int delay_iput);
 void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
 
 struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
@@ -4248,6 +4321,30 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info,
        }
 }
 
+#define btrfs_clear_fs_incompat(__fs_info, opt) \
+       __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info,
+                                            u64 flag)
+{
+       struct btrfs_super_block *disk_super;
+       u64 features;
+
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_incompat_flags(disk_super);
+       if (features & flag) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_incompat_flags(disk_super);
+               if (features & flag) {
+                       features &= ~flag;
+                       btrfs_set_super_incompat_flags(disk_super, features);
+                       btrfs_info(fs_info, "clearing %llu feature flag",
+                                        flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+}
+
 #define btrfs_fs_incompat(fs_info, opt) \
        __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
 
@@ -4258,6 +4355,64 @@ static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
        return !!(btrfs_super_incompat_flags(disk_super) & flag);
 }
 
+#define btrfs_set_fs_compat_ro(__fs_info, opt) \
+       __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info,
+                                           u64 flag)
+{
+       struct btrfs_super_block *disk_super;
+       u64 features;
+
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_compat_ro_flags(disk_super);
+       if (!(features & flag)) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_compat_ro_flags(disk_super);
+               if (!(features & flag)) {
+                       features |= flag;
+                       btrfs_set_super_compat_ro_flags(disk_super, features);
+                       btrfs_info(fs_info, "setting %llu ro feature flag",
+                                  flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+}
+
+#define btrfs_clear_fs_compat_ro(__fs_info, opt) \
+       __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info,
+                                             u64 flag)
+{
+       struct btrfs_super_block *disk_super;
+       u64 features;
+
+       disk_super = fs_info->super_copy;
+       features = btrfs_super_compat_ro_flags(disk_super);
+       if (features & flag) {
+               spin_lock(&fs_info->super_lock);
+               features = btrfs_super_compat_ro_flags(disk_super);
+               if (features & flag) {
+                       features &= ~flag;
+                       btrfs_set_super_compat_ro_flags(disk_super, features);
+                       btrfs_info(fs_info, "clearing %llu ro feature flag",
+                                  flag);
+               }
+               spin_unlock(&fs_info->super_lock);
+       }
+}
+
+#define btrfs_fs_compat_ro(fs_info, opt) \
+       __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
+{
+       struct btrfs_super_block *disk_super;
+       disk_super = fs_info->super_copy;
+       return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
+}
+
 /*
  * Call btrfs_abort_transaction as early as possible when an error condition is
  * detected, that way the exact line number is reported.
index e06dd75..914ac13 100644 (file)
@@ -493,12 +493,12 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs,
                                memcpy(&existing_ref->extent_op->key,
                                       &ref->extent_op->key,
                                       sizeof(ref->extent_op->key));
-                               existing_ref->extent_op->update_key = 1;
+                               existing_ref->extent_op->update_key = true;
                        }
                        if (ref->extent_op->update_flags) {
                                existing_ref->extent_op->flags_to_set |=
                                        ref->extent_op->flags_to_set;
-                               existing_ref->extent_op->update_flags = 1;
+                               existing_ref->extent_op->update_flags = true;
                        }
                        btrfs_free_delayed_extent_op(ref->extent_op);
                }
index 00ed02c..c24b653 100644 (file)
@@ -75,11 +75,11 @@ struct btrfs_delayed_ref_node {
 
 struct btrfs_delayed_extent_op {
        struct btrfs_disk_key key;
+       u8 level;
+       bool update_key;
+       bool update_flags;
+       bool is_data;
        u64 flags_to_set;
-       int level;
-       unsigned int update_key:1;
-       unsigned int update_flags:1;
-       unsigned int is_data:1;
 };
 
 /*
index e8eebe7..c67c129 100644 (file)
@@ -42,6 +42,7 @@
 #include "locking.h"
 #include "tree-log.h"
 #include "free-space-cache.h"
+#include "free-space-tree.h"
 #include "inode-map.h"
 #include "check-integrity.h"
 #include "rcu-string.h"
@@ -362,7 +363,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
        }
 
        lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
-                        0, &cached_state);
+                        &cached_state);
        if (extent_buffer_uptodate(eb) &&
            btrfs_header_generation(eb) == parent_transid) {
                ret = 0;
@@ -1650,6 +1651,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
        if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
                return fs_info->uuid_root ? fs_info->uuid_root :
                                            ERR_PTR(-ENOENT);
+       if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID)
+               return fs_info->free_space_root ? fs_info->free_space_root :
+                                                 ERR_PTR(-ENOENT);
 again:
        root = btrfs_lookup_fs_root(fs_info, location->objectid);
        if (root) {
@@ -2148,6 +2152,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
        free_root_extent_buffers(info->uuid_root);
        if (chunk_root)
                free_root_extent_buffers(info->chunk_root);
+       free_root_extent_buffers(info->free_space_root);
 }
 
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
@@ -2448,6 +2453,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
                fs_info->uuid_root = root;
        }
 
+       if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID;
+               root = btrfs_read_tree_root(tree_root, &location);
+               if (IS_ERR(root))
+                       return PTR_ERR(root);
+               set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
+               fs_info->free_space_root = root;
+       }
+
        return 0;
 }
 
@@ -2668,6 +2682,7 @@ int open_ctree(struct super_block *sb,
        if (btrfs_check_super_csum(bh->b_data)) {
                printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                err = -EINVAL;
+               brelse(bh);
                goto fail_alloc;
        }
 
@@ -3051,6 +3066,18 @@ retry_root_backup:
        if (sb->s_flags & MS_RDONLY)
                return 0;
 
+       if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) &&
+           !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               pr_info("BTRFS: creating free space tree\n");
+               ret = btrfs_create_free_space_tree(fs_info);
+               if (ret) {
+                       pr_warn("BTRFS: failed to create free space tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       }
+
        down_read(&fs_info->cleanup_work_sem);
        if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
            (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
@@ -3076,6 +3103,18 @@ retry_root_backup:
 
        btrfs_qgroup_rescan_resume(fs_info);
 
+       if (btrfs_test_opt(tree_root, CLEAR_CACHE) &&
+           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
+               pr_info("BTRFS: clearing free space tree\n");
+               ret = btrfs_clear_free_space_tree(fs_info);
+               if (ret) {
+                       pr_warn("BTRFS: failed to clear free space tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       }
+
        if (!fs_info->uuid_root) {
                pr_info("BTRFS: creating UUID tree\n");
                ret = btrfs_create_uuid_tree(fs_info);
@@ -3902,11 +3941,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
        return !ret;
 }
 
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
-{
-       return set_extent_buffer_uptodate(buf);
-}
-
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
 {
        struct btrfs_root *root;
index a407d1b..8e79d00 100644 (file)
@@ -116,7 +116,6 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root)
 void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
 int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
                          int atomic);
-int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, char *result);
index 1ea56d8..03d29ea 100644 (file)
@@ -33,6 +33,7 @@
 #include "raid56.h"
 #include "locking.h"
 #include "free-space-cache.h"
+#include "free-space-tree.h"
 #include "math.h"
 #include "sysfs.h"
 #include "qgroup.h"
@@ -357,8 +358,8 @@ static void fragment_free_space(struct btrfs_root *root,
  * we need to check the pinned_extents for any extents that can't be used yet
  * since their free space will be released as soon as the transaction commits.
  */
-static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
-                             struct btrfs_fs_info *info, u64 start, u64 end)
+u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+                      struct btrfs_fs_info *info, u64 start, u64 end)
 {
        u64 extent_start, extent_end, size, total_added = 0;
        int ret;
@@ -395,11 +396,10 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
        return total_added;
 }
 
-static noinline void caching_thread(struct btrfs_work *work)
+static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
 {
        struct btrfs_block_group_cache *block_group;
        struct btrfs_fs_info *fs_info;
-       struct btrfs_caching_control *caching_ctl;
        struct btrfs_root *extent_root;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
@@ -407,17 +407,16 @@ static noinline void caching_thread(struct btrfs_work *work)
        u64 total_found = 0;
        u64 last = 0;
        u32 nritems;
-       int ret = -ENOMEM;
+       int ret;
        bool wakeup = true;
 
-       caching_ctl = container_of(work, struct btrfs_caching_control, work);
        block_group = caching_ctl->block_group;
        fs_info = block_group->fs_info;
        extent_root = fs_info->extent_root;
 
        path = btrfs_alloc_path();
        if (!path)
-               goto out;
+               return -ENOMEM;
 
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
@@ -438,20 +437,16 @@ static noinline void caching_thread(struct btrfs_work *work)
         */
        path->skip_locking = 1;
        path->search_commit_root = 1;
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        key.objectid = last;
        key.offset = 0;
        key.type = BTRFS_EXTENT_ITEM_KEY;
-again:
-       mutex_lock(&caching_ctl->mutex);
-       /* need to make sure the commit_root doesn't disappear */
-       down_read(&fs_info->commit_root_sem);
 
 next:
        ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
        if (ret < 0)
-               goto err;
+               goto out;
 
        leaf = path->nodes[0];
        nritems = btrfs_header_nritems(leaf);
@@ -477,12 +472,14 @@ next:
                                up_read(&fs_info->commit_root_sem);
                                mutex_unlock(&caching_ctl->mutex);
                                cond_resched();
-                               goto again;
+                               mutex_lock(&caching_ctl->mutex);
+                               down_read(&fs_info->commit_root_sem);
+                               goto next;
                        }
 
                        ret = btrfs_next_leaf(extent_root, path);
                        if (ret < 0)
-                               goto err;
+                               goto out;
                        if (ret)
                                break;
                        leaf = path->nodes[0];
@@ -521,7 +518,7 @@ next:
                        else
                                last = key.objectid + key.offset;
 
-                       if (total_found > SZ_2M) {
+                       if (total_found > CACHING_CTL_WAKE_UP) {
                                total_found = 0;
                                if (wakeup)
                                        wake_up(&caching_ctl->wait);
@@ -534,9 +531,37 @@ next:
        total_found += add_new_free_space(block_group, fs_info, last,
                                          block_group->key.objectid +
                                          block_group->key.offset);
+       caching_ctl->progress = (u64)-1;
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static noinline void caching_thread(struct btrfs_work *work)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_caching_control *caching_ctl;
+       struct btrfs_root *extent_root;
+       int ret;
+
+       caching_ctl = container_of(work, struct btrfs_caching_control, work);
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       extent_root = fs_info->extent_root;
+
+       mutex_lock(&caching_ctl->mutex);
+       down_read(&fs_info->commit_root_sem);
+
+       if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+               ret = load_free_space_tree(caching_ctl);
+       else
+               ret = load_extent_tree_free(caching_ctl);
+
        spin_lock(&block_group->lock);
        block_group->caching_ctl = NULL;
-       block_group->cached = BTRFS_CACHE_FINISHED;
+       block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED;
        spin_unlock(&block_group->lock);
 
 #ifdef CONFIG_BTRFS_DEBUG
@@ -555,20 +580,11 @@ next:
 #endif
 
        caching_ctl->progress = (u64)-1;
-err:
-       btrfs_free_path(path);
-       up_read(&fs_info->commit_root_sem);
-
-       free_excluded_extents(extent_root, block_group);
 
+       up_read(&fs_info->commit_root_sem);
+       free_excluded_extents(fs_info->extent_root, block_group);
        mutex_unlock(&caching_ctl->mutex);
-out:
-       if (ret) {
-               spin_lock(&block_group->lock);
-               block_group->caching_ctl = NULL;
-               block_group->cached = BTRFS_CACHE_ERROR;
-               spin_unlock(&block_group->lock);
-       }
+
        wake_up(&caching_ctl->wait);
 
        put_caching_control(caching_ctl);
@@ -680,8 +696,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                }
        } else {
                /*
-                * We are not going to do the fast caching, set cached to the
-                * appropriate value and wakeup any waiters.
+                * We're either using the free space tree or no caching at all.
+                * Set cached to the appropriate value and wakeup any waiters.
                 */
                spin_lock(&cache->lock);
                if (load_cache_only) {
@@ -2115,7 +2131,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       path->reada = 1;
+       path->reada = READA_FORWARD;
        path->leave_spinning = 1;
        /* this will setup the path even if it fails to insert the back ref */
        ret = insert_inline_extent_backref(trans, fs_info->extent_root, path,
@@ -2141,7 +2157,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 
-       path->reada = 1;
+       path->reada = READA_FORWARD;
        path->leave_spinning = 1;
        /* now insert the actual backref */
        ret = insert_extent_backref(trans, root->fs_info->extent_root,
@@ -2254,7 +2270,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
        }
 
 again:
-       path->reada = 1;
+       path->reada = READA_FORWARD;
        path->leave_spinning = 1;
        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key,
                                path, 0, 1);
@@ -2910,6 +2926,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
        if (trans->aborted)
                return 0;
 
+       if (root->fs_info->creating_free_space_tree)
+               return 0;
+
        if (root == root->fs_info->extent_root)
                root = root->fs_info->tree_root;
 
@@ -2988,9 +3007,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        extent_op->flags_to_set = flags;
-       extent_op->update_flags = 1;
-       extent_op->update_key = 0;
-       extent_op->is_data = is_data ? 1 : 0;
+       extent_op->update_flags = true;
+       extent_op->update_key = false;
+       extent_op->is_data = is_data ? true : false;
        extent_op->level = level;
 
        ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
@@ -3684,11 +3703,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                return -ENOMEM;
 
        /*
-        * We don't need the lock here since we are protected by the transaction
-        * commit.  We want to do the cache_save_setup first and then run the
+        * Even though we are in the critical section of the transaction commit,
+        * we can still have concurrent tasks adding elements to this
+        * transaction's list of dirty block groups. These tasks correspond to
+        * endio free space workers started when writeback finishes for a
+        * space cache, which run inode.c:btrfs_finish_ordered_io(), and can
+        * allocate new block groups as a result of COWing nodes of the root
+        * tree when updating the free space inode. The writeback for the space
+        * caches is triggered by an earlier call to
+        * btrfs_start_dirty_block_groups() and iterations of the following
+        * loop.
+        * Also we want to do the cache_save_setup first and then run the
         * delayed refs to make sure we have the best chance at doing this all
         * in one shot.
         */
+       spin_lock(&cur_trans->dirty_bgs_lock);
        while (!list_empty(&cur_trans->dirty_bgs)) {
                cache = list_first_entry(&cur_trans->dirty_bgs,
                                         struct btrfs_block_group_cache,
@@ -3700,11 +3729,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                 * finish and then do it all again
                 */
                if (!list_empty(&cache->io_list)) {
+                       spin_unlock(&cur_trans->dirty_bgs_lock);
                        list_del_init(&cache->io_list);
                        btrfs_wait_cache_io(root, trans, cache,
                                            &cache->io_ctl, path,
                                            cache->key.objectid);
                        btrfs_put_block_group(cache);
+                       spin_lock(&cur_trans->dirty_bgs_lock);
                }
 
                /*
@@ -3712,6 +3743,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                 * on any pending IO
                 */
                list_del_init(&cache->dirty_list);
+               spin_unlock(&cur_trans->dirty_bgs_lock);
                should_put = 1;
 
                cache_save_setup(cache, trans, path);
@@ -3743,7 +3775,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                /* if its not on the io list, we need to put the block group */
                if (should_put)
                        btrfs_put_block_group(cache);
+               spin_lock(&cur_trans->dirty_bgs_lock);
        }
+       spin_unlock(&cur_trans->dirty_bgs_lock);
 
        while (!list_empty(io)) {
                cache = list_first_entry(io, struct btrfs_block_group_cache,
@@ -6435,7 +6469,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       path->reada = 1;
+       path->reada = READA_FORWARD;
        path->leave_spinning = 1;
 
        is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
@@ -6658,6 +6692,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        }
                }
 
+               ret = add_to_free_space_tree(trans, root->fs_info, bytenr,
+                                            num_bytes);
+               if (ret) {
+                       btrfs_abort_transaction(trans, extent_root, ret);
+                       goto out;
+               }
+
                ret = update_block_group(trans, root, bytenr, num_bytes, 0);
                if (ret) {
                        btrfs_abort_transaction(trans, extent_root, ret);
@@ -7669,6 +7710,11 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
+       ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+                                         ins->offset);
+       if (ret)
+               return ret;
+
        ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
@@ -7749,6 +7795,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
+       ret = remove_from_free_space_tree(trans, fs_info, ins->objectid,
+                                         num_bytes);
+       if (ret)
+               return ret;
+
        ret = update_block_group(trans, root, ins->objectid, root->nodesize,
                                 1);
        if (ret) { /* -ENOENT, logic error */
@@ -7831,7 +7882,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
 
        btrfs_set_lock_blocking(buf);
-       btrfs_set_buffer_uptodate(buf);
+       set_extent_buffer_uptodate(buf);
 
        if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
                buf->log_index = root->log_transid % 2;
@@ -7977,12 +8028,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
                else
                        memset(&extent_op->key, 0, sizeof(extent_op->key));
                extent_op->flags_to_set = flags;
-               if (skinny_metadata)
-                       extent_op->update_key = 0;
-               else
-                       extent_op->update_key = 1;
-               extent_op->update_flags = 1;
-               extent_op->is_data = 0;
+               extent_op->update_key = skinny_metadata ? false : true;
+               extent_op->update_flags = true;
+               extent_op->is_data = false;
                extent_op->level = level;
 
                ret = btrfs_add_delayed_tree_ref(root->fs_info, trans,
@@ -9653,6 +9701,8 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
        cache->full_stripe_len = btrfs_full_stripe_len(root,
                                               &root->fs_info->mapping_tree,
                                               start);
+       set_free_space_tree_thresholds(cache);
+
        atomic_set(&cache->count, 1);
        spin_lock_init(&cache->lock);
        init_rwsem(&cache->data_rwsem);
@@ -9664,6 +9714,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size)
        INIT_LIST_HEAD(&cache->io_list);
        btrfs_init_free_space_ctl(cache);
        atomic_set(&cache->trimming, 0);
+       mutex_init(&cache->free_space_lock);
 
        return cache;
 }
@@ -9688,7 +9739,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
        if (btrfs_test_opt(root, SPACE_CACHE) &&
@@ -9874,6 +9925,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
                                               key.objectid, key.offset);
                if (ret)
                        btrfs_abort_transaction(trans, extent_root, ret);
+               add_block_group_free_space(trans, root->fs_info, block_group);
+               /* already aborted the transaction if it failed. */
 next:
                list_del_init(&block_group->bg_list);
        }
@@ -9904,6 +9957,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
        cache->flags = type;
        cache->last_byte_to_unpin = (u64)-1;
        cache->cached = BTRFS_CACHE_FINISHED;
+       cache->needs_free_space = 1;
        ret = exclude_super_stripes(root, cache);
        if (ret) {
                /*
@@ -10274,6 +10328,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
        unlock_chunks(root);
 
+       ret = remove_block_group_free_space(trans, root->fs_info, block_group);
+       if (ret)
+               goto out;
+
        btrfs_put_block_group(block_group);
        btrfs_put_block_group(block_group);
 
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
deleted file mode 100644 (file)
index e69de29..0000000
index 43a5c5b..2e7c97a 100644 (file)
@@ -1285,20 +1285,6 @@ search_again:
 }
 
 /* wrappers around set/clear extent bit */
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
-                    gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
-                             NULL, mask);
-}
-
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                   unsigned bits, gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, bits, NULL,
-                             NULL, mask);
-}
-
 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                           unsigned bits, gfp_t mask,
                           struct extent_changeset *changeset)
@@ -1323,17 +1309,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                                  cached, mask, NULL);
 }
 
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                     unsigned bits, gfp_t mask)
-{
-       int wake = 0;
-
-       if (bits & EXTENT_LOCKED)
-               wake = 1;
-
-       return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
-}
-
 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                             unsigned bits, gfp_t mask,
                             struct extent_changeset *changeset)
@@ -1348,63 +1323,18 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                                  changeset);
 }
 
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
-                       struct extent_state **cached_state, gfp_t mask)
-{
-       return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_UPTODATE,
-                             NULL, cached_state, mask);
-}
-
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
-                     struct extent_state **cached_state, gfp_t mask)
-{
-       return set_extent_bit(tree, start, end,
-                             EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
-                             NULL, cached_state, mask);
-}
-
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end,
-                               EXTENT_DIRTY | EXTENT_DELALLOC |
-                               EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
-}
-
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
-                    gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_NEW, NULL,
-                             NULL, mask);
-}
-
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       struct extent_state **cached_state, gfp_t mask)
-{
-       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
-                             cached_state, mask);
-}
-
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                         struct extent_state **cached_state, gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
-                               cached_state, mask);
-}
-
 /*
  * either insert or lock state struct between start and end use mask to tell
  * us if waiting is desired.
  */
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                    unsigned bits, struct extent_state **cached_state)
+                    struct extent_state **cached_state)
 {
        int err;
        u64 failed_start;
 
        while (1) {
-               err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits,
+               err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
                                       EXTENT_LOCKED, &failed_start,
                                       cached_state, GFP_NOFS, NULL);
                if (err == -EEXIST) {
@@ -1417,11 +1347,6 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
        return err;
 }
 
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
-       return lock_extent_bits(tree, start, end, 0, NULL);
-}
-
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
 {
        int err;
@@ -1438,20 +1363,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
        return 1;
 }
 
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
-                        struct extent_state **cached, gfp_t mask)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
-                               mask);
-}
-
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
-{
-       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
-                               GFP_NOFS);
-}
-
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
 {
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1464,10 +1376,9 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
                page_cache_release(page);
                index++;
        }
-       return 0;
 }
 
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
 {
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1481,13 +1392,12 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
                page_cache_release(page);
                index++;
        }
-       return 0;
 }
 
 /*
  * helper function to set both pages and extents in the tree writeback
  */
-static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
+static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
 {
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
@@ -1500,7 +1410,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
                page_cache_release(page);
                index++;
        }
-       return 0;
 }
 
 /* find the first state struct with 'bits' set after 'start', and
@@ -1800,7 +1709,7 @@ again:
        BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */
 
        /* step three, lock the state bits for the whole range */
-       lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state);
+       lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state);
 
        /* then test to make sure it is all still delalloc */
        ret = test_range_bit(tree, delalloc_start, delalloc_end,
@@ -1820,7 +1729,7 @@ out_failed:
        return found;
 }
 
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
                                 struct page *locked_page,
                                 unsigned clear_bits,
                                 unsigned long page_ops)
@@ -1835,7 +1744,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
 
        clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
        if (page_ops == 0)
-               return 0;
+               return;
 
        if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0)
                mapping_set_error(inode->i_mapping, -EIO);
@@ -1869,7 +1778,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
                index += ret;
                cond_resched();
        }
-       return 0;
 }
 
 /*
@@ -2516,7 +2424,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
 
 /* lots and lots of room for performance fixes in the end_bio funcs */
 
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
 {
        int uptodate = (err == 0);
        struct extent_io_tree *tree;
@@ -2537,7 +2445,6 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
                ret = ret < 0 ? ret : -EIO;
                mapping_set_error(page->mapping, ret);
        }
-       return 0;
 }
 
 /*
@@ -2579,9 +2486,7 @@ static void end_bio_extent_writepage(struct bio *bio)
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
 
-               if (end_extent_writepage(page, bio->bi_error, start, end))
-                       continue;
-
+               end_extent_writepage(page, bio->bi_error, start, end);
                end_page_writeback(page);
        }
 
@@ -4326,7 +4231,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
        if (start > end)
                return 0;
 
-       lock_extent_bits(tree, start, end, 0, &cached_state);
+       lock_extent_bits(tree, start, end, &cached_state);
        wait_on_page_writeback(page);
        clear_extent_bit(tree, start, end,
                         EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
@@ -4536,7 +4441,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                last_for_get_extent = isize;
        }
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1,
                         &cached_state);
 
        em = get_extent_skip_holes(inode, start, last_for_get_extent,
@@ -4797,24 +4702,14 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
        return new;
 }
 
-struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
-                                               u64 start)
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+                                                 u64 start, unsigned long len)
 {
        struct extent_buffer *eb;
-       unsigned long len;
        unsigned long num_pages;
        unsigned long i;
 
-       if (!fs_info) {
-               /*
-                * Called only from tests that don't always have a fs_info
-                * available, but we know that nodesize is 4096
-                */
-               len = 4096;
-       } else {
-               len = fs_info->tree_root->nodesize;
-       }
-       num_pages = num_extent_pages(0, len);
+       num_pages = num_extent_pages(start, len);
 
        eb = __alloc_extent_buffer(fs_info, start, len);
        if (!eb)
@@ -4837,6 +4732,24 @@ err:
        return NULL;
 }
 
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+                                               u64 start)
+{
+       unsigned long len;
+
+       if (!fs_info) {
+               /*
+                * Called only from tests that don't always have a fs_info
+                * available, but we know that nodesize is 4096
+                */
+               len = 4096;
+       } else {
+               len = fs_info->tree_root->nodesize;
+       }
+
+       return __alloc_dummy_extent_buffer(fs_info, start, len);
+}
+
 static void check_buffer_tree_ref(struct extent_buffer *eb)
 {
        int refs;
@@ -5227,7 +5140,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb)
        return was_dirty;
 }
 
-int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+void clear_extent_buffer_uptodate(struct extent_buffer *eb)
 {
        unsigned long i;
        struct page *page;
@@ -5240,10 +5153,9 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb)
                if (page)
                        ClearPageUptodate(page);
        }
-       return 0;
 }
 
-int set_extent_buffer_uptodate(struct extent_buffer *eb)
+void set_extent_buffer_uptodate(struct extent_buffer *eb)
 {
        unsigned long i;
        struct page *page;
@@ -5255,7 +5167,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb)
                page = eb->pages[i];
                SetPageUptodate(page);
        }
-       return 0;
 }
 
 int extent_buffer_uptodate(struct extent_buffer *eb)
@@ -5594,6 +5505,155 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
        }
 }
 
+/*
+ * The extent buffer bitmap operations are done with byte granularity because
+ * bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+       ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+       (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+/*
+ * eb_bitmap_offset() - calculate the page and offset of the byte containing the
+ * given bit number
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number
+ * @page_index: return index of the page in the extent buffer that contains the
+ * given bit number
+ * @page_offset: return offset into the page given by page_index
+ *
+ * This helper hides the ugliness of finding the byte in an extent buffer which
+ * contains a given bit.
+ */
+static inline void eb_bitmap_offset(struct extent_buffer *eb,
+                                   unsigned long start, unsigned long nr,
+                                   unsigned long *page_index,
+                                   size_t *page_offset)
+{
+       size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+       size_t byte_offset = BIT_BYTE(nr);
+       size_t offset;
+
+       /*
+        * The byte we want is the offset of the extent buffer + the offset of
+        * the bitmap item in the extent buffer + the offset of the byte in the
+        * bitmap item.
+        */
+       offset = start_offset + start + byte_offset;
+
+       *page_index = offset >> PAGE_CACHE_SHIFT;
+       *page_offset = offset & (PAGE_CACHE_SIZE - 1);
+}
+
+/**
+ * extent_buffer_test_bit - determine whether a bit in a bitmap item is set
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @nr: bit number to test
+ */
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+                          unsigned long nr)
+{
+       char *kaddr;
+       struct page *page;
+       unsigned long i;
+       size_t offset;
+
+       eb_bitmap_offset(eb, start, nr, &i, &offset);
+       page = eb->pages[i];
+       WARN_ON(!PageUptodate(page));
+       kaddr = page_address(page);
+       return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
+}
+
+/**
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                             unsigned long pos, unsigned long len)
+{
+       char *kaddr;
+       struct page *page;
+       unsigned long i;
+       size_t offset;
+       const unsigned int size = pos + len;
+       int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+       unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+       eb_bitmap_offset(eb, start, pos, &i, &offset);
+       page = eb->pages[i];
+       WARN_ON(!PageUptodate(page));
+       kaddr = page_address(page);
+
+       while (len >= bits_to_set) {
+               kaddr[offset] |= mask_to_set;
+               len -= bits_to_set;
+               bits_to_set = BITS_PER_BYTE;
+               mask_to_set = ~0U;
+               if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+                       offset = 0;
+                       page = eb->pages[++i];
+                       WARN_ON(!PageUptodate(page));
+                       kaddr = page_address(page);
+               }
+       }
+       if (len) {
+               mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+               kaddr[offset] |= mask_to_set;
+       }
+}
+
+
+/**
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                               unsigned long pos, unsigned long len)
+{
+       char *kaddr;
+       struct page *page;
+       unsigned long i;
+       size_t offset;
+       const unsigned int size = pos + len;
+       int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+       unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+       eb_bitmap_offset(eb, start, pos, &i, &offset);
+       page = eb->pages[i];
+       WARN_ON(!PageUptodate(page));
+       kaddr = page_address(page);
+
+       while (len >= bits_to_clear) {
+               kaddr[offset] &= ~mask_to_clear;
+               len -= bits_to_clear;
+               bits_to_clear = BITS_PER_BYTE;
+               mask_to_clear = ~0U;
+               if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+                       offset = 0;
+                       page = eb->pages[++i];
+                       WARN_ON(!PageUptodate(page));
+                       kaddr = page_address(page);
+               }
+       }
+       if (len) {
+               mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+               kaddr[offset] &= ~mask_to_clear;
+       }
+}
+
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
 {
        unsigned long distance = (src > dst) ? src - dst : dst - src;
index f4c1ae1..0377413 100644 (file)
@@ -199,12 +199,14 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                               struct extent_io_tree *tree, struct page *page,
                               gfp_t mask);
 int try_release_extent_buffer(struct page *page);
-int lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                    unsigned bits, struct extent_state **cached);
-int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end);
-int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
-                        struct extent_state **cached, gfp_t mask);
+                    struct extent_state **cached);
+
+static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       return lock_extent_bits(tree, start, end, NULL);
+}
+
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
                          get_extent_t *get_extent, int mirror_num);
@@ -221,39 +223,105 @@ void free_extent_state(struct extent_state *state);
 int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   unsigned bits, int filled,
                   struct extent_state *cached_state);
-int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                     unsigned bits, gfp_t mask);
 int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                             unsigned bits, gfp_t mask,
                             struct extent_changeset *changeset);
 int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                     unsigned bits, int wake, int delete,
                     struct extent_state **cached, gfp_t mask);
-int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
-                   unsigned bits, gfp_t mask);
+
+static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL,
+                               GFP_NOFS);
+}
+
+static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
+               u64 end, struct extent_state **cached, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
+                               mask);
+}
+
+static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
+               u64 end, unsigned bits, gfp_t mask)
+{
+       int wake = 0;
+
+       if (bits & EXTENT_LOCKED)
+               wake = 1;
+
+       return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask);
+}
+
 int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
                           unsigned bits, gfp_t mask,
                           struct extent_changeset *changeset);
 int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                   unsigned bits, u64 *failed_start,
                   struct extent_state **cached_state, gfp_t mask);
-int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                       struct extent_state **cached_state, gfp_t mask);
-int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
-                         struct extent_state **cached_state, gfp_t mask);
-int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
-                  gfp_t mask);
-int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
-                    gfp_t mask);
-int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
-                      gfp_t mask);
+
+static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
+               u64 end, unsigned bits, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, bits, NULL, NULL, mask);
+}
+
+static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
+               u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
+                               cached_state, mask);
+}
+
+static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
+               u64 end, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL,
+                             NULL, mask);
+}
+
+static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
+               u64 end, gfp_t mask)
+{
+       return clear_extent_bit(tree, start, end,
+                               EXTENT_DIRTY | EXTENT_DELALLOC |
+                               EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask);
+}
+
 int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                       unsigned bits, unsigned clear_bits,
                       struct extent_state **cached_state, gfp_t mask);
-int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
-                       struct extent_state **cached_state, gfp_t mask);
-int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end,
-                     struct extent_state **cached_state, gfp_t mask);
+
+static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
+               u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end,
+                             EXTENT_DELALLOC | EXTENT_UPTODATE,
+                             NULL, cached_state, mask);
+}
+
+static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
+               u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end,
+                             EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
+                             NULL, cached_state, mask);
+}
+
+static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
+               u64 end, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, mask);
+}
+
+static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
+               u64 end, struct extent_state **cached_state, gfp_t mask)
+{
+       return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL,
+                             cached_state, mask);
+}
+
 int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
                          u64 *start_ret, u64 *end_ret, unsigned bits,
                          struct extent_state **cached_state);
@@ -282,8 +350,10 @@ void set_page_extent_mapped(struct page *page);
 
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
                                          u64 start);
+struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+                                                 u64 start, unsigned long len);
 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
-               u64 start);
+                                               u64 start);
 struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
                                         u64 start);
@@ -328,19 +398,25 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                           unsigned long src_offset, unsigned long len);
 void memset_extent_buffer(struct extent_buffer *eb, char c,
                          unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+                          unsigned long pos);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+                             unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+                               unsigned long pos, unsigned long len);
 void clear_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_dirty(struct extent_buffer *eb);
-int set_extent_buffer_uptodate(struct extent_buffer *eb);
-int clear_extent_buffer_uptodate(struct extent_buffer *eb);
+void set_extent_buffer_uptodate(struct extent_buffer *eb);
+void clear_extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_under_io(struct extent_buffer *eb);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long min_len, char **map,
                      unsigned long *map_start,
                      unsigned long *map_len);
-int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
+void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
                                 struct page *locked_page,
                                 unsigned bits_to_clear,
                                 unsigned long page_ops);
@@ -357,7 +433,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
                      int mirror_num);
 int clean_io_failure(struct inode *inode, u64 start, struct page *page,
                     unsigned int pg_offset);
-int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
+void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
                         int mirror_num);
 
index 58ece65..a67e1c8 100644 (file)
@@ -202,7 +202,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
        }
 
        if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
-               path->reada = 2;
+               path->reada = READA_FORWARD;
 
        WARN_ON(bio->bi_vcnt <= 0);
 
@@ -328,7 +328,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 
        if (search_commit) {
                path->skip_locking = 1;
-               path->reada = 2;
+               path->reada = READA_FORWARD;
                path->search_commit_root = 1;
        }
 
index 0f09526..364e0f1 100644 (file)
@@ -1394,7 +1394,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
        if (start_pos < inode->i_size) {
                struct btrfs_ordered_extent *ordered;
                lock_extent_bits(&BTRFS_I(inode)->io_tree,
-                                start_pos, last_pos, 0, cached_state);
+                                start_pos, last_pos, cached_state);
                ordered = btrfs_lookup_ordered_range(inode, start_pos,
                                                     last_pos - start_pos + 1);
                if (ordered &&
@@ -2398,7 +2398,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
                truncate_pagecache_range(inode, lockstart, lockend);
 
                lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, &cached_state);
+                                &cached_state);
                ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
 
                /*
@@ -2705,7 +2705,7 @@ static long btrfs_fallocate(struct file *file, int mode,
                 * transaction
                 */
                lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
-                                locked_end, 0, &cached_state);
+                                locked_end, &cached_state);
                ordered = btrfs_lookup_first_ordered_extent(inode,
                                                            alloc_end - 1);
                if (ordered &&
@@ -2852,7 +2852,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
        lockend--;
        len = lockend - lockstart + 1;
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         &cached_state);
 
        while (start < inode->i_size) {
index e5d7ec8..8f835bf 100644 (file)
@@ -1255,7 +1255,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
                goto out;
 
        lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
-                        0, &cached_state);
+                        &cached_state);
 
        io_ctl_set_generation(io_ctl, trans->transid);
 
@@ -2009,7 +2009,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
        return true;
 }
 
-static struct btrfs_free_space_op free_space_op = {
+static const struct btrfs_free_space_op free_space_op = {
        .recalc_thresholds      = recalculate_thresholds,
        .use_bitmap             = use_bitmap,
 };
index f251865..33178c4 100644 (file)
@@ -37,7 +37,7 @@ struct btrfs_free_space_ctl {
        int total_bitmaps;
        int unit;
        u64 start;
-       struct btrfs_free_space_op *op;
+       const struct btrfs_free_space_op *op;
        void *private;
        struct mutex cache_writeout_mutex;
        struct list_head trimming_ranges;
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
new file mode 100644 (file)
index 0000000..393e36b
--- /dev/null
@@ -0,0 +1,1591 @@
+/*
+ * Copyright (C) 2015 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "locking.h"
+#include "free-space-tree.h"
+#include "transaction.h"
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+                                       struct btrfs_fs_info *fs_info,
+                                       struct btrfs_block_group_cache *block_group,
+                                       struct btrfs_path *path);
+
+void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache)
+{
+       u32 bitmap_range;
+       size_t bitmap_size;
+       u64 num_bitmaps, total_bitmap_size;
+
+       /*
+        * We convert to bitmaps when the disk space required for using extents
+        * exceeds that required for using bitmaps.
+        */
+       bitmap_range = cache->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+       num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1,
+                             bitmap_range);
+       bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE;
+       total_bitmap_size = num_bitmaps * bitmap_size;
+       cache->bitmap_high_thresh = div_u64(total_bitmap_size,
+                                           sizeof(struct btrfs_item));
+
+       /*
+        * We allow for a small buffer between the high threshold and low
+        * threshold to avoid thrashing back and forth between the two formats.
+        */
+       if (cache->bitmap_high_thresh > 100)
+               cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100;
+       else
+               cache->bitmap_low_thresh = 0;
+}
+
+static int add_new_free_space_info(struct btrfs_trans_handle *trans,
+                                  struct btrfs_fs_info *fs_info,
+                                  struct btrfs_block_group_cache *block_group,
+                                  struct btrfs_path *path)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_free_space_info *info;
+       struct btrfs_key key;
+       struct extent_buffer *leaf;
+       int ret;
+
+       key.objectid = block_group->key.objectid;
+       key.type = BTRFS_FREE_SPACE_INFO_KEY;
+       key.offset = block_group->key.offset;
+
+       ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info));
+       if (ret)
+               goto out;
+
+       leaf = path->nodes[0];
+       info = btrfs_item_ptr(leaf, path->slots[0],
+                             struct btrfs_free_space_info);
+       btrfs_set_free_space_extent_count(leaf, info, 0);
+       btrfs_set_free_space_flags(leaf, info, 0);
+       btrfs_mark_buffer_dirty(leaf);
+
+       ret = 0;
+out:
+       btrfs_release_path(path);
+       return ret;
+}
+
+struct btrfs_free_space_info *
+search_free_space_info(struct btrfs_trans_handle *trans,
+                      struct btrfs_fs_info *fs_info,
+                      struct btrfs_block_group_cache *block_group,
+                      struct btrfs_path *path, int cow)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_key key;
+       int ret;
+
+       key.objectid = block_group->key.objectid;
+       key.type = BTRFS_FREE_SPACE_INFO_KEY;
+       key.offset = block_group->key.offset;
+
+       ret = btrfs_search_slot(trans, root, &key, path, 0, cow);
+       if (ret < 0)
+               return ERR_PTR(ret);
+       if (ret != 0) {
+               btrfs_warn(fs_info, "missing free space info for %llu\n",
+                          block_group->key.objectid);
+               ASSERT(0);
+               return ERR_PTR(-ENOENT);
+       }
+
+       return btrfs_item_ptr(path->nodes[0], path->slots[0],
+                             struct btrfs_free_space_info);
+}
+
+/*
+ * btrfs_search_slot() but we're looking for the greatest key less than the
+ * passed key.
+ */
+static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root,
+                                 struct btrfs_key *key, struct btrfs_path *p,
+                                 int ins_len, int cow)
+{
+       int ret;
+
+       ret = btrfs_search_slot(trans, root, key, p, ins_len, cow);
+       if (ret < 0)
+               return ret;
+
+       if (ret == 0) {
+               ASSERT(0);
+               return -EIO;
+       }
+
+       if (p->slots[0] == 0) {
+               ASSERT(0);
+               return -EIO;
+       }
+       p->slots[0]--;
+
+       return 0;
+}
+
+static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize)
+{
+       return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE);
+}
+
+static unsigned long *alloc_bitmap(u32 bitmap_size)
+{
+       return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO,
+                        PAGE_KERNEL);
+}
+
+int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_free_space_info *info;
+       struct btrfs_key key, found_key;
+       struct extent_buffer *leaf;
+       unsigned long *bitmap;
+       char *bitmap_cursor;
+       u64 start, end;
+       u64 bitmap_range, i;
+       u32 bitmap_size, flags, expected_extent_count;
+       u32 extent_count = 0;
+       int done = 0, nr;
+       int ret;
+
+       bitmap_size = free_space_bitmap_size(block_group->key.offset,
+                                            block_group->sectorsize);
+       bitmap = alloc_bitmap(bitmap_size);
+       if (!bitmap) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       start = block_group->key.objectid;
+       end = block_group->key.objectid + block_group->key.offset;
+
+       key.objectid = end - 1;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       while (!done) {
+               ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+               if (ret)
+                       goto out;
+
+               leaf = path->nodes[0];
+               nr = 0;
+               path->slots[0]++;
+               while (path->slots[0] > 0) {
+                       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+                       if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+                               ASSERT(found_key.objectid == block_group->key.objectid);
+                               ASSERT(found_key.offset == block_group->key.offset);
+                               done = 1;
+                               break;
+                       } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) {
+                               u64 first, last;
+
+                               ASSERT(found_key.objectid >= start);
+                               ASSERT(found_key.objectid < end);
+                               ASSERT(found_key.objectid + found_key.offset <= end);
+
+                               first = div_u64(found_key.objectid - start,
+                                               block_group->sectorsize);
+                               last = div_u64(found_key.objectid + found_key.offset - start,
+                                              block_group->sectorsize);
+                               bitmap_set(bitmap, first, last - first);
+
+                               extent_count++;
+                               nr++;
+                               path->slots[0]--;
+                       } else {
+                               ASSERT(0);
+                       }
+               }
+
+               ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+       }
+
+       info = search_free_space_info(trans, fs_info, block_group, path, 1);
+       if (IS_ERR(info)) {
+               ret = PTR_ERR(info);
+               goto out;
+       }
+       leaf = path->nodes[0];
+       flags = btrfs_free_space_flags(leaf, info);
+       flags |= BTRFS_FREE_SPACE_USING_BITMAPS;
+       btrfs_set_free_space_flags(leaf, info, flags);
+       expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(path);
+
+       if (extent_count != expected_extent_count) {
+               btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+                         block_group->key.objectid, extent_count,
+                         expected_extent_count);
+               ASSERT(0);
+               ret = -EIO;
+               goto out;
+       }
+
+       bitmap_cursor = (char *)bitmap;
+       bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS;
+       i = start;
+       while (i < end) {
+               unsigned long ptr;
+               u64 extent_size;
+               u32 data_size;
+
+               extent_size = min(end - i, bitmap_range);
+               data_size = free_space_bitmap_size(extent_size,
+                                                  block_group->sectorsize);
+
+               key.objectid = i;
+               key.type = BTRFS_FREE_SPACE_BITMAP_KEY;
+               key.offset = extent_size;
+
+               ret = btrfs_insert_empty_item(trans, root, path, &key,
+                                             data_size);
+               if (ret)
+                       goto out;
+
+               leaf = path->nodes[0];
+               ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+               write_extent_buffer(leaf, bitmap_cursor, ptr,
+                                   data_size);
+               btrfs_mark_buffer_dirty(leaf);
+               btrfs_release_path(path);
+
+               i += extent_size;
+               bitmap_cursor += data_size;
+       }
+
+       ret = 0;
+out:
+       vfree(bitmap);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       return ret;
+}
+
+int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_free_space_info *info;
+       struct btrfs_key key, found_key;
+       struct extent_buffer *leaf;
+       unsigned long *bitmap;
+       u64 start, end;
+       /* Initialize to silence GCC. */
+       u64 extent_start = 0;
+       u64 offset;
+       u32 bitmap_size, flags, expected_extent_count;
+       int prev_bit = 0, bit, bitnr;
+       u32 extent_count = 0;
+       int done = 0, nr;
+       int ret;
+
+       bitmap_size = free_space_bitmap_size(block_group->key.offset,
+                                            block_group->sectorsize);
+       bitmap = alloc_bitmap(bitmap_size);
+       if (!bitmap) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       start = block_group->key.objectid;
+       end = block_group->key.objectid + block_group->key.offset;
+
+       key.objectid = end - 1;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       while (!done) {
+               ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+               if (ret)
+                       goto out;
+
+               leaf = path->nodes[0];
+               nr = 0;
+               path->slots[0]++;
+               while (path->slots[0] > 0) {
+                       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+                       if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+                               ASSERT(found_key.objectid == block_group->key.objectid);
+                               ASSERT(found_key.offset == block_group->key.offset);
+                               done = 1;
+                               break;
+                       } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+                               unsigned long ptr;
+                               char *bitmap_cursor;
+                               u32 bitmap_pos, data_size;
+
+                               ASSERT(found_key.objectid >= start);
+                               ASSERT(found_key.objectid < end);
+                               ASSERT(found_key.objectid + found_key.offset <= end);
+
+                               bitmap_pos = div_u64(found_key.objectid - start,
+                                                    block_group->sectorsize *
+                                                    BITS_PER_BYTE);
+                               bitmap_cursor = ((char *)bitmap) + bitmap_pos;
+                               data_size = free_space_bitmap_size(found_key.offset,
+                                                                  block_group->sectorsize);
+
+                               ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1);
+                               read_extent_buffer(leaf, bitmap_cursor, ptr,
+                                                  data_size);
+
+                               nr++;
+                               path->slots[0]--;
+                       } else {
+                               ASSERT(0);
+                       }
+               }
+
+               ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+       }
+
+       info = search_free_space_info(trans, fs_info, block_group, path, 1);
+       if (IS_ERR(info)) {
+               ret = PTR_ERR(info);
+               goto out;
+       }
+       leaf = path->nodes[0];
+       flags = btrfs_free_space_flags(leaf, info);
+       flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS;
+       btrfs_set_free_space_flags(leaf, info, flags);
+       expected_extent_count = btrfs_free_space_extent_count(leaf, info);
+       btrfs_mark_buffer_dirty(leaf);
+       btrfs_release_path(path);
+
+       offset = start;
+       bitnr = 0;
+       while (offset < end) {
+               bit = !!test_bit(bitnr, bitmap);
+               if (prev_bit == 0 && bit == 1) {
+                       extent_start = offset;
+               } else if (prev_bit == 1 && bit == 0) {
+                       key.objectid = extent_start;
+                       key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+                       key.offset = offset - extent_start;
+
+                       ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+                       if (ret)
+                               goto out;
+                       btrfs_release_path(path);
+
+                       extent_count++;
+               }
+               prev_bit = bit;
+               offset += block_group->sectorsize;
+               bitnr++;
+       }
+       if (prev_bit == 1) {
+               key.objectid = extent_start;
+               key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+               key.offset = end - extent_start;
+
+               ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+
+               extent_count++;
+       }
+
+       if (extent_count != expected_extent_count) {
+               btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+                         block_group->key.objectid, extent_count,
+                         expected_extent_count);
+               ASSERT(0);
+               ret = -EIO;
+               goto out;
+       }
+
+       ret = 0;
+out:
+       vfree(bitmap);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       return ret;
+}
+
+static int update_free_space_extent_count(struct btrfs_trans_handle *trans,
+                                         struct btrfs_fs_info *fs_info,
+                                         struct btrfs_block_group_cache *block_group,
+                                         struct btrfs_path *path,
+                                         int new_extents)
+{
+       struct btrfs_free_space_info *info;
+       u32 flags;
+       u32 extent_count;
+       int ret = 0;
+
+       if (new_extents == 0)
+               return 0;
+
+       info = search_free_space_info(trans, fs_info, block_group, path, 1);
+       if (IS_ERR(info)) {
+               ret = PTR_ERR(info);
+               goto out;
+       }
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+       extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+       extent_count += new_extents;
+       btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count);
+       btrfs_mark_buffer_dirty(path->nodes[0]);
+       btrfs_release_path(path);
+
+       if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+           extent_count > block_group->bitmap_high_thresh) {
+               ret = convert_free_space_to_bitmaps(trans, fs_info, block_group,
+                                                   path);
+       } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) &&
+                  extent_count < block_group->bitmap_low_thresh) {
+               ret = convert_free_space_to_extents(trans, fs_info, block_group,
+                                                   path);
+       }
+
+out:
+       return ret;
+}
+
+int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+                       struct btrfs_path *path, u64 offset)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       u64 found_start, found_end;
+       unsigned long ptr, i;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+       found_start = key.objectid;
+       found_end = key.objectid + key.offset;
+       ASSERT(offset >= found_start && offset < found_end);
+
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       i = div_u64(offset - found_start, block_group->sectorsize);
+       return !!extent_buffer_test_bit(leaf, ptr, i);
+}
+
+static void free_space_set_bits(struct btrfs_block_group_cache *block_group,
+                               struct btrfs_path *path, u64 *start, u64 *size,
+                               int bit)
+{
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       u64 end = *start + *size;
+       u64 found_start, found_end;
+       unsigned long ptr, first, last;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+       ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+
+       found_start = key.objectid;
+       found_end = key.objectid + key.offset;
+       ASSERT(*start >= found_start && *start < found_end);
+       ASSERT(end > found_start);
+
+       if (end > found_end)
+               end = found_end;
+
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       first = div_u64(*start - found_start, block_group->sectorsize);
+       last = div_u64(end - found_start, block_group->sectorsize);
+       if (bit)
+               extent_buffer_bitmap_set(leaf, ptr, first, last - first);
+       else
+               extent_buffer_bitmap_clear(leaf, ptr, first, last - first);
+       btrfs_mark_buffer_dirty(leaf);
+
+       *size -= end - *start;
+       *start = end;
+}
+
+/*
+ * We can't use btrfs_next_item() in modify_free_space_bitmap() because
+ * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy
+ * tree walking in btrfs_next_leaf() anyways because we know exactly what we're
+ * looking for.
+ */
+static int free_space_next_bitmap(struct btrfs_trans_handle *trans,
+                                 struct btrfs_root *root, struct btrfs_path *p)
+{
+       struct btrfs_key key;
+
+       if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) {
+               p->slots[0]++;
+               return 0;
+       }
+
+       btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]);
+       btrfs_release_path(p);
+
+       key.objectid += key.offset;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       return btrfs_search_prev_slot(trans, root, &key, p, 0, 1);
+}
+
+/*
+ * If remove is 1, then we are removing free space, thus clearing bits in the
+ * bitmap. If remove is 0, then we are adding free space, thus setting bits in
+ * the bitmap.
+ */
+static int modify_free_space_bitmap(struct btrfs_trans_handle *trans,
+                                   struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_group_cache *block_group,
+                                   struct btrfs_path *path,
+                                   u64 start, u64 size, int remove)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_key key;
+       u64 end = start + size;
+       u64 cur_start, cur_size;
+       int prev_bit, next_bit;
+       int new_extents;
+       int ret;
+
+       /*
+        * Read the bit for the block immediately before the extent of space if
+        * that block is within the block group.
+        */
+       if (start > block_group->key.objectid) {
+               u64 prev_block = start - block_group->sectorsize;
+
+               key.objectid = prev_block;
+               key.type = (u8)-1;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+               if (ret)
+                       goto out;
+
+               prev_bit = free_space_test_bit(block_group, path, prev_block);
+
+               /* The previous block may have been in the previous bitmap. */
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (start >= key.objectid + key.offset) {
+                       ret = free_space_next_bitmap(trans, root, path);
+                       if (ret)
+                               goto out;
+               }
+       } else {
+               key.objectid = start;
+               key.type = (u8)-1;
+               key.offset = (u64)-1;
+
+               ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1);
+               if (ret)
+                       goto out;
+
+               prev_bit = -1;
+       }
+
+       /*
+        * Iterate over all of the bitmaps overlapped by the extent of space,
+        * clearing/setting bits as required.
+        */
+       cur_start = start;
+       cur_size = size;
+       while (1) {
+               free_space_set_bits(block_group, path, &cur_start, &cur_size,
+                                   !remove);
+               if (cur_size == 0)
+                       break;
+               ret = free_space_next_bitmap(trans, root, path);
+               if (ret)
+                       goto out;
+       }
+
+       /*
+        * Read the bit for the block immediately after the extent of space if
+        * that block is within the block group.
+        */
+       if (end < block_group->key.objectid + block_group->key.offset) {
+               /* The next block may be in the next bitmap. */
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+               if (end >= key.objectid + key.offset) {
+                       ret = free_space_next_bitmap(trans, root, path);
+                       if (ret)
+                               goto out;
+               }
+
+               next_bit = free_space_test_bit(block_group, path, end);
+       } else {
+               next_bit = -1;
+       }
+
+       if (remove) {
+               new_extents = -1;
+               if (prev_bit == 1) {
+                       /* Leftover on the left. */
+                       new_extents++;
+               }
+               if (next_bit == 1) {
+                       /* Leftover on the right. */
+                       new_extents++;
+               }
+       } else {
+               new_extents = 1;
+               if (prev_bit == 1) {
+                       /* Merging with neighbor on the left. */
+                       new_extents--;
+               }
+               if (next_bit == 1) {
+                       /* Merging with neighbor on the right. */
+                       new_extents--;
+               }
+       }
+
+       btrfs_release_path(path);
+       ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+                                            new_extents);
+
+out:
+       return ret;
+}
+
+static int remove_free_space_extent(struct btrfs_trans_handle *trans,
+                                   struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_group_cache *block_group,
+                                   struct btrfs_path *path,
+                                   u64 start, u64 size)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_key key;
+       u64 found_start, found_end;
+       u64 end = start + size;
+       int new_extents = -1;
+       int ret;
+
+       key.objectid = start;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+       if (ret)
+               goto out;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+       ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+
+       found_start = key.objectid;
+       found_end = key.objectid + key.offset;
+       ASSERT(start >= found_start && end <= found_end);
+
+       /*
+        * Okay, now that we've found the free space extent which contains the
+        * free space that we are removing, there are four cases:
+        *
+        * 1. We're using the whole extent: delete the key we found and
+        * decrement the free space extent count.
+        * 2. We are using part of the extent starting at the beginning: delete
+        * the key we found and insert a new key representing the leftover at
+        * the end. There is no net change in the number of extents.
+        * 3. We are using part of the extent ending at the end: delete the key
+        * we found and insert a new key representing the leftover at the
+        * beginning. There is no net change in the number of extents.
+        * 4. We are using part of the extent in the middle: delete the key we
+        * found and insert two new keys representing the leftovers on each
+        * side. Where we used to have one extent, we now have two, so increment
+        * the extent count. We may need to convert the block group to bitmaps
+        * as a result.
+        */
+
+       /* Delete the existing key (cases 1-4). */
+       ret = btrfs_del_item(trans, root, path);
+       if (ret)
+               goto out;
+
+       /* Add a key for leftovers at the beginning (cases 3 and 4). */
+       if (start > found_start) {
+               key.objectid = found_start;
+               key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+               key.offset = start - found_start;
+
+               btrfs_release_path(path);
+               ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+               if (ret)
+                       goto out;
+               new_extents++;
+       }
+
+       /* Add a key for leftovers at the end (cases 2 and 4). */
+       if (end < found_end) {
+               key.objectid = end;
+               key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+               key.offset = found_end - end;
+
+               btrfs_release_path(path);
+               ret = btrfs_insert_empty_item(trans, root, path, &key, 0);
+               if (ret)
+                       goto out;
+               new_extents++;
+       }
+
+       btrfs_release_path(path);
+       ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+                                            new_extents);
+
+out:
+       return ret;
+}
+
+int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path, u64 start, u64 size)
+{
+       struct btrfs_free_space_info *info;
+       u32 flags;
+       int ret;
+
+       if (block_group->needs_free_space) {
+               ret = __add_block_group_free_space(trans, fs_info, block_group,
+                                                  path);
+               if (ret)
+                       return ret;
+       }
+
+       info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+       if (IS_ERR(info))
+               return PTR_ERR(info);
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+       btrfs_release_path(path);
+
+       if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+               return modify_free_space_bitmap(trans, fs_info, block_group,
+                                               path, start, size, 1);
+       } else {
+               return remove_free_space_extent(trans, fs_info, block_group,
+                                               path, start, size);
+       }
+}
+
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info,
+                               u64 start, u64 size)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_path *path;
+       int ret;
+
+       if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       block_group = btrfs_lookup_block_group(fs_info, start);
+       if (!block_group) {
+               ASSERT(0);
+               ret = -ENOENT;
+               goto out;
+       }
+
+       mutex_lock(&block_group->free_space_lock);
+       ret = __remove_from_free_space_tree(trans, fs_info, block_group, path,
+                                           start, size);
+       mutex_unlock(&block_group->free_space_lock);
+
+       btrfs_put_block_group(block_group);
+out:
+       btrfs_free_path(path);
+       if (ret)
+               btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+       return ret;
+}
+
+static int add_free_space_extent(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_group_cache *block_group,
+                                struct btrfs_path *path,
+                                u64 start, u64 size)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_key key, new_key;
+       u64 found_start, found_end;
+       u64 end = start + size;
+       int new_extents = 1;
+       int ret;
+
+       /*
+        * We are adding a new extent of free space, but we need to merge
+        * extents. There are four cases here:
+        *
+        * 1. The new extent does not have any immediate neighbors to merge
+        * with: add the new key and increment the free space extent count. We
+        * may need to convert the block group to bitmaps as a result.
+        * 2. The new extent has an immediate neighbor before it: remove the
+        * previous key and insert a new key combining both of them. There is no
+        * net change in the number of extents.
+        * 3. The new extent has an immediate neighbor after it: remove the next
+        * key and insert a new key combining both of them. There is no net
+        * change in the number of extents.
+        * 4. The new extent has immediate neighbors on both sides: remove both
+        * of the keys and insert a new key combining all of them. Where we used
+        * to have two extents, we now have one, so decrement the extent count.
+        */
+
+       new_key.objectid = start;
+       new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY;
+       new_key.offset = size;
+
+       /* Search for a neighbor on the left. */
+       if (start == block_group->key.objectid)
+               goto right;
+       key.objectid = start - 1;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+       if (ret)
+               goto out;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+       if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+               ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+               btrfs_release_path(path);
+               goto right;
+       }
+
+       found_start = key.objectid;
+       found_end = key.objectid + key.offset;
+       ASSERT(found_start >= block_group->key.objectid &&
+              found_end > block_group->key.objectid);
+       ASSERT(found_start < start && found_end <= start);
+
+       /*
+        * Delete the neighbor on the left and absorb it into the new key (cases
+        * 2 and 4).
+        */
+       if (found_end == start) {
+               ret = btrfs_del_item(trans, root, path);
+               if (ret)
+                       goto out;
+               new_key.objectid = found_start;
+               new_key.offset += key.offset;
+               new_extents--;
+       }
+       btrfs_release_path(path);
+
+right:
+       /* Search for a neighbor on the right. */
+       if (end == block_group->key.objectid + block_group->key.offset)
+               goto insert;
+       key.objectid = end;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+       if (ret)
+               goto out;
+
+       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+       if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) {
+               ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY);
+               btrfs_release_path(path);
+               goto insert;
+       }
+
+       found_start = key.objectid;
+       found_end = key.objectid + key.offset;
+       ASSERT(found_start >= block_group->key.objectid &&
+              found_end > block_group->key.objectid);
+       ASSERT((found_start < start && found_end <= start) ||
+              (found_start >= end && found_end > end));
+
+       /*
+        * Delete the neighbor on the right and absorb it into the new key
+        * (cases 3 and 4).
+        */
+       if (found_start == end) {
+               ret = btrfs_del_item(trans, root, path);
+               if (ret)
+                       goto out;
+               new_key.offset += key.offset;
+               new_extents--;
+       }
+       btrfs_release_path(path);
+
+insert:
+       /* Insert the new key (cases 1-4). */
+       ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0);
+       if (ret)
+               goto out;
+
+       btrfs_release_path(path);
+       ret = update_free_space_extent_count(trans, fs_info, block_group, path,
+                                            new_extents);
+
+out:
+       return ret;
+}
+
+int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info,
+                            struct btrfs_block_group_cache *block_group,
+                            struct btrfs_path *path, u64 start, u64 size)
+{
+       struct btrfs_free_space_info *info;
+       u32 flags;
+       int ret;
+
+       if (block_group->needs_free_space) {
+               ret = __add_block_group_free_space(trans, fs_info, block_group,
+                                                  path);
+               if (ret)
+                       return ret;
+       }
+
+       info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+       if (IS_ERR(info))
+               return PTR_ERR(info);
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+       btrfs_release_path(path);
+
+       if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+               return modify_free_space_bitmap(trans, fs_info, block_group,
+                                               path, start, size, 0);
+       } else {
+               return add_free_space_extent(trans, fs_info, block_group, path,
+                                            start, size);
+       }
+}
+
+int add_to_free_space_tree(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          u64 start, u64 size)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_path *path;
+       int ret;
+
+       if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       block_group = btrfs_lookup_block_group(fs_info, start);
+       if (!block_group) {
+               ASSERT(0);
+               ret = -ENOENT;
+               goto out;
+       }
+
+       mutex_lock(&block_group->free_space_lock);
+       ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start,
+                                      size);
+       mutex_unlock(&block_group->free_space_lock);
+
+       btrfs_put_block_group(block_group);
+out:
+       btrfs_free_path(path);
+       if (ret)
+               btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+       return ret;
+}
+
+/*
+ * Populate the free space tree by walking the extent tree. Operations on the
+ * extent tree that happen as a result of writes to the free space tree will go
+ * through the normal add/remove hooks.
+ */
+static int populate_free_space_tree(struct btrfs_trans_handle *trans,
+                                   struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_group_cache *block_group)
+{
+       struct btrfs_root *extent_root = fs_info->extent_root;
+       struct btrfs_path *path, *path2;
+       struct btrfs_key key;
+       u64 start, end;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+       path->reada = 1;
+
+       path2 = btrfs_alloc_path();
+       if (!path2) {
+               btrfs_free_path(path);
+               return -ENOMEM;
+       }
+
+       ret = add_new_free_space_info(trans, fs_info, block_group, path2);
+       if (ret)
+               goto out;
+
+       mutex_lock(&block_group->free_space_lock);
+
+       /*
+        * Iterate through all of the extent and metadata items in this block
+        * group, adding the free space between them and the free space at the
+        * end. Note that EXTENT_ITEM and METADATA_ITEM are less than
+        * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's
+        * contained in.
+        */
+       key.objectid = block_group->key.objectid;
+       key.type = BTRFS_EXTENT_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
+       if (ret < 0)
+               goto out_locked;
+       ASSERT(ret == 0);
+
+       start = block_group->key.objectid;
+       end = block_group->key.objectid + block_group->key.offset;
+       while (1) {
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+               if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+                   key.type == BTRFS_METADATA_ITEM_KEY) {
+                       if (key.objectid >= end)
+                               break;
+
+                       if (start < key.objectid) {
+                               ret = __add_to_free_space_tree(trans, fs_info,
+                                                              block_group,
+                                                              path2, start,
+                                                              key.objectid -
+                                                              start);
+                               if (ret)
+                                       goto out_locked;
+                       }
+                       start = key.objectid;
+                       if (key.type == BTRFS_METADATA_ITEM_KEY)
+                               start += fs_info->tree_root->nodesize;
+                       else
+                               start += key.offset;
+               } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) {
+                       if (key.objectid != block_group->key.objectid)
+                               break;
+               }
+
+               ret = btrfs_next_item(extent_root, path);
+               if (ret < 0)
+                       goto out_locked;
+               if (ret)
+                       break;
+       }
+       if (start < end) {
+               ret = __add_to_free_space_tree(trans, fs_info, block_group,
+                                              path2, start, end - start);
+               if (ret)
+                       goto out_locked;
+       }
+
+       ret = 0;
+out_locked:
+       mutex_unlock(&block_group->free_space_lock);
+out:
+       btrfs_free_path(path2);
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *free_space_root;
+       struct btrfs_block_group_cache *block_group;
+       struct rb_node *node;
+       int ret;
+
+       trans = btrfs_start_transaction(tree_root, 0);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       fs_info->creating_free_space_tree = 1;
+       free_space_root = btrfs_create_tree(trans, fs_info,
+                                           BTRFS_FREE_SPACE_TREE_OBJECTID);
+       if (IS_ERR(free_space_root)) {
+               ret = PTR_ERR(free_space_root);
+               goto abort;
+       }
+       fs_info->free_space_root = free_space_root;
+
+       node = rb_first(&fs_info->block_group_cache_tree);
+       while (node) {
+               block_group = rb_entry(node, struct btrfs_block_group_cache,
+                                      cache_node);
+               ret = populate_free_space_tree(trans, fs_info, block_group);
+               if (ret)
+                       goto abort;
+               node = rb_next(node);
+       }
+
+       btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       fs_info->creating_free_space_tree = 0;
+
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+
+       return 0;
+
+abort:
+       fs_info->creating_free_space_tree = 0;
+       btrfs_abort_transaction(trans, tree_root, ret);
+       btrfs_end_transaction(trans, tree_root);
+       return ret;
+}
+
+static int clear_free_space_tree(struct btrfs_trans_handle *trans,
+                                struct btrfs_root *root)
+{
+       struct btrfs_path *path;
+       struct btrfs_key key;
+       int nr;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       path->leave_spinning = 1;
+
+       key.objectid = 0;
+       key.type = 0;
+       key.offset = 0;
+
+       while (1) {
+               ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+               if (ret < 0)
+                       goto out;
+
+               nr = btrfs_header_nritems(path->nodes[0]);
+               if (!nr)
+                       break;
+
+               path->slots[0] = 0;
+               ret = btrfs_del_items(trans, root, path, 0, nr);
+               if (ret)
+                       goto out;
+
+               btrfs_release_path(path);
+       }
+
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *free_space_root = fs_info->free_space_root;
+       int ret;
+
+       trans = btrfs_start_transaction(tree_root, 0);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE);
+       fs_info->free_space_root = NULL;
+
+       ret = clear_free_space_tree(trans, free_space_root);
+       if (ret)
+               goto abort;
+
+       ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key);
+       if (ret)
+               goto abort;
+
+       list_del(&free_space_root->dirty_list);
+
+       btrfs_tree_lock(free_space_root->node);
+       clean_tree_block(trans, tree_root->fs_info, free_space_root->node);
+       btrfs_tree_unlock(free_space_root->node);
+       btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
+                             0, 1);
+
+       free_extent_buffer(free_space_root->node);
+       free_extent_buffer(free_space_root->commit_root);
+       kfree(free_space_root);
+
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+
+       return 0;
+
+abort:
+       btrfs_abort_transaction(trans, tree_root, ret);
+       btrfs_end_transaction(trans, tree_root);
+       return ret;
+}
+
+static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
+                                       struct btrfs_fs_info *fs_info,
+                                       struct btrfs_block_group_cache *block_group,
+                                       struct btrfs_path *path)
+{
+       u64 start, end;
+       int ret;
+
+       start = block_group->key.objectid;
+       end = block_group->key.objectid + block_group->key.offset;
+
+       block_group->needs_free_space = 0;
+
+       ret = add_new_free_space_info(trans, fs_info, block_group, path);
+       if (ret)
+               return ret;
+
+       return __add_to_free_space_tree(trans, fs_info, block_group, path,
+                                       block_group->key.objectid,
+                                       block_group->key.offset);
+}
+
+int add_block_group_free_space(struct btrfs_trans_handle *trans,
+                              struct btrfs_fs_info *fs_info,
+                              struct btrfs_block_group_cache *block_group)
+{
+       struct btrfs_path *path = NULL;
+       int ret = 0;
+
+       if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+               return 0;
+
+       mutex_lock(&block_group->free_space_lock);
+       if (!block_group->needs_free_space)
+               goto out;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = __add_block_group_free_space(trans, fs_info, block_group, path);
+
+out:
+       btrfs_free_path(path);
+       mutex_unlock(&block_group->free_space_lock);
+       if (ret)
+               btrfs_abort_transaction(trans, fs_info->free_space_root, ret);
+       return ret;
+}
+
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group)
+{
+       struct btrfs_root *root = fs_info->free_space_root;
+       struct btrfs_path *path;
+       struct btrfs_key key, found_key;
+       struct extent_buffer *leaf;
+       u64 start, end;
+       int done = 0, nr;
+       int ret;
+
+       if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
+               return 0;
+
+       if (block_group->needs_free_space) {
+               /* We never added this block group to the free space tree. */
+               return 0;
+       }
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       start = block_group->key.objectid;
+       end = block_group->key.objectid + block_group->key.offset;
+
+       key.objectid = end - 1;
+       key.type = (u8)-1;
+       key.offset = (u64)-1;
+
+       while (!done) {
+               ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1);
+               if (ret)
+                       goto out;
+
+               leaf = path->nodes[0];
+               nr = 0;
+               path->slots[0]++;
+               while (path->slots[0] > 0) {
+                       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1);
+
+                       if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) {
+                               ASSERT(found_key.objectid == block_group->key.objectid);
+                               ASSERT(found_key.offset == block_group->key.offset);
+                               done = 1;
+                               nr++;
+                               path->slots[0]--;
+                               break;
+                       } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY ||
+                                  found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) {
+                               ASSERT(found_key.objectid >= start);
+                               ASSERT(found_key.objectid < end);
+                               ASSERT(found_key.objectid + found_key.offset <= end);
+                               nr++;
+                               path->slots[0]--;
+                       } else {
+                               ASSERT(0);
+                       }
+               }
+
+               ret = btrfs_del_items(trans, root, path, path->slots[0], nr);
+               if (ret)
+                       goto out;
+               btrfs_release_path(path);
+       }
+
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+       return ret;
+}
+
+static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
+                                  struct btrfs_path *path,
+                                  u32 expected_extent_count)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_root *root;
+       struct btrfs_key key;
+       int prev_bit = 0, bit;
+       /* Initialize to silence GCC. */
+       u64 extent_start = 0;
+       u64 end, offset;
+       u64 total_found = 0;
+       u32 extent_count = 0;
+       int ret;
+
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       root = fs_info->free_space_root;
+
+       end = block_group->key.objectid + block_group->key.offset;
+
+       while (1) {
+               ret = btrfs_next_item(root, path);
+               if (ret < 0)
+                       goto out;
+               if (ret)
+                       break;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+               if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+                       break;
+
+               ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
+               ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+               caching_ctl->progress = key.objectid;
+
+               offset = key.objectid;
+               while (offset < key.objectid + key.offset) {
+                       bit = free_space_test_bit(block_group, path, offset);
+                       if (prev_bit == 0 && bit == 1) {
+                               extent_start = offset;
+                       } else if (prev_bit == 1 && bit == 0) {
+                               total_found += add_new_free_space(block_group,
+                                                                 fs_info,
+                                                                 extent_start,
+                                                                 offset);
+                               if (total_found > CACHING_CTL_WAKE_UP) {
+                                       total_found = 0;
+                                       wake_up(&caching_ctl->wait);
+                               }
+                               extent_count++;
+                       }
+                       prev_bit = bit;
+                       offset += block_group->sectorsize;
+               }
+       }
+       if (prev_bit == 1) {
+               total_found += add_new_free_space(block_group, fs_info,
+                                                 extent_start, end);
+               extent_count++;
+       }
+
+       if (extent_count != expected_extent_count) {
+               btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+                         block_group->key.objectid, extent_count,
+                         expected_extent_count);
+               ASSERT(0);
+               ret = -EIO;
+               goto out;
+       }
+
+       caching_ctl->progress = (u64)-1;
+
+       ret = 0;
+out:
+       return ret;
+}
+
+static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
+                                  struct btrfs_path *path,
+                                  u32 expected_extent_count)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_root *root;
+       struct btrfs_key key;
+       u64 end;
+       u64 total_found = 0;
+       u32 extent_count = 0;
+       int ret;
+
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+       root = fs_info->free_space_root;
+
+       end = block_group->key.objectid + block_group->key.offset;
+
+       while (1) {
+               ret = btrfs_next_item(root, path);
+               if (ret < 0)
+                       goto out;
+               if (ret)
+                       break;
+
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+               if (key.type == BTRFS_FREE_SPACE_INFO_KEY)
+                       break;
+
+               ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
+               ASSERT(key.objectid < end && key.objectid + key.offset <= end);
+
+               caching_ctl->progress = key.objectid;
+
+               total_found += add_new_free_space(block_group, fs_info,
+                                                 key.objectid,
+                                                 key.objectid + key.offset);
+               if (total_found > CACHING_CTL_WAKE_UP) {
+                       total_found = 0;
+                       wake_up(&caching_ctl->wait);
+               }
+               extent_count++;
+       }
+
+       if (extent_count != expected_extent_count) {
+               btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u",
+                         block_group->key.objectid, extent_count,
+                         expected_extent_count);
+               ASSERT(0);
+               ret = -EIO;
+               goto out;
+       }
+
+       caching_ctl->progress = (u64)-1;
+
+       ret = 0;
+out:
+       return ret;
+}
+
+int load_free_space_tree(struct btrfs_caching_control *caching_ctl)
+{
+       struct btrfs_block_group_cache *block_group;
+       struct btrfs_fs_info *fs_info;
+       struct btrfs_free_space_info *info;
+       struct btrfs_path *path;
+       u32 extent_count, flags;
+       int ret;
+
+       block_group = caching_ctl->block_group;
+       fs_info = block_group->fs_info;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /*
+        * Just like caching_thread() doesn't want to deadlock on the extent
+        * tree, we don't want to deadlock on the free space tree.
+        */
+       path->skip_locking = 1;
+       path->search_commit_root = 1;
+       path->reada = 1;
+
+       info = search_free_space_info(NULL, fs_info, block_group, path, 0);
+       if (IS_ERR(info)) {
+               ret = PTR_ERR(info);
+               goto out;
+       }
+       extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+
+       /*
+        * We left path pointing to the free space info item, so now
+        * load_free_space_foo can just iterate through the free space tree from
+        * there.
+        */
+       if (flags & BTRFS_FREE_SPACE_USING_BITMAPS)
+               ret = load_free_space_bitmaps(caching_ctl, path, extent_count);
+       else
+               ret = load_free_space_extents(caching_ctl, path, extent_count);
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h
new file mode 100644 (file)
index 0000000..54ffced
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2015 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_FREE_SPACE_TREE
+#define __BTRFS_FREE_SPACE_TREE
+
+/*
+ * The default size for new free space bitmap items. The last bitmap in a block
+ * group may be truncated, and none of the free space tree code assumes that
+ * existing bitmaps are this size.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_SIZE 256
+#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE)
+
+void set_free_space_tree_thresholds(struct btrfs_block_group_cache *block_group);
+int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info);
+int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info);
+int load_free_space_tree(struct btrfs_caching_control *caching_ctl);
+int add_block_group_free_space(struct btrfs_trans_handle *trans,
+                              struct btrfs_fs_info *fs_info,
+                              struct btrfs_block_group_cache *block_group);
+int remove_block_group_free_space(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group);
+int add_to_free_space_tree(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          u64 start, u64 size);
+int remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+                               struct btrfs_fs_info *fs_info,
+                               u64 start, u64 size);
+
+/* Exposed for testing. */
+struct btrfs_free_space_info *
+search_free_space_info(struct btrfs_trans_handle *trans,
+                      struct btrfs_fs_info *fs_info,
+                      struct btrfs_block_group_cache *block_group,
+                      struct btrfs_path *path, int cow);
+int __add_to_free_space_tree(struct btrfs_trans_handle *trans,
+                            struct btrfs_fs_info *fs_info,
+                            struct btrfs_block_group_cache *block_group,
+                            struct btrfs_path *path, u64 start, u64 size);
+int __remove_from_free_space_tree(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path, u64 start, u64 size);
+int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path);
+int convert_free_space_to_extents(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *block_group,
+                                 struct btrfs_path *path);
+int free_space_test_bit(struct btrfs_block_group_cache *block_group,
+                       struct btrfs_path *path, u64 offset);
+
+#endif
index e5eb40c..8b57c17 100644 (file)
@@ -48,7 +48,7 @@ static int caching_kthread(void *data)
        /* Since the commit root is read-only, we can safely skip locking. */
        path->skip_locking = 1;
        path->search_commit_root = 1;
-       path->reada = 2;
+       path->reada = READA_FORWARD;
 
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.offset = 0;
@@ -334,7 +334,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
        return true;
 }
 
-static struct btrfs_free_space_op free_ino_op = {
+static const struct btrfs_free_space_op free_ino_op = {
        .recalc_thresholds      = recalculate_thresholds,
        .use_bitmap             = use_bitmap,
 };
@@ -356,7 +356,7 @@ static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl,
        return false;
 }
 
-static struct btrfs_free_space_op pinned_free_ino_op = {
+static const struct btrfs_free_space_op pinned_free_ino_op = {
        .recalc_thresholds      = pinned_recalc_thresholds,
        .use_bitmap             = pinned_use_bitmap,
 };
index a379ff6..65997d6 100644 (file)
@@ -66,6 +66,13 @@ struct btrfs_iget_args {
        struct btrfs_root *root;
 };
 
+struct btrfs_dio_data {
+       u64 outstanding_extents;
+       u64 reserve;
+       u64 unsubmitted_oe_range_start;
+       u64 unsubmitted_oe_range_end;
+};
+
 static const struct inode_operations btrfs_dir_inode_operations;
 static const struct inode_operations btrfs_symlink_inode_operations;
 static const struct inode_operations btrfs_dir_ro_inode_operations;
@@ -74,17 +81,16 @@ static const struct inode_operations btrfs_file_inode_operations;
 static const struct address_space_operations btrfs_aops;
 static const struct address_space_operations btrfs_symlink_aops;
 static const struct file_operations btrfs_dir_file_operations;
-static struct extent_io_ops btrfs_extent_io_ops;
+static const struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
-static struct kmem_cache *btrfs_delalloc_work_cachep;
 struct kmem_cache *btrfs_trans_handle_cachep;
 struct kmem_cache *btrfs_transaction_cachep;
 struct kmem_cache *btrfs_path_cachep;
 struct kmem_cache *btrfs_free_space_cachep;
 
 #define S_SHIFT 12
-static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
        [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
        [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
        [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
@@ -1989,7 +1995,7 @@ again:
        page_start = page_offset(page);
        page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
 
-       lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
+       lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
                         &cached_state);
 
        /* already ordered? We're done */
@@ -2482,7 +2488,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
        lock_start = backref->file_pos;
        lock_end = backref->file_pos + backref->num_bytes - 1;
        lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
-                        0, &cached);
+                        &cached);
 
        ordered = btrfs_lookup_first_ordered_extent(inode, lock_end);
        if (ordered) {
@@ -2874,7 +2880,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
 
        lock_extent_bits(io_tree, ordered_extent->file_offset,
                         ordered_extent->file_offset + ordered_extent->len - 1,
-                        0, &cached_state);
+                        &cached_state);
 
        ret = test_range_bit(io_tree, ordered_extent->file_offset,
                        ordered_extent->file_offset + ordered_extent->len - 1,
@@ -3106,55 +3112,47 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      start, (size_t)(end - start + 1));
 }
 
-struct delayed_iput {
-       struct list_head list;
-       struct inode *inode;
-};
-
-/* JDM: If this is fs-wide, why can't we add a pointer to
- * btrfs_inode instead and avoid the allocation? */
 void btrfs_add_delayed_iput(struct inode *inode)
 {
        struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
-       struct delayed_iput *delayed;
+       struct btrfs_inode *binode = BTRFS_I(inode);
 
        if (atomic_add_unless(&inode->i_count, -1, 1))
                return;
 
-       delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
-       delayed->inode = inode;
-
        spin_lock(&fs_info->delayed_iput_lock);
-       list_add_tail(&delayed->list, &fs_info->delayed_iputs);
+       if (binode->delayed_iput_count == 0) {
+               ASSERT(list_empty(&binode->delayed_iput));
+               list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs);
+       } else {
+               binode->delayed_iput_count++;
+       }
        spin_unlock(&fs_info->delayed_iput_lock);
 }
 
 void btrfs_run_delayed_iputs(struct btrfs_root *root)
 {
-       LIST_HEAD(list);
        struct btrfs_fs_info *fs_info = root->fs_info;
-       struct delayed_iput *delayed;
-       int empty;
-
-       spin_lock(&fs_info->delayed_iput_lock);
-       empty = list_empty(&fs_info->delayed_iputs);
-       spin_unlock(&fs_info->delayed_iput_lock);
-       if (empty)
-               return;
 
        down_read(&fs_info->delayed_iput_sem);
-
        spin_lock(&fs_info->delayed_iput_lock);
-       list_splice_init(&fs_info->delayed_iputs, &list);
-       spin_unlock(&fs_info->delayed_iput_lock);
-
-       while (!list_empty(&list)) {
-               delayed = list_entry(list.next, struct delayed_iput, list);
-               list_del(&delayed->list);
-               iput(delayed->inode);
-               kfree(delayed);
+       while (!list_empty(&fs_info->delayed_iputs)) {
+               struct btrfs_inode *inode;
+
+               inode = list_first_entry(&fs_info->delayed_iputs,
+                               struct btrfs_inode, delayed_iput);
+               if (inode->delayed_iput_count) {
+                       inode->delayed_iput_count--;
+                       list_move_tail(&inode->delayed_iput,
+                                       &fs_info->delayed_iputs);
+               } else {
+                       list_del_init(&inode->delayed_iput);
+               }
+               spin_unlock(&fs_info->delayed_iput_lock);
+               iput(&inode->vfs_inode);
+               spin_lock(&fs_info->delayed_iput_lock);
        }
-
+       spin_unlock(&fs_info->delayed_iput_lock);
        up_read(&root->fs_info->delayed_iput_sem);
 }
 
@@ -3351,7 +3349,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                ret = -ENOMEM;
                goto out;
        }
-       path->reada = -1;
+       path->reada = READA_BACK;
 
        key.objectid = BTRFS_ORPHAN_OBJECTID;
        key.type = BTRFS_ORPHAN_ITEM_KEY;
@@ -4317,7 +4315,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = -1;
+       path->reada = READA_BACK;
 
        /*
         * We want to drop from the next block forward in case this new size is
@@ -4668,7 +4666,7 @@ again:
        }
        wait_on_page_writeback(page);
 
-       lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
        set_page_extent_mapped(page);
 
        ordered = btrfs_lookup_ordered_extent(inode, page_start);
@@ -4799,7 +4797,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
        while (1) {
                struct btrfs_ordered_extent *ordered;
 
-               lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
+               lock_extent_bits(io_tree, hole_start, block_end - 1,
                                 &cached_state);
                ordered = btrfs_lookup_ordered_range(inode, hole_start,
                                                     block_end - hole_start);
@@ -5111,7 +5109,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
                end = state->end;
                spin_unlock(&io_tree->lock);
 
-               lock_extent_bits(io_tree, start, end, 0, &cached_state);
+               lock_extent_bits(io_tree, start, end, &cached_state);
 
                /*
                 * If still has DELALLOC flag, the extent didn't reach disk,
@@ -5752,7 +5750,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        if (!path)
                return -ENOMEM;
 
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        if (key_type == BTRFS_DIR_INDEX_KEY) {
                INIT_LIST_HEAD(&ins_list);
@@ -6783,7 +6781,7 @@ again:
                 * Chances are we'll be called again, so go ahead and do
                 * readahead
                 */
-               path->reada = 1;
+               path->reada = READA_FORWARD;
        }
 
        ret = btrfs_lookup_file_extent(trans, root, path,
@@ -7378,7 +7376,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
 
        while (1) {
                lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
-                                0, cached_state);
+                                cached_state);
                /*
                 * We're concerned with the entire range that we're going to be
                 * doing DIO to, so we need to make sure theres no ordered
@@ -7406,25 +7404,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
                        btrfs_start_ordered_extent(inode, ordered, 1);
                        btrfs_put_ordered_extent(ordered);
                } else {
-                       /* Screw you mmap */
-                       ret = btrfs_fdatawrite_range(inode, lockstart, lockend);
-                       if (ret)
-                               break;
-                       ret = filemap_fdatawait_range(inode->i_mapping,
-                                                     lockstart,
-                                                     lockend);
-                       if (ret)
-                               break;
-
                        /*
-                        * If we found a page that couldn't be invalidated just
-                        * fall back to buffered.
+                        * We could trigger writeback for this range (and wait
+                        * for it to complete) and then invalidate the pages for
+                        * this range (through invalidate_inode_pages2_range()),
+                        * but that can lead us to a deadlock with a concurrent
+                        * call to readpages() (a buffered read or a defrag call
+                        * triggered a readahead) on a page lock due to an
+                        * ordered dio extent we created before but did not have
+                        * yet a corresponding bio submitted (whence it can not
+                        * complete), which makes readpages() wait for that
+                        * ordered extent to complete while holding a lock on
+                        * that page.
                         */
-                       ret = invalidate_inode_pages2_range(inode->i_mapping,
-                                       lockstart >> PAGE_CACHE_SHIFT,
-                                       lockend >> PAGE_CACHE_SHIFT);
-                       if (ret)
-                               break;
+                       ret = -ENOTBLK;
+                       break;
                }
 
                cond_resched();
@@ -7480,11 +7474,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
        return em;
 }
 
-struct btrfs_dio_data {
-       u64 outstanding_extents;
-       u64 reserve;
-};
-
 static void adjust_dio_outstanding_extents(struct inode *inode,
                                           struct btrfs_dio_data *dio_data,
                                           const u64 len)
@@ -7668,6 +7657,7 @@ unlock:
                btrfs_free_reserved_data_space(inode, start, len);
                WARN_ON(dio_data->reserve < len);
                dio_data->reserve -= len;
+               dio_data->unsubmitted_oe_range_end = start + len;
                current->journal_info = dio_data;
        }
 
@@ -7990,22 +7980,22 @@ static void btrfs_endio_direct_read(struct bio *bio)
        bio_put(bio);
 }
 
-static void btrfs_endio_direct_write(struct bio *bio)
+static void btrfs_endio_direct_write_update_ordered(struct inode *inode,
+                                                   const u64 offset,
+                                                   const u64 bytes,
+                                                   const int uptodate)
 {
-       struct btrfs_dio_private *dip = bio->bi_private;
-       struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ordered_extent *ordered = NULL;
-       u64 ordered_offset = dip->logical_offset;
-       u64 ordered_bytes = dip->bytes;
-       struct bio *dio_bio;
+       u64 ordered_offset = offset;
+       u64 ordered_bytes = bytes;
        int ret;
 
 again:
        ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
                                                   &ordered_offset,
                                                   ordered_bytes,
-                                                  !bio->bi_error);
+                                                  uptodate);
        if (!ret)
                goto out_test;
 
@@ -8018,13 +8008,22 @@ out_test:
         * our bio might span multiple ordered extents.  If we haven't
         * completed the accounting for the whole dio, go back and try again
         */
-       if (ordered_offset < dip->logical_offset + dip->bytes) {
-               ordered_bytes = dip->logical_offset + dip->bytes -
-                       ordered_offset;
+       if (ordered_offset < offset + bytes) {
+               ordered_bytes = offset + bytes - ordered_offset;
                ordered = NULL;
                goto again;
        }
-       dio_bio = dip->dio_bio;
+}
+
+static void btrfs_endio_direct_write(struct bio *bio)
+{
+       struct btrfs_dio_private *dip = bio->bi_private;
+       struct bio *dio_bio = dip->dio_bio;
+
+       btrfs_endio_direct_write_update_ordered(dip->inode,
+                                               dip->logical_offset,
+                                               dip->bytes,
+                                               !bio->bi_error);
 
        kfree(dip);
 
@@ -8332,6 +8331,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
                dip->subio_endio = btrfs_subio_endio_read;
        }
 
+       /*
+        * Reset the range for unsubmitted ordered extents (to a 0 length range)
+        * even if we fail to submit a bio, because in such case we do the
+        * corresponding error handling below and it must not be done a second
+        * time by btrfs_direct_IO().
+        */
+       if (write) {
+               struct btrfs_dio_data *dio_data = current->journal_info;
+
+               dio_data->unsubmitted_oe_range_end = dip->logical_offset +
+                       dip->bytes;
+               dio_data->unsubmitted_oe_range_start =
+                       dio_data->unsubmitted_oe_range_end;
+       }
+
        ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
        if (!ret)
                return;
@@ -8360,24 +8374,15 @@ free_ordered:
                dip = NULL;
                io_bio = NULL;
        } else {
-               if (write) {
-                       struct btrfs_ordered_extent *ordered;
-
-                       ordered = btrfs_lookup_ordered_extent(inode,
-                                                             file_offset);
-                       set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
-                       /*
-                        * Decrements our ref on the ordered extent and removes
-                        * the ordered extent from the inode's ordered tree,
-                        * doing all the proper resource cleanup such as for the
-                        * reserved space and waking up any waiters for this
-                        * ordered extent (through btrfs_remove_ordered_extent).
-                        */
-                       btrfs_finish_ordered_io(ordered);
-               } else {
+               if (write)
+                       btrfs_endio_direct_write_update_ordered(inode,
+                                               file_offset,
+                                               dio_bio->bi_iter.bi_size,
+                                               0);
+               else
                        unlock_extent(&BTRFS_I(inode)->io_tree, file_offset,
                              file_offset + dio_bio->bi_iter.bi_size - 1);
-               }
+
                dio_bio->bi_error = -EIO;
                /*
                 * Releases and cleans up our dio_bio, no need to bio_put()
@@ -8477,6 +8482,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                 * originally calculated.  Abuse current->journal_info for this.
                 */
                dio_data.reserve = round_up(count, root->sectorsize);
+               dio_data.unsubmitted_oe_range_start = (u64)offset;
+               dio_data.unsubmitted_oe_range_end = (u64)offset;
                current->journal_info = &dio_data;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
@@ -8495,6 +8502,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
                        if (dio_data.reserve)
                                btrfs_delalloc_release_space(inode, offset,
                                                             dio_data.reserve);
+                       /*
+                        * On error we might have left some ordered extents
+                        * without submitting corresponding bios for them, so
+                        * cleanup them up to avoid other tasks getting them
+                        * and waiting for them to complete forever.
+                        */
+                       if (dio_data.unsubmitted_oe_range_start <
+                           dio_data.unsubmitted_oe_range_end)
+                               btrfs_endio_direct_write_update_ordered(inode,
+                                       dio_data.unsubmitted_oe_range_start,
+                                       dio_data.unsubmitted_oe_range_end -
+                                       dio_data.unsubmitted_oe_range_start,
+                                       0);
                } else if (ret >= 0 && (size_t)ret < count)
                        btrfs_delalloc_release_space(inode, offset,
                                                     count - (size_t)ret);
@@ -8532,15 +8552,28 @@ int btrfs_readpage(struct file *file, struct page *page)
 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
 {
        struct extent_io_tree *tree;
-
+       struct inode *inode = page->mapping->host;
+       int ret;
 
        if (current->flags & PF_MEMALLOC) {
                redirty_page_for_writepage(wbc, page);
                unlock_page(page);
                return 0;
        }
+
+       /*
+        * If we are under memory pressure we will call this directly from the
+        * VM, we need to make sure we have the inode referenced for the ordered
+        * extent.  If not just return like we didn't do anything.
+        */
+       if (!igrab(inode)) {
+               redirty_page_for_writepage(wbc, page);
+               return AOP_WRITEPAGE_ACTIVATE;
+       }
        tree = &BTRFS_I(page->mapping->host)->io_tree;
-       return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
+       ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc);
+       btrfs_add_delayed_iput(inode);
+       return ret;
 }
 
 static int btrfs_writepages(struct address_space *mapping,
@@ -8612,7 +8645,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
        }
 
        if (!inode_evicting)
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               lock_extent_bits(tree, page_start, page_end, &cached_state);
        ordered = btrfs_lookup_ordered_extent(inode, page_start);
        if (ordered) {
                /*
@@ -8650,7 +8683,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                btrfs_put_ordered_extent(ordered);
                if (!inode_evicting) {
                        cached_state = NULL;
-                       lock_extent_bits(tree, page_start, page_end, 0,
+                       lock_extent_bits(tree, page_start, page_end,
                                         &cached_state);
                }
        }
@@ -8748,7 +8781,7 @@ again:
        }
        wait_on_page_writeback(page);
 
-       lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state);
+       lock_extent_bits(io_tree, page_start, page_end, &cached_state);
        set_page_extent_mapped(page);
 
        /*
@@ -9022,6 +9055,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->dir_index = 0;
        ei->last_unlink_trans = 0;
        ei->last_log_commit = 0;
+       ei->delayed_iput_count = 0;
 
        spin_lock_init(&ei->lock);
        ei->outstanding_extents = 0;
@@ -9046,6 +9080,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
        INIT_LIST_HEAD(&ei->delalloc_inodes);
+       INIT_LIST_HEAD(&ei->delayed_iput);
        RB_CLEAR_NODE(&ei->rb_node);
 
        return inode;
@@ -9150,8 +9185,6 @@ void btrfs_destroy_cachep(void)
                kmem_cache_destroy(btrfs_path_cachep);
        if (btrfs_free_space_cachep)
                kmem_cache_destroy(btrfs_free_space_cachep);
-       if (btrfs_delalloc_work_cachep)
-               kmem_cache_destroy(btrfs_delalloc_work_cachep);
 }
 
 int btrfs_init_cachep(void)
@@ -9186,13 +9219,6 @@ int btrfs_init_cachep(void)
        if (!btrfs_free_space_cachep)
                goto fail;
 
-       btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
-                       sizeof(struct btrfs_delalloc_work), 0,
-                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
-                       NULL);
-       if (!btrfs_delalloc_work_cachep)
-               goto fail;
-
        return 0;
 fail:
        btrfs_destroy_cachep();
@@ -9416,14 +9442,10 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
        delalloc_work = container_of(work, struct btrfs_delalloc_work,
                                     work);
        inode = delalloc_work->inode;
-       if (delalloc_work->wait) {
-               btrfs_wait_ordered_range(inode, 0, (u64)-1);
-       } else {
+       filemap_flush(inode->i_mapping);
+       if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                               &BTRFS_I(inode)->runtime_flags))
                filemap_flush(inode->i_mapping);
-               if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
-                            &BTRFS_I(inode)->runtime_flags))
-                       filemap_flush(inode->i_mapping);
-       }
 
        if (delalloc_work->delay_iput)
                btrfs_add_delayed_iput(inode);
@@ -9433,18 +9455,17 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work)
 }
 
 struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
-                                                   int wait, int delay_iput)
+                                                   int delay_iput)
 {
        struct btrfs_delalloc_work *work;
 
-       work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+       work = kmalloc(sizeof(*work), GFP_NOFS);
        if (!work)
                return NULL;
 
        init_completion(&work->completion);
        INIT_LIST_HEAD(&work->list);
        work->inode = inode;
-       work->wait = wait;
        work->delay_iput = delay_iput;
        WARN_ON_ONCE(!inode);
        btrfs_init_work(&work->work, btrfs_flush_delalloc_helper,
@@ -9456,7 +9477,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
 void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
 {
        wait_for_completion(&work->completion);
-       kmem_cache_free(btrfs_delalloc_work_cachep, work);
+       kfree(work);
 }
 
 /*
@@ -9492,7 +9513,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
                }
                spin_unlock(&root->delalloc_lock);
 
-               work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+               work = btrfs_alloc_delalloc_work(inode, delay_iput);
                if (!work) {
                        if (delay_iput)
                                btrfs_add_delayed_iput(inode);
@@ -10021,7 +10042,7 @@ static const struct file_operations btrfs_dir_file_operations = {
        .fsync          = btrfs_sync_file,
 };
 
-static struct extent_io_ops btrfs_extent_io_ops = {
+static const struct extent_io_ops btrfs_extent_io_ops = {
        .fill_delalloc = run_delalloc_range,
        .submit_bio_hook = btrfs_submit_bio_hook,
        .merge_bio_hook = btrfs_merge_bio_hook,
index 4b9b928..e392dd6 100644 (file)
@@ -655,22 +655,28 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
                return -EINVAL;
 
+       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+       if (!pending_snapshot)
+               return -ENOMEM;
+
+       pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
+                       GFP_NOFS);
+       pending_snapshot->path = btrfs_alloc_path();
+       if (!pending_snapshot->root_item || !pending_snapshot->path) {
+               ret = -ENOMEM;
+               goto free_pending;
+       }
+
        atomic_inc(&root->will_be_snapshoted);
        smp_mb__after_atomic();
        btrfs_wait_for_no_snapshoting_writes(root);
 
        ret = btrfs_start_delalloc_inodes(root, 0);
        if (ret)
-               goto out;
+               goto dec_and_free;
 
        btrfs_wait_ordered_extents(root, -1);
 
-       pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
-       if (!pending_snapshot) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
        btrfs_init_block_rsv(&pending_snapshot->block_rsv,
                             BTRFS_BLOCK_RSV_TEMP);
        /*
@@ -686,7 +692,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                                        &pending_snapshot->qgroup_reserved,
                                        false);
        if (ret)
-               goto free;
+               goto dec_and_free;
 
        pending_snapshot->dentry = dentry;
        pending_snapshot->root = root;
@@ -737,11 +743,14 @@ fail:
        btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
                                         &pending_snapshot->block_rsv,
                                         pending_snapshot->qgroup_reserved);
-free:
-       kfree(pending_snapshot);
-out:
+dec_and_free:
        if (atomic_dec_and_test(&root->will_be_snapshoted))
                wake_up_atomic_t(&root->will_be_snapshoted);
+free_pending:
+       kfree(pending_snapshot->root_item);
+       btrfs_free_path(pending_snapshot->path);
+       kfree(pending_snapshot);
+
        return ret;
 }
 
@@ -992,7 +1001,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
                u64 end = start + len - 1;
 
                /* get the big lock and read metadata off disk */
-               lock_extent_bits(io_tree, start, end, 0, &cached);
+               lock_extent_bits(io_tree, start, end, &cached);
                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
                unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
 
@@ -1140,7 +1149,7 @@ again:
                page_end = page_start + PAGE_CACHE_SIZE - 1;
                while (1) {
                        lock_extent_bits(tree, page_start, page_end,
-                                        0, &cached_state);
+                                        &cached_state);
                        ordered = btrfs_lookup_ordered_extent(inode,
                                                              page_start);
                        unlock_extent_cached(tree, page_start, page_end,
@@ -1200,7 +1209,7 @@ again:
        page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
 
        lock_extent_bits(&BTRFS_I(inode)->io_tree,
-                        page_start, page_end - 1, 0, &cached_state);
+                        page_start, page_end - 1, &cached_state);
        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
                          page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
                          EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0,
@@ -3477,7 +3486,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
                return ret;
        }
 
-       path->reada = 2;
+       path->reada = READA_FORWARD;
        /* clone data */
        key.objectid = btrfs_ino(src);
        key.type = BTRFS_EXTENT_DATA_KEY;
@@ -4146,7 +4155,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                return -ENOMEM;
 
        space_args.total_spaces = 0;
-       dest = kmalloc(alloc_size, GFP_NOFS);
+       dest = kmalloc(alloc_size, GFP_KERNEL);
        if (!dest)
                return -ENOMEM;
        dest_orig = dest;
@@ -4672,7 +4681,7 @@ locked:
                goto out_bargs;
        }
 
-       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+       bctl = kzalloc(sizeof(*bctl), GFP_KERNEL);
        if (!bctl) {
                ret = -ENOMEM;
                goto out_bargs;
@@ -4758,7 +4767,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root,
                goto out;
        }
 
-       bargs = kzalloc(sizeof(*bargs), GFP_NOFS);
+       bargs = kzalloc(sizeof(*bargs), GFP_KERNEL);
        if (!bargs) {
                ret = -ENOMEM;
                goto out;
@@ -5018,7 +5027,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       qsa = kzalloc(sizeof(*qsa), GFP_NOFS);
+       qsa = kzalloc(sizeof(*qsa), GFP_KERNEL);
        if (!qsa)
                return -ENOMEM;
 
@@ -5148,7 +5157,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
                goto out;
        }
 
-       args64 = kmalloc(sizeof(*args64), GFP_NOFS);
+       args64 = kmalloc(sizeof(*args64), GFP_KERNEL);
        if (!args64) {
                ret = -ENOMEM;
                goto out;
@@ -5285,7 +5294,7 @@ out_unlock:
 static int btrfs_ioctl_get_supported_features(struct file *file,
                                              void __user *arg)
 {
-       static struct btrfs_ioctl_feature_flags features[3] = {
+       static const struct btrfs_ioctl_feature_flags features[3] = {
                INIT_FEATURE_FLAGS(SUPP),
                INIT_FEATURE_FLAGS(SAFE_SET),
                INIT_FEATURE_FLAGS(SAFE_CLEAR)
index b4ca545..ef6d8fc 100644 (file)
@@ -708,8 +708,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
                err = -ENOMEM;
                goto out;
        }
-       path1->reada = 1;
-       path2->reada = 2;
+       path1->reada = READA_FORWARD;
+       path2->reada = READA_FORWARD;
 
        node = alloc_backref_node(cache);
        if (!node) {
@@ -2130,7 +2130,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        reloc_root = root->reloc_root;
        root_item = &reloc_root->root_item;
@@ -3527,7 +3527,7 @@ static int find_data_references(struct reloc_control *rc,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        root = read_fs_root(rc->extent_root->fs_info, ref_root);
        if (IS_ERR(root)) {
@@ -3917,7 +3917,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 1;
+       path->reada = READA_FORWARD;
 
        ret = prepare_to_relocate(rc);
        if (ret) {
@@ -4343,7 +4343,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = -1;
+       path->reada = READA_BACK;
 
        key.objectid = BTRFS_TREE_RELOC_OBJECTID;
        key.type = BTRFS_ROOT_ITEM_KEY;
index b57a15a..0c981eb 100644 (file)
@@ -3505,7 +3505,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
        if (!path)
                return -ENOMEM;
 
-       path->reada = 2;
+       path->reada = READA_FORWARD;
        path->search_commit_root = 1;
        path->skip_locking = 1;
 
@@ -3733,27 +3733,27 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
        if (fs_info->scrub_workers_refcnt == 0) {
                if (is_dev_replace)
                        fs_info->scrub_workers =
-                               btrfs_alloc_workqueue("btrfs-scrub", flags,
+                               btrfs_alloc_workqueue("scrub", flags,
                                                      1, 4);
                else
                        fs_info->scrub_workers =
-                               btrfs_alloc_workqueue("btrfs-scrub", flags,
+                               btrfs_alloc_workqueue("scrub", flags,
                                                      max_active, 4);
                if (!fs_info->scrub_workers)
                        goto fail_scrub_workers;
 
                fs_info->scrub_wr_completion_workers =
-                       btrfs_alloc_workqueue("btrfs-scrubwrc", flags,
+                       btrfs_alloc_workqueue("scrubwrc", flags,
                                              max_active, 2);
                if (!fs_info->scrub_wr_completion_workers)
                        goto fail_scrub_wr_completion_workers;
 
                fs_info->scrub_nocow_workers =
-                       btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0);
+                       btrfs_alloc_workqueue("scrubnc", flags, 1, 0);
                if (!fs_info->scrub_nocow_workers)
                        goto fail_scrub_nocow_workers;
                fs_info->scrub_parity_workers =
-                       btrfs_alloc_workqueue("btrfs-scrubparity", flags,
+                       btrfs_alloc_workqueue("scrubparity", flags,
                                              max_active, 2);
                if (!fs_info->scrub_parity_workers)
                        goto fail_scrub_parity_workers;
@@ -4209,7 +4209,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len,
 
        io_tree = &BTRFS_I(inode)->io_tree;
 
-       lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
+       lock_extent_bits(io_tree, lockstart, lockend, &cached_state);
        ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
        if (ordered) {
                btrfs_put_ordered_extent(ordered);
index 0b6ab61..86f7fdc 100644 (file)
@@ -295,10 +295,11 @@ enum {
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
-       Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
-       Opt_check_integrity, Opt_check_integrity_including_extent_data,
+       Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
+       Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
+       Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
+       Opt_skip_balance, Opt_check_integrity,
+       Opt_check_integrity_including_extent_data,
        Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
@@ -309,7 +310,7 @@ enum {
        Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
        {Opt_degraded, "degraded"},
        {Opt_subvol, "subvol=%s"},
        {Opt_subvolid, "subvolid=%s"},
@@ -340,6 +341,7 @@ static match_table_t tokens = {
        {Opt_discard, "discard"},
        {Opt_nodiscard, "nodiscard"},
        {Opt_space_cache, "space_cache"},
+       {Opt_space_cache_version, "space_cache=%s"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_enospc_debug, "enospc_debug"},
@@ -383,7 +385,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        bool compress_force = false;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
-       if (cache_gen)
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
+               btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+       else if (cache_gen)
                btrfs_set_opt(info->mount_opt, SPACE_CACHE);
 
        if (!options)
@@ -617,15 +621,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                             "turning off discard");
                        break;
                case Opt_space_cache:
-                       btrfs_set_and_info(root, SPACE_CACHE,
-                                          "enabling disk space caching");
+               case Opt_space_cache_version:
+                       if (token == Opt_space_cache ||
+                           strcmp(args[0].from, "v1") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               FREE_SPACE_TREE);
+                               btrfs_set_and_info(root, SPACE_CACHE,
+                                                  "enabling disk space caching");
+                       } else if (strcmp(args[0].from, "v2") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               SPACE_CACHE);
+                               btrfs_set_and_info(root, FREE_SPACE_TREE,
+                                                  "enabling free space tree");
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                        break;
                case Opt_rescan_uuid_tree:
                        btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
                        break;
                case Opt_no_space_cache:
-                       btrfs_clear_and_info(root, SPACE_CACHE,
-                                            "disabling disk space caching");
+                       if (btrfs_test_opt(root, SPACE_CACHE)) {
+                               btrfs_clear_and_info(root, SPACE_CACHE,
+                                                    "disabling disk space caching");
+                       }
+                       if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
+                               btrfs_clear_and_info(root, FREE_SPACE_TREE,
+                                                    "disabling free space tree");
+                       }
                        break;
                case Opt_inode_cache:
                        btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
@@ -754,8 +778,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                }
        }
 out:
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, CLEAR_CACHE)) {
+               btrfs_err(root->fs_info, "cannot disable free space tree");
+               ret = -EINVAL;
+
+       }
        if (!ret && btrfs_test_opt(root, SPACE_CACHE))
                btrfs_info(root->fs_info, "disk space caching is enabled");
+       if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+               btrfs_info(root->fs_info, "using free space tree");
        kfree(orig);
        return ret;
 }
@@ -1162,6 +1195,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",noacl");
        if (btrfs_test_opt(root, SPACE_CACHE))
                seq_puts(seq, ",space_cache");
+       else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+               seq_puts(seq, ",space_cache=v2");
        else
                seq_puts(seq, ",nospace_cache");
        if (btrfs_test_opt(root, RESCAN_UUID_TREE))
@@ -1956,6 +1991,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
  * there are other factors that may change the result (like a new metadata
  * chunk).
  *
+ * If metadata is exhausted, f_bavail will be 0.
+ *
  * FIXME: not accurate for mixed block groups, total and free/used are ok,
  * available appears slightly larger.
  */
@@ -1967,11 +2004,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct btrfs_space_info *found;
        u64 total_used = 0;
        u64 total_free_data = 0;
+       u64 total_free_meta = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)fs_info->fsid;
        unsigned factor = 1;
        struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
        int ret;
+       u64 thresh = 0;
 
        /*
         * holding chunk_muext to avoid allocating new chunks, holding
@@ -1997,6 +2036,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
                                }
                        }
                }
+               if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+                       total_free_meta += found->disk_total - found->disk_used;
 
                total_used += found->disk_used;
        }
@@ -2019,6 +2060,24 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_bavail += div_u64(total_free_data, factor);
        buf->f_bavail = buf->f_bavail >> bits;
 
+       /*
+        * We calculate the remaining metadata space minus global reserve. If
+        * this is (supposedly) smaller than zero, there's no space. But this
+        * does not hold in practice, the exhausted state happens where's still
+        * some positive delta. So we apply some guesswork and compare the
+        * delta to a 4M threshold.  (Practically observed delta was ~2M.)
+        *
+        * We probably cannot calculate the exact threshold value because this
+        * depends on the internal reservations requested by various
+        * operations, so some operations that consume a few metadata will
+        * succeed even if the Avail is zero. But this is better than the other
+        * way around.
+        */
+       thresh = 4 * 1024 * 1024;
+
+       if (total_free_meta - thresh < block_rsv->size)
+               buf->f_bavail = 0;
+
        buf->f_type = BTRFS_SUPER_MAGIC;
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_namelen = BTRFS_NAME_LEN;
@@ -2225,6 +2284,9 @@ static int btrfs_run_sanity_tests(void)
        if (ret)
                goto out;
        ret = btrfs_test_qgroups();
+       if (ret)
+               goto out;
+       ret = btrfs_test_free_space_tree();
 out:
        btrfs_destroy_test_fs();
        return ret;
index 9626252..b1d920b 100644 (file)
@@ -21,6 +21,9 @@
 #include <linux/magic.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../free-space-cache.h"
+#include "../free-space-tree.h"
+#include "../transaction.h"
 #include "../volumes.h"
 #include "../disk-io.h"
 #include "../qgroup.h"
@@ -122,6 +125,9 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void)
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
        INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       extent_io_tree_init(&fs_info->freed_extents[0], NULL);
+       extent_io_tree_init(&fs_info->freed_extents[1], NULL);
+       fs_info->pinned_extents = &fs_info->freed_extents[0];
        return fs_info;
 }
 
@@ -169,3 +175,55 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
        kfree(root);
 }
 
+struct btrfs_block_group_cache *
+btrfs_alloc_dummy_block_group(unsigned long length)
+{
+       struct btrfs_block_group_cache *cache;
+
+       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       if (!cache)
+               return NULL;
+       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+                                       GFP_NOFS);
+       if (!cache->free_space_ctl) {
+               kfree(cache);
+               return NULL;
+       }
+       cache->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!cache->fs_info) {
+               kfree(cache->free_space_ctl);
+               kfree(cache);
+               return NULL;
+       }
+
+       cache->key.objectid = 0;
+       cache->key.offset = length;
+       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       cache->sectorsize = 4096;
+       cache->full_stripe_len = 4096;
+
+       INIT_LIST_HEAD(&cache->list);
+       INIT_LIST_HEAD(&cache->cluster_list);
+       INIT_LIST_HEAD(&cache->bg_list);
+       btrfs_init_free_space_ctl(cache);
+       mutex_init(&cache->free_space_lock);
+
+       return cache;
+}
+
+void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache)
+{
+       if (!cache)
+               return;
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+       kfree(cache->free_space_ctl);
+       kfree(cache);
+}
+
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans)
+{
+       memset(trans, 0, sizeof(*trans));
+       trans->transid = 1;
+       INIT_LIST_HEAD(&trans->qgroup_ref_list);
+       trans->type = __TRANS_DUMMY;
+}
index fd39542..054b8c7 100644 (file)
 #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__)
 
 struct btrfs_root;
+struct btrfs_trans_handle;
 
 int btrfs_test_free_space_cache(void);
 int btrfs_test_extent_buffer_operations(void);
 int btrfs_test_extent_io(void);
 int btrfs_test_inodes(void);
 int btrfs_test_qgroups(void);
+int btrfs_test_free_space_tree(void);
 int btrfs_init_test_fs(void);
 void btrfs_destroy_test_fs(void);
 struct inode *btrfs_new_test_inode(void);
 struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void);
 void btrfs_free_dummy_root(struct btrfs_root *root);
+struct btrfs_block_group_cache *
+btrfs_alloc_dummy_block_group(unsigned long length);
+void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache);
+void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans);
 #else
 static inline int btrfs_test_free_space_cache(void)
 {
@@ -63,6 +69,10 @@ static inline int btrfs_test_qgroups(void)
 {
        return 0;
 }
+static inline int btrfs_test_free_space_tree(void)
+{
+       return 0;
+}
 #endif
 
 #endif
index 6ec2bce..e29fa29 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <linux/pagemap.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/sizes.h>
 #include "btrfs-tests.h"
 #include "../extent_io.h"
@@ -77,6 +78,8 @@ static int test_find_delalloc(void)
        u64 found;
        int ret = -EINVAL;
 
+       test_msg("Running find delalloc tests\n");
+
        inode = btrfs_new_test_inode();
        if (!inode) {
                test_msg("Failed to allocate test inode\n");
@@ -269,8 +272,139 @@ out:
        return ret;
 }
 
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
+                            unsigned long len)
+{
+       unsigned long i, x;
+
+       memset(bitmap, 0, len);
+       memset_extent_buffer(eb, 0, 0, len);
+       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               test_msg("Bitmap was not zeroed\n");
+               return -EINVAL;
+       }
+
+       bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+       extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               test_msg("Setting all bits failed\n");
+               return -EINVAL;
+       }
+
+       bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
+       extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
+       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               test_msg("Clearing all bits failed\n");
+               return -EINVAL;
+       }
+
+       bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+                  sizeof(long) * BITS_PER_BYTE);
+       extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+                                sizeof(long) * BITS_PER_BYTE);
+       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               test_msg("Setting straddling pages failed\n");
+               return -EINVAL;
+       }
+
+       bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
+       bitmap_clear(bitmap,
+                    (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+                    sizeof(long) * BITS_PER_BYTE);
+       extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
+       extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+                                  sizeof(long) * BITS_PER_BYTE);
+       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
+               test_msg("Clearing straddling pages failed\n");
+               return -EINVAL;
+       }
+
+       /*
+        * Generate a wonky pseudo-random bit pattern for the sake of not using
+        * something repetitive that could miss some hypothetical off-by-n bug.
+        */
+       x = 0;
+       for (i = 0; i < len / sizeof(long); i++) {
+               x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL;
+               bitmap[i] = x;
+       }
+       write_extent_buffer(eb, bitmap, 0, len);
+
+       for (i = 0; i < len * BITS_PER_BYTE; i++) {
+               int bit, bit1;
+
+               bit = !!test_bit(i, bitmap);
+               bit1 = !!extent_buffer_test_bit(eb, 0, i);
+               if (bit1 != bit) {
+                       test_msg("Testing bit pattern failed\n");
+                       return -EINVAL;
+               }
+
+               bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
+                                               i % BITS_PER_BYTE);
+               if (bit1 != bit) {
+                       test_msg("Testing bit pattern with offset failed\n");
+                       return -EINVAL;
+               }
+       }
+
+       return 0;
+}
+
+static int test_eb_bitmaps(void)
+{
+       unsigned long len = PAGE_CACHE_SIZE * 4;
+       unsigned long *bitmap;
+       struct extent_buffer *eb;
+       int ret;
+
+       test_msg("Running extent buffer bitmap tests\n");
+
+       bitmap = kmalloc(len, GFP_NOFS);
+       if (!bitmap) {
+               test_msg("Couldn't allocate test bitmap\n");
+               return -ENOMEM;
+       }
+
+       eb = __alloc_dummy_extent_buffer(NULL, 0, len);
+       if (!eb) {
+               test_msg("Couldn't allocate test extent buffer\n");
+               kfree(bitmap);
+               return -ENOMEM;
+       }
+
+       ret = __test_eb_bitmaps(bitmap, eb, len);
+       if (ret)
+               goto out;
+
+       /* Do it over again with an extent buffer which isn't page-aligned. */
+       free_extent_buffer(eb);
+       eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len);
+       if (!eb) {
+               test_msg("Couldn't allocate test extent buffer\n");
+               kfree(bitmap);
+               return -ENOMEM;
+       }
+
+       ret = __test_eb_bitmaps(bitmap, eb, len);
+out:
+       free_extent_buffer(eb);
+       kfree(bitmap);
+       return ret;
+}
+
 int btrfs_test_extent_io(void)
 {
-       test_msg("Running find delalloc tests\n");
-       return test_find_delalloc();
+       int ret;
+
+       test_msg("Running extent I/O tests\n");
+
+       ret = test_find_delalloc();
+       if (ret)
+               goto out;
+
+       ret = test_eb_bitmaps();
+out:
+       test_msg("Extent I/O tests finished\n");
+       return ret;
 }
index cc287d4..c9ad97b 100644 (file)
 #include "../free-space-cache.h"
 
 #define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
-static struct btrfs_block_group_cache *init_test_block_group(void)
-{
-       struct btrfs_block_group_cache *cache;
-
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
-       if (!cache)
-               return NULL;
-       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
-       if (!cache->free_space_ctl) {
-               kfree(cache);
-               return NULL;
-       }
-       cache->fs_info = btrfs_alloc_dummy_fs_info();
-       if (!cache->fs_info) {
-               kfree(cache->free_space_ctl);
-               kfree(cache);
-               return NULL;
-       }
-
-       cache->key.objectid = 0;
-       cache->key.offset = SZ_1G;
-       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-       cache->sectorsize = 4096;
-       cache->full_stripe_len = 4096;
-
-       spin_lock_init(&cache->lock);
-       INIT_LIST_HEAD(&cache->list);
-       INIT_LIST_HEAD(&cache->cluster_list);
-       INIT_LIST_HEAD(&cache->bg_list);
-
-       btrfs_init_free_space_ctl(cache);
-
-       return cache;
-}
 
 /*
  * This test just does basic sanity checking, making sure we can add an exten
@@ -440,9 +405,11 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
        int ret;
        u64 offset;
        u64 max_extent_size;
-
-       bool (*use_bitmap_op)(struct btrfs_free_space_ctl *,
-                             struct btrfs_free_space *);
+       const struct btrfs_free_space_op test_free_space_ops = {
+               .recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds,
+               .use_bitmap = test_use_bitmap,
+       };
+       const struct btrfs_free_space_op *orig_free_space_ops;
 
        test_msg("Running space stealing from bitmap to extent\n");
 
@@ -464,8 +431,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
         * that forces use of bitmaps as soon as we have at least 1
         * extent entry.
         */
-       use_bitmap_op = cache->free_space_ctl->op->use_bitmap;
-       cache->free_space_ctl->op->use_bitmap = test_use_bitmap;
+       orig_free_space_ops = cache->free_space_ctl->op;
+       cache->free_space_ctl->op = &test_free_space_ops;
 
        /*
         * Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[
@@ -851,7 +818,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
        if (ret)
                return ret;
 
-       cache->free_space_ctl->op->use_bitmap = use_bitmap_op;
+       cache->free_space_ctl->op = orig_free_space_ops;
        __btrfs_remove_free_space_cache(cache->free_space_ctl);
 
        return 0;
@@ -865,7 +832,7 @@ int btrfs_test_free_space_cache(void)
 
        test_msg("Running btrfs free space cache tests\n");
 
-       cache = init_test_block_group();
+       cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024);
        if (!cache) {
                test_msg("Couldn't run the tests\n");
                return 0;
@@ -896,9 +863,7 @@ int btrfs_test_free_space_cache(void)
 
        ret = test_steal_space_from_bitmap_to_extent(cache);
 out:
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-       kfree(cache->free_space_ctl);
-       kfree(cache);
+       btrfs_free_dummy_block_group(cache);
        btrfs_free_dummy_root(root);
        test_msg("Free space cache tests finished\n");
        return ret;
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
new file mode 100644 (file)
index 0000000..d05fe1a
--- /dev/null
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) 2015 Facebook.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../disk-io.h"
+#include "../free-space-tree.h"
+#include "../transaction.h"
+
+struct free_space_extent {
+       u64 start, length;
+};
+
+/*
+ * The test cases align their operations to this in order to hit some of the
+ * edge cases in the bitmap code.
+ */
+#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096)
+
+static int __check_free_space_extents(struct btrfs_trans_handle *trans,
+                                     struct btrfs_fs_info *fs_info,
+                                     struct btrfs_block_group_cache *cache,
+                                     struct btrfs_path *path,
+                                     struct free_space_extent *extents,
+                                     unsigned int num_extents)
+{
+       struct btrfs_free_space_info *info;
+       struct btrfs_key key;
+       int prev_bit = 0, bit;
+       u64 extent_start = 0, offset, end;
+       u32 flags, extent_count;
+       unsigned int i;
+       int ret;
+
+       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       if (IS_ERR(info)) {
+               test_msg("Could not find free space info\n");
+               ret = PTR_ERR(info);
+               goto out;
+       }
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+       extent_count = btrfs_free_space_extent_count(path->nodes[0], info);
+
+       if (extent_count != num_extents) {
+               test_msg("Extent count is wrong\n");
+               ret = -EINVAL;
+               goto out;
+       }
+       if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+               if (path->slots[0] != 0)
+                       goto invalid;
+               end = cache->key.objectid + cache->key.offset;
+               i = 0;
+               while (++path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
+                       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+                       if (key.type != BTRFS_FREE_SPACE_BITMAP_KEY)
+                               goto invalid;
+                       offset = key.objectid;
+                       while (offset < key.objectid + key.offset) {
+                               bit = free_space_test_bit(cache, path, offset);
+                               if (prev_bit == 0 && bit == 1) {
+                                       extent_start = offset;
+                               } else if (prev_bit == 1 && bit == 0) {
+                                       if (i >= num_extents)
+                                               goto invalid;
+                                       if (i >= num_extents ||
+                                           extent_start != extents[i].start ||
+                                           offset - extent_start != extents[i].length)
+                                               goto invalid;
+                                       i++;
+                               }
+                               prev_bit = bit;
+                               offset += cache->sectorsize;
+                       }
+               }
+               if (prev_bit == 1) {
+                       if (i >= num_extents ||
+                           extent_start != extents[i].start ||
+                           end - extent_start != extents[i].length)
+                               goto invalid;
+                       i++;
+               }
+               if (i != num_extents)
+                       goto invalid;
+       } else {
+               if (btrfs_header_nritems(path->nodes[0]) != num_extents + 1 ||
+                   path->slots[0] != 0)
+                       goto invalid;
+               for (i = 0; i < num_extents; i++) {
+                       path->slots[0]++;
+                       btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+                       if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY ||
+                           key.objectid != extents[i].start ||
+                           key.offset != extents[i].length)
+                               goto invalid;
+               }
+       }
+
+       ret = 0;
+out:
+       btrfs_release_path(path);
+       return ret;
+invalid:
+       test_msg("Free space tree is invalid\n");
+       ret = -EINVAL;
+       goto out;
+}
+
+static int check_free_space_extents(struct btrfs_trans_handle *trans,
+                                   struct btrfs_fs_info *fs_info,
+                                   struct btrfs_block_group_cache *cache,
+                                   struct btrfs_path *path,
+                                   struct free_space_extent *extents,
+                                   unsigned int num_extents)
+{
+       struct btrfs_free_space_info *info;
+       u32 flags;
+       int ret;
+
+       info = search_free_space_info(trans, fs_info, cache, path, 0);
+       if (IS_ERR(info)) {
+               test_msg("Could not find free space info\n");
+               btrfs_release_path(path);
+               return PTR_ERR(info);
+       }
+       flags = btrfs_free_space_flags(path->nodes[0], info);
+       btrfs_release_path(path);
+
+       ret = __check_free_space_extents(trans, fs_info, cache, path, extents,
+                                        num_extents);
+       if (ret)
+               return ret;
+
+       /* Flip it to the other format and check that for good measure. */
+       if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) {
+               ret = convert_free_space_to_extents(trans, fs_info, cache, path);
+               if (ret) {
+                       test_msg("Could not convert to extents\n");
+                       return ret;
+               }
+       } else {
+               ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path);
+               if (ret) {
+                       test_msg("Could not convert to bitmaps\n");
+                       return ret;
+               }
+       }
+       return __check_free_space_extents(trans, fs_info, cache, path, extents,
+                                         num_extents);
+}
+
+static int test_empty_block_group(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info,
+                                 struct btrfs_block_group_cache *cache,
+                                 struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, cache->key.offset},
+       };
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_all(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {};
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid,
+                                           cache->key.offset);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_beginning(struct btrfs_trans_handle *trans,
+                                struct btrfs_fs_info *fs_info,
+                                struct btrfs_block_group_cache *cache,
+                                struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid + BITMAP_RANGE,
+                       cache->key.offset - BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid, BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+
+}
+
+static int test_remove_end(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, cache->key.offset - BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid +
+                                           cache->key.offset - BITMAP_RANGE,
+                                           BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_remove_middle(struct btrfs_trans_handle *trans,
+                             struct btrfs_fs_info *fs_info,
+                             struct btrfs_block_group_cache *cache,
+                             struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, BITMAP_RANGE},
+               {cache->key.objectid + 2 * BITMAP_RANGE,
+                       cache->key.offset - 2 * BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid + BITMAP_RANGE,
+                                           BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_left(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, 2 * BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid,
+                                           cache->key.offset);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid, BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_right(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid,
+                                           cache->key.offset);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + 2 * BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_both(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, 3 * BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid,
+                                           cache->key.offset);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid, BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + 2 * BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+static int test_merge_none(struct btrfs_trans_handle *trans,
+                          struct btrfs_fs_info *fs_info,
+                          struct btrfs_block_group_cache *cache,
+                          struct btrfs_path *path)
+{
+       struct free_space_extent extents[] = {
+               {cache->key.objectid, BITMAP_RANGE},
+               {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE},
+               {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE},
+       };
+       int ret;
+
+       ret = __remove_from_free_space_tree(trans, fs_info, cache, path,
+                                           cache->key.objectid,
+                                           cache->key.offset);
+       if (ret) {
+               test_msg("Could not remove free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid, BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + 4 * BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       ret = __add_to_free_space_tree(trans, fs_info, cache, path,
+                                      cache->key.objectid + 2 * BITMAP_RANGE,
+                                      BITMAP_RANGE);
+       if (ret) {
+               test_msg("Could not add free space\n");
+               return ret;
+       }
+
+       return check_free_space_extents(trans, fs_info, cache, path,
+                                       extents, ARRAY_SIZE(extents));
+}
+
+typedef int (*test_func_t)(struct btrfs_trans_handle *,
+                          struct btrfs_fs_info *,
+                          struct btrfs_block_group_cache *,
+                          struct btrfs_path *);
+
+static int run_test(test_func_t test_func, int bitmaps)
+{
+       struct btrfs_root *root = NULL;
+       struct btrfs_block_group_cache *cache = NULL;
+       struct btrfs_trans_handle trans;
+       struct btrfs_path *path = NULL;
+       int ret;
+
+       root = btrfs_alloc_dummy_root();
+       if (IS_ERR(root)) {
+               test_msg("Couldn't allocate dummy root\n");
+               ret = PTR_ERR(root);
+               goto out;
+       }
+
+       root->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!root->fs_info) {
+               test_msg("Couldn't allocate dummy fs info\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       btrfs_set_super_compat_ro_flags(root->fs_info->super_copy,
+                                       BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE);
+       root->fs_info->free_space_root = root;
+       root->fs_info->tree_root = root;
+
+       root->node = alloc_test_extent_buffer(root->fs_info, 4096);
+       if (!root->node) {
+               test_msg("Couldn't allocate dummy buffer\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+       btrfs_set_header_level(root->node, 0);
+       btrfs_set_header_nritems(root->node, 0);
+       root->alloc_bytenr += 8192;
+
+       cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE);
+       if (!cache) {
+               test_msg("Couldn't allocate dummy block group cache\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+       cache->bitmap_low_thresh = 0;
+       cache->bitmap_high_thresh = (u32)-1;
+       cache->needs_free_space = 1;
+
+       btrfs_init_dummy_trans(&trans);
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               test_msg("Couldn't allocate path\n");
+               return -ENOMEM;
+       }
+
+       ret = add_block_group_free_space(&trans, root->fs_info, cache);
+       if (ret) {
+               test_msg("Could not add block group free space\n");
+               goto out;
+       }
+
+       if (bitmaps) {
+               ret = convert_free_space_to_bitmaps(&trans, root->fs_info,
+                                                   cache, path);
+               if (ret) {
+                       test_msg("Could not convert block group to bitmaps\n");
+                       goto out;
+               }
+       }
+
+       ret = test_func(&trans, root->fs_info, cache, path);
+       if (ret)
+               goto out;
+
+       ret = remove_block_group_free_space(&trans, root->fs_info, cache);
+       if (ret) {
+               test_msg("Could not remove block group free space\n");
+               goto out;
+       }
+
+       if (btrfs_header_nritems(root->node) != 0) {
+               test_msg("Free space tree has leftover items\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = 0;
+out:
+       btrfs_free_path(path);
+       btrfs_free_dummy_block_group(cache);
+       btrfs_free_dummy_root(root);
+       return ret;
+}
+
+static int run_test_both_formats(test_func_t test_func)
+{
+       int ret;
+
+       ret = run_test(test_func, 0);
+       if (ret)
+               return ret;
+       return run_test(test_func, 1);
+}
+
+int btrfs_test_free_space_tree(void)
+{
+       test_func_t tests[] = {
+               test_empty_block_group,
+               test_remove_all,
+               test_remove_beginning,
+               test_remove_end,
+               test_remove_middle,
+               test_merge_left,
+               test_merge_right,
+               test_merge_both,
+               test_merge_none,
+       };
+       int i;
+
+       test_msg("Running free space tree tests\n");
+       for (i = 0; i < ARRAY_SIZE(tests); i++) {
+               int ret = run_test_both_formats(tests[i]);
+               if (ret) {
+                       test_msg("%pf failed\n", tests[i]);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
index 846d277..8ea5d34 100644 (file)
 #include "../qgroup.h"
 #include "../backref.h"
 
-static void init_dummy_trans(struct btrfs_trans_handle *trans)
-{
-       memset(trans, 0, sizeof(*trans));
-       trans->transid = 1;
-       INIT_LIST_HEAD(&trans->qgroup_ref_list);
-       trans->type = __TRANS_DUMMY;
-}
-
 static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
                                  u64 num_bytes, u64 parent, u64 root_objectid)
 {
@@ -44,7 +36,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr,
        u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info);
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        ins.objectid = bytenr;
        ins.type = BTRFS_EXTENT_ITEM_KEY;
@@ -94,7 +86,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes,
        u64 refs;
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -144,7 +136,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr,
        struct btrfs_path *path;
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -178,7 +170,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr,
        u64 refs;
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -232,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root)
        struct ulist *new_roots = NULL;
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        test_msg("Qgroup basic add\n");
        ret = btrfs_create_qgroup(NULL, fs_info, 5);
@@ -326,7 +318,7 @@ static int test_multiple_refs(struct btrfs_root *root)
        struct ulist *new_roots = NULL;
        int ret;
 
-       init_dummy_trans(&trans);
+       btrfs_init_dummy_trans(&trans);
 
        test_msg("Qgroup multiple refs test\n");
 
index fc82b02..b6031ce 100644 (file)
@@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
                        list_del_init(&em->list);
                        free_extent_map(em);
                }
+               /*
+                * If any block groups are found in ->deleted_bgs then it's
+                * because the transaction was aborted and a commit did not
+                * happen (things failed before writing the new superblock
+                * and calling btrfs_finish_extent_commit()), so we can not
+                * discard the physical locations of the block groups.
+                */
+               while (!list_empty(&transaction->deleted_bgs)) {
+                       struct btrfs_block_group_cache *cache;
+
+                       cache = list_first_entry(&transaction->deleted_bgs,
+                                                struct btrfs_block_group_cache,
+                                                bg_list);
+                       list_del_init(&cache->bg_list);
+                       btrfs_put_block_group_trimming(cache);
+                       btrfs_put_block_group(cache);
+               }
                kmem_cache_free(btrfs_transaction_cachep, transaction);
        }
 }
@@ -1324,17 +1341,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
        u64 root_flags;
        uuid_le new_uuid;
 
-       path = btrfs_alloc_path();
-       if (!path) {
-               pending->error = -ENOMEM;
-               return 0;
-       }
+       ASSERT(pending->path);
+       path = pending->path;
 
-       new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
-       if (!new_root_item) {
-               pending->error = -ENOMEM;
-               goto root_item_alloc_fail;
-       }
+       ASSERT(pending->root_item);
+       new_root_item = pending->root_item;
 
        pending->error = btrfs_find_free_objectid(tree_root, &objectid);
        if (pending->error)
@@ -1567,8 +1578,10 @@ clear_skip_qgroup:
        btrfs_clear_skip_qgroup(trans);
 no_free_objectid:
        kfree(new_root_item);
-root_item_alloc_fail:
+       pending->root_item = NULL;
        btrfs_free_path(path);
+       pending->path = NULL;
+
        return ret;
 }
 
index 64c8221..72be51f 100644 (file)
@@ -137,8 +137,10 @@ struct btrfs_pending_snapshot {
        struct dentry *dentry;
        struct inode *dir;
        struct btrfs_root *root;
+       struct btrfs_root_item *root_item;
        struct btrfs_root *snap;
        struct btrfs_qgroup_inherit *inherit;
+       struct btrfs_path *path;
        /* block reservation for the operation */
        struct btrfs_block_rsv block_rsv;
        u64 qgroup_reserved;
index f31db43..cb65089 100644 (file)
@@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                goto out;
        }
        btrfs_release_path(path);
+       /*
+        * We don't need a lock on a leaf. btrfs_realloc_node() will lock all
+        * leafs from path->nodes[1], so set lowest_level to 1 to avoid later
+        * a deadlock (attempting to write lock an already write locked leaf).
+        */
+       path->lowest_level = 1;
        wret = btrfs_search_slot(trans, root, &key, path, 0, 1);
 
        if (wret < 0) {
@@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                ret = 0;
                goto out;
        }
-       path->slots[1] = btrfs_header_nritems(path->nodes[1]);
-       next_key_ret = btrfs_find_next_key(root, path, &key, 1,
-                                          min_trans);
+       /*
+        * The node at level 1 must always be locked when our path has
+        * keep_locks set and lowest_level is 1, regardless of the value of
+        * path->slots[1].
+        */
+       BUG_ON(path->locks[1] == 0);
        ret = btrfs_realloc_node(trans, root,
                                 path->nodes[1], 0,
                                 &last_ret,
@@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
                WARN_ON(ret == -EAGAIN);
                goto out;
        }
+       /*
+        * Now that we reallocated the node we can find the next key. Note that
+        * btrfs_find_next_key() can release our path and do another search
+        * without COWing, this is because even with path->keep_locks = 1,
+        * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a
+        * node when path->slots[node_level - 1] does not point to the last
+        * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore
+        * we search for the next key after reallocating our node.
+        */
+       path->slots[1] = btrfs_header_nritems(path->nodes[1]);
+       next_key_ret = btrfs_find_next_key(root, path, &key, 1,
+                                          min_trans);
        if (next_key_ret == 0) {
                memcpy(&root->defrag_progress, &key, sizeof(key));
                ret = -EAGAIN;
index adb79ad..544636e 100644 (file)
@@ -125,6 +125,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root);
 static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
 static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
+static void btrfs_close_one_device(struct btrfs_device *device);
 
 DEFINE_MUTEX(uuid_mutex);
 static LIST_HEAD(fs_uuids);
@@ -1102,7 +1103,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 2;
+       path->reada = READA_FORWARD;
 
        key.objectid = device->devid;
        key.offset = start;
@@ -1271,7 +1272,7 @@ again:
                goto out;
        }
 
-       path->reada = 2;
+       path->reada = READA_FORWARD;
        path->search_commit_root = 1;
        path->skip_locking = 1;
 
@@ -3722,14 +3723,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                goto out;
        }
 
-       /* allow dup'ed data chunks only in mixed mode */
-       if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-           (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) {
-               btrfs_err(fs_info, "dup for data is not allowed");
-               ret = -EINVAL;
-               goto out;
-       }
-
        /* allow to reduce meta or sys integrity only if force set */
        allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
                        BTRFS_BLOCK_GROUP_RAID10 |
@@ -3755,6 +3748,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
                }
        } while (read_seqretry(&fs_info->profiles_lock, seq));
 
+       if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) <
+               btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) {
+               btrfs_warn(fs_info,
+       "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx",
+                       bctl->meta.target, bctl->data.target);
+       }
+
        if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
                fs_info->num_tolerated_disk_barrier_failures = min(
                        btrfs_calc_num_tolerated_disk_barrier_failures(fs_info),
@@ -4267,7 +4267,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
        if (!path)
                return -ENOMEM;
 
-       path->reada = 2;
+       path->reada = READA_FORWARD;
 
        lock_chunks(root);
 
@@ -4792,7 +4792,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
        u64 dev_offset;
        u64 stripe_size;
        int i = 0;
-       int ret;
+       int ret = 0;
 
        em_tree = &extent_root->fs_info->mapping_tree.map_tree;
        read_lock(&em_tree->lock);
@@ -4823,20 +4823,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                goto out;
        }
 
+       /*
+        * Take the device list mutex to prevent races with the final phase of
+        * a device replace operation that replaces the device object associated
+        * with the map's stripes, because the device object's id can change
+        * at any time during that final phase of the device replace operation
+        * (dev-replace.c:btrfs_dev_replace_finishing()).
+        */
+       mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex);
        for (i = 0; i < map->num_stripes; i++) {
                device = map->stripes[i].dev;
                dev_offset = map->stripes[i].physical;
 
                ret = btrfs_update_device(trans, device);
                if (ret)
-                       goto out;
+                       break;
                ret = btrfs_alloc_dev_extent(trans, device,
                                             chunk_root->root_key.objectid,
                                             BTRFS_FIRST_CHUNK_TREE_OBJECTID,
                                             chunk_offset, dev_offset,
                                             stripe_size);
                if (ret)
-                       goto out;
+                       break;
+       }
+       if (ret) {
+               mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
+               goto out;
        }
 
        stripe = &chunk->stripe;
@@ -4849,6 +4861,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
                memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
                stripe++;
        }
+       mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex);
 
        btrfs_set_stack_chunk_length(chunk, chunk_size);
        btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
@@ -6464,11 +6477,11 @@ int btrfs_read_sys_array(struct btrfs_root *root)
        sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET);
        if (!sb)
                return -ENOMEM;
-       btrfs_set_buffer_uptodate(sb);
+       set_extent_buffer_uptodate(sb);
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
        /*
         * The sb extent buffer is artifical and just used to read the system array.
-        * btrfs_set_buffer_uptodate() call does not properly mark all it's
+        * set_extent_buffer_uptodate() call does not properly mark all it's
         * pages up-to-date when the page is larger: extent does not cover the
         * whole page and consequently check_page_uptodate does not find all
         * the page's extents up-to-date (the hole beyond sb),
@@ -6511,6 +6524,14 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                                goto out_short_read;
 
                        num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+                       if (!num_stripes) {
+                               printk(KERN_ERR
+           "BTRFS: invalid number of stripes %u in sys_array at offset %u\n",
+                                       num_stripes, cur_offset);
+                               ret = -EIO;
+                               break;
+                       }
+
                        len = btrfs_chunk_item_size(num_stripes);
                        if (cur_offset + len > array_size)
                                goto out_short_read;
@@ -6519,6 +6540,9 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                        if (ret)
                                break;
                } else {
+                       printk(KERN_ERR
+               "BTRFS: unexpected item type %u in sys_array at offset %u\n",
+                               (u32)key.type, cur_offset);
                        ret = -EIO;
                        break;
                }
@@ -6948,7 +6972,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
        }
 }
 
-void btrfs_close_one_device(struct btrfs_device *device)
+static void btrfs_close_one_device(struct btrfs_device *device)
 {
        struct btrfs_fs_devices *fs_devices = device->fs_devices;
        struct btrfs_device *new_device;
index 30bbc3e..1939ebd 100644 (file)
@@ -566,6 +566,5 @@ static inline void unlock_chunks(struct btrfs_root *root)
 struct list_head *btrfs_get_fs_uuids(void);
 void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
-void btrfs_close_one_device(struct btrfs_device *device);
 
 #endif
index 1fcd7b6..608552e 100644 (file)
@@ -283,7 +283,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
-       path->reada = 2;
+       path->reada = READA_FORWARD;
 
        /* search for our xattrs */
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -494,7 +494,7 @@ static int btrfs_initxattrs(struct inode *inode,
 
        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                name = kmalloc(XATTR_SECURITY_PREFIX_LEN +
-                              strlen(xattr->name) + 1, GFP_NOFS);
+                              strlen(xattr->name) + 1, GFP_KERNEL);
                if (!name) {
                        err = -ENOMEM;
                        break;
index b4473da..d866f21 100644 (file)
@@ -45,7 +45,8 @@ struct btrfs_qgroup_operation;
                { BTRFS_TREE_LOG_OBJECTID,      "TREE_LOG"      },      \
                { BTRFS_QUOTA_TREE_OBJECTID,    "QUOTA_TREE"    },      \
                { BTRFS_TREE_RELOC_OBJECTID,    "TREE_RELOC"    },      \
-               { BTRFS_UUID_TREE_OBJECTID,     "UUID_RELOC"    },      \
+               { BTRFS_UUID_TREE_OBJECTID,     "UUID_TREE"     },      \
+               { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },  \
                { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
 
 #define show_root_type(obj)                                            \