btrfs: Replace fs_info->readahead_workers workqueue with btrfs_workqueue.
[cascardo/linux.git] / fs / btrfs / disk-io.c
index 8072cfa..c0b003b 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-#include <linux/crc32c.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
 #include "rcu-string.h"
 #include "dev-replace.h"
 #include "raid56.h"
+#include "sysfs.h"
 
 #ifdef CONFIG_X86
 #include <asm/cpufeature.h>
 #endif
 
 static struct extent_io_ops btree_extent_io_ops;
-static void end_workqueue_fn(struct btrfs_work *work);
+static void end_workqueue_fn(struct btrfs_work_struct *work);
 static void free_fs_root(struct btrfs_root *root);
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
                                    int read_only);
@@ -85,7 +86,7 @@ struct end_io_wq {
        int error;
        int metadata;
        struct list_head list;
-       struct btrfs_work work;
+       struct btrfs_work_struct work;
 };
 
 /*
@@ -107,7 +108,7 @@ struct async_submit_bio {
         * can't tell us where in the file the bio should go
         */
        u64 bio_offset;
-       struct btrfs_work work;
+       struct btrfs_work_struct work;
        int error;
 };
 
@@ -243,7 +244,7 @@ out:
 
 u32 btrfs_csum_data(char *data, u32 seed, size_t len)
 {
-       return crc32c(seed, data, len);
+       return btrfs_crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
@@ -299,11 +300,11 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                        memcpy(&found, result, csum_size);
 
                        read_extent_buffer(buf, &val, 0, csum_size);
-                       printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
-                                      "failed on %llu wanted %X found %X "
-                                      "level %d\n",
-                                      root->fs_info->sb->s_id, buf->start,
-                                      val, found, btrfs_header_level(buf));
+                       printk_ratelimited(KERN_INFO
+                               "BTRFS: %s checksum verify failed on %llu wanted %X found %X "
+                               "level %d\n",
+                               root->fs_info->sb->s_id, buf->start,
+                               val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
                                kfree(result);
                        return 1;
@@ -382,13 +383,14 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
                        ret = 1;
 
                if (ret && btrfs_super_generation(disk_sb) < 10) {
-                       printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+                       printk(KERN_WARNING
+                               "BTRFS: super block crcs don't match, older mkfs detected\n");
                        ret = 0;
                }
        }
 
        if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
-               printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+               printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
                                csum_type);
                ret = 1;
        }
@@ -464,13 +466,10 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
 
 static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 {
-       struct extent_io_tree *tree;
        u64 start = page_offset(page);
        u64 found_start;
        struct extent_buffer *eb;
 
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
-
        eb = (struct extent_buffer *)page->private;
        if (page != eb->pages[0])
                return 0;
@@ -500,8 +499,8 @@ static int check_tree_block_fsid(struct btrfs_root *root,
 }
 
 #define CORRUPT(reason, eb, root, slot)                                \
-       printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
-              "root=%llu, slot=%d\n", reason,                  \
+       btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu,"       \
+                  "root=%llu, slot=%d", reason,                        \
               btrfs_header_bytenr(eb), root->objectid, slot)
 
 static noinline int check_leaf(struct btrfs_root *root,
@@ -569,7 +568,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
                                      u64 phy_offset, struct page *page,
                                      u64 start, u64 end, int mirror)
 {
-       struct extent_io_tree *tree;
        u64 found_start;
        int found_level;
        struct extent_buffer *eb;
@@ -580,7 +578,6 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
        if (!page->private)
                goto out;
 
-       tree = &BTRFS_I(page->mapping->host)->io_tree;
        eb = (struct extent_buffer *)page->private;
 
        /* the pending IO might have been the only thing that kept this buffer
@@ -600,21 +597,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 
        found_start = btrfs_header_bytenr(eb);
        if (found_start != eb->start) {
-               printk_ratelimited(KERN_INFO "btrfs bad tree block start "
+               printk_ratelimited(KERN_INFO "BTRFS: bad tree block start "
                               "%llu %llu\n",
                               found_start, eb->start);
                ret = -EIO;
                goto err;
        }
        if (check_tree_block_fsid(root, eb)) {
-               printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
+               printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n",
                               eb->start);
                ret = -EIO;
                goto err;
        }
        found_level = btrfs_header_level(eb);
        if (found_level >= BTRFS_MAX_LEVEL) {
-               btrfs_info(root->fs_info, "bad tree block level %d\n",
+               btrfs_info(root->fs_info, "bad tree block level %d",
                           (int)btrfs_header_level(eb));
                ret = -EIO;
                goto err;
@@ -681,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err)
 
        fs_info = end_io_wq->info;
        end_io_wq->error = err;
-       end_io_wq->work.func = end_workqueue_fn;
-       end_io_wq->work.flags = 0;
+       btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 
        if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
-                       btrfs_queue_worker(&fs_info->endio_meta_write_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_meta_write_workers,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
-                       btrfs_queue_worker(&fs_info->endio_freespace_worker,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_freespace_worker,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_raid56_workers,
+                                        &end_io_wq->work);
                else
-                       btrfs_queue_worker(&fs_info->endio_write_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_write_workers,
+                                        &end_io_wq->work);
        } else {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_raid56_workers,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata)
-                       btrfs_queue_worker(&fs_info->endio_meta_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_meta_workers,
+                                        &end_io_wq->work);
                else
-                       btrfs_queue_worker(&fs_info->endio_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_workers,
+                                        &end_io_wq->work);
        }
 }
 
@@ -741,12 +737,12 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 {
        unsigned long limit = min_t(unsigned long,
-                                   info->workers.max_workers,
+                                   info->thread_pool_size,
                                    info->fs_devices->open_devices);
        return 256 * limit;
 }
 
-static void run_one_async_start(struct btrfs_work *work)
+static void run_one_async_start(struct btrfs_work_struct *work)
 {
        struct async_submit_bio *async;
        int ret;
@@ -759,7 +755,7 @@ static void run_one_async_start(struct btrfs_work *work)
                async->error = ret;
 }
 
-static void run_one_async_done(struct btrfs_work *work)
+static void run_one_async_done(struct btrfs_work_struct *work)
 {
        struct btrfs_fs_info *fs_info;
        struct async_submit_bio *async;
@@ -786,7 +782,7 @@ static void run_one_async_done(struct btrfs_work *work)
                               async->bio_offset);
 }
 
-static void run_one_async_free(struct btrfs_work *work)
+static void run_one_async_free(struct btrfs_work_struct *work)
 {
        struct async_submit_bio *async;
 
@@ -814,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        async->submit_bio_start = submit_bio_start;
        async->submit_bio_done = submit_bio_done;
 
-       async->work.func = run_one_async_start;
-       async->work.ordered_func = run_one_async_done;
-       async->work.ordered_free = run_one_async_free;
+       btrfs_init_work(&async->work, run_one_async_start,
+                       run_one_async_done, run_one_async_free);
 
-       async->work.flags = 0;
        async->bio_flags = bio_flags;
        async->bio_offset = bio_offset;
 
@@ -827,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        atomic_inc(&fs_info->nr_async_submits);
 
        if (rw & REQ_SYNC)
-               btrfs_set_work_high_prio(&async->work);
+               btrfs_set_work_high_priority(&async->work);
 
-       btrfs_queue_worker(&fs_info->workers, &async->work);
+       btrfs_queue_work(fs_info->workers, &async->work);
 
        while (atomic_read(&fs_info->async_submit_draining) &&
              atomic_read(&fs_info->nr_async_submits)) {
@@ -967,11 +961,9 @@ static int btree_migratepage(struct address_space *mapping,
 static int btree_writepages(struct address_space *mapping,
                            struct writeback_control *wbc)
 {
-       struct extent_io_tree *tree;
        struct btrfs_fs_info *fs_info;
        int ret;
 
-       tree = &BTRFS_I(mapping->host)->io_tree;
        if (wbc->sync_mode == WB_SYNC_NONE) {
 
                if (wbc->for_kupdate)
@@ -1010,8 +1002,9 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
        extent_invalidatepage(tree, page, offset);
        btree_releasepage(page, GFP_NOFS);
        if (PagePrivate(page)) {
-               printk(KERN_WARNING "btrfs warning page private not zero "
-                      "on page %llu\n", (unsigned long long)page_offset(page));
+               btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info,
+                          "page private not zero on page %llu",
+                          (unsigned long long)page_offset(page));
                ClearPagePrivate(page);
                set_page_private(page, 0);
                page_cache_release(page);
@@ -1095,21 +1088,13 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize)
 {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-       eb = find_extent_buffer(&BTRFS_I(btree_inode)->io_tree, bytenr);
-       return eb;
+       return find_extent_buffer(root->fs_info, bytenr);
 }
 
 struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
                                                 u64 bytenr, u32 blocksize)
 {
-       struct inode *btree_inode = root->fs_info->btree_inode;
-       struct extent_buffer *eb;
-
-       eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree,
-                                bytenr, blocksize);
-       return eb;
+       return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
 }
 
 
@@ -1212,6 +1197,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
+       INIT_LIST_HEAD(&root->log_ctxs[0]);
+       INIT_LIST_HEAD(&root->log_ctxs[1]);
        atomic_set(&root->log_commit[0], 0);
        atomic_set(&root->log_commit[1], 0);
        atomic_set(&root->log_writers, 0);
@@ -1219,6 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        atomic_set(&root->orphan_inodes, 0);
        atomic_set(&root->refs, 1);
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        if (fs_info)
                extent_io_tree_init(&root->dirty_log_pages,
@@ -1273,7 +1261,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        struct btrfs_root *root;
        struct btrfs_key key;
        int ret = 0;
-       u64 bytenr;
        uuid_le uuid;
 
        root = btrfs_alloc_root(fs_info);
@@ -1295,7 +1282,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
                goto fail;
        }
 
-       bytenr = leaf->start;
        memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
        btrfs_set_header_bytenr(leaf, leaf->start);
        btrfs_set_header_generation(leaf, trans->transid);
@@ -1434,6 +1420,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        WARN_ON(root->log_root);
        root->log_root = log_root;
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        return 0;
 }
@@ -1616,7 +1603,8 @@ again:
        if (ret)
                goto fail;
 
-       ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
+       ret = btrfs_find_item(fs_info->tree_root, NULL, BTRFS_ORPHAN_OBJECTID,
+                       location->objectid, BTRFS_ORPHAN_ITEM_KEY, NULL);
        if (ret < 0)
                goto fail;
        if (ret == 0)
@@ -1680,16 +1668,14 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
  * called by the kthread helper functions to finally call the bio end_io
  * functions.  This is where read checksum verification actually happens
  */
-static void end_workqueue_fn(struct btrfs_work *work)
+static void end_workqueue_fn(struct btrfs_work_struct *work)
 {
        struct bio *bio;
        struct end_io_wq *end_io_wq;
-       struct btrfs_fs_info *fs_info;
        int error;
 
        end_io_wq = container_of(work, struct end_io_wq, work);
        bio = end_io_wq->bio;
-       fs_info = end_io_wq->info;
 
        error = end_io_wq->error;
        bio->bi_private = end_io_wq->private;
@@ -2010,20 +1996,20 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 {
        btrfs_stop_workers(&fs_info->generic_worker);
        btrfs_stop_workers(&fs_info->fixup_workers);
-       btrfs_stop_workers(&fs_info->delalloc_workers);
-       btrfs_stop_workers(&fs_info->workers);
-       btrfs_stop_workers(&fs_info->endio_workers);
-       btrfs_stop_workers(&fs_info->endio_meta_workers);
-       btrfs_stop_workers(&fs_info->endio_raid56_workers);
-       btrfs_stop_workers(&fs_info->rmw_workers);
-       btrfs_stop_workers(&fs_info->endio_meta_write_workers);
-       btrfs_stop_workers(&fs_info->endio_write_workers);
-       btrfs_stop_workers(&fs_info->endio_freespace_worker);
-       btrfs_stop_workers(&fs_info->submit_workers);
+       btrfs_destroy_workqueue(fs_info->delalloc_workers);
+       btrfs_destroy_workqueue(fs_info->workers);
+       btrfs_destroy_workqueue(fs_info->endio_workers);
+       btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+       btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+       btrfs_destroy_workqueue(fs_info->rmw_workers);
+       btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
+       btrfs_destroy_workqueue(fs_info->endio_write_workers);
+       btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
+       btrfs_destroy_workqueue(fs_info->submit_workers);
        btrfs_stop_workers(&fs_info->delayed_workers);
-       btrfs_stop_workers(&fs_info->caching_workers);
-       btrfs_stop_workers(&fs_info->readahead_workers);
-       btrfs_stop_workers(&fs_info->flush_workers);
+       btrfs_destroy_workqueue(fs_info->caching_workers);
+       btrfs_destroy_workqueue(fs_info->readahead_workers);
+       btrfs_destroy_workqueue(fs_info->flush_workers);
        btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
 }
 
@@ -2080,6 +2066,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
                for (i = 0; i < ret; i++)
                        btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
+
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+               btrfs_free_log_root_tree(NULL, fs_info);
+               btrfs_destroy_pinned_extent(fs_info->tree_root,
+                                           fs_info->pinned_extents);
+       }
 }
 
 int open_ctree(struct super_block *sb,
@@ -2109,6 +2101,8 @@ int open_ctree(struct super_block *sb,
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
+       int max_active;
+       int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
        bool create_uuid_tree;
        bool check_uuid_tree;
 
@@ -2145,15 +2139,22 @@ int open_ctree(struct super_block *sb,
                goto fail_dirty_metadata_bytes;
        }
 
+       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       if (ret) {
+               err = ret;
+               goto fail_delalloc_bytes;
+       }
+
        fs_info->btree_inode = new_inode(sb);
        if (!fs_info->btree_inode) {
                err = -ENOMEM;
-               goto fail_delalloc_bytes;
+               goto fail_bio_counter;
        }
 
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
 
        INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2167,6 +2168,7 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->free_chunk_lock);
        spin_lock_init(&fs_info->tree_mod_seq_lock);
        spin_lock_init(&fs_info->super_lock);
+       spin_lock_init(&fs_info->buffer_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->reloc_mutex);
        seqlock_init(&fs_info->profiles_lock);
@@ -2198,7 +2200,7 @@ int open_ctree(struct super_block *sb,
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-
+       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
        spin_lock_init(&fs_info->reada_lock);
@@ -2221,6 +2223,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->scrub_pause_req, 0);
        atomic_set(&fs_info->scrubs_paused, 0);
        atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->replace_wait);
        init_waitqueue_head(&fs_info->scrub_pause_wait);
        fs_info->scrub_workers_refcnt = 0;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
@@ -2337,7 +2340,7 @@ int open_ctree(struct super_block *sb,
         * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
         */
        if (btrfs_check_super_csum(bh->b_data)) {
-               printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+               printk(KERN_ERR "BTRFS: superblock checksum mismatch\n");
                err = -EINVAL;
                goto fail_alloc;
        }
@@ -2356,7 +2359,7 @@ int open_ctree(struct super_block *sb,
 
        ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
        if (ret) {
-               printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
+               printk(KERN_ERR "BTRFS: superblock contains fatal errors\n");
                err = -EINVAL;
                goto fail_alloc;
        }
@@ -2421,7 +2424,7 @@ int open_ctree(struct super_block *sb,
                features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
 
        if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
-               printk(KERN_ERR "btrfs: has skinny extents\n");
+               printk(KERN_ERR "BTRFS: has skinny extents\n");
 
        /*
         * flag our filesystem as having big metadata blocks if
@@ -2429,7 +2432,7 @@ int open_ctree(struct super_block *sb,
         */
        if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) {
                if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
-                       printk(KERN_INFO "btrfs flagging fs with big metadata feature\n");
+                       printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
                features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
        }
 
@@ -2446,7 +2449,7 @@ int open_ctree(struct super_block *sb,
         */
        if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
            (sectorsize != leafsize)) {
-               printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes "
+               printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes "
                                "are not allowed for mixed block groups on %s\n",
                                sb->s_id);
                goto fail_alloc;
@@ -2468,107 +2471,84 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
+       max_active = fs_info->thread_pool_size;
        btrfs_init_workers(&fs_info->generic_worker,
                           "genwork", 1, NULL);
 
-       btrfs_init_workers(&fs_info->workers, "worker",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-
-       btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->workers =
+               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
+                                     max_active, 16);
 
-       btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->delalloc_workers =
+               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
 
-       btrfs_init_workers(&fs_info->submit_workers, "submit",
-                          min_t(u64, fs_devices->num_devices,
-                          fs_info->thread_pool_size), NULL);
+       fs_info->flush_workers =
+               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
 
-       btrfs_init_workers(&fs_info->caching_workers, "cache",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->caching_workers =
+               btrfs_alloc_workqueue("cache", flags, max_active, 0);
 
-       /* a higher idle thresh on the submit workers makes it much more
+       /*
+        * a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
         * devices
         */
-       fs_info->submit_workers.idle_thresh = 64;
-
-       fs_info->workers.idle_thresh = 16;
-       fs_info->workers.ordered = 1;
-
-       fs_info->delalloc_workers.idle_thresh = 2;
-       fs_info->delalloc_workers.ordered = 1;
+       fs_info->submit_workers =
+               btrfs_alloc_workqueue("submit", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 64);
 
        btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
                           &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_workers, "endio",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_meta_write_workers,
-                          "endio-meta-write", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_raid56_workers,
-                          "endio-raid56", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->rmw_workers,
-                          "rmw", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
-                          1, &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->readahead_workers, "readahead",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
-                          &fs_info->generic_worker);
 
        /*
         * endios are largely parallel and should have a very
         * low idle thresh
         */
-       fs_info->endio_workers.idle_thresh = 4;
-       fs_info->endio_meta_workers.idle_thresh = 4;
-       fs_info->endio_raid56_workers.idle_thresh = 4;
-       fs_info->rmw_workers.idle_thresh = 2;
-
-       fs_info->endio_write_workers.idle_thresh = 2;
-       fs_info->endio_meta_write_workers.idle_thresh = 2;
-       fs_info->readahead_workers.idle_thresh = 2;
+       fs_info->endio_workers =
+               btrfs_alloc_workqueue("endio", flags, max_active, 4);
+       fs_info->endio_meta_workers =
+               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+       fs_info->endio_meta_write_workers =
+               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+       fs_info->endio_raid56_workers =
+               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+       fs_info->rmw_workers =
+               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+       fs_info->endio_write_workers =
+               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+       fs_info->endio_freespace_worker =
+               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+       btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
+                          fs_info->thread_pool_size,
+                          &fs_info->generic_worker);
+       fs_info->readahead_workers =
+               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+       btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+                          &fs_info->generic_worker);
 
        /*
         * btrfs_start_workers can really only fail because of ENOMEM so just
         * return -ENOMEM if any of these fail.
         */
-       ret = btrfs_start_workers(&fs_info->workers);
-       ret |= btrfs_start_workers(&fs_info->generic_worker);
-       ret |= btrfs_start_workers(&fs_info->submit_workers);
-       ret |= btrfs_start_workers(&fs_info->delalloc_workers);
+       ret = btrfs_start_workers(&fs_info->generic_worker);
        ret |= btrfs_start_workers(&fs_info->fixup_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
-       ret |= btrfs_start_workers(&fs_info->rmw_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_write_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
        ret |= btrfs_start_workers(&fs_info->delayed_workers);
-       ret |= btrfs_start_workers(&fs_info->caching_workers);
-       ret |= btrfs_start_workers(&fs_info->readahead_workers);
-       ret |= btrfs_start_workers(&fs_info->flush_workers);
        ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
        if (ret) {
                err = -ENOMEM;
                goto fail_sb_buffer;
        }
+       if (!(fs_info->workers && fs_info->delalloc_workers &&
+             fs_info->submit_workers && fs_info->flush_workers &&
+             fs_info->endio_workers && fs_info->endio_meta_workers &&
+             fs_info->endio_meta_write_workers &&
+             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
+             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
+             fs_info->caching_workers && fs_info->readahead_workers)) {
+               err = -ENOMEM;
+               goto fail_sb_buffer;
+       }
 
        fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
        fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2583,12 +2563,12 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
        if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
-               printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
+               printk(KERN_INFO "BTRFS: valid FS not found on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
 
        if (sectorsize != PAGE_SIZE) {
-               printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+               printk(KERN_WARNING "BTRFS: Incompatible sector size(%lu) "
                       "found on %s\n", (unsigned long)sectorsize, sb->s_id);
                goto fail_sb_buffer;
        }
@@ -2597,7 +2577,7 @@ int open_ctree(struct super_block *sb,
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read the system "
+               printk(KERN_WARNING "BTRFS: failed to read the system "
                       "array on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
@@ -2614,7 +2594,7 @@ int open_ctree(struct super_block *sb,
                                           blocksize, generation);
        if (!chunk_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
@@ -2626,7 +2606,7 @@ int open_ctree(struct super_block *sb,
 
        ret = btrfs_read_chunk_tree(chunk_root);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read chunk tree on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
@@ -2638,7 +2618,7 @@ int open_ctree(struct super_block *sb,
        btrfs_close_extra_devices(fs_info, fs_devices, 0);
 
        if (!fs_devices->latest_bdev) {
-               printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+               printk(KERN_CRIT "BTRFS: failed to read devices on %s\n",
                       sb->s_id);
                goto fail_tree_roots;
        }
@@ -2653,7 +2633,7 @@ retry_root_backup:
                                          blocksize, generation);
        if (!tree_root->node ||
            !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
-               printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
+               printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n",
                       sb->s_id);
 
                goto recovery_tree_root;
@@ -2724,50 +2704,56 @@ retry_root_backup:
 
        ret = btrfs_recover_balance(fs_info);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to recover balance\n");
+               printk(KERN_WARNING "BTRFS: failed to recover balance\n");
                goto fail_block_groups;
        }
 
        ret = btrfs_init_dev_stats(fs_info);
        if (ret) {
-               printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
+               printk(KERN_ERR "BTRFS: failed to init dev_stats: %d\n",
                       ret);
                goto fail_block_groups;
        }
 
        ret = btrfs_init_dev_replace(fs_info);
        if (ret) {
-               pr_err("btrfs: failed to init dev_replace: %d\n", ret);
+               pr_err("BTRFS: failed to init dev_replace: %d\n", ret);
                goto fail_block_groups;
        }
 
        btrfs_close_extra_devices(fs_info, fs_devices, 1);
 
-       ret = btrfs_init_space_info(fs_info);
+       ret = btrfs_sysfs_add_one(fs_info);
        if (ret) {
-               printk(KERN_ERR "Failed to initial space info: %d\n", ret);
+               pr_err("BTRFS: failed to init sysfs interface: %d\n", ret);
                goto fail_block_groups;
        }
 
+       ret = btrfs_init_space_info(fs_info);
+       if (ret) {
+               printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
+               goto fail_sysfs;
+       }
+
        ret = btrfs_read_block_groups(extent_root);
        if (ret) {
-               printk(KERN_ERR "Failed to read block groups: %d\n", ret);
-               goto fail_block_groups;
+               printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
+               goto fail_sysfs;
        }
        fs_info->num_tolerated_disk_barrier_failures =
                btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
        if (fs_info->fs_devices->missing_devices >
             fs_info->num_tolerated_disk_barrier_failures &&
            !(sb->s_flags & MS_RDONLY)) {
-               printk(KERN_WARNING
-                      "Btrfs: too many missing devices, writeable mount is not allowed\n");
-               goto fail_block_groups;
+               printk(KERN_WARNING "BTRFS: "
+                       "too many missing devices, writeable mount is not allowed\n");
+               goto fail_sysfs;
        }
 
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (IS_ERR(fs_info->cleaner_kthread))
-               goto fail_block_groups;
+               goto fail_sysfs;
 
        fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                   tree_root,
@@ -2778,11 +2764,15 @@ retry_root_backup:
        if (!btrfs_test_opt(tree_root, SSD) &&
            !btrfs_test_opt(tree_root, NOSSD) &&
            !fs_info->fs_devices->rotating) {
-               printk(KERN_INFO "Btrfs detected SSD devices, enabling SSD "
+               printk(KERN_INFO "BTRFS: detected SSD devices, enabling SSD "
                       "mode\n");
                btrfs_set_opt(fs_info->mount_opt, SSD);
        }
 
+       /* Set the real inode map cache flag */
+       if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
+               btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
+
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
                ret = btrfsic_mount(tree_root, fs_devices,
@@ -2791,7 +2781,7 @@ retry_root_backup:
                                    1 : 0,
                                    fs_info->check_integrity_print_mask);
                if (ret)
-                       printk(KERN_WARNING "btrfs: failed to initialize"
+                       printk(KERN_WARNING "BTRFS: failed to initialize"
                               " integrity check module %s\n", sb->s_id);
        }
 #endif
@@ -2804,7 +2794,7 @@ retry_root_backup:
                u64 bytenr = btrfs_super_log_root(disk_super);
 
                if (fs_devices->rw_devices == 0) {
-                       printk(KERN_WARNING "Btrfs log replay required "
+                       printk(KERN_WARNING "BTRFS: log replay required "
                               "on RO media\n");
                        err = -EIO;
                        goto fail_qgroup;
@@ -2827,7 +2817,7 @@ retry_root_backup:
                                                      generation + 1);
                if (!log_tree_root->node ||
                    !extent_buffer_uptodate(log_tree_root->node)) {
-                       printk(KERN_ERR "btrfs: failed to read log tree\n");
+                       printk(KERN_ERR "BTRFS: failed to read log tree\n");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
                        goto fail_trans_kthread;
@@ -2861,7 +2851,7 @@ retry_root_backup:
                ret = btrfs_recover_relocation(tree_root);
                if (ret < 0) {
                        printk(KERN_WARNING
-                              "btrfs: failed to recover relocation\n");
+                              "BTRFS: failed to recover relocation\n");
                        err = -EINVAL;
                        goto fail_qgroup;
                }
@@ -2891,14 +2881,14 @@ retry_root_backup:
 
        ret = btrfs_resume_balance_async(fs_info);
        if (ret) {
-               printk(KERN_WARNING "btrfs: failed to resume balance\n");
+               printk(KERN_WARNING "BTRFS: failed to resume balance\n");
                close_ctree(tree_root);
                return ret;
        }
 
        ret = btrfs_resume_dev_replace_async(fs_info);
        if (ret) {
-               pr_warn("btrfs: failed to resume dev_replace\n");
+               pr_warn("BTRFS: failed to resume dev_replace\n");
                close_ctree(tree_root);
                return ret;
        }
@@ -2906,20 +2896,20 @@ retry_root_backup:
        btrfs_qgroup_rescan_resume(fs_info);
 
        if (create_uuid_tree) {
-               pr_info("btrfs: creating UUID tree\n");
+               pr_info("BTRFS: creating UUID tree\n");
                ret = btrfs_create_uuid_tree(fs_info);
                if (ret) {
-                       pr_warn("btrfs: failed to create the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to create the UUID tree %d\n",
                                ret);
                        close_ctree(tree_root);
                        return ret;
                }
        } else if (check_uuid_tree ||
                   btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
-               pr_info("btrfs: checking UUID tree\n");
+               pr_info("BTRFS: checking UUID tree\n");
                ret = btrfs_check_uuid_tree(fs_info);
                if (ret) {
-                       pr_warn("btrfs: failed to check the UUID tree %d\n",
+                       pr_warn("BTRFS: failed to check the UUID tree %d\n",
                                ret);
                        close_ctree(tree_root);
                        return ret;
@@ -2945,6 +2935,9 @@ fail_cleaner:
         */
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
+fail_sysfs:
+       btrfs_sysfs_remove_one(fs_info);
+
 fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
@@ -2960,6 +2953,8 @@ fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        iput(fs_info->btree_inode);
+fail_bio_counter:
+       percpu_counter_destroy(&fs_info->bio_counter);
 fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
@@ -3000,7 +2995,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
                struct btrfs_device *device = (struct btrfs_device *)
                        bh->b_private;
 
-               printk_ratelimited_in_rcu(KERN_WARNING "lost page write due to "
+               printk_ratelimited_in_rcu(KERN_WARNING "BTRFS: lost page write due to "
                                          "I/O error on %s\n",
                                          rcu_str_deref(device->name));
                /* note, we dont' set_buffer_write_io_error because we have
@@ -3119,7 +3114,7 @@ static int write_dev_supers(struct btrfs_device *device,
                        bh = __getblk(device->bdev, bytenr / 4096,
                                      BTRFS_SUPER_INFO_SIZE);
                        if (!bh) {
-                               printk(KERN_ERR "btrfs: couldn't get super "
+                               printk(KERN_ERR "BTRFS: couldn't get super "
                                       "buffer head for bytenr %Lu\n", bytenr);
                                errors++;
                                continue;
@@ -3140,7 +3135,10 @@ static int write_dev_supers(struct btrfs_device *device,
                 * we fua the first super.  The others we allow
                 * to go down lazy.
                 */
-               ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               if (i == 0)
+                       ret = btrfsic_submit_bh(WRITE_FUA, bh);
+               else
+                       ret = btrfsic_submit_bh(WRITE_SYNC, bh);
                if (ret)
                        errors++;
        }
@@ -3186,7 +3184,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
                wait_for_completion(&device->flush_wait);
 
                if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
-                       printk_in_rcu("btrfs: disabling barriers on dev %s\n",
+                       printk_in_rcu("BTRFS: disabling barriers on dev %s\n",
                                      rcu_str_deref(device->name));
                        device->nobarriers = 1;
                } else if (!bio_flagged(bio, BIO_UPTODATE)) {
@@ -3238,6 +3236,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        /* send down all the barriers */
        head = &info->fs_devices->devices;
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_send++;
                        continue;
@@ -3252,6 +3252,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 
        /* wait for all the barriers */
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_wait++;
                        continue;
@@ -3407,7 +3409,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                        total_errors++;
        }
        if (total_errors > max_errors) {
-               printk(KERN_ERR "btrfs: %d errors while writing supers\n",
+               btrfs_err(root->fs_info, "%d errors while writing supers",
                       total_errors);
                mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
@@ -3455,10 +3457,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_free_log(NULL, root);
-               btrfs_free_log_root_tree(NULL, fs_info);
-       }
 
        __btrfs_remove_free_space_cache(root->free_ino_pinned);
        __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3563,14 +3563,12 @@ int close_ctree(struct btrfs_root *root)
        if (!(fs_info->sb->s_flags & MS_RDONLY)) {
                ret = btrfs_commit_super(root);
                if (ret)
-                       printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
+                       btrfs_err(root->fs_info, "commit super ret %d", ret);
        }
 
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_error_commit_super(root);
 
-       btrfs_put_block_group_cache(fs_info);
-
        kthread_stop(fs_info->transaction_kthread);
        kthread_stop(fs_info->cleaner_kthread);
 
@@ -3580,12 +3578,16 @@ int close_ctree(struct btrfs_root *root)
        btrfs_free_qgroup_config(root->fs_info);
 
        if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
-               printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+               btrfs_info(root->fs_info, "at unmount delalloc count %lld",
                       percpu_counter_sum(&fs_info->delalloc_bytes));
        }
 
+       btrfs_sysfs_remove_one(fs_info);
+
        del_fs_roots(fs_info);
 
+       btrfs_put_block_group_cache(fs_info);
+
        btrfs_free_block_groups(fs_info);
 
        btrfs_stop_all_workers(fs_info);
@@ -3604,6 +3606,7 @@ int close_ctree(struct btrfs_root *root)
 
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        percpu_counter_destroy(&fs_info->delalloc_bytes);
+       percpu_counter_destroy(&fs_info->bio_counter);
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
@@ -3785,9 +3788,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
                list_move_tail(&root->ordered_root,
                               &fs_info->ordered_roots);
 
+               spin_unlock(&fs_info->ordered_root_lock);
                btrfs_destroy_ordered_extents(root);
 
-               cond_resched_lock(&fs_info->ordered_root_lock);
+               cond_resched();
+               spin_lock(&fs_info->ordered_root_lock);
        }
        spin_unlock(&fs_info->ordered_root_lock);
 }
@@ -3803,55 +3808,54 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        delayed_refs = &trans->delayed_refs;
 
        spin_lock(&delayed_refs->lock);
-       if (delayed_refs->num_entries == 0) {
+       if (atomic_read(&delayed_refs->num_entries) == 0) {
                spin_unlock(&delayed_refs->lock);
-               printk(KERN_INFO "delayed_refs has NO entry\n");
+               btrfs_info(root->fs_info, "delayed_refs has NO entry");
                return ret;
        }
 
-       while ((node = rb_first(&delayed_refs->root)) != NULL) {
-               struct btrfs_delayed_ref_head *head = NULL;
+       while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+               struct btrfs_delayed_ref_head *head;
                bool pin_bytes = false;
 
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               atomic_set(&ref->refs, 1);
-               if (btrfs_delayed_ref_is_head(ref)) {
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
 
-                       head = btrfs_delayed_node_to_head(ref);
-                       if (!mutex_trylock(&head->mutex)) {
-                               atomic_inc(&ref->refs);
-                               spin_unlock(&delayed_refs->lock);
-
-                               /* Need to wait for the delayed ref to run */
-                               mutex_lock(&head->mutex);
-                               mutex_unlock(&head->mutex);
-                               btrfs_put_delayed_ref(ref);
-
-                               spin_lock(&delayed_refs->lock);
-                               continue;
-                       }
-
-                       if (head->must_insert_reserved)
-                               pin_bytes = true;
-                       btrfs_free_delayed_extent_op(head->extent_op);
-                       delayed_refs->num_heads--;
-                       if (list_empty(&head->cluster))
-                               delayed_refs->num_heads_ready--;
-                       list_del_init(&head->cluster);
-               }
-
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               delayed_refs->num_entries--;
-               spin_unlock(&delayed_refs->lock);
-               if (head) {
-                       if (pin_bytes)
-                               btrfs_pin_extent(root, ref->bytenr,
-                                                ref->num_bytes, 1);
+                       mutex_lock(&head->mutex);
                        mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       spin_lock(&delayed_refs->lock);
+                       continue;
                }
-               btrfs_put_delayed_ref(ref);
+               spin_lock(&head->lock);
+               while ((node = rb_first(&head->ref_root)) != NULL) {
+                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                      rb_node);
+                       ref->in_tree = 0;
+                       rb_erase(&ref->rb_node, &head->ref_root);
+                       atomic_dec(&delayed_refs->num_entries);
+                       btrfs_put_delayed_ref(ref);
+               }
+               if (head->must_insert_reserved)
+                       pin_bytes = true;
+               btrfs_free_delayed_extent_op(head->extent_op);
+               delayed_refs->num_heads--;
+               if (head->processing == 0)
+                       delayed_refs->num_heads_ready--;
+               atomic_dec(&delayed_refs->num_entries);
+               head->node.in_tree = 0;
+               rb_erase(&head->href_node, &delayed_refs->href_root);
+               spin_unlock(&head->lock);
+               spin_unlock(&delayed_refs->lock);
+               mutex_unlock(&head->mutex);
 
+               if (pin_bytes)
+                       btrfs_pin_extent(root, head->node.bytenr,
+                                        head->node.num_bytes, 1);
+               btrfs_put_delayed_ref(&head->node);
                cond_resched();
                spin_lock(&delayed_refs->lock);
        }