Btrfs: just wait or commit our own log sub-transaction
[cascardo/linux.git] / fs / btrfs / disk-io.c
index 9850a51..dd52146 100644 (file)
@@ -26,7 +26,6 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
-#include <linux/crc32c.h>
 #include <linux/slab.h>
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
@@ -35,6 +34,7 @@
 #include <asm/unaligned.h>
 #include "ctree.h"
 #include "disk-io.h"
+#include "hash.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
 #include "volumes.h"
@@ -244,7 +244,7 @@ out:
 
 u32 btrfs_csum_data(char *data, u32 seed, size_t len)
 {
-       return crc32c(seed, data, len);
+       return btrfs_crc32c(seed, data, len);
 }
 
 void btrfs_csum_final(u32 crc, char *result)
@@ -1200,6 +1200,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
+       INIT_LIST_HEAD(&root->log_ctxs[0]);
+       INIT_LIST_HEAD(&root->log_ctxs[1]);
        atomic_set(&root->log_commit[0], 0);
        atomic_set(&root->log_commit[1], 0);
        atomic_set(&root->log_writers, 0);
@@ -1207,6 +1209,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        atomic_set(&root->orphan_inodes, 0);
        atomic_set(&root->refs, 1);
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        if (fs_info)
                extent_io_tree_init(&root->dirty_log_pages,
@@ -1420,6 +1423,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        WARN_ON(root->log_root);
        root->log_root = log_root;
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        return 0;
 }
@@ -2065,6 +2069,12 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
                for (i = 0; i < ret; i++)
                        btrfs_drop_and_free_fs_root(fs_info, gang[i]);
        }
+
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+               btrfs_free_log_root_tree(NULL, fs_info);
+               btrfs_destroy_pinned_extent(fs_info->tree_root,
+                                           fs_info->pinned_extents);
+       }
 }
 
 int open_ctree(struct super_block *sb,
@@ -2130,10 +2140,16 @@ int open_ctree(struct super_block *sb,
                goto fail_dirty_metadata_bytes;
        }
 
+       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       if (ret) {
+               err = ret;
+               goto fail_delalloc_bytes;
+       }
+
        fs_info->btree_inode = new_inode(sb);
        if (!fs_info->btree_inode) {
                err = -ENOMEM;
-               goto fail_delalloc_bytes;
+               goto fail_bio_counter;
        }
 
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2185,7 +2201,7 @@ int open_ctree(struct super_block *sb,
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
        fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
-
+       fs_info->avg_delayed_ref_runtime = div64_u64(NSEC_PER_SEC, 64);
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
        spin_lock_init(&fs_info->reada_lock);
@@ -2208,6 +2224,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->scrub_pause_req, 0);
        atomic_set(&fs_info->scrubs_paused, 0);
        atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->replace_wait);
        init_waitqueue_head(&fs_info->scrub_pause_wait);
        fs_info->scrub_workers_refcnt = 0;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
@@ -2739,13 +2756,13 @@ retry_root_backup:
        ret = btrfs_init_space_info(fs_info);
        if (ret) {
                printk(KERN_ERR "BTRFS: Failed to initial space info: %d\n", ret);
-               goto fail_block_groups;
+               goto fail_sysfs;
        }
 
        ret = btrfs_read_block_groups(extent_root);
        if (ret) {
                printk(KERN_ERR "BTRFS: Failed to read block groups: %d\n", ret);
-               goto fail_block_groups;
+               goto fail_sysfs;
        }
        fs_info->num_tolerated_disk_barrier_failures =
                btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
@@ -2754,13 +2771,13 @@ retry_root_backup:
            !(sb->s_flags & MS_RDONLY)) {
                printk(KERN_WARNING "BTRFS: "
                        "too many missing devices, writeable mount is not allowed\n");
-               goto fail_block_groups;
+               goto fail_sysfs;
        }
 
        fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
                                               "btrfs-cleaner");
        if (IS_ERR(fs_info->cleaner_kthread))
-               goto fail_block_groups;
+               goto fail_sysfs;
 
        fs_info->transaction_kthread = kthread_run(transaction_kthread,
                                                   tree_root,
@@ -2776,6 +2793,10 @@ retry_root_backup:
                btrfs_set_opt(fs_info->mount_opt, SSD);
        }
 
+       /* Set the real inode map cache flag */
+       if (btrfs_test_opt(tree_root, CHANGE_INODE_CACHE))
+               btrfs_set_opt(tree_root->fs_info->mount_opt, INODE_MAP_CACHE);
+
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
        if (btrfs_test_opt(tree_root, CHECK_INTEGRITY)) {
                ret = btrfsic_mount(tree_root, fs_devices,
@@ -2938,6 +2959,9 @@ fail_cleaner:
         */
        filemap_write_and_wait(fs_info->btree_inode->i_mapping);
 
+fail_sysfs:
+       btrfs_sysfs_remove_one(fs_info);
+
 fail_block_groups:
        btrfs_put_block_group_cache(fs_info);
        btrfs_free_block_groups(fs_info);
@@ -2953,6 +2977,8 @@ fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        iput(fs_info->btree_inode);
+fail_bio_counter:
+       percpu_counter_destroy(&fs_info->bio_counter);
 fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
@@ -3234,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        /* send down all the barriers */
        head = &info->fs_devices->devices;
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_send++;
                        continue;
@@ -3248,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 
        /* wait for all the barriers */
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_wait++;
                        continue;
@@ -3451,10 +3481,8 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
        if (btrfs_root_refs(&root->root_item) == 0)
                synchronize_srcu(&fs_info->subvol_srcu);
 
-       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_free_log(NULL, root);
-               btrfs_free_log_root_tree(NULL, fs_info);
-       }
 
        __btrfs_remove_free_space_cache(root->free_ino_pinned);
        __btrfs_remove_free_space_cache(root->free_ino_ctl);
@@ -3565,8 +3593,6 @@ int close_ctree(struct btrfs_root *root)
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                btrfs_error_commit_super(root);
 
-       btrfs_put_block_group_cache(fs_info);
-
        kthread_stop(fs_info->transaction_kthread);
        kthread_stop(fs_info->cleaner_kthread);
 
@@ -3584,6 +3610,8 @@ int close_ctree(struct btrfs_root *root)
 
        del_fs_roots(fs_info);
 
+       btrfs_put_block_group_cache(fs_info);
+
        btrfs_free_block_groups(fs_info);
 
        btrfs_stop_all_workers(fs_info);
@@ -3602,6 +3630,7 @@ int close_ctree(struct btrfs_root *root)
 
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        percpu_counter_destroy(&fs_info->delalloc_bytes);
+       percpu_counter_destroy(&fs_info->bio_counter);
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
@@ -3783,9 +3812,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
                list_move_tail(&root->ordered_root,
                               &fs_info->ordered_roots);
 
+               spin_unlock(&fs_info->ordered_root_lock);
                btrfs_destroy_ordered_extents(root);
 
-               cond_resched_lock(&fs_info->ordered_root_lock);
+               cond_resched();
+               spin_lock(&fs_info->ordered_root_lock);
        }
        spin_unlock(&fs_info->ordered_root_lock);
 }
@@ -3801,58 +3832,54 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
        delayed_refs = &trans->delayed_refs;
 
        spin_lock(&delayed_refs->lock);
-       if (delayed_refs->num_entries == 0) {
+       if (atomic_read(&delayed_refs->num_entries) == 0) {
                spin_unlock(&delayed_refs->lock);
                btrfs_info(root->fs_info, "delayed_refs has NO entry");
                return ret;
        }
 
-       while ((node = rb_first(&delayed_refs->root)) != NULL) {
-               struct btrfs_delayed_ref_head *head = NULL;
+       while ((node = rb_first(&delayed_refs->href_root)) != NULL) {
+               struct btrfs_delayed_ref_head *head;
                bool pin_bytes = false;
 
-               ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
-               atomic_set(&ref->refs, 1);
-               if (btrfs_delayed_ref_is_head(ref)) {
-
-                       head = btrfs_delayed_node_to_head(ref);
-                       if (!mutex_trylock(&head->mutex)) {
-                               atomic_inc(&ref->refs);
-                               spin_unlock(&delayed_refs->lock);
-
-                               /* Need to wait for the delayed ref to run */
-                               mutex_lock(&head->mutex);
-                               mutex_unlock(&head->mutex);
-                               btrfs_put_delayed_ref(ref);
+               head = rb_entry(node, struct btrfs_delayed_ref_head,
+                               href_node);
+               if (!mutex_trylock(&head->mutex)) {
+                       atomic_inc(&head->node.refs);
+                       spin_unlock(&delayed_refs->lock);
 
-                               spin_lock(&delayed_refs->lock);
-                               continue;
-                       }
-
-                       if (head->must_insert_reserved)
-                               pin_bytes = true;
-                       btrfs_free_delayed_extent_op(head->extent_op);
-                       delayed_refs->num_heads--;
-                       if (list_empty(&head->cluster))
-                               delayed_refs->num_heads_ready--;
-                       list_del_init(&head->cluster);
-               }
-
-               ref->in_tree = 0;
-               rb_erase(&ref->rb_node, &delayed_refs->root);
-               if (head)
-                       rb_erase(&head->href_node, &delayed_refs->href_root);
-
-               delayed_refs->num_entries--;
-               spin_unlock(&delayed_refs->lock);
-               if (head) {
-                       if (pin_bytes)
-                               btrfs_pin_extent(root, ref->bytenr,
-                                                ref->num_bytes, 1);
+                       mutex_lock(&head->mutex);
                        mutex_unlock(&head->mutex);
+                       btrfs_put_delayed_ref(&head->node);
+                       spin_lock(&delayed_refs->lock);
+                       continue;
                }
-               btrfs_put_delayed_ref(ref);
+               spin_lock(&head->lock);
+               while ((node = rb_first(&head->ref_root)) != NULL) {
+                       ref = rb_entry(node, struct btrfs_delayed_ref_node,
+                                      rb_node);
+                       ref->in_tree = 0;
+                       rb_erase(&ref->rb_node, &head->ref_root);
+                       atomic_dec(&delayed_refs->num_entries);
+                       btrfs_put_delayed_ref(ref);
+               }
+               if (head->must_insert_reserved)
+                       pin_bytes = true;
+               btrfs_free_delayed_extent_op(head->extent_op);
+               delayed_refs->num_heads--;
+               if (head->processing == 0)
+                       delayed_refs->num_heads_ready--;
+               atomic_dec(&delayed_refs->num_entries);
+               head->node.in_tree = 0;
+               rb_erase(&head->href_node, &delayed_refs->href_root);
+               spin_unlock(&head->lock);
+               spin_unlock(&delayed_refs->lock);
+               mutex_unlock(&head->mutex);
 
+               if (pin_bytes)
+                       btrfs_pin_extent(root, head->node.bytenr,
+                                        head->node.num_bytes, 1);
+               btrfs_put_delayed_ref(&head->node);
                cond_resched();
                spin_lock(&delayed_refs->lock);
        }