Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Apr 2012 16:30:07 +0000 (09:30 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Apr 2012 16:30:07 +0000 (09:30 -0700)
Pull btrfs fixes from Chris Mason:
 "This has our collection of bug fixes.  I missed the last rc because I
  thought our patches were making NFS crash during my xfs test runs.
  Turns out it was an NFS client bug fixed by someone else while I tried
  to bisect it.

  All of these fixes are small, but some are fairly high impact.  The
  biggest are fixes for our mount -o remount handling, a deadlock due to
  GFP_KERNEL allocations in readdir, and a RAID10 error handling bug.

  This was tested against both 3.3 and Linus' master as of this morning."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (26 commits)
  Btrfs: reduce lock contention during extent insertion
  Btrfs: avoid deadlocks from GFP_KERNEL allocations during btrfs_real_readdir
  Btrfs: Fix space checking during fs resize
  Btrfs: fix block_rsv and space_info lock ordering
  Btrfs: Prevent root_list corruption
  Btrfs: fix repair code for RAID10
  Btrfs: do not start delalloc inodes during sync
  Btrfs: fix that check_int_data mount option was ignored
  Btrfs: don't count CRC or header errors twice while scrubbing
  Btrfs: fix btrfs_ioctl_dev_info() crash on missing device
  btrfs: don't return EINTR
  Btrfs: double unlock bug in error handling
  Btrfs: always store the mirror we read the eb from
  fs/btrfs/volumes.c: add missing free_fs_devices
  btrfs: fix early abort in 'remount'
  Btrfs: fix max chunk size check in chunk allocator
  Btrfs: add missing read locks in backref.c
  Btrfs: don't call free_extent_buffer twice in iterate_irefs
  Btrfs: Make free_ipath() deal gracefully with NULL pointers
  Btrfs: avoid possible use-after-free in clear_extent_bit()
  ...

15 files changed:
fs/btrfs/backref.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/reada.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/volumes.c

index f4e9074..bcec067 100644 (file)
@@ -22,6 +22,7 @@
 #include "ulist.h"
 #include "transaction.h"
 #include "delayed-ref.h"
+#include "locking.h"
 
 /*
  * this structure records all encountered refs on the way up to the root
@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
        s64 bytes_left = size - 1;
        struct extent_buffer *eb = eb_in;
        struct btrfs_key found_key;
+       int leave_spinning = path->leave_spinning;
 
        if (bytes_left >= 0)
                dest[bytes_left] = '\0';
 
+       path->leave_spinning = 1;
        while (1) {
                len = btrfs_inode_ref_name_len(eb, iref);
                bytes_left -= len;
                if (bytes_left >= 0)
                        read_extent_buffer(eb, dest + bytes_left,
                                                (unsigned long)(iref + 1), len);
-               if (eb != eb_in)
+               if (eb != eb_in) {
+                       btrfs_tree_read_unlock_blocking(eb);
                        free_extent_buffer(eb);
+               }
                ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
                if (ret > 0)
                        ret = -ENOENT;
@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                slot = path->slots[0];
                eb = path->nodes[0];
                /* make sure we can use eb after releasing the path */
-               if (eb != eb_in)
+               if (eb != eb_in) {
                        atomic_inc(&eb->refs);
+                       btrfs_tree_read_lock(eb);
+                       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+               }
                btrfs_release_path(path);
 
                iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
        }
 
        btrfs_release_path(path);
+       path->leave_spinning = leave_spinning;
 
        if (ret)
                return ERR_PTR(ret);
@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
                                struct btrfs_path *path,
                                iterate_irefs_t *iterate, void *ctx)
 {
-       int ret;
+       int ret = 0;
        int slot;
        u32 cur;
        u32 len;
@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
        struct btrfs_inode_ref *iref;
        struct btrfs_key found_key;
 
-       while (1) {
+       while (!ret) {
+               path->leave_spinning = 1;
                ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
                                        &found_key);
                if (ret < 0)
@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
                eb = path->nodes[0];
                /* make sure we can use eb after releasing the path */
                atomic_inc(&eb->refs);
+               btrfs_tree_read_lock(eb);
+               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
                btrfs_release_path(path);
 
                item = btrfs_item_nr(eb, slot);
@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
                                 (unsigned long long)found_key.objectid,
                                 (unsigned long long)fs_root->objectid);
                        ret = iterate(parent, iref, eb, ctx);
-                       if (ret) {
-                               free_extent_buffer(eb);
+                       if (ret)
                                break;
-                       }
                        len = sizeof(*iref) + name_len;
                        iref = (struct btrfs_inode_ref *)((char *)iref + len);
                }
+               btrfs_tree_read_unlock_blocking(eb);
                free_extent_buffer(eb);
        }
 
@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
 
 void free_ipath(struct inode_fs_paths *ipath)
 {
+       if (!ipath)
+               return;
        kfree(ipath->fspath);
        kfree(ipath);
 }
index 3f65a81..8fd7233 100644 (file)
@@ -1078,7 +1078,7 @@ struct btrfs_fs_info {
         * is required instead of the faster short fsync log commits
         */
        u64 last_trans_log_full_commit;
-       unsigned long mount_opt:21;
+       unsigned long mount_opt;
        unsigned long compress_type:4;
        u64 max_inline;
        u64 alloc_start;
index 20196f4..d0c969b 100644 (file)
@@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
                        break;
 
-               if (!failed_mirror) {
-                       failed = 1;
-                       printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
-                       failed_mirror = eb->failed_mirror;
-               }
-
                num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
                                              eb->start, eb->len);
                if (num_copies == 1)
                        break;
 
+               if (!failed_mirror) {
+                       failed = 1;
+                       failed_mirror = eb->read_mirror;
+               }
+
                mirror_num++;
                if (mirror_num == failed_mirror)
                        mirror_num++;
@@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
 }
 
 static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state)
+                              struct extent_state *state, int mirror)
 {
        struct extent_io_tree *tree;
        u64 found_start;
@@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        if (!reads_done)
                goto err;
 
+       eb->read_mirror = mirror;
        if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
                ret = -EIO;
                goto err;
@@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
 
        eb = (struct extent_buffer *)page->private;
        set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
-       eb->failed_mirror = failed_mirror;
+       eb->read_mirror = failed_mirror;
        if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
                btree_readahead_hook(root, eb, eb->start, -EIO);
        return -EIO;    /* we fixed nothing */
@@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb,
                goto fail_sb_buffer;
        }
 
-       if (sectorsize < PAGE_SIZE) {
-               printk(KERN_WARNING "btrfs: Incompatible sector size "
-                      "found on %s\n", sb->s_id);
+       if (sectorsize != PAGE_SIZE) {
+               printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
+                      "found on %s\n", (unsigned long)sectorsize, sb->s_id);
                goto fail_sb_buffer;
        }
 
index 2b35f8d..6fc2e6f 100644 (file)
@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
                                if (ret) {
                                        printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
+                                       spin_lock(&delayed_refs->lock);
                                        return ret;
                                }
 
@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
 
                if (ret) {
                        printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
+                       spin_lock(&delayed_refs->lock);
                        return ret;
                }
 
@@ -3769,13 +3771,10 @@ again:
                 */
                if (current->journal_info)
                        return -EAGAIN;
-               ret = wait_event_interruptible(space_info->wait,
-                                              !space_info->flush);
-               /* Must have been interrupted, return */
-               if (ret) {
-                       printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
+               ret = wait_event_killable(space_info->wait, !space_info->flush);
+               /* Must have been killed, return */
+               if (ret)
                        return -EINTR;
-               }
 
                spin_lock(&space_info->lock);
        }
@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 
        num_bytes = calc_global_metadata_size(fs_info);
 
-       spin_lock(&block_rsv->lock);
        spin_lock(&sinfo->lock);
+       spin_lock(&block_rsv->lock);
 
        block_rsv->size = num_bytes;
 
@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
                block_rsv->full = 1;
        }
 
-       spin_unlock(&sinfo->lock);
        spin_unlock(&block_rsv->lock);
+       spin_unlock(&sinfo->lock);
 }
 
 static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
index cd4b5e4..198c2ba 100644 (file)
@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
        return 0;
 }
 
+static struct extent_state *next_state(struct extent_state *state)
+{
+       struct rb_node *next = rb_next(&state->rb_node);
+       if (next)
+               return rb_entry(next, struct extent_state, rb_node);
+       else
+               return NULL;
+}
+
 /*
  * utility function to clear some bits in an extent state struct.
- * it will optionally wake up any one waiting on this state (wake == 1), or
- * forcibly remove the state from the tree (delete == 1).
+ * it will optionally wake up any one waiting on this state (wake == 1)
  *
  * If no bits are set on the state struct after clearing things, the
  * struct is freed and removed from the tree
  */
-static int clear_state_bit(struct extent_io_tree *tree,
-                           struct extent_state *state,
-                           int *bits, int wake)
+static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
+                                           struct extent_state *state,
+                                           int *bits, int wake)
 {
+       struct extent_state *next;
        int bits_to_clear = *bits & ~EXTENT_CTLBITS;
-       int ret = state->state & bits_to_clear;
 
        if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
                u64 range = state->end - state->start + 1;
@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
        if (wake)
                wake_up(&state->wq);
        if (state->state == 0) {
+               next = next_state(state);
                if (state->tree) {
                        rb_erase(&state->rb_node, &tree->state);
                        state->tree = NULL;
@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
                }
        } else {
                merge_state(tree, state);
+               next = next_state(state);
        }
-       return ret;
+       return next;
 }
 
 static struct extent_state *
@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
        struct extent_state *state;
        struct extent_state *cached;
        struct extent_state *prealloc = NULL;
-       struct rb_node *next_node;
        struct rb_node *node;
        u64 last_end;
        int err;
@@ -528,14 +537,11 @@ hit_next:
        WARN_ON(state->end < start);
        last_end = state->end;
 
-       if (state->end < end && !need_resched())
-               next_node = rb_next(&state->rb_node);
-       else
-               next_node = NULL;
-
        /* the state doesn't have the wanted bits, go ahead */
-       if (!(state->state & bits))
+       if (!(state->state & bits)) {
+               state = next_state(state);
                goto next;
+       }
 
        /*
         *     | ---- desired range ---- |
@@ -593,16 +599,13 @@ hit_next:
                goto out;
        }
 
-       clear_state_bit(tree, state, &bits, wake);
+       state = clear_state_bit(tree, state, &bits, wake);
 next:
        if (last_end == (u64)-1)
                goto out;
        start = last_end + 1;
-       if (start <= end && next_node) {
-               state = rb_entry(next_node, struct extent_state,
-                                rb_node);
+       if (start <= end && state && !need_resched())
                goto hit_next;
-       }
        goto search_again;
 
 out:
@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        u64 start;
        u64 end;
        int whole_page;
-       int failed_mirror;
+       int mirror;
        int ret;
 
        if (err)
@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                }
                spin_unlock(&tree->lock);
 
+               mirror = (int)(unsigned long)bio->bi_bdev;
                if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
                        ret = tree->ops->readpage_end_io_hook(page, start, end,
-                                                             state);
+                                                             state, mirror);
                        if (ret)
                                uptodate = 0;
                        else
                                clean_io_failure(start, page);
                }
 
-               if (!uptodate)
-                       failed_mirror = (int)(unsigned long)bio->bi_bdev;
-
                if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
-                       ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
+                       ret = tree->ops->readpage_io_failed_hook(page, mirror);
                        if (!ret && !err &&
                            test_bit(BIO_UPTODATE, &bio->bi_flags))
                                uptodate = 1;
@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                         * can't handle the error it will return -EIO and we
                         * remain responsible for that page.
                         */
-                       ret = bio_readpage_error(bio, page, start, end,
-                                                       failed_mirror, NULL);
+                       ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
                        if (ret == 0) {
                                uptodate =
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
        }
 
        clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
-       eb->failed_mirror = 0;
+       eb->read_mirror = 0;
        atomic_set(&eb->io_pages, num_reads);
        for (i = start_i; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
index faf10eb..b516c3b 100644 (file)
@@ -79,7 +79,7 @@ struct extent_io_ops {
                                        u64 start, u64 end,
                                       struct extent_state *state);
        int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
-                                   struct extent_state *state);
+                                   struct extent_state *state, int mirror);
        int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
                                      struct extent_state *state, int uptodate);
        void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -135,7 +135,7 @@ struct extent_buffer {
        spinlock_t refs_lock;
        atomic_t refs;
        atomic_t io_pages;
-       int failed_mirror;
+       int read_mirror;
        struct list_head leak_list;
        struct rcu_head rcu_head;
        pid_t lock_owner;
index d83260d..53bf2d7 100644 (file)
@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
        int extent_type;
        int recow;
        int ret;
+       int modify_tree = -1;
 
        if (drop_cache)
                btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
        if (!path)
                return -ENOMEM;
 
+       if (start >= BTRFS_I(inode)->disk_i_size)
+               modify_tree = 0;
+
        while (1) {
                recow = 0;
                ret = btrfs_lookup_file_extent(trans, root, path, ino,
-                                              search_start, -1);
+                                              search_start, modify_tree);
                if (ret < 0)
                        break;
                if (ret > 0 && path->slots[0] > 0 && search_start == start) {
@@ -634,7 +638,8 @@ next_slot:
                }
 
                search_start = max(key.offset, start);
-               if (recow) {
+               if (recow || !modify_tree) {
+                       modify_tree = -1;
                        btrfs_release_path(path);
                        continue;
                }
index 115bc05..61b16c6 100644 (file)
@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
  * extent_io.c will try to find good copies for us.
  */
 static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state)
+                              struct extent_state *state, int mirror)
 {
        size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
        struct inode *inode = page->mapping->host;
@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
        BTRFS_I(inode)->dummy_inode = 1;
 
        inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
-       inode->i_op = &simple_dir_inode_operations;
+       inode->i_op = &btrfs_dir_ro_inode_operations;
        inode->i_fop = &simple_dir_operations;
        inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
 static int btrfs_dentry_delete(const struct dentry *dentry)
 {
        struct btrfs_root *root;
+       struct inode *inode = dentry->d_inode;
 
-       if (!dentry->d_inode && !IS_ROOT(dentry))
-               dentry = dentry->d_parent;
+       if (!inode && !IS_ROOT(dentry))
+               inode = dentry->d_parent->d_inode;
 
-       if (dentry->d_inode) {
-               root = BTRFS_I(dentry->d_inode)->root;
+       if (inode) {
+               root = BTRFS_I(inode)->root;
                if (btrfs_root_refs(&root->root_item) == 0)
                        return 1;
+
+               if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
+                       return 1;
        }
        return 0;
 }
@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
        struct btrfs_path *path;
        struct list_head ins_list;
        struct list_head del_list;
-       struct qstr q;
        int ret;
        struct extent_buffer *leaf;
        int slot;
@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
 
                while (di_cur < di_total) {
                        struct btrfs_key location;
-                       struct dentry *tmp;
 
                        if (verify_dir_item(root, leaf, di))
                                break;
@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
                        d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
                        btrfs_dir_item_key_to_cpu(leaf, di, &location);
 
-                       q.name = name_ptr;
-                       q.len = name_len;
-                       q.hash = full_name_hash(q.name, q.len);
-                       tmp = d_lookup(filp->f_dentry, &q);
-                       if (!tmp) {
-                               struct btrfs_key *newkey;
-
-                               newkey = kzalloc(sizeof(struct btrfs_key),
-                                                GFP_NOFS);
-                               if (!newkey)
-                                       goto no_dentry;
-                               tmp = d_alloc(filp->f_dentry, &q);
-                               if (!tmp) {
-                                       kfree(newkey);
-                                       dput(tmp);
-                                       goto no_dentry;
-                               }
-                               memcpy(newkey, &location,
-                                      sizeof(struct btrfs_key));
-                               tmp->d_fsdata = newkey;
-                               tmp->d_flags |= DCACHE_NEED_LOOKUP;
-                               d_rehash(tmp);
-                               dput(tmp);
-                       } else {
-                               dput(tmp);
-                       }
-no_dentry:
+
                        /* is this a reference to our own snapshot? If so
-                        * skip it
+                        * skip it.
+                        *
+                        * In contrast to old kernels, we insert the snapshot's
+                        * dir item and dir index after it has been created, so
+                        * we won't find a reference to our own snapshot. We
+                        * still keep the following code for backward
+                        * compatibility.
                         */
                        if (location.type == BTRFS_ROOT_ITEM_KEY &&
                            location.objectid == root->root_key.objectid) {
index 18cc23d..14f8e1f 100644 (file)
@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
        di_args->bytes_used = dev->bytes_used;
        di_args->total_bytes = dev->total_bytes;
        memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
-       strncpy(di_args->path, dev->name, sizeof(di_args->path));
+       if (dev->name)
+               strncpy(di_args->path, dev->name, sizeof(di_args->path));
+       else
+               di_args->path[0] = '\0';
 
 out:
        if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
index dc5d331..ac5d010 100644 (file)
@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
                                          struct btrfs_bio *bbio)
 {
        int ret;
-       int looped = 0;
        struct reada_zone *zone;
        struct btrfs_block_group_cache *cache = NULL;
        u64 start;
        u64 end;
        int i;
 
-again:
        zone = NULL;
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
@@ -274,9 +272,6 @@ again:
                spin_unlock(&fs_info->reada_lock);
        }
 
-       if (looped)
-               return NULL;
-
        cache = btrfs_lookup_block_group(fs_info, logical);
        if (!cache)
                return NULL;
@@ -307,13 +302,15 @@ again:
        ret = radix_tree_insert(&dev->reada_zones,
                                (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
                                zone);
-       spin_unlock(&fs_info->reada_lock);
 
-       if (ret) {
+       if (ret == -EEXIST) {
                kfree(zone);
-               looped = 1;
-               goto again;
+               ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
+                                            logical >> PAGE_CACHE_SHIFT, 1);
+               if (ret == 1)
+                       kref_get(&zone->refcnt);
        }
+       spin_unlock(&fs_info->reada_lock);
 
        return zone;
 }
@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
                                              struct btrfs_key *top, int level)
 {
        int ret;
-       int looped = 0;
        struct reada_extent *re = NULL;
+       struct reada_extent *re_exist = NULL;
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        struct btrfs_bio *bbio = NULL;
        struct btrfs_device *dev;
+       struct btrfs_device *prev_dev;
        u32 blocksize;
        u64 length;
        int nzones = 0;
        int i;
        unsigned long index = logical >> PAGE_CACHE_SHIFT;
 
-again:
        spin_lock(&fs_info->reada_lock);
        re = radix_tree_lookup(&fs_info->reada_tree, index);
        if (re)
                kref_get(&re->refcnt);
        spin_unlock(&fs_info->reada_lock);
 
-       if (re || looped)
+       if (re)
                return re;
 
        re = kzalloc(sizeof(*re), GFP_NOFS);
@@ -398,16 +395,31 @@ again:
        /* insert extent in reada_tree + all per-device trees, all or nothing */
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_insert(&fs_info->reada_tree, index, re);
+       if (ret == -EEXIST) {
+               re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
+               BUG_ON(!re_exist);
+               kref_get(&re_exist->refcnt);
+               spin_unlock(&fs_info->reada_lock);
+               goto error;
+       }
        if (ret) {
                spin_unlock(&fs_info->reada_lock);
-               if (ret != -ENOMEM) {
-                       /* someone inserted the extent in the meantime */
-                       looped = 1;
-               }
                goto error;
        }
+       prev_dev = NULL;
        for (i = 0; i < nzones; ++i) {
                dev = bbio->stripes[i].dev;
+               if (dev == prev_dev) {
+                       /*
+                        * in case of DUP, just add the first zone. As both
+                        * are on the same device, there's nothing to gain
+                        * from adding both.
+                        * Also, it wouldn't work, as the tree is per device
+                        * and adding would fail with EEXIST
+                        */
+                       continue;
+               }
+               prev_dev = dev;
                ret = radix_tree_insert(&dev->reada_extents, index, re);
                if (ret) {
                        while (--i >= 0) {
@@ -450,9 +462,7 @@ error:
        }
        kfree(bbio);
        kfree(re);
-       if (looped)
-               goto again;
-       return NULL;
+       return re_exist;
 }
 
 static void reada_kref_dummy(struct kref *kr)
index 017281d..646ee21 100644 (file)
@@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
                if (rb_node)
                        backref_tree_panic(rb_node, -EEXIST, node->bytenr);
        } else {
+               spin_lock(&root->fs_info->trans_lock);
                list_del_init(&root->root_list);
+               spin_unlock(&root->fs_info->trans_lock);
                kfree(node);
        }
        return 0;
@@ -3811,7 +3813,7 @@ restart:
 
                ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
                if (ret < 0) {
-                       if (ret != -EAGAIN) {
+                       if (ret != -ENOSPC) {
                                err = ret;
                                WARN_ON(1);
                                break;
index bc015f7..4f76fc3 100644 (file)
@@ -1257,12 +1257,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
        if (memcmp(csum, on_disk_csum, sdev->csum_size))
                fail = 1;
 
-       if (fail) {
-               spin_lock(&sdev->stat_lock);
-               ++sdev->stat.csum_errors;
-               spin_unlock(&sdev->stat_lock);
-       }
-
        return fail;
 }
 
@@ -1335,15 +1329,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
        if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
                ++crc_fail;
 
-       if (crc_fail || fail) {
-               spin_lock(&sdev->stat_lock);
-               if (crc_fail)
-                       ++sdev->stat.csum_errors;
-               if (fail)
-                       ++sdev->stat.verify_errors;
-               spin_unlock(&sdev->stat_lock);
-       }
-
        return fail || crc_fail;
 }
 
index 8d5d380..c5f8fca 100644 (file)
@@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
                return 0;
        }
 
-       btrfs_start_delalloc_inodes(root, 0);
        btrfs_wait_ordered_extents(root, 0, 0);
 
        trans = btrfs_start_transaction(root, 0);
@@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
                if (ret)
                        goto restore;
        } else {
-               if (fs_info->fs_devices->rw_devices == 0)
+               if (fs_info->fs_devices->rw_devices == 0) {
                        ret = -EACCES;
                        goto restore;
+               }
 
-               if (btrfs_super_log_root(fs_info->super_copy) != 0)
+               if (btrfs_super_log_root(fs_info->super_copy) != 0) {
                        ret = -EINVAL;
                        goto restore;
+               }
 
                ret = btrfs_cleanup_fs_roots(fs_info);
                if (ret)
index 11b77a5..3642225 100644 (file)
@@ -73,8 +73,10 @@ loop:
 
        cur_trans = root->fs_info->running_transaction;
        if (cur_trans) {
-               if (cur_trans->aborted)
+               if (cur_trans->aborted) {
+                       spin_unlock(&root->fs_info->trans_lock);
                        return cur_trans->aborted;
+               }
                atomic_inc(&cur_trans->use_count);
                atomic_inc(&cur_trans->num_writers);
                cur_trans->num_joined++;
@@ -1400,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = commit_fs_roots(trans, root);
        if (ret) {
                mutex_unlock(&root->fs_info->tree_log_mutex);
+               mutex_unlock(&root->fs_info->reloc_mutex);
                goto cleanup_transaction;
        }
 
@@ -1411,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        ret = commit_cowonly_roots(trans, root);
        if (ret) {
                mutex_unlock(&root->fs_info->tree_log_mutex);
+               mutex_unlock(&root->fs_info->reloc_mutex);
                goto cleanup_transaction;
        }
 
index 759d024..1411b99 100644 (file)
@@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        stripe_size = devices_info[ndevs-1].max_avail;
        num_stripes = ndevs * dev_stripes;
 
-       if (stripe_size * num_stripes > max_chunk_size * ncopies) {
+       if (stripe_size * ndevs > max_chunk_size * ncopies) {
                stripe_size = max_chunk_size * ncopies;
-               do_div(stripe_size, num_stripes);
+               do_div(stripe_size, ndevs);
        }
 
        do_div(stripe_size, dev_stripes);
+
+       /* align to BTRFS_STRIPE_LEN */
        do_div(stripe_size, BTRFS_STRIPE_LEN);
        stripe_size *= BTRFS_STRIPE_LEN;
 
@@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
                else if (mirror_num)
                        stripe_index += mirror_num - 1;
                else {
+                       int old_stripe_index = stripe_index;
                        stripe_index = find_live_mirror(map, stripe_index,
                                              map->sub_stripes, stripe_index +
                                              current->pid % map->sub_stripes);
-                       mirror_num = stripe_index + 1;
+                       mirror_num = stripe_index - old_stripe_index + 1;
                }
        } else {
                /*
@@ -4350,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
 
        ret = __btrfs_open_devices(fs_devices, FMODE_READ,
                                   root->fs_info->bdev_holder);
-       if (ret)
+       if (ret) {
+               free_fs_devices(fs_devices);
                goto out;
+       }
 
        if (!fs_devices->seeding) {
                __btrfs_close_devices(fs_devices);